- introduce seed for reproducibility in keras

In [1]:
import os
import sys
import time
import random
import datetime
import numpy as np 
import pandas as pd 
from numba import jit
from sklearn.metrics import roc_auc_score, log_loss
from sklearn.model_selection import train_test_split, KFold
from sklearn import preprocessing, decomposition, model_selection, metrics, pipeline

sys.path.append('../input/multilabelstraifier/')
from ml_stratifiers import MultilabelStratifiedKFold

import warnings
warnings.filterwarnings('ignore')
pd.set_option("max_rows", 110)

import tensorflow as tf
from keras.models import Sequential
from tensorflow.keras.layers import *
from tensorflow.keras.models import *
from tensorflow.keras.layers import *
from tensorflow.keras.optimizers import *
from tensorflow.keras.metrics import *
from tensorflow.keras.utils import *
from tensorflow.keras.callbacks import *
from keras.utils import np_utils
from keras.preprocessing import sequence, text
from keras import backend as K
from keras.callbacks import ReduceLROnPlateau, ModelCheckpoint, EarlyStopping
from keras.models import load_model

In [2]:
# 完全な再現はできないが、ブレは抑えることができる
XLA_ACCELERATE = True
tf.config.optimizer.set_jit(XLA_ACCELERATE)

def set_seed(seed=0):
    os.environ["PYTHONHASHSEED"] = str(seed)

    random.seed(seed)
    np.random.seed(seed)
    tf.random.set_seed(seed)

    graph = tf.compat.v1.get_default_graph()
    session_conf = tf.compat.v1.ConfigProto(
        inter_op_parallelism_threads=1, intra_op_parallelism_threads=1
    )
    sess = tf.compat.v1.Session(graph=graph, config=session_conf)

    tf.compat.v1.keras.backend.set_session(sess)

In [3]:
DIR = '../input/atma-retail20210129/'
log_df = pd.read_csv(DIR+'carlog.csv', dtype={ 'value_1': str }, parse_dates=['date'])
test_df = pd.read_csv(DIR+'test.csv')
meta_df = pd.read_csv(DIR+"meta.csv")
display_action_id = pd.read_csv(DIR+"display_action_id.csv")
product_master_df = pd.read_csv(DIR+"product_master.csv", dtype={ 'JAN': str })
user_master = pd.read_csv(DIR+"user_master.csv")
submission = pd.read_csv(DIR+"atmaCup9__sample_submission.csv")
test_sessions = test_df['session_id'].unique()

In [4]:
meta_df["dayofweek"] = pd.to_datetime(meta_df["date"]).dt.dayofweek
meta_df["year"] = pd.to_datetime(meta_df["date"]).dt.year
#meta_df["month"] = pd.to_datetime(meta_df["date"]).dt.month
#meta_df["day"] = pd.to_datetime(meta_df["date"]).dt.day

In [5]:
register_num_id2code = dict()
for ind, i in enumerate(meta_df["register_number"].unique()):
    register_num_id2code[i] = ind
meta_df["register_number"] = meta_df["register_number"].map(register_num_id2code)
n_register = meta_df["register_number"].nunique() + 1
print("number register_number", n_register)

number register_number 224


In [6]:
MAX_SEQ = 64
NFOLDS = 5
target_length = 32

In [7]:
n_category_id_class = product_master_df["category_id"].nunique() + 1
print("number skills", n_category_id_class, product_master_df["category_id"].min(), product_master_df["category_id"].max())

number skills 773 0 771


In [8]:
TARGET_CATEGORIES = [
    # お酒に関するもの
    'ビール系__RTD', 'ビール系__ビール系', 'ビール系__ノンアルコール',
    
    # お菓子に関するもの
    'スナック・キャンディー__スナック', 
    'チョコ・ビスクラ__チョコレート', 
    'スナック・キャンディー__ガム', 
    'スナック・キャンディー__シリアル',
    'アイスクリーム__ノベルティー', 
    '和菓子__米菓',
    
    # 飲料に関するもの
    '水・炭酸水__大型PET（炭酸水）',
    '水・炭酸水__小型PET（炭酸水）',
    '缶飲料__コーヒー（缶）',
    '小型PET__コーヒー（小型PET）',
    '大型PET__無糖茶（大型PET）',
    
    # 麺類
    '麺類__カップ麺',
]

In [9]:
cat2id = dict(zip(product_master_df['category_name'], product_master_df['category_id']))
TARGET_IDS = pd.Series(TARGET_CATEGORIES).map(cat2id).values.tolist()
category_id2code = dict(zip(TARGET_IDS, TARGET_CATEGORIES))

for x in zip(TARGET_IDS, TARGET_CATEGORIES):
    print(x)

def only_purchase_records(input_df: pd.DataFrame) -> pd.DataFrame:
    idx = input_df['kind_1'] == '商品'
    out_df = input_df[idx].reset_index(drop=True)
    return out_df

def create_payment(input_df: pd.DataFrame) -> pd.DataFrame:
    """
    ログデータから session_id / JAN ごとの購買情報に変換します.

    Args:
        input_df:
            レジカートログデータ

    Returns:
        session_id, JAN, n_items (合計購買数) の DataFrame
    """

    # 購買情報は商品のものだけ.
    out_df = only_purchase_records(input_df)
    out_df = out_df.groupby(['session_id', 'value_1'])['n_items'].sum().reset_index()
    out_df = out_df.rename(columns={
        'value_1': 'JAN'
    })
    return out_df

def annot_category(input_df: pd.DataFrame,
                   master_df: pd.DataFrame):
    """
    カテゴリ ID をひも付けます.

    Args:
        input_df:
            変換するデータ.
            `value_1`  or `JAN` を column として持っている必要があります.
        master_df:
            商品マスタのデータフレーム

    Returns:

    """
    input_df = input_df.rename(columns={'value_1': 'JAN'})
    out_df = pd.merge(input_df['JAN'],
                      master_df[['JAN', 'category_id']], on='JAN', how='left')
    return out_df['category_id']

def only_payment_session_record(input_log_df):
    """支払いが紐づくセッションへ絞り込みを行なう"""
    payed_sessions = input_log_df[input_log_df['is_payment'] == 1]['session_id'].unique()
    idx = input_log_df['session_id'].isin(payed_sessions)
    out_df = input_log_df[idx].reset_index(drop=True)
    return out_df

def create_target_from_log(log_df: pd.DataFrame,
                           product_master_df: pd.DataFrame,
                          only_payment=True):

    if only_payment:
        log_df = only_payment_session_record(log_df)
    pay_df = create_payment(log_df)
    pay_df['category_id'] = annot_category(pay_df, master_df=product_master_df)

    # null の category を削除. JAN が紐付かない時に発生する.
    idx_null = pay_df['category_id'].isnull()
    pay_df = pay_df[~idx_null].reset_index(drop=True)
    # Nullが混じっている時 float になるため int へ明示的に戻す
    pay_df['category_id'] = pay_df['category_id'].astype(int)

    idx = pay_df['category_id'].isin(TARGET_IDS)
    target_df = pd.pivot_table(data=pay_df[idx],
                               index='session_id',
                               columns='category_id',
                               values='n_items',
                               aggfunc='sum')

    sessions = sorted(log_df['session_id'].unique())
    print(len(sessions))
    target_df = target_df.reindex(sessions)
    target_df = target_df.fillna(0).astype(int)
    return target_df, pay_df

(171, 'ビール系__RTD')
(173, 'ビール系__ビール系')
(172, 'ビール系__ノンアルコール')
(114, 'スナック・キャンディー__スナック')
(134, 'チョコ・ビスクラ__チョコレート')
(110, 'スナック・キャンディー__ガム')
(113, 'スナック・キャンディー__シリアル')
(38, 'アイスクリーム__ノベルティー')
(376, '和菓子__米菓')
(537, '水・炭酸水__大型PET（炭酸水）')
(539, '水・炭酸水__小型PET（炭酸水）')
(629, '缶飲料__コーヒー（缶）')
(467, '小型PET__コーヒー（小型PET）')
(435, '大型PET__無糖茶（大型PET）')
(768, '麺類__カップ麺')


# data formation

In [10]:
# 完全なデータを持っているログに絞る
test_sessions = test_df['session_id'].unique()
idx_test = log_df['session_id'].isin(test_sessions)
whole_log_df = log_df[~idx_test].reset_index(drop=True)
payment_session_df = only_payment_session_record(whole_log_df)
del whole_log_df

In [11]:
# 商品購買の最後(max spend time)が10分より大きいセッションを取り出す
is_item_record = payment_session_df['kind_1'] == '商品'
max_payed_time = payment_session_df[is_item_record].groupby('session_id')['spend_time'].max()
max_payed_time_over_10min = max_payed_time[max_payed_time > 10 * 60]

train_sessions = max_payed_time_over_10min.index.tolist()
train_whole_log_df = payment_session_df[payment_session_df['session_id'].isin(train_sessions)].reset_index(drop=True)

del max_payed_time, max_payed_time_over_10min, payment_session_df, is_item_record

In [12]:
seed = 103
np.random.seed(seed)
random.seed(seed)

time_elasped_count = meta_df['time_elapsed'].value_counts(normalize=True)

train_time_elapsed = np.random.choice(time_elasped_count.index.astype(int), 
                                      p=time_elasped_count.values, 
                                      size=len(train_sessions))
train_meta_df = pd.DataFrame({
    'session_id': train_sessions,
    'time_elapsed': train_time_elapsed
})

train_meta_df = pd.merge(train_meta_df, 
                         meta_df.drop(columns=['time_elapsed']), 
                         on='session_id', 
                         how='left')
del train_time_elapsed, train_sessions

In [13]:
_df = pd.merge(train_whole_log_df[['session_id', 'spend_time']], train_meta_df, on='session_id', how='left')
idx_show = _df['spend_time'] <= _df['time_elapsed'] * 60

del _df

train_public_df = train_whole_log_df[idx_show].reset_index(drop=True)
train_private_df = train_whole_log_df[~idx_show].reset_index(drop=True)

del idx_show, train_whole_log_df

# テストのログデータと合わせて推論時に見ても良いログ `public_log_df` として保存しておく
public_log_df = pd.concat([
    train_public_df, log_df[log_df['session_id'].isin(test_sessions)]
], axis=0, ignore_index=True)
# meta に紐づく情報は後でよく使うので, テストデータにも meta 情報をマージしておきます. 
# train_meata_df / test_meta_df が今後特徴を作る上で key になるデータになります。
test_meta_df = pd.merge(test_df, meta_df, on='session_id', how='left')

del log_df, train_public_df

In [14]:
train_target_df, _  = create_target_from_log(train_private_df, 
                                             product_master_df=product_master_df,
                                            only_payment=False)

train_target_df[train_target_df >= 1] = 1
train_target_df[train_target_df <= 0] = 0

del train_private_df

366478


In [15]:
public_log_df = public_log_df[public_log_df['kind_1'] == '商品'].reset_index(drop=True)
public_log_df = pd.merge(public_log_df, product_master_df[["JAN", "category_id"]], 
                         left_on="value_1", right_on="JAN",how="left")[["session_id", "value_1", "category_id"]]
public_log_df["category_id"] += 1
public_log_df["value_1"] = public_log_df["value_1"].astype(int)

In [16]:
jan2code = dict()
for ind, i in enumerate(public_log_df["value_1"].unique()):
    jan2code[i] = ind + 1
public_log_df["value_1"] = public_log_df["value_1"].map(jan2code)

n_jan_class = public_log_df["value_1"].nunique() + 1
print("number jans", n_jan_class, public_log_df["value_1"].min(), public_log_df["value_1"].max())

number jans 47005 1 47004


In [17]:
@jit
def make_category(a):
    a = np.nan_to_num(a, nan=0)
    if len(a) < MAX_SEQ:
        a = np.concatenate([[0]*(MAX_SEQ-len(a)), a])
    else:
        a = a[:MAX_SEQ]
    return a

@jit
def make_invisible_category(a):
    return np.array([0]*(MAX_SEQ))

def make_data(public_log_df, feature):
    group = public_log_df.groupby('session_id').apply(lambda r: make_category(r[feature].values))

    train_group = group[group.index.isin(train_meta_df.session_id.unique())]
    test_group = group[group.index.isin(test_meta_df.session_id.unique())]

    train_inv_session = set(train_meta_df.session_id) - set(train_group.index)
    test_inv_session = set(test_meta_df.session_id) - set(test_group.index)

    train_inv_group = meta_df[meta_df.session_id.isin(train_inv_session)].groupby('session_id').apply(lambda x: make_invisible_category(x["user_id"].values))
    test_inv_group = meta_df[meta_df.session_id.isin(test_inv_session)].groupby('session_id').apply(lambda x: make_invisible_category(x["user_id"].values))

    train_group = pd.concat([train_group, train_inv_group]).sort_index()
    test_group = pd.concat([test_group, test_inv_group]).sort_index()
    return train_group, test_group

train_group, test_group = make_data(public_log_df, "category_id")
train_action_group, test_action_group = make_data(public_log_df, "value_1")

In [18]:
test_only_users = set(test_meta_df.user_id) - set(train_meta_df.user_id)

In [19]:
time_sorted_index = train_meta_df.sort_values(["user_id","session_id"]).index
original_index = [i for i in range(train_meta_df.shape[0])]

In [20]:
del meta_df, display_action_id, product_master_df

In [21]:
@jit
def calc_seq_target(c):
    target_nums = 15
    ans = np.ones([target_length, target_nums]) * -1
    ans = ans.reshape(1,target_length, target_nums)
    for i in range(1,len(c)):
        tmp = np.concatenate([ans[-1,1:,:], np.array(c[i-1,:]).reshape(1,target_nums)]).reshape(1,target_length, target_nums)
        ans = np.vstack([ans, tmp])
    return ans

@jit
def calc_test_seq_target(c):
    target_nums = 15
    ans = np.ones([target_length, target_nums]) * -1
    ans = np.concatenate([ans, np.array(c)])
    return ans[-target_length:]

@jit
def calc_test_only_seq_target():
    target_nums = 15
    ans = np.ones([target_length, target_nums]) * -1
    return ans

def make_seq_target(df):
    df_ = df.reset_index().copy()
    df_ = df_.reindex(index = time_sorted_index) # timeでソートしてgroupbyかける
    df_ = pd.merge(df_, train_meta_df[["user_id", "session_id"]], on="session_id", how="left")
    values = df_.groupby('user_id').apply(lambda x: calc_seq_target(x[TARGET_IDS].values))
    values = np.vstack(values)
    vlaues = values[original_index] #もとのindex順に戻す
    return values

def make_test_seq_target(df):
    df_ = df.reset_index().copy()
    df_ = df_.reindex(index = time_sorted_index)
    df_ = pd.merge(df_, train_meta_df[["user_id", "session_id"]], on="session_id", how="left")
    values = df_.groupby('user_id').apply(lambda x: calc_test_seq_target(x[TARGET_IDS].values))
    tmp = pd.DataFrame(test_only_users)
    tmp.columns = ["user_id"]
    values2 = tmp.groupby('user_id').apply(lambda x: calc_test_only_seq_target())
    values = pd.concat([values, values2])
    return values

seq_target = make_seq_target(train_target_df)

test_seq_target_tmp = make_test_seq_target(train_target_df)
test_seq_target = pd.merge(test_meta_df, test_seq_target_tmp.reset_index(), on=["user_id"], how="left")[0]
del test_seq_target_tmp
print(test_seq_target.isnull().sum())
test_seq_target = np.vstack(test_seq_target).reshape(-1, target_length, 15)
#https://stackoverflow.com/questions/42170682/retain-dataframes-index-when-using-groupby-apply-to-generate-series

0


In [22]:
train_cat_df = pd.merge(train_meta_df, user_master, on="user_id", how="left")[["user_id","register_number"]]
test_cat_df = pd.merge(test_meta_df, user_master, on="user_id", how="left")[["user_id","register_number"]]

train_meta_df = pd.merge(train_meta_df, user_master, on="user_id", how="left")[["age", "gender", "time_elapsed",
                                                                                "dayofweek", "hour", "year"]] 

test_meta_df = pd.merge(test_meta_df, user_master, on="user_id", how="left")[["age", "gender", "time_elapsed",
                                                                              "dayofweek", "hour", "year"]]  

In [23]:
le = preprocessing.LabelEncoder()
tmp = pd.concat([train_cat_df, test_cat_df])
le.fit(tmp["user_id"])
train_cat_df["user_id"] = le.transform(train_cat_df["user_id"])
test_cat_df["user_id"] = le.transform(test_cat_df["user_id"])
n_user = tmp["user_id"].nunique() + 1
del tmp

In [24]:
def fe(df, columns):
    tmp = df.copy()
    tmp = pd.get_dummies(tmp, columns=columns)
    return tmp

train_meta_df = fe(train_meta_df, ["time_elapsed", "dayofweek"])
test_meta_df = fe(test_meta_df, ["time_elapsed", "dayofweek"])
train_meta_df["age"] = train_meta_df["age"]/100
test_meta_df["age"] = test_meta_df["age"]/100
train_meta_df["hour"] = train_meta_df["hour"]/24
test_meta_df["hour"] = test_meta_df["hour"]/24
train_meta_df["year2019"] = (train_meta_df["year"] == 2019).astype(int)
test_meta_df["year2019"] = (test_meta_df["year"] == 2019).astype(int)
train_meta_df["year2020"] = (train_meta_df["year"] == 2020).astype(int)
test_meta_df["year2020"] = (test_meta_df["year"] == 2020).astype(int)
del train_meta_df["year"], test_meta_df["year"]

# data transformation

In [25]:
# https://stackoverflow.com/questions/57001209/creating-numpy-matrix-from-nested-arrays-in-a-list
train_group = np.vstack(np.ravel(train_group))
test_group = np.vstack(np.ravel(test_group))
train_action_group = np.vstack(np.ravel(train_action_group))
test_action_group = np.vstack(np.ravel(test_action_group))

In [26]:
# not to increase memory use during training
train_meta_np = np.ndarray(shape=(len(train_meta_df), train_meta_df.shape[1]), dtype=np.float32)
test_meta_np = np.ndarray(shape=(len(test_meta_df), test_meta_df.shape[1]), dtype=np.float32)
train_cat_np = np.ndarray(shape=(len(train_cat_df), train_cat_df.shape[1]), dtype=np.float32)
test_cat_np = np.ndarray(shape=(len(test_cat_df), test_cat_df.shape[1]), dtype=np.float32)

for idx in range(train_meta_df.shape[1]):
    train_meta_np[:,idx] = train_meta_df.iloc[:,idx].astype(np.float32)

for idx in range(test_meta_df.shape[1]):
    test_meta_np[:,idx] = test_meta_df.iloc[:,idx].astype(np.float32)

for idx in range(train_cat_df.shape[1]):
    train_cat_np[:,idx] = train_cat_df.iloc[:,idx].astype(np.float32)
    test_cat_np[:,idx] = test_cat_df.iloc[:,idx].astype(np.float32)

del train_meta_df, test_meta_df, train_cat_df, test_cat_df

train_meta_df = train_meta_np.copy()
test_meta_df = test_meta_np.copy()
train_cat_df = train_cat_np.copy()
test_cat_df = test_cat_np.copy()
del train_meta_np, test_meta_np, train_cat_np, test_cat_np

seq_target_np = np.ndarray(shape=(len(seq_target), seq_target.shape[1], seq_target.shape[2]), dtype=np.float32)
for idx in range(seq_target.shape[1]):
    for idx2 in range(seq_target.shape[2]):
        seq_target_np[:, idx, idx2] = seq_target[:, idx, idx2].astype(np.float32)
del seq_target
seq_target = seq_target_np.copy()
del seq_target_np

test_seq_target_np = np.ndarray(shape=(len(test_seq_target), test_seq_target.shape[1], 
                                       test_seq_target.shape[2]), dtype=np.float32)
for idx in range(test_seq_target.shape[1]):
    for idx2 in range(test_seq_target.shape[2]):
        test_seq_target_np[:, idx, idx2] = test_seq_target[:, idx, idx2].astype(np.float32)
del test_seq_target
test_seq_target = test_seq_target_np.copy()
del test_seq_target_np

In [27]:
train_target_df = train_target_df.reset_index(drop=True).to_numpy()

In [28]:
print(train_target_df.shape)
print(train_group.shape, train_action_group.shape, train_meta_df.shape, seq_target.shape)
print(test_group.shape, test_action_group.shape, test_meta_df.shape, test_seq_target.shape)

(366478, 15)
(366478, 64) (366478, 64) (366478, 16) (366478, 32, 15)
(56486, 64) (56486, 64) (56486, 16) (56486, 32, 15)


# keras transformer

In [29]:
register_nums = n_register
user_nums = n_user
embed_dim = 64  # Embedding size for each token
target_embed_dim = 15 # embedding size for each target seq
num_heads = 8  # Number of attention heads
ff_dim = 128 # Hidden layer size in feed forward network inside transformer
vocab_size = n_category_id_class 
vocab_size2 = n_jan_class
maxlen = MAX_SEQ 
BATCH_SIZE = 1024
TRAIN_EPOCH = 8
DROPOUT = 0.2

In [30]:
# https://keras.io/examples/nlp/text_classification_with_transformer/
# https://www.kaggle.com/gogo827jz/moa-lstm-pure-transformer-fast-and-not-bad
class TransformerBlock(Layer):
    def __init__(self, embed_dim, num_heads, ff_dim, rate=DROPOUT):
        super(TransformerBlock, self).__init__()
        self.att = MultiHeadAttention(num_heads=num_heads, key_dim=embed_dim)
        self.ffn = Sequential(
            [Dense(ff_dim, activation="relu"), Dense(embed_dim),]
        )
        self.layernorm1 = LayerNormalization(epsilon=1e-6)
        self.layernorm2 = LayerNormalization(epsilon=1e-6)
        self.dropout1 = Dropout(rate)
        self.dropout2 = Dropout(rate)

    def call(self, inputs, training):
        attn_output = self.att(inputs, inputs)
        attn_output = self.dropout1(attn_output, training=training)
        out1 = self.layernorm1(inputs + attn_output)
        ffn_output = self.ffn(out1)
        ffn_output = self.dropout2(ffn_output, training=training)
        return self.layernorm2(out1 + ffn_output)
    
class TokenAndPositionEmbedding(Layer):
    def __init__(self, maxlen, vocab_size, embed_dim):
        super(TokenAndPositionEmbedding, self).__init__()
        self.token_emb = Embedding(input_dim=vocab_size, output_dim=embed_dim, mask_zero=True)
        self.pos_emb = Embedding(input_dim=maxlen, output_dim=embed_dim)

    def call(self, x):
        maxlen = tf.shape(x)[-1]
        positions = tf.range(start=0, limit=maxlen, delta=1)
        positions = self.pos_emb(positions)
        x = self.token_emb(x)
        return x + positions

In [31]:
def create_model(maxlen, category_size, action_size, user_size, register_size, embed_dim):
    _input_aux = Input((test_meta_df.shape[1],))  
    x_aux = Dense(16, activation="relu")(_input_aux)
    x_aux = Dropout(DROPOUT)(x_aux)

    inputs = Input(shape=(maxlen,))        
    x_category = TokenAndPositionEmbedding(maxlen, vocab_size, embed_dim)(inputs)
    x_category = TransformerBlock(embed_dim, num_heads, ff_dim)(x_category)
    x_category = TransformerBlock(embed_dim, num_heads, ff_dim)(x_category)
    x_category = TransformerBlock(embed_dim, num_heads, ff_dim)(x_category)
    x_category = LSTM(128)(x_category)
    x_category = Dropout(DROPOUT)(x_category)

    inputs_for_action = Input(shape=(maxlen,))
    x_action = TokenAndPositionEmbedding(maxlen, action_size, embed_dim)(inputs_for_action)
    x_action = TransformerBlock(embed_dim, num_heads, ff_dim)(x_action)
    x_action = TransformerBlock(embed_dim, num_heads, ff_dim)(x_action)
    x_action = TransformerBlock(embed_dim, num_heads, ff_dim)(x_action)
    x_action = LSTM(128)(x_action)
    x_action = Dropout(DROPOUT)(x_action)
    
    inputs_for_target_seq = Input(shape=(target_length,15,))
    x_target_seq = TransformerBlock(target_embed_dim, num_heads, ff_dim)(inputs_for_target_seq)
    x_target_seq = LSTM(32)(x_target_seq)
    x_target_seq = Dropout(DROPOUT)(x_target_seq)
    
    inputs_for_register = Input(shape=(1,))
    x_register = Embedding(input_dim=register_size, output_dim=64)(inputs_for_register)
    x_register = GlobalMaxPooling1D()(x_register)
    
    inputs_for_user = Input(shape=(1,))
    x_user = Embedding(input_dim=user_size, output_dim=64)(inputs_for_user)
    x_user = GlobalMaxPooling1D()(x_user)
    
    x = Concatenate()([x_category, x_action])  
    x = Concatenate()([x, x_target_seq])
    x = Concatenate()([x, x_register])
    x = Concatenate()([x, x_user])
    x = Concatenate()([x, x_aux])
    
    x = Dense(128+128+test_meta_df.shape[1], activation="relu")(x)
    x = Dropout(DROPOUT)(x)
    outputs = Dense(len(TARGET_CATEGORIES), activation="softmax")(x)

    model = Model(inputs=[inputs, inputs_for_action, inputs_for_target_seq, _input_aux, inputs_for_register, inputs_for_user],
                  outputs=outputs)
    
    return model

In [32]:
gru_oof_preds = np.zeros((train_group.shape[0], train_target_df.shape[1]))
gru_test_preds = np.zeros((test_group.shape[0], train_target_df.shape[1]))
oof_losses = []
mskf = MultilabelStratifiedKFold(n_splits=NFOLDS, random_state=0, shuffle=True)
X_test = tf.convert_to_tensor(test_group, dtype=tf.float32)
X_test_action = tf.convert_to_tensor(test_action_group, dtype=tf.float32)
X_test_seq_tar = tf.convert_to_tensor(test_seq_target, dtype=tf.float32)
meta_X_test = tf.convert_to_tensor(test_meta_df, dtype=tf.float32)

user_X_test = tf.convert_to_tensor(test_cat_df[:,0], dtype=tf.float32)
reg_X_test = tf.convert_to_tensor(test_cat_df[:,1], dtype=tf.float32)

for fn, (trn_idx, val_idx) in enumerate(mskf.split(train_group, train_target_df)):
    print('Starting fold: ', fn)    
    X_train, X_val = train_group[trn_idx,:], train_group[val_idx,:]
    X_train_action, X_val_action = train_action_group[trn_idx,:], train_action_group[val_idx,:]
    X_train_seq_tar, X_val_seq_tar = seq_target[trn_idx], seq_target[val_idx]
    meta_X_train, meta_X_val = train_meta_df[trn_idx,:], train_meta_df[val_idx,:]
    user_X_train, user_X_val = train_cat_df[trn_idx,0], train_cat_df[val_idx,0]
    reg_X_train, reg_X_val = train_cat_df[trn_idx,1], train_cat_df[val_idx,1]
    
    y_train, y_val = train_target_df[trn_idx,:], train_target_df[val_idx,:]
    
    X_train = tf.convert_to_tensor(X_train, dtype=tf.float32)
    X_val = tf.convert_to_tensor(X_val, dtype=tf.float32)
    X_train_action = tf.convert_to_tensor(X_train_action, dtype=tf.float32)
    X_val_action = tf.convert_to_tensor(X_val_action, dtype=tf.float32)   
    X_train_seq_tar = tf.convert_to_tensor(X_train_seq_tar, dtype=tf.float32)
    X_val_seq_tar = tf.convert_to_tensor(X_val_seq_tar, dtype=tf.float32)  
    meta_X_train = tf.convert_to_tensor(meta_X_train, dtype=tf.float32)
    meta_X_val = tf.convert_to_tensor(meta_X_val, dtype=tf.float32)
    user_X_train = tf.convert_to_tensor(user_X_train, dtype=tf.float32)
    user_X_val = tf.convert_to_tensor(user_X_val, dtype=tf.float32)
    reg_X_train = tf.convert_to_tensor(reg_X_train, dtype=tf.float32)
    reg_X_val = tf.convert_to_tensor(reg_X_val, dtype=tf.float32)
    
    y_train = tf.convert_to_tensor(y_train, dtype=tf.float32)
    y_val = tf.convert_to_tensor(y_val, dtype=tf.float32)
    
    # modelling
    set_seed()
    model = create_model(maxlen, vocab_size, vocab_size2, user_nums, register_nums, embed_dim)
    model.compile(loss='binary_crossentropy', optimizer='adam', metrics=[AUC(multi_label=True)])    
    model.fit([X_train, X_train_action, X_train_seq_tar, meta_X_train, reg_X_train, user_X_train], y_train, 
              validation_data = ([X_val, X_val_action, X_val_seq_tar, meta_X_val, reg_X_val, user_X_val], y_val),
              epochs=TRAIN_EPOCH, batch_size=BATCH_SIZE, verbose=1,
              callbacks = [
                          ReduceLROnPlateau(monitor = 'val_auc', factor = 0.1, patience = 3, verbose = 1, 
                           min_delta = 1e-4, mode = 'max'),
                          ModelCheckpoint(filepath=f'transformer_{fn}.hdf5', verbose = 1, 
                           save_best_only = True, save_weights_only=True, mode = 'max', monitor = 'val_auc')
                          ]
              )
    #

    model.load_weights(f'transformer_{fn}.hdf5')
    gru_preds = model.predict([X_val, X_val_action, X_val_seq_tar,  meta_X_val, reg_X_val, user_X_val], batch_size=BATCH_SIZE)
    gru_oof_preds[val_idx] = gru_preds
    score = roc_auc_score(y_val, gru_preds, average='macro')
    print(score)
    oof_losses.append(score)
    preds = model.predict([X_test, X_test_action, X_test_seq_tar, meta_X_test, reg_X_test, user_X_test], batch_size=BATCH_SIZE)

    gru_test_preds += preds / NFOLDS
    
    K.clear_session()
    
print(oof_losses)
print('Mean OOF loss across folds', np.mean(oof_losses))
print('STD OOF loss across folds', np.std(oof_losses))

Starting fold:  0
Epoch 1/8

Epoch 00001: val_auc improved from -inf to 0.64484, saving model to transformer_0.hdf5
Epoch 2/8

Epoch 00002: val_auc improved from 0.64484 to 0.72723, saving model to transformer_0.hdf5
Epoch 3/8

Epoch 00003: val_auc improved from 0.72723 to 0.75463, saving model to transformer_0.hdf5
Epoch 4/8

Epoch 00004: val_auc improved from 0.75463 to 0.76306, saving model to transformer_0.hdf5
Epoch 5/8

Epoch 00005: val_auc improved from 0.76306 to 0.76934, saving model to transformer_0.hdf5
Epoch 6/8

Epoch 00006: val_auc did not improve from 0.76934
Epoch 7/8

Epoch 00007: val_auc did not improve from 0.76934
Epoch 8/8

Epoch 00008: ReduceLROnPlateau reducing learning rate to 0.00010000000474974513.

Epoch 00008: val_auc did not improve from 0.76934
0.7728541856802419
Starting fold:  1
Epoch 1/8

Epoch 00001: val_auc improved from -inf to 0.63978, saving model to transformer_1.hdf5
Epoch 2/8

Epoch 00002: val_auc improved from 0.63978 to 0.72841, saving model t

- 287/287 [==============================] - 148s 466ms/step - loss: 0.2558 - auc: 0.5117 - val_loss: 0.2065 - val_auc: 0.6458
- 287/287 [==============================] - 152s 473ms/step - loss: 0.2558 - auc: 0.5117 - val_loss: 0.2065 - val_auc: 0.6446
- 287/287 [==============================] - 146s 464ms/step - loss: 0.2558 - auc: 0.5118 - val_loss: 0.2064 - val_auc: 0.6455
- 追加導入後
- 287/287 [==============================] - 147s 417ms/step - loss: 0.2558 - auc: 0.5118 - val_loss: 0.2066 - val_auc: 0.6448
- 287/287 [==============================] - 159s 441ms/step - loss: 0.2558 - auc: 0.5119 - val_loss: 0.2064 - val_auc: 0.6455

# submission

In [33]:
submission.iloc[:,:] = gru_test_preds
submission.to_csv("atmacup9_"+str(score)[:-10]+"_.csv", index=False)