In [1]:
import numpy as np
import pandas as pd
from tqdm import tqdm
import pickle

with open('../input/pretrained-for-riiid/user_ele_dict.pkl', 'rb') as f:
    user_ele_dict = pickle.load(f)

with open('../input/pretrained-for-riiid/user_num_dict.pkl', 'rb') as f:
    user_num_dict = pickle.load(f)


In [2]:
import lightgbm as lgb
#model = lgb.Booster(model_file='../input/boostingmodel/lgb-model-boosting.txt')
model_dict = dict()
for i in range(100):
    model_dict["model{0}".format(i)] = lgb.Booster(model_file='../input/baggingmodels/lgb-model-bagging'+str(i)+'.txt')

In [3]:
question_dtype = {
    'question_id':'int16',
    'tags':'object'
}
questions_data = pd.read_csv('../input/riiid-test-answer-prediction/questions.csv',
                             usecols = question_dtype.keys(), 
                             dtype = question_dtype)

questions_data.tags.fillna('92',inplace=True)

def gen_vec(row):
    row['vec'] = np.zeros(188)
    index_list = row.tags.split()
    for index_ in index_list:
        row.vec[int(index_)] = 1.0
    return row

questions_data = questions_data.apply(gen_vec, axis='columns')

In [4]:
import warnings
warnings.filterwarnings("ignore", message="invalid value encountered")

In [5]:
def pre_X(df, ele_dict=user_ele_dict, num_dict=user_num_dict, q_data=questions_data):
    m = len(df)
    X = np.zeros((m,188*2))
    i = 0
    for index, row in df.iterrows():
        mask = q_data.vec[row.content_id]
        if row.user_id in ele_dict.keys():
            X[i,:188] = np.nan_to_num(ele_dict[row.user_id]/num_dict[row.user_id],nan=0.25)
        else:
            X[i,:188] = np.ones(188)*0.25
        X[i,188:] = mask
        i = i + 1
    return X

In [6]:
import riiideducation
env = riiideducation.make_env()
iter_test = env.iter_test()
for (test_df, sample_prediction_df) in iter_test:
    test_df = test_df[test_df.content_type_id == 0]
    X_test = pre_X(test_df)
    test_df['answered_correctly'] = np.mean([model_dict["model{0}".format(i)].predict(X_test) for i in range(100)],axis=0)
    #print('-----------------------------------------')
    #print(test_df[['row_id', 'answered_correctly']])
    #print('-----------------------------------------')
    env.predict(test_df[['row_id', 'answered_correctly']])