In [None]:
import pandas as pd
import numpy as np
from sklearn.metrics import roc_auc_score
import lightgbm as lgb
import pickle

import riiideducation

import gc

In [None]:
class user:
    def __init__(self):
        self.questions_answered = 0
        self.answered_correctly = 0
        self.user_prof = 0.
        self.user_prof_inv = 1.
        
        self.questions_per_part = {1:0,2:0,3:0,4:0,5:0,6:0,7:0}
        self.correct_per_part = {1:0,2:0,3:0,4:0,5:0,6:0,7:0}
        self.user_part_prof = {1:0,2:0,3:0,4:0,5:0,6:0,7:0}
        self.user_part_prof_inv = {1:0,2:0,3:0,4:0,5:0,6:0,7:0}

        self.had_explanation = 0
        self.no_explanation = 0
        self.mean_user_p_e = 0.

        
    def train_update(self, data):
        self.questions_answered = self.questions_answered + 1
        self.questions_per_part[data['part']] = self.questions_per_part[data['part']] + 1

        if data['answered_correctly']==1:
            self.answered_correctly = self.answered_correctly + 1
            self.correct_per_part[data['part']] = self.correct_per_part[data['part']] + 1

        if data['prior_question_had_explanation']==1:
            self.had_explanation = self.had_explanation + 1
        else:
            self.no_explanation = self.no_explanation + 1

    def test_update(self, data):
        self.questions_answered = self.questions_answered + 1
        self.questions_per_part[data['part']] = self.questions_per_part[data['part']] + 1

        if data['prior_question_had_explanation']==1:
            self.had_explanation = self.had_explanation + 1
        else:
            self.no_explanation = self.no_explanation + 1        
    
    def update_answers(self,data):
        self.answered_correctly = self.answered_correctly + 1
        self.correct_per_part[data['part']] = self.correct_per_part[data['part']] + 1
        self.do_the_monster_math()
        
    def do_the_monster_math(self):
        self.user_prof = self.answered_correctly / self.questions_answered
        self.user_prof_inv = 1 - self.user_prof
        
        for part in self.questions_per_part:
            if self.questions_per_part[part]==0:
                continue
            self.user_part_prof[part] = self.correct_per_part[part] / self.questions_per_part[part]
            self.user_part_prof_inv[part] = 1 - self.user_part_prof[part]
        
        self.mean_user_p_e = self.had_explanation / (self.had_explanation + self.no_explanation)

In [None]:
# %%time
questions_df = pd.read_csv('../input/riiid-test-answer-prediction/questions.csv',
                          usecols=['question_id','part'],
                          dtype={'question_id':'int32','part':'int8'})

with open('../input/riiid-model/final.pickle', 'rb') as handle:
    train_dict = pickle.load(handle)
    
model = pd.read_pickle('../input/riiid-model/lgbm_no_comm.pkl')
# print('Load model:', model)


env = riiideducation.make_env()
iter_test = env.iter_test()
first_pass = True

for (test_df, sample_prediction_df) in iter_test:
    test_df = test_df.merge(questions_df,left_on='content_id',right_on='question_id',how='left').drop('question_id',axis=1)
    test_df.loc[:,'prior_question_had_explanation'].fillna(False,inplace=True)
    
    # Add Features
    if first_pass == False:
        try:
            old_df['answered_correctly'] = eval(test_df.iat[0,-3])
            for _,row in old_df.iterrows():
                if row['content_type_id']==0:
                    if row['answered_correctly']==1:
                        train_dict[row['user_id']].update_answers(row)
                    else:
                        train_dict[row['user_id']].do_the_monster_math()
        except Exception as e:
            print('first: ',e)

    old_df = test_df[['row_id','user_id','part','content_type_id']]
    
    cols = ['user_part_prof','user_part_prof_inv','user_prof','user_prof_inv','mean_user_p_e','num_qs_ans']
    test_df.loc[:,cols] = np.nan
    test_df[cols] = test_df[cols].astype('float16')
    
    for index,row in test_df.iterrows():
        try:
            if row['user_id'] not in train_dict:
                train_dict[row['user_id']] = user()
            if row['content_type_id']==0:
                train_dict[row['user_id']].test_update(row)
            test_df.iloc[index,-6:] = [train_dict[row['user_id']].user_part_prof[row['part']],
                                       train_dict[row['user_id']].user_part_prof_inv[row['part']],
                                       train_dict[row['user_id']].user_prof,
                                       train_dict[row['user_id']].user_prof_inv,
                                       train_dict[row['user_id']].mean_user_p_e,
                                       train_dict[row['user_id']].questions_answered]

        except Exception as e:
            print('last: ',e)
            pass
    
    # Correct Data
    test_df[['user_id','content_id']] = test_df[['user_id','content_id']].astype('category')
    first_pass = False
    
    # Predictions
    test_df['answered_correctly'] = model.predict(test_df[['user_id','content_id','user_part_prof','user_part_prof_inv','user_prof','user_prof_inv','mean_user_p_e','num_qs_ans']], num_iteration=model.best_iteration)
    env.predict(test_df.loc[test_df['content_type_id']==0,['row_id', 'answered_correctly']])