In [None]:
from fastai.tabular.all import *

import gc

import riiideducation

In [None]:
def get_task_chars(data):
    mean_task_exp = train_df[['task_container_id','prior_question_had_explanation']].groupby('task_container_id').agg('mean').astype('float16')
    mean_task_exp.columns = ['mean_task_exp']

    task_diff = 1 - train_df[['task_container_id','answered_correctly']].groupby('task_container_id').agg('mean')
    task_diff.columns = ['task_diff_inv']

    task_chars = task_diff.join(mean_task_exp)
    
    return task_chars



def get_user_chars(data):
    # User data
    user_prof = data[['user_id','answered_correctly']].groupby('user_id').agg({'mean'}).droplevel(level=0,axis=1)
    user_prof.columns = ['user_prof']
    user_prof['user_prof_inv'] = 1-user_prof.user_prof
    user_prof = user_prof.astype({'user_prof':'float32','user_prof_inv':'float32'})

    df0 = data.loc[data['prior_question_had_explanation']==0,['answered_correctly','user_id']].groupby(['user_id']).agg('count')
    df1 = data.loc[data['prior_question_had_explanation']==1,['answered_correctly','user_id']].groupby(['user_id']).agg('count')

    mean_user_p_e = df1/(df0+df1)
    mean_user_p_e.columns = ['mean_user_p_e']
    mean_user_p_e.fillna(mean_user_p_e.mean(),inplace=True)

    user_chars = user_prof.join(mean_user_p_e)

    # User data per part
    user_part_chars = data[['user_id','answered_correctly','part']].groupby(['user_id','part']).agg('mean')
    user_part_chars.columns = ['user_part_prof']
    user_part_chars['user_part_prof_inv'] = 1- user_part_chars.user_part_prof
    user_part_chars.reset_index(inplace=True)

    part_diff = data[['answered_correctly','part']].groupby('part').agg({'answered_correctly': 'mean'})
    part_diff.columns = ['part_diff']
    part_diff['part_diff_inv'] = 1- part_diff.part_diff

    user_part_chars = user_part_chars.merge(part_diff,on='part',right_index=True,how='left')
    
    user_chars = user_chars.merge(user_part_chars,left_index=True,right_on='user_id',how='left')
    
    return user_chars



def load_model():
    return load_learner('../input/riiid-model/fastai_nn_v6_15_epochs_wd')


class user:
    def __init__(self):
        self.questions_answered = 0
        self.answered_correctly = 0
        self.user_prof = 0.
        self.user_prof_inv = 1.
        
        self.user_comm_prof = 0.
        self.user_comm_prof_inv = 0.
        
        self.questions_per_part = dict((x,0) for x in range(8))
        self.correct_per_part = dict((x,0) for x in range(8))
        self.user_part_prof = dict((x,0.) for x in range(8))
        self.user_part_prof_inv = dict((x,0.) for x in range(8))

        self.had_explanation = 0
        self.no_explanation = 0
        self.mean_user_p_e = 0.

        
    def train_update(self, data):
        self.questions_answered = self.questions_answered + 1
        self.questions_per_part[data['part']] = self.questions_per_part[data['part']] + 1

        if data['answered_correctly']==1:
            self.answered_correctly = self.answered_correctly + 1
            self.correct_per_part[data['part']] = self.correct_per_part[data['part']] + 1

        if data['prior_question_had_explanation']==1:
            self.had_explanation = self.had_explanation + 1
        else:
            self.no_explanation = self.no_explanation + 1

    def test_update(self, data):
        self.questions_answered = self.questions_answered + 1
        self.questions_per_part[data['part']] = self.questions_per_part[data['part']] + 1

        if data['prior_question_had_explanation']==1:
            self.had_explanation = self.had_explanation + 1
        else:
            self.no_explanation = self.no_explanation + 1        
    
    def update_answers(self,data):
        self.answered_correctly = self.answered_correctly + 1
        self.correct_per_part[data['part']] = self.correct_per_part[data['part']] + 1
        self.do_the_monster_math()
        
    def do_the_monster_math(self):
        self.user_prof = self.answered_correctly / self.questions_answered
        self.user_prof_inv = 1 - self.user_prof
        
        for part in self.questions_per_part:
            if self.questions_per_part[part]==0:
                continue
            self.user_part_prof[part] = self.correct_per_part[part] / self.questions_per_part[part]
            self.user_part_prof_inv[part] = 1 - self.user_part_prof[part]
        
        self.mean_user_p_e = self.had_explanation / (self.had_explanation + self.no_explanation)

In [None]:
# %%time
questions_df = pd.read_csv('../input/riiid-test-answer-prediction/questions.csv',
                          usecols=['question_id','part'],
                          dtype={'question_id':'int32','part':'int8'})

with open('../input/riiid-model/final.pickle', 'rb') as handle:
    train_dict = pickle.load(handle)

try:
    model = load_model()
except Exception as e:
    print('model:  ',e)

env = riiideducation.make_env()
iter_test = env.iter_test()
first_pass = True

for (test_df, sample_prediction_df) in iter_test:
    test_df = test_df.merge(questions_df,left_on='content_id',right_on='question_id',how='left').drop('question_id',axis=1)
    test_df.loc[:,'prior_question_had_explanation'].fillna(False,inplace=True)
    
    # Add Features
    if first_pass == False:
        try:
            old_df['answered_correctly'] = eval(test_df.iat[0,-3])
            for _,row in old_df.iterrows():
                if row['content_type_id']==0:
                    if row['answered_correctly']==1:
                        train_dict[row['user_id']].update_answers(row)
                    else:
                        train_dict[row['user_id']].do_the_monster_math()
        except Exception as e:
            print('first: ',e)

            
    old_df = test_df[['row_id','user_id','part','content_type_id']]
    
    cols = ['user_part_prof','user_part_prof_inv','user_prof','user_prof_inv','mean_user_p_e','num_qs_ans']
    test_df.loc[:,cols] = np.nan
    test_df[cols] = test_df[cols].astype('float16')
    
    for index,row in test_df.iterrows():
        try:
            if row['user_id'] not in train_dict:
                train_dict[row['user_id']] = user()
            if row['content_type_id']==0:
                train_dict[row['user_id']].test_update(row)
            test_df.iloc[index,-6:] = [train_dict[row['user_id']].user_part_prof[row['part']],
                                       train_dict[row['user_id']].user_part_prof_inv[row['part']],
                                       train_dict[row['user_id']].user_prof,
                                       train_dict[row['user_id']].user_prof_inv,
                                       train_dict[row['user_id']].mean_user_p_e,
                                       train_dict[row['user_id']].questions_answered]

        except Exception as e:
            print('last: ',e)
            pass
    
    first_pass = False
    
    # Predictions
    dl = model.dls.test_dl(test_df[['user_id','content_id','user_part_prof','user_part_prof_inv','user_prof','user_prof_inv','mean_user_p_e','num_qs_ans']])
    preds,_ = model.get_preds(dl=dl)
    test_df['answered_correctly'] = list(map(np.float64, preds.argmax(axis=1)))

    env.predict(test_df.loc[test_df['content_type_id']==0,['row_id', 'answered_correctly']])