Slightly re-structured version of this kernel: https://www.kaggle.com/manikanthr5/riiid-sakt-model-inference-public

I have used it to improve the score to 0.775

In [1]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

/kaggle/input/riiid-test-answer-prediction/example_sample_submission.csv
/kaggle/input/riiid-test-answer-prediction/example_test.csv
/kaggle/input/riiid-test-answer-prediction/questions.csv
/kaggle/input/riiid-test-answer-prediction/train.csv
/kaggle/input/riiid-test-answer-prediction/lectures.csv
/kaggle/input/riiid-test-answer-prediction/riiideducation/competition.cpython-37m-x86_64-linux-gnu.so
/kaggle/input/riiid-test-answer-prediction/riiideducation/__init__.py


In [2]:
import gc
import random
from tqdm.notebook import tqdm
from sklearn.metrics import roc_auc_score
from sklearn.model_selection import train_test_split

import seaborn as sns
import matplotlib.pyplot as plt

import torch
import torch.nn as nn
import torch.nn.utils.rnn as rnn_utils
from torch.autograd import Variable
from torch.utils.data import Dataset, DataLoader
from pathlib import Path
import datatable as dt

### Load Data

In [3]:
path = Path('/kaggle/input')
assert path.exists()

In [4]:
%%time

data_types_dict = {
    'content_type_id': 'bool',
    'timestamp': 'int64',
    'user_id': 'int32', 
    'content_id': 'int16', 
    'answered_correctly': 'int8', 
    'prior_question_elapsed_time': 'float32', 
    'prior_question_had_explanation': 'bool'
}
target = 'answered_correctly'
train_df = dt.fread(path/'riiid-test-answer-prediction/train.csv', columns=set(data_types_dict.keys())).to_pandas()

CPU times: user 1min 8s, sys: 11.7 s, total: 1min 19s
Wall time: 1min 59s


In [5]:
train_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 101230332 entries, 0 to 101230331
Data columns (total 7 columns):
 #   Column                          Dtype  
---  ------                          -----  
 0   timestamp                       int64  
 1   user_id                         int32  
 2   content_id                      int32  
 3   content_type_id                 bool   
 4   answered_correctly              int32  
 5   prior_question_elapsed_time     float64
 6   prior_question_had_explanation  object 
dtypes: bool(1), float64(1), int32(3), int64(1), object(1)
memory usage: 3.5+ GB


In [6]:
%%time

train_df = train_df[train_df.content_type_id == False]

#arrange by timestamp
train_df = train_df.sort_values(['timestamp'], ascending=True).reset_index(drop = True)

CPU times: user 28.1 s, sys: 8.98 s, total: 37.1 s
Wall time: 38 s


In [7]:
del train_df['timestamp']
del train_df['content_type_id']

### Pre-process

In [8]:
n_skill = train_df["content_id"].nunique()
print("number skills", n_skill)

number skills 13523


In [9]:
%%time

group = train_df[['user_id', 'content_id', 'answered_correctly']].groupby('user_id').apply(lambda r: (r['content_id'].values, r['answered_correctly'].values))

del train_df

CPU times: user 38.5 s, sys: 2.1 s, total: 40.6 s
Wall time: 41.7 s


### Data Loaders

In [10]:
MAX_SEQ = 180
ACCEPTED_USER_CONTENT_SIZE = 4
EMBED_SIZE = 128
BATCH_SIZE = 64
DROPOUT = 0.1

In [11]:
class SAKTDataset(Dataset):
    def __init__(self, group, n_skill, max_seq=100):
        super(SAKTDataset, self).__init__()
        self.samples, self.n_skill, self.max_seq = {}, n_skill, max_seq
        
        self.user_ids = []
        for i, user_id in enumerate(group.index):
            if(i % 10000 == 0):
                print(f'Processed {i} users')
            content_id, answered_correctly = group[user_id]
            if len(content_id) >= ACCEPTED_USER_CONTENT_SIZE:
                if len(content_id) > self.max_seq:
                    total_questions = len(content_id)
                    last_pos = total_questions // self.max_seq
                    for seq in range(last_pos):
                        index = f"{user_id}_{seq}"
                        self.user_ids.append(index)
                        start = seq * self.max_seq
                        end = (seq + 1) * self.max_seq
                        self.samples[index] = (content_id[start:end], answered_correctly[start:end])
                    if len(content_id[end:]) >= ACCEPTED_USER_CONTENT_SIZE:
                        index = f"{user_id}_{last_pos + 1}"
                        self.user_ids.append(index)
                        self.samples[index] = (content_id[end:], answered_correctly[end:])
                else:
                    index = f'{user_id}'
                    self.user_ids.append(index)
                    self.samples[index] = (content_id, answered_correctly)
                
                
    def __len__(self):
        return len(self.user_ids)

    def __getitem__(self, index):
        user_id = self.user_ids[index]
        content_id, answered_correctly = self.samples[user_id]
        seq_len = len(content_id)
        
        content_id_seq = np.zeros(self.max_seq, dtype=int)
        answered_correctly_seq = np.zeros(self.max_seq, dtype=int)
        if seq_len >= self.max_seq:
            content_id_seq[:] = content_id[-self.max_seq:]
            answered_correctly_seq[:] = answered_correctly[-self.max_seq:]
        else:
            content_id_seq[-seq_len:] = content_id
            answered_correctly_seq[-seq_len:] = answered_correctly
            
        target_id = content_id_seq[1:]
        label = answered_correctly_seq[1:]
        
        x = content_id_seq[:-1].copy()
        x += (answered_correctly_seq[:-1] == 1) * self.n_skill
        
        return x, target_id, label

In [12]:
TEST_SIZE = 0.1

train, val = train_test_split(group, test_size = TEST_SIZE)

In [13]:
train_dataset = SAKTDataset(train, n_skill, max_seq=MAX_SEQ)
train_dataloader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True, num_workers=8)
del train

Processed 0 users
Processed 10000 users
Processed 20000 users
Processed 30000 users
Processed 40000 users
Processed 50000 users
Processed 60000 users
Processed 70000 users
Processed 80000 users
Processed 90000 users
Processed 100000 users
Processed 110000 users
Processed 120000 users
Processed 130000 users
Processed 140000 users
Processed 150000 users
Processed 160000 users
Processed 170000 users
Processed 180000 users
Processed 190000 users
Processed 200000 users
Processed 210000 users
Processed 220000 users
Processed 230000 users
Processed 240000 users
Processed 250000 users
Processed 260000 users
Processed 270000 users
Processed 280000 users
Processed 290000 users
Processed 300000 users
Processed 310000 users
Processed 320000 users
Processed 330000 users
Processed 340000 users
Processed 350000 users


In [14]:
val_dataset = SAKTDataset(val, n_skill, max_seq=MAX_SEQ)
val_dataloader = DataLoader(val_dataset, batch_size=BATCH_SIZE, shuffle=True, num_workers=8)
del val

Processed 0 users
Processed 10000 users
Processed 20000 users
Processed 30000 users


In [15]:
sample_batch = next(iter(train_dataloader))
sample_batch[0].shape, sample_batch[1].shape, sample_batch[2].shape

(torch.Size([64, 179]), torch.Size([64, 179]), torch.Size([64, 179]))

### Define model

In [16]:
class FFN(nn.Module):
    def __init__(self, state_size = 200, forward_expansion = 1, bn_size=MAX_SEQ - 1, dropout=0.2):
        super(FFN, self).__init__()
        self.state_size = state_size
        
        self.lr1 = nn.Linear(state_size, forward_expansion * state_size)
        self.relu = nn.ReLU()
        self.bn = nn.BatchNorm1d(bn_size)
        self.lr2 = nn.Linear(forward_expansion * state_size, state_size)
        self.dropout = nn.Dropout(dropout)
        
    def forward(self, x):
        x = self.relu(self.lr1(x))
        x = self.bn(x)
        x = self.lr2(x)
        return self.dropout(x)

FFN()

FFN(
  (lr1): Linear(in_features=200, out_features=200, bias=True)
  (relu): ReLU()
  (bn): BatchNorm1d(179, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (lr2): Linear(in_features=200, out_features=200, bias=True)
  (dropout): Dropout(p=0.2, inplace=False)
)

In [17]:
def future_mask(seq_length):
    future_mask = (np.triu(np.ones([seq_length, seq_length]), k = 1)).astype('bool')
    return torch.from_numpy(future_mask)

future_mask(5)

tensor([[False,  True,  True,  True,  True],
        [False, False,  True,  True,  True],
        [False, False, False,  True,  True],
        [False, False, False, False,  True],
        [False, False, False, False, False]])

In [18]:
class TransformerBlock(nn.Module):
    def __init__(self, embed_dim, heads = 8, dropout = DROPOUT, forward_expansion = 1):
        super(TransformerBlock, self).__init__()
        self.multi_att = nn.MultiheadAttention(embed_dim=embed_dim, num_heads=heads, dropout=dropout)
        self.dropout = nn.Dropout(dropout)
        self.layer_normal = nn.LayerNorm(embed_dim)
        self.ffn = FFN(embed_dim, forward_expansion = forward_expansion, dropout=dropout)
        self.layer_normal_2 = nn.LayerNorm(embed_dim)
        

    def forward(self, value, key, query, att_mask):
        att_output, att_weight = self.multi_att(value, key, query, attn_mask=att_mask)
        att_output = self.dropout(self.layer_normal(att_output + value))
        att_output = att_output.permute(1, 0, 2) # att_output: [s_len, bs, embed] => [bs, s_len, embed]
        x = self.ffn(att_output)
        x = self.dropout(self.layer_normal_2(x + att_output))
        return x.squeeze(-1), att_weight
    
class Encoder(nn.Module):
    def __init__(self, n_skill, max_seq=100, embed_dim=128, dropout = DROPOUT, forward_expansion = 1, num_layers=1, heads = 8):
        super(Encoder, self).__init__()
        self.n_skill, self.embed_dim = n_skill, embed_dim
        self.embedding = nn.Embedding(2 * n_skill + 1, embed_dim)
        self.pos_embedding = nn.Embedding(max_seq - 1, embed_dim)
        self.e_embedding = nn.Embedding(n_skill+1, embed_dim)
        self.layers = nn.ModuleList([TransformerBlock(embed_dim, forward_expansion = forward_expansion) for _ in range(num_layers)])
        self.dropout = nn.Dropout(dropout)
        
    def forward(self, x, question_ids):
        device = x.device
        x = self.embedding(x)
        pos_id = torch.arange(x.size(1)).unsqueeze(0).to(device)
        pos_x = self.pos_embedding(pos_id)
        x = self.dropout(x + pos_x)
        x = x.permute(1, 0, 2) # x: [bs, s_len, embed] => [s_len, bs, embed]
        e = self.e_embedding(question_ids)
        e = e.permute(1, 0, 2)
        for layer in self.layers:
            att_mask = future_mask(e.size(0)).to(device)
            x, att_weight = layer(e, x, x, att_mask=att_mask)
            x = x.permute(1, 0, 2)
        x = x.permute(1, 0, 2)
        return x, att_weight

class SAKTModel(nn.Module):
    def __init__(self, n_skill, max_seq=100, embed_dim=128, dropout = DROPOUT, forward_expansion = 1, enc_layers=1, heads = 8):
        super(SAKTModel, self).__init__()
        self.encoder = Encoder(n_skill, max_seq, embed_dim, dropout, forward_expansion, num_layers=enc_layers)
        self.pred = nn.Linear(embed_dim, 1)
        
    def forward(self, x, question_ids):
        x, att_weight = self.encoder(x, question_ids)
        x = self.pred(x)
        return x.squeeze(-1), att_weight

In [19]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [20]:
# Main changes are possibility of forward expansion and stacking of encoding layers
def create_model():
    return SAKTModel(n_skill, max_seq=MAX_SEQ, embed_dim=EMBED_SIZE, forward_expansion=1, enc_layers=1, heads=8, dropout=0.1)
model = create_model()
model

SAKTModel(
  (encoder): Encoder(
    (embedding): Embedding(27047, 128)
    (pos_embedding): Embedding(179, 128)
    (e_embedding): Embedding(13524, 128)
    (layers): ModuleList(
      (0): TransformerBlock(
        (multi_att): MultiheadAttention(
          (out_proj): _LinearWithBias(in_features=128, out_features=128, bias=True)
        )
        (dropout): Dropout(p=0.1, inplace=False)
        (layer_normal): LayerNorm((128,), eps=1e-05, elementwise_affine=True)
        (ffn): FFN(
          (lr1): Linear(in_features=128, out_features=128, bias=True)
          (relu): ReLU()
          (bn): BatchNorm1d(179, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
          (lr2): Linear(in_features=128, out_features=128, bias=True)
          (dropout): Dropout(p=0.1, inplace=False)
        )
        (layer_normal_2): LayerNorm((128,), eps=1e-05, elementwise_affine=True)
      )
    )
    (dropout): Dropout(p=0.1, inplace=False)
  )
  (pred): Linear(in_features=128, out_featu

In [21]:
model(sample_batch[0], sample_batch[1])[0]

tensor([[ 0.1994,  0.1229, -0.1232,  ...,  0.3084,  0.4550,  0.4504],
        [ 0.2230, -0.5328, -0.0037,  ..., -0.6866, -0.2128,  0.1493],
        [-0.3094, -0.7514, -0.1331,  ...,  0.8114,  0.3952, -0.0308],
        ...,
        [ 0.2798, -0.6239, -0.3457,  ..., -1.4099, -0.2568,  0.6621],
        [-1.0329, -0.8234, -0.9820,  ..., -0.4109,  0.8155, -1.5426],
        [-0.5693,  0.2341,  0.2793,  ..., -0.4258,  1.3532, -0.5722]],
       grad_fn=<SqueezeBackward1>)

### Training

In [22]:
LR = 2e-3
EPOCHS = 10
MODEL_PATH = '/kaggle/working/sakt.pth'

In [23]:
def load_from_item(item):
    x = item[0].to(device).long()
    target_id = item[1].to(device).long()
    label = item[2].to(device).float()
    target_mask = (target_id != 0)
    return x, target_id, label, target_mask

def update_stats(tbar, train_loss, loss, output, label, num_corrects, num_total, labels, outs):
    train_loss.append(loss.item())
    pred = (torch.sigmoid(output) >= 0.5).long()
    num_corrects += (pred == label).sum().item()
    num_total += len(label)
    labels.extend(label.view(-1).data.cpu().numpy())
    outs.extend(output.view(-1).data.cpu().numpy())
    tbar.set_description('loss - {:.4f}'.format(loss))
    return num_corrects, num_total

def train_epoch(model, dataloader, optim, criterion, scheduler, device="cpu"):
    model.train()
    
    train_loss = []
    num_corrects = 0
    num_total = 0
    labels = []
    outs = []
    
    tbar = tqdm(dataloader)
    for item in tbar:
        x, target_id, label, target_mask = load_from_item(item)
        
        optim.zero_grad()
        output, _ = model(x, target_id)
        
        output = torch.masked_select(output, target_mask)
        label = torch.masked_select(label, target_mask)
        
        loss = criterion(output, label)
        loss.backward()
        optim.step()
        scheduler.step()
        
        tbar.set_description('loss - {:.4f}'.format(loss))

def val_epoch(model, val_iterator, criterion, device="cpu"):
    model.eval()

    train_loss = []
    num_corrects = 0
    num_total = 0
    labels = []
    outs = []

    tbar = tqdm(val_iterator)
    for item in tbar:
        x, target_id, label, target_mask = load_from_item(item)

        with torch.no_grad():
            output, atten_weight = model(x, target_id)
        
        output = torch.masked_select(output, target_mask)
        label = torch.masked_select(label, target_mask)

        loss = criterion(output, label)
        
        num_corrects, num_total = update_stats(tbar, train_loss, loss, output, label, num_corrects, num_total, labels, outs)

    acc = num_corrects / num_total
    auc = roc_auc_score(labels, outs)
    loss = np.average(train_loss)

    return loss, acc, auc


In [24]:
def do_train():
    optimizer = torch.optim.Adam(model.parameters(), lr=LR)
    criterion = nn.BCEWithLogitsLoss()
    scheduler = torch.optim.lr_scheduler.OneCycleLR(optimizer, max_lr=LR, 
                                                    steps_per_epoch=len(train_dataloader), epochs=EPOCHS)
    model.to(device)
    criterion.to(device)
    best_auc = 0.0
    for epoch in range(EPOCHS):
        train_epoch(model, train_dataloader, optimizer, criterion, scheduler, device)
        val_loss, avl_acc, val_auc = val_epoch(model, val_dataloader, criterion, device)
        print(f"epoch - {epoch + 1} val_loss - {val_loss:.3f} acc - {avl_acc:.3f} auc - {val_auc:.3f}")
        if best_auc < val_auc:
            print(f'epoch - {epoch + 1} best model with val auc: {val_auc}')
            best_auc = val_auc
        torch.save(model.state_dict(), MODEL_PATH)

In [25]:
do_train()

HBox(children=(FloatProgress(value=0.0, max=11575.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=1280.0), HTML(value='')))


epoch - 1 val_loss - 0.547 acc - 0.719 auc - 0.755
epoch - 1 best model with val auc: 0.754837215516602


HBox(children=(FloatProgress(value=0.0, max=11575.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=1280.0), HTML(value='')))


epoch - 2 val_loss - 0.543 acc - 0.722 auc - 0.760
epoch - 2 best model with val auc: 0.7597044161053049


HBox(children=(FloatProgress(value=0.0, max=11575.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=1280.0), HTML(value='')))


epoch - 3 val_loss - 0.541 acc - 0.724 auc - 0.761
epoch - 3 best model with val auc: 0.7612668806818147


HBox(children=(FloatProgress(value=0.0, max=11575.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=1280.0), HTML(value='')))


epoch - 4 val_loss - 0.540 acc - 0.725 auc - 0.763
epoch - 4 best model with val auc: 0.7633911043318008


HBox(children=(FloatProgress(value=0.0, max=11575.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=1280.0), HTML(value='')))


epoch - 5 val_loss - 0.538 acc - 0.726 auc - 0.765
epoch - 5 best model with val auc: 0.7654638383587644


HBox(children=(FloatProgress(value=0.0, max=11575.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=1280.0), HTML(value='')))


epoch - 6 val_loss - 0.536 acc - 0.727 auc - 0.767
epoch - 6 best model with val auc: 0.767180146440579


HBox(children=(FloatProgress(value=0.0, max=11575.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=1280.0), HTML(value='')))


epoch - 7 val_loss - 0.536 acc - 0.728 auc - 0.768
epoch - 7 best model with val auc: 0.7682799637083612


HBox(children=(FloatProgress(value=0.0, max=11575.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=1280.0), HTML(value='')))


epoch - 8 val_loss - 0.535 acc - 0.728 auc - 0.769
epoch - 8 best model with val auc: 0.7690251953130177


HBox(children=(FloatProgress(value=0.0, max=11575.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=1280.0), HTML(value='')))


epoch - 9 val_loss - 0.535 acc - 0.729 auc - 0.769
epoch - 9 best model with val auc: 0.769281306088584


HBox(children=(FloatProgress(value=0.0, max=11575.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=1280.0), HTML(value='')))


epoch - 10 val_loss - 0.535 acc - 0.729 auc - 0.769
epoch - 10 best model with val auc: 0.7692956011943171


In [26]:
LR = 2e-4
EPOCHS = 3

do_train()

HBox(children=(FloatProgress(value=0.0, max=11575.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=1280.0), HTML(value='')))


epoch - 1 val_loss - 0.535 acc - 0.729 auc - 0.769
epoch - 1 best model with val auc: 0.7691931374909687


HBox(children=(FloatProgress(value=0.0, max=11575.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=1280.0), HTML(value='')))


epoch - 2 val_loss - 0.535 acc - 0.729 auc - 0.769
epoch - 2 best model with val auc: 0.7693094268414359


HBox(children=(FloatProgress(value=0.0, max=11575.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=1280.0), HTML(value='')))


epoch - 3 val_loss - 0.534 acc - 0.729 auc - 0.769
epoch - 3 best model with val auc: 0.7693746769622746


### Predict

In [27]:
model = create_model()
model.load_state_dict(torch.load(MODEL_PATH))
model.to(device)

SAKTModel(
  (encoder): Encoder(
    (embedding): Embedding(27047, 128)
    (pos_embedding): Embedding(179, 128)
    (e_embedding): Embedding(13524, 128)
    (layers): ModuleList(
      (0): TransformerBlock(
        (multi_att): MultiheadAttention(
          (out_proj): _LinearWithBias(in_features=128, out_features=128, bias=True)
        )
        (dropout): Dropout(p=0.1, inplace=False)
        (layer_normal): LayerNorm((128,), eps=1e-05, elementwise_affine=True)
        (ffn): FFN(
          (lr1): Linear(in_features=128, out_features=128, bias=True)
          (relu): ReLU()
          (bn): BatchNorm1d(179, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
          (lr2): Linear(in_features=128, out_features=128, bias=True)
          (dropout): Dropout(p=0.1, inplace=False)
        )
        (layer_normal_2): LayerNorm((128,), eps=1e-05, elementwise_affine=True)
      )
    )
    (dropout): Dropout(p=0.1, inplace=False)
  )
  (pred): Linear(in_features=128, out_featu

In [28]:
class TestDataset(Dataset):
    def __init__(self, samples, test_df, n_skill, max_seq=100):
        super(TestDataset, self).__init__()
        self.samples, self.user_ids, self.test_df = samples, [x for x in test_df["user_id"].unique()], test_df
        self.n_skill, self.max_seq = n_skill, max_seq

    def __len__(self):
        return self.test_df.shape[0]
    
    def __getitem__(self, index):
        test_info = self.test_df.iloc[index]
        
        user_id = test_info['user_id']
        target_id = test_info['content_id']
        
        content_id_seq = np.zeros(self.max_seq, dtype=int)
        answered_correctly_seq = np.zeros(self.max_seq, dtype=int)
        
        if user_id in self.samples.index:
            content_id, answered_correctly = self.samples[user_id]
            
            seq_len = len(content_id)
            
            if seq_len >= self.max_seq:
                content_id_seq = content_id[-self.max_seq:]
                answered_correctly_seq = answered_correctly[-self.max_seq:]
            else:
                content_id_seq[-seq_len:] = content_id
                answered_correctly_seq[-seq_len:] = answered_correctly
                
        x = content_id_seq[1:].copy()
        x += (answered_correctly_seq[1:] == 1) * self.n_skill
        
        questions = np.append(content_id_seq[2:], [target_id])
        
        return x, questions

In [29]:
import riiideducation

env = riiideducation.make_env()
iter_test = env.iter_test()

In [30]:
import psutil

model.eval()

prev_test_df = None

for (test_df, sample_prediction_df) in tqdm(iter_test):
    
    if (prev_test_df is not None) & (psutil.virtual_memory().percent<90):
        print(psutil.virtual_memory().percent)
        prev_test_df['answered_correctly'] = eval(test_df['prior_group_answers_correct'].iloc[0])
        prev_test_df = prev_test_df[prev_test_df.content_type_id == False]
        prev_group = prev_test_df[['user_id', 'content_id', 'answered_correctly']].groupby('user_id').apply(lambda r: (
            r['content_id'].values,
            r['answered_correctly'].values))
        for prev_user_id in prev_group.index:
            prev_group_content = prev_group[prev_user_id][0]
            prev_group_answered_correctly = prev_group[prev_user_id][1]
            if prev_user_id in group.index:
                group[prev_user_id] = (np.append(group[prev_user_id][0], prev_group_content), 
                                       np.append(group[prev_user_id][1], prev_group_answered_correctly))
            else:
                group[prev_user_id] = (prev_group_content, prev_group_answered_correctly)
            
            if len(group[prev_user_id][0]) > MAX_SEQ:
                new_group_content = group[prev_user_id][0][-MAX_SEQ:]
                new_group_answered_correctly = group[prev_user_id][1][-MAX_SEQ:]
                group[prev_user_id] = (new_group_content, new_group_answered_correctly)
                
    prev_test_df = test_df.copy()
    test_df = test_df[test_df.content_type_id == False]
    
    test_dataset = TestDataset(group, test_df, n_skill, max_seq=MAX_SEQ)
    test_dataloader = DataLoader(test_dataset, batch_size=len(test_df), shuffle=False)
    
    item = next(iter(test_dataloader))
    x = item[0].to(device).long()
    target_id = item[1].to(device).long()
    
    with torch.no_grad():
        output, _ = model(x, target_id)
        
    output = torch.sigmoid(output)
    output = output[:, -1]
    test_df['answered_correctly'] = output.cpu().numpy()
    env.predict(test_df.loc[test_df['content_type_id'] == 0, ['row_id', 'answered_correctly']])

HBox(children=(FloatProgress(value=1.0, bar_style='info', max=1.0), HTML(value='')))

28.8
28.8
28.8



In [31]:
test_df

Unnamed: 0_level_0,row_id,timestamp,user_id,content_id,content_type_id,task_container_id,prior_question_elapsed_time,prior_question_had_explanation,prior_group_answers_correct,prior_group_responses,answered_correctly
group_num,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
3,74,75311,275030867,8308,0,3,15000.0,False,"[1, 1, 1, 1, 0, 1, 1, 0, 0, 1, 0, 0, 1, 1, 0, ...","[0, 1, 0, 0, 1, 1, 0, 0, 1, 1, 1, 0, 3, 0, 0, ...",0.46337
3,75,31220886463,1305988022,396,0,4163,19000.0,True,,,0.574607
3,76,48613916248,1310228392,11869,0,1458,26333.0,True,,,0.510368
3,77,48613916248,1310228392,11871,0,1458,26333.0,True,,,0.976178
3,78,48613916248,1310228392,11870,0,1458,26333.0,True,,,0.566138
3,79,48613916248,1310228392,11872,0,1458,26333.0,True,,,0.506688
3,80,48613916248,1310228392,11868,0,1458,26333.0,True,,,0.964257
3,81,4693192735,1637273633,5935,0,3149,19000.0,True,,,0.813115
3,82,1254131274,674533997,6000,0,1046,10000.0,True,,,0.338717
3,84,69704234415,2093197291,12611,0,5448,28750.0,True,,,0.836856


In [32]:
test_dataset = TestDataset(group, test_df, n_skill, max_seq=MAX_SEQ)

In [33]:
# Save to pickle to usage in other notebooks
group.to_pickle('/kaggle/working/group.pkl')