This notebook shows how to import a DPFA model as a tensorflow keras model and train it.

# 1. System Setup

In [1]:
from google.colab import drive
import tensorflow as tf
import os
import sys

BASE_DIR = '/content/dirve'
CODE_DIR = os.path.join(BASE_DIR, 'My Drive')
DATA_DIR = os.path.join(CODE_DIR, 'dpfa', 'data')

In [2]:
# check if GPU is visible
tf.config.list_physical_devices('GPU')

[PhysicalDevice(name='/physical_device:GPU:0', device_type='GPU')]

In [3]:
# mount google drive to access data
drive.mount(BASE_DIR, force_remount=True)

Mounted at /content/dirve


In [4]:
# change to the destination dir
os.chdir(os.path.join(BASE_DIR, 'MyDrive', 'dpfa', 'dpfa'))

In [5]:
# only run once at start
!pip install -r ../requirements.txt 

Collecting charset-normalizer==2.0.3
  Downloading charset_normalizer-2.0.3-py3-none-any.whl (35 kB)
Collecting Cython==0.29.24
  Downloading Cython-0.29.24-cp37-cp37m-manylinux1_x86_64.whl (2.0 MB)
[K     |████████████████████████████████| 2.0 MB 12.8 MB/s 
Collecting future==0.18.2
  Downloading future-0.18.2.tar.gz (829 kB)
[K     |████████████████████████████████| 829 kB 46.2 MB/s 
Collecting google-api-core==1.31.0
  Downloading google_api_core-1.31.0-py2.py3-none-any.whl (93 kB)
[K     |████████████████████████████████| 93 kB 1.4 MB/s 
[?25hCollecting google-api-python-client==2.14.0
  Downloading google_api_python_client-2.14.0-py2.py3-none-any.whl (7.1 MB)
[K     |████████████████████████████████| 7.1 MB 28.9 MB/s 
[?25hCollecting google-auth==1.33.1
  Downloading google_auth-1.33.1-py2.py3-none-any.whl (152 kB)
[K     |████████████████████████████████| 152 kB 46.0 MB/s 
[?25hCollecting google-auth-httplib2==0.1.0
  Downloading google_auth_httplib2-0.1.0-py2.py3-none-an

In [5]:
# dpfa
import dpfa_config, preprocessing, model, losses, main

# 2. Load Data

In [6]:
dataset = 'assist2017'
if dataset == 'synthetic5':
    max_len = 50
else:
    max_len = dpfa_config.MAX_LEN

if dataset == 'nips2020':
    raise NotImplementedError
else:
    data_dir = dpfa_config.DATA_DIR

data = preprocessing.DataSet(
    dataset,   # name of dataset
    data_dir,  # data director
    0,         # cv idx 
    max_len=max_len, # max_len
    **dpfa_config.config[dataset]  # other default training parameters
    )

In [7]:
data.load_data()

train data in /content/dirve/MyDrive/dpfa/data/assist2017-cv-train-0.csv is loaded
test data in /content/dirve/MyDrive/dpfa/data/assist2017-cv-test-0.csv is loaded


In [8]:
data.describe_data()

dataset = assist2017
num of attempts = 393.663 K
num of students = 1709
num of items = 4119


In [9]:
(train_inputs, train_targets), (test_inputs, test_targets) = data.preprocess()

input dataset is raw, will perform encode and extract features
problem encoder is created
train and test data are encoded
train and test sequences are extracted
100%|██████████| 1367/1367 [00:00<00:00, 46656.09it/s]
100%|██████████| 342/342 [00:00<00:00, 42254.39it/s]
train and test sequences are folded
train and test inputs and targets are created


# 3. Build Model

In [10]:
dpfa_model = model.DPFABase(
    hidden_size=16,
    dropout=0.2,
    item_vocab_size=data.item_vocab_size,
    regulate_dot_product=False,
    time_decay=False,
    normalize_embedding=True,
    )

In [11]:
batch_size = 512
lr = 0.008 * batch_size / 128 * max_len / 128

dpfa_model.compile(
    optimizer=tf.keras.optimizers.Adam(learning_rate=lr),
    loss=losses.PaddedBinaryCrossentropyLoss(0.1)
)

In [12]:
dpfa_model.fit(
    train_inputs,
    train_targets,
    validation_split=0.1,
    epochs=60,
    batch_size=batch_size,
)

Epoch 1/60
Instructions for updating:
The `validate_indices` argument has no effect. Indices are always validated on CPU and never validated on GPU.
Epoch 2/60
Epoch 3/60
Epoch 4/60
Epoch 5/60
Epoch 6/60
Epoch 7/60
Epoch 8/60
Epoch 9/60
Epoch 10/60
Epoch 11/60
Epoch 12/60
Epoch 13/60
Epoch 14/60
Epoch 15/60
Epoch 16/60
Epoch 17/60
Epoch 18/60
Epoch 19/60
Epoch 20/60
Epoch 21/60
Epoch 22/60
Epoch 23/60
Epoch 24/60
Epoch 25/60
Epoch 26/60
Epoch 27/60
Epoch 28/60
Epoch 29/60
Epoch 30/60
Epoch 31/60
Epoch 32/60
Epoch 33/60
Epoch 34/60
Epoch 35/60
Epoch 36/60
Epoch 37/60
Epoch 38/60
Epoch 39/60
Epoch 40/60
Epoch 41/60
Epoch 42/60
Epoch 43/60
Epoch 44/60
Epoch 45/60
Epoch 46/60
Epoch 47/60
Epoch 48/60
Epoch 49/60
Epoch 50/60
Epoch 51/60
Epoch 52/60
Epoch 53/60
Epoch 54/60
Epoch 55/60
Epoch 56/60
Epoch 57/60
Epoch 58/60
Epoch 59/60
Epoch 60/60


<tensorflow.python.keras.callbacks.History at 0x7fb2fb5fd2d0>

In [13]:
# auc
main.evaluate(test_inputs, test_targets, dpfa_model, test_size=test_targets.shape[0])

0.7893170156788926