<a href="https://colab.research.google.com/github/theresaskruzna/riiid_knowledge_tracing/blob/main/03_Model_Building.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import numpy as np
import pandas as pd

import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout, BatchNormalization
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint

Building an RNN - LSTM model, binary classification

In [None]:
def sequential_lstm_model(input_shape, hidden_units=128): # hidden_units=128 is default number that sets the number of neurons
    model = Sequential() # creates sequential model = linear stack of layers
    # input fits on X
    model.add(Input(shape=X_train.shape[1:]))
    # First LSTM(i.e.long short-term memory) layer with return sequences for stacking
    model.add(LSTM(hidden_units, return_sequences=True, input_shape=input_shape))
    model.add(Dropout(0.2)) # prevent overfitting - randomly ignore 20% neurons during training
    # use recurrent_dropout=0.2?

    # Second LSTM layer without return sequences for final output
    model.add(LSTM(hidden_units, return_sequences=False))
    model.add(Dropout(0.2))

    model.add(BatchNormalization()) # stabilize training and improve convergence

        # Output layer with one neuron (for binary classification), sigmoid produces a probability between 0 and 1
    model.add(Dense(1, activation='sigmoid', kernel_regularizer=tf.keras.regularizers.l2(0.01))) # L2 regularisor to prevent overfitting

    model.compile( # configure learning process of the model
        optimizer='adam', # specify opitimisation algorithm for training
        loss='binary_crossentropy', # measure difference between predictions and actual values
        metrics=['accuracy', tf.keras.metrics.AUC()] # set metrics to track during training and evaluation, AUC(area under the curve)
    )
    model.summary()

    return model

tweak model by changing number of hidden units and layers to optimise the architecture of the model

Early stopping

In [None]:
early_stopping = EarlyStopping(
    # monitor accuracy of the validation dataset
    monitor="val_accuracy",
    # if it doesn't improve by at least 0.3%
    min_delta=0.003,
    # within the last 10 epochs
    patience=10,
    # turn it off and restore weights from the epoch with the highest accuracy on the validation set
    restore_best_weights=True,
    start_from_epoch = 50
)

Model checkpoint

In [None]:
save = ModelCheckpoint(
    # where to save model
    filepath="best_model.keras",
    # monitor accuracy of the validation set
    monitor="val_accuracy",
    # save only one file with the highest metrics value
    save_best_only=True,
    # save architecture and wights into one file
    save_weights_only=False,
    # after every epoch
    save_freq="epoch"
)