# CIE 633: Deep Learning Fundamentals - Spring 2021
# Project - Due Date: July 5, 2021
# Name: Muhammad Hamdy AlAref

DISCLAIMER I relied heavily on the official TensorFlow [tutorial](https://www.tensorflow.org/text/tutorials/text_classification_rnn).

## Exploring the data

In [None]:
import pandas as pd

data_train = pd.read_csv('../input/commonlitreadabilityprize/train.csv')
data_test = pd.read_csv('../input/commonlitreadabilityprize/test.csv')

In [None]:
data_train.head()

In [None]:
data_train.describe(include='all')

In [None]:
X_train = data_train['excerpt'].to_numpy()
y_train = data_train['target'].to_numpy()
X_test = data_test['excerpt'].to_numpy()

for i in (X_train, y_train, X_test): print(i.shape)  # sanity check

## RNN

In [None]:
import tensorflow as tf

def build_model(X, y, n_lstm_layers=1, n_units_per_layer=64, learning_rate=1e-4, max_epochs=50, patience=3, dropout=0.5, validation_split=0.2):
    encoder = tf.keras.layers.experimental.preprocessing.TextVectorization()
    encoder.adapt(X)
    model = tf.keras.Sequential([
        encoder,
        tf.keras.layers.Embedding(input_dim=len(encoder.get_vocabulary()), output_dim=n_units_per_layer, mask_zero=True),
        *(tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(n_units_per_layer // 2**i, return_sequences=True)) for i in range(n_lstm_layers - 1)),
        tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(n_units_per_layer // 2**(n_lstm_layers - 1))),
        tf.keras.layers.Dense(n_units_per_layer, activation='relu'),
        tf.keras.layers.Dropout(dropout),
        tf.keras.layers.Dense(1)
    ])

    model.compile(loss=tf.keras.losses.MeanSquaredError(),
                  optimizer=tf.keras.optimizers.Adam(learning_rate=0.0001),
                  metrics=[tf.keras.metrics.RootMeanSquaredError()])

    model.fit(X, y,
              epochs=max_epochs,
              validation_split=validation_split,
              callbacks=[tf.keras.callbacks.EarlyStopping(monitor='val_root_mean_squared_error', patience=patience, restore_best_weights=True)])
    
    return model

In [None]:
# Choosing a suitable depth

for n_lstm_layers in range(1, 4):
    print(f'Trying {n_lstm_layers} LSTM layer(s)')
    build_model(X_train, y_train, n_lstm_layers=n_lstm_layers)

In [None]:
# Choosing a suitable width

for n_units_per_layer in (2**i for i in range(4, 8)):
    print(f'Trying {n_units_per_layer} units per layer')
    build_model(X_train, y_train, n_lstm_layers=1, n_units_per_layer=n_units_per_layer)

In [None]:
# Choosing a suitable learning rate

for learning_rate in (10**i for i in range(-6, -2)):
    print(f'Trying learning rate = {learning_rate:.0e}')
    build_model(X_train, y_train, n_lstm_layers=1, n_units_per_layer=32, learning_rate=learning_rate)

In [None]:
predictor = build_model(X_train, y_train, n_lstm_layers=1, n_units_per_layer=32, learning_rate=1e-4, max_epochs=10, validation_split=0.0)

## Writing predictions

In [None]:
y_predict = predictor(X_test)
data_predict = pd.read_csv('../input/commonlitreadabilityprize/sample_submission.csv')
data_predict.iloc[:, 1:] = y_predict
data_predict.to_csv('submission.csv', index=False)