In [11]:
import numpy as np
import pandas as pd
import pickle
import tensorflow as tf
import tensorflow.keras.backend as K
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import ModelCheckpoint, LearningRateScheduler
from tensorflow.keras.layers import Input, Embedding, Dense, Flatten, Dropout, GRU, LSTM
from tensorflow.keras.models import Model
from sklearn.preprocessing import OneHotEncoder
from pathlib import Path, PureWindowsPath

In [9]:
from google.colab import drive
drive.mount('/content/drive', force_remount=True)

Mounted at /content/drive


In [3]:
!unzip ./dev_dataset_csv_small.zip

Archive:  ./dev_dataset_csv_small.zip
  inflating: train_set.csv           
  inflating: val_set.csv             


In [4]:
'''
Define a function for loading the data from a given folder path. 
(note: in this project, training, validation and test datasets are located in different folders)
'''
# You should write some code based on the content of "dataset-visualization.py"

# To make things easier, first, create a function to load data for the first problem (beam prediction using past beams).
# Have clear the dataset composition before doing this.
# The 9th column (starting from 1) would be the true labels column (we want to predict the first future beam)
# You should one-hot-encode the true labels columns. There's an sklearn function for this.

def load_beam_data(path):
    df = pd.read_csv(path)
    feature_cols = ["Beam 1", "Beam 2", "Beam 3", "Beam 4", "Beam 5", "Beam 6", "Beam 7", "Beam 8"]
    target_cols = ["Beam 9"]
    features = df[feature_cols].to_numpy()
    target = df[target_cols].to_numpy()
    return features, target 

In [5]:
'''
Define functions for creating a specific type of network architecture.
In this project, we will implement a first architecture based on GRUs (baseline approach) using only the beam indexes as features.
A second architecture (based on 3D CNN) will additionally exploit camera images to improve the prediction accuracy.
'''
# [1] https://arxiv.org/abs/2002.02445; Github: https://github.com/malrabeiah/VABT/tree/master

# Create a method that returns the first model using keras APIs
def build_gru_model(input_size, codebook_size, embed_size=50, hidden_size=20, num_layers=2, dropout=0.2, return_seq=False):

    inputs = Input(shape=(input_size,), name="input_layer")
    # Add an embedding layer (as discussed in paper [1]): https://keras.io/api/layers/core_layers/embedding/
    # The output of the embedding layer should have shape: [batch_size, input_size, embed_size]
    embedding = Embedding(codebook_size, embed_size, name="embedding_layer")(inputs)

    # Add "num_layers" GRU layers with "hidden_size" units. Use the parameters provided in [1]
    # https://keras.io/api/layers/recurrent_layers/gru/
    layer_output = embedding
    for i in range(num_layers):
        if i+1 == num_layers:
            layer_output = GRU(hidden_size, return_sequences=return_seq, dropout=dropout,
                               name="recurrent_layer_"+str(i+1))(layer_output)
        else:
            layer_output = GRU(hidden_size, return_sequences=True, dropout=dropout,
                               name="recurrent_layer_"+str(i+1))(layer_output)
    layer_output = Flatten()(layer_output)
    # Add Softmax activation layer.
    out = Dense(codebook_size, activation='softmax')(layer_output)

    model = Model(inputs=inputs, outputs=out)
    return model

In [6]:
# load training, validation and test data (note: there's no need to scale the data)
train_path = "./train_set.csv"
val_path = "./val_set.csv"
Xtr, ytr = load_beam_data(train_path)
Xval, yval = load_beam_data(val_path)
# Xts, yts = load_beam_data(test_path) # Test data is formatted in a differemt way, need to modify the loader
print(f"Training data shape: {Xtr.shape}")
print(f"Validation data shape: {Xval.shape}")
# One-hot-encoding of training and val target
enc = OneHotEncoder()
enc.fit_transform(np.vstack((ytr, [0]))) # needed to manually add codeword "0" in order to one-hot-code to the correct codebook size
# It seems codeword corresponding to index 0 has not been collected in the data
ytr_e = enc.transform(ytr).toarray()
yval_e = enc.transform(yval).toarray()
print(f"Encoded training target shape: {ytr_e.shape}")
print(f"Encoded validation target shape: {yval_e.shape}")

Training data shape: (281100, 8)
Validation data shape: (120468, 8)
Encoded training target shape: (281100, 128)
Encoded validation target shape: (120468, 128)


In [8]:
K.clear_session()
input_size = Xtr.shape[1]
codebook_size = np.max(Xtr)+1
print(codebook_size)
# create a model to verify the correctness
model = build_gru_model(input_size, int(codebook_size), return_seq=True)
print(model.summary())

128.0
Model: "functional_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_layer (InputLayer)     [(None, 8)]               0         
_________________________________________________________________
embedding_layer (Embedding)  (None, 8, 50)             6400      
_________________________________________________________________
recurrent_layer_1 (GRU)      (None, 8, 20)             4320      
_________________________________________________________________
recurrent_layer_2 (GRU)      (None, 8, 20)             2520      
_________________________________________________________________
flatten (Flatten)            (None, 160)               0         
_________________________________________________________________
dense (Dense)                (None, 128)               20608     
Total params: 33,848
Trainable params: 33,848
Non-trainable params: 0
____________________________________________

In [None]:
####### Parameters Optimization #########
lr_test = np.array([2e-4, 1e-3, 5e-3])
nlayers_test = np.array([2,3,4])
ret_seq_test = np.array([False, True])
embed_size_test = np.array([50, 100])
hidden_size_test = np.array([20, 40])
dropout_test = np.array([0.2, 0.5])
n_tests = lr_test.size*lr_test.size*ret_seq_test.size*embed_size_test.size*hidden_size_test.size*dropout_test.size
print(f"The nummber of combinations to test is: {n_tests}")

The nummber of combinations to test is: 144


In [None]:
# trying saving on drive 
with open('./drive/MyDrive/test.pickle', 'wb') as f:
    pickle.dump([nlayers_test, lr_test], f)

In [None]:
n_epochs = 100
batch_size = 1000
tr_accuracy = np.zeros((lr_test.size,lr_test.size,ret_seq_test.size,embed_size_test.size,hidden_size_test.size,dropout_test.size,n_epochs))
val_accuracy = np.zeros(tr_accuracy.shape)
print(tr_accuracy.shape)
print(val_accuracy.shape)

for i1 , lr_i in enumerate(lr_test):
    for i2 , nlayers_i in enumerate(nlayers_test):
        for i3 , ret_seq_i in enumerate(ret_seq_test):
            for i4 , embed_size_i in enumerate(embed_size_test):
                for i5 , hidden_size_i in enumerate(hidden_size_test):
                    for i6 , dropout_i in enumerate(dropout_test):
                        K.clear_session()
                        model = build_gru_model(input_size, int(codebook_size), num_layers=nlayers_i, 
                                                embed_size=embed_size_i, hidden_size=hidden_size_i,
                                                return_seq=ret_seq_i, dropout=dropout_i)
                        # compile the model with proper optimizer
                        opt = Adam(lr=lr_i, amsgrad=True)
                        model.compile(optimizer=opt, loss='categorical_crossentropy', metrics=['accuracy'])
                        hist = model.fit(Xtr, ytr_e, validation_data=(Xval, yval_e), batch_size=batch_size, epochs=n_epochs)
                        tr_accuracy[i1,i2,i3,i4,i5,i6,:] = hist.history['accuracy']
                        val_accuracy[i1,i2,i3,i4,i5,i6,:] = hist.history['val_accuracy']

with open('./drive/MyDrive/param_optim.pickle', 'wb') as f:
    pickle.dump([tr_accuracy, val_accuracy], f)

In [15]:
# retrain using the best parameters found in the steps above
K.clear_session()
n_epochs = 200
batch_size = 1000
# best parameters
lr = 1e-3
nlayers = 3
embed_size = 100
ret_seq = True
hidden_size = 40
dropout = 0.2

model_path = "./drive/MyDrive/model-gru-{epoch:02d}.hdf5"
model_checkpoint = ModelCheckpoint(model_path, monitor="val_accuracy", save_best_only=True, verbose=1)

# decaying lr callback
up_epoch = 50
decay_factor = 0.5
def decay_schedule(epoch, lr):
    # decay by 0.1 every 5 epochs; use `% 1` to decay after each epoch
    if (epoch % up_epoch == 0) and (epoch != 0):
        lr = lr * decay_factor
    return lr
lr_scheduler = LearningRateScheduler(decay_schedule)

model = build_gru_model(input_size, int(codebook_size), num_layers=nlayers, 
                        embed_size=embed_size, hidden_size=hidden_size,
                        return_seq=ret_seq, dropout=dropout)
opt = Adam(lr=lr, amsgrad=True)
model.compile(optimizer=opt, loss='categorical_crossentropy', metrics=['accuracy'])
hist = model.fit(Xtr, ytr_e, validation_data=(Xval, yval_e), batch_size=batch_size, 
                 epochs=n_epochs, callbacks=[model_checkpoint, lr_scheduler], workers=8)

Epoch 1/200
Epoch 00001: val_accuracy improved from -inf to 0.73126, saving model to ./drive/MyDrive/model-gru-01.hdf5
Epoch 2/200
Epoch 00002: val_accuracy improved from 0.73126 to 0.79463, saving model to ./drive/MyDrive/model-gru-02.hdf5
Epoch 3/200
Epoch 00003: val_accuracy improved from 0.79463 to 0.81238, saving model to ./drive/MyDrive/model-gru-03.hdf5
Epoch 4/200
Epoch 00004: val_accuracy improved from 0.81238 to 0.82202, saving model to ./drive/MyDrive/model-gru-04.hdf5
Epoch 5/200
Epoch 00005: val_accuracy improved from 0.82202 to 0.82746, saving model to ./drive/MyDrive/model-gru-05.hdf5
Epoch 6/200
Epoch 00006: val_accuracy improved from 0.82746 to 0.83063, saving model to ./drive/MyDrive/model-gru-06.hdf5
Epoch 7/200
Epoch 00007: val_accuracy improved from 0.83063 to 0.83452, saving model to ./drive/MyDrive/model-gru-07.hdf5
Epoch 8/200
Epoch 00008: val_accuracy improved from 0.83452 to 0.83703, saving model to ./drive/MyDrive/model-gru-08.hdf5
Epoch 9/200
Epoch 00009: va