# Task 3: Tell-the-time Neural Network

In [1]:
import tensorflow 
from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.keras.models import load_model, Sequential
from tensorflow.keras.datasets import mnist 
from tensorflow.keras.layers import Dense, Dropout, Flatten, Convolution2D, MaxPooling2D, Activation
from tensorflow.keras.optimizers import RMSprop
#from functools import partial

import pandas as pd
from matplotlib import pyplot as plt
import numpy as np

In [2]:
#import the images and lables

images = np.load('/content/drive/MyDrive/Colab Notebooks/images.npy', mmap_mode='r+')
labels = np.load('/content/drive/MyDrive/Colab Notebooks/labels.npy', mmap_mode='r+')
index_train = np.load('index_train_final.npy')    #these are the indexes of the training images in the dataset (valid for the periodic funtion and multi-head model)
index_test = np.load('index_test_final.npy')      #these are the indexes of the testing images in the dataset (valid for the periodic funtion and multi-head model)

#create the train, validation and test set 
#uncomment these lines in case you want to create a model on a new training set
#i = int(len(images) * 0.8)   # we want a 80:20 split
#index_list = np.arange(0, len(images), 1)
#index_train = np.random.choice(index_list, size = i, replace = False)   #indexes of the training + validation set
#index_test = np.delete(index_list, index_train)                         #indexes of the test set


X_train, X_test = images[index_train] / 255.0, images[index_test] / 255.0    #normalize the input data
X_valid, X_train = X_train[:1000], X_train[1000:]            #create 1000 validation images

In [3]:
#make input appropriate for CNN network --> we are dealing with greyscale images, so one channel is required only

x_train = X_train.reshape(X_train.shape[0], 150, 150, 1)
x_test = X_test.reshape(X_test.shape[0], 150, 150, 1)
x_valid = X_valid.reshape(X_valid.shape[0], 150, 150, 1)

### Treat the problem as a purely regression problem 

In [None]:
#modify the label data such that it is appropriate for a regression problem 

hours = labels[:, 0]     #hour entry from the label
minutes = labels[:, 1] / 60   #minute entry from the label, note we normalize it so that it is defined between 0 and 1
reg_labels = hours + minutes   #these are the regression lables

#Note we decided to normalize the labels here below, because otherwise our model doesn't learn
y_train, y_test = reg_labels[index_train] / 12 , reg_labels[index_test] / 12    #define the targets for training and testing
y_valid, y_train = y_train[:1000], y_train[1000:]                               #define the targets for validation 


In [None]:
#define the architecture of the model in this cell
model = Sequential()
model.add(Convolution2D(32, kernel_size = (3,  3), strides = 3,  activation = 'relu', input_shape=(150, 150, 1)))
model.add(MaxPooling2D(pool_size=(2,2)))
model.add(Convolution2D(32, kernel_size = (3, 3), activation = 'relu'))
model.add(MaxPooling2D(pool_size=(2,2)))
model.add(Convolution2D(64, kernel_size = (3, 3), activation = 'relu'))
model.add(Convolution2D(64, kernel_size = (1, 1), activation = 'relu'))

model.add(Flatten())
model.add(Dense(200, activation = 'relu'))
model.add(Dropout(0.1))
model.add(Dense(200, activation = 'relu'))
model.add(Dense(1, activation = 'sigmoid'))   



#we define a customized loss function in order to ensure that network computes the 'common sense' error
#example: if the prediction is 11:00 but the true time is 1:00, this loss function ensures that the error is
# 2 hours, not 10 hours! Based on this error then the function computed the mse
def loss_func(y_true, y_pred):
    loss1 = tensorflow.math.reduce_mean(tensorflow.square(y_true - y_pred), axis = -1)
    loss2 = tensorflow.math.reduce_mean(tensorflow.square(abs(y_true - y_pred) - 1), axis = -1)   #-1 instead of -12 since we normalized the labels to be between 0 and 1
    l1 = tensorflow.linalg.norm(loss1)
    l2 = tensorflow.linalg.norm(loss2)
    return tensorflow.cond(tensorflow.less(l1, l2),lambda: loss1,lambda: loss2)


In [None]:
#compile the model 
model.compile(loss='mse', optimizer='adam', metrics=['accuracy'])

#define a stopping condition 
early_stop = keras.callbacks.EarlyStopping(monitor='val_loss', patience=40)

In [None]:
#train the model 
history = model.fit(x_train, y_train, epochs=300, batch_size = 100, validation_data=(x_valid, y_valid), callbacks = [early_stop])

#save history
hist = pd.DataFrame(history.history)
hist_csv_file = 'regression_history'
with open(hist_csv_file, mode='w') as f:
    hist.to_csv(f)

#save model 
model.save('Task3_models/regression_model')

In [None]:
#assess the performance of the model
model.evaluate(x_test, y_test)
y_pred = model.predict([x_test[:,:]])
ht = np.floor(y_test * 12)
mt = (y_test * 12 - ht) * 60
true_time = 60 * ht + mt  #this is the true time expressed in minutes after 0:00

h = np.floor(y_pred * 12)
m = (y_pred * 12 - h) * 60
pred_time = 60 * h + m    #this is the predicted time expressed in minutes after 0:00

delta = np.zeros(len(h)) #will include the common sense error of each prediction
#the loop will ensure that we measure the common sense error between the prediction and the true time 
for i in range(len(h)):
    delta1 = abs(true_time[i] - pred_time[i])
    delta2 = abs(true_time[i] - pred_time[i] - 720)
    if delta1 < delta2:
        diff = delta1
    else:
        diff = delta2
    delta[i] = diff

print('Average time difference is: ' + str(delta.sum() / len(h)) + ' minutes')

### Treat problem as a classification problem

In [None]:
#create the new labels list for classification problem --> in this we have 720 categories for each minute post 0:00

labels_cat = np.zeros((18000, 720))   #list will store each label to its appropriate category
for i in range(len(labels)):   #loop through each label
    h = labels[i, 0]   #the hour
    m = labels[i, 1]   #the minte
    c = 60*h + m       #category to which the label should be assigned to 
    labels_cat[i, c] = 1

labels_cat = np.argmax(labels_cat, axis = 1)  #each element of the list will be an integer between 0 and 719, where the integer corresponds to the category of the label

#create the training, test and validation labels again
y_train, y_test = labels_cat[index_train], labels_cat[index_test]
y_valid, y_train = y_train[:1000], y_train[1000:]

In [None]:
#create the architecture of the model

model = Sequential()
model.add(Convolution2D(32, kernel_size = (3,  3),  activation = 'relu', input_shape=(150, 150, 1)))
model.add(MaxPooling2D(pool_size=(2,2)))
model.add(Convolution2D(32, kernel_size = (3, 3), activation = 'relu'))
model.add(MaxPooling2D(pool_size=(2,2)))
model.add(Convolution2D(64, kernel_size = (3, 3), activation = 'relu'))
model.add(Convolution2D(64, kernel_size = (1, 1), activation = 'relu'))
model.add(Flatten())
model.add(Dense(200, activation = 'relu'))
model.add(Dropout(0.1))
model.add(Dense(200, activation = 'relu'))
model.add(Dense(720, activation = 'softmax'))    #use softmax since we use sparse_categorical_crossentropy as the loss

In [None]:
#compile the model 
model.compile(loss='sparse_categorical_crossentropy', optimizer='adam', metrics=['accuracy'])


#train the model 
history = model.fit(x_train, y_train, epochs=300, batch_size = 100, validation_data=(x_valid, y_valid), callbacks = [early_stop])

#save history
hist = pd.DataFrame(history.history)
hist_csv_file = 'classification_history'
with open(hist_csv_file, mode='w') as f:
    hist.to_csv(f)

#save model 
model.save('Task3_models/classification_model')

In [None]:
#assess the perfromance of the model 
model.evaluate(x_test, y_test)

We realize that 720 categories is overkill for this problem

### Multi-head model

We create a model consisting of two heads - one head for predicting hours and another for predicting minutes. The hour branch acts classification model while the minute branch acts as a regression model

In [4]:
#modify the labels so that they are appropriate for the multi-head model

y_train, y_test = labels[index_train], labels[index_test]
y_valid, y_train = y_train[:1000], y_train[1000:]

yh_train = y_train[:, 0]          #training labels for hours
ym_train = y_train[:, 1] / 60     #training labels for minutes
yh_valid = y_valid[:, 0]          #validation labels for hours
ym_valid = y_valid[:, 1] / 60     #validation labels for minutes
yh_test = y_test[:, 0]          #test labels for hours
ym_test = y_test[:, 1] / 60     #test labels for minutes
#note we have normalized the minute labels

In [None]:
#create the architecture of the model


#base of the model
inp = layers.Input(shape=(150, 150, 1))
model = layers.Convolution2D(32, kernel_size = (3, 3), activation = 'relu')(inp)
model = layers.MaxPooling2D(pool_size=2)(model)
model = layers.Convolution2D(32, kernel_size = (3, 3), activation = 'relu')(model)
model = layers.Convolution2D(32, kernel_size = (3, 3), activation = 'relu')(model)
model = layers.MaxPooling2D(pool_size=2)(model)
model = layers.Convolution2D(64, kernel_size = (3, 3), activation = 'relu')(model)
model = layers.Convolution2D(64, kernel_size = (1, 1), activation = 'relu')(model)
model = layers.Flatten()(model)

#hour branch --> acts as classification model
hour = layers.Dense(200, activation = 'relu')(model)
hour = layers.Dropout(0.1)(hour)
hour = layers.Dense(200, activation = 'relu')(hour)
hour = layers.Dense(12, activation = 'softmax', name = 'hour')(hour)

#minutes branch --> acts as regression model 
minute = layers.Dense(200, activation = 'relu')(model)
minute = layers.Dense(200, activation = 'relu')(minute)
minute = layers.Dense(200, activation = 'relu')(minute)
minute = layers.Dropout(0.1)(minute)
minute = layers.Dense(200, activation = 'relu')(minutel)
minute = layers.Dense(1, activation = 'softplus', name = 'minute')(minute)

#put all of the parts together
model = tensorflow.keras.Model(inputs = inp, outputs=[hour, minute])

In [None]:
#compile the model. Note a loss for each branch has to be defined

model.compile(loss=['sparse_categorical_crossentropy', 'mse'], optimizer='adam', metrics=['accuracy'])

model.fit(x_train, [yh_train, ym_train], epochs=100, batch_size=100, validation_data=(x_valid, [yh_valid, ym_valid]),
         callbacks = [early_stop])

#save history
hist = pd.DataFrame(history.history)
hist_csv_file = 'multi_head_history'
with open(hist_csv_file, mode='w') as f:
    hist.to_csv(f)

#save model 
model.save('Task3_models/multi_head_model')

In [None]:
model = keras.models.load_model('Task3_models/multi_head_model.h5')

In [None]:
#assess the performance of the model 

model.evaluate(x_test, [yh_test, ym_test])

y_pred = model.predict([x_test[:,:]])

#same procedure as pefore when assessing the performance on the test set 
h = np.argmax(y_pred[0], axis = 1) 
ht = yh_test
m = y_pred[1] * 60
mt = ym_test * 60
pred_time = 60 * h + m.T   #predicted time
true_time = 60 * ht + mt   #true time
dlist = []
delta = np.zeros(len(h))
for i in range(len(h)):
    delta1 = abs(true_time[i] - pred_time[0, i])
    delta2 = abs(abs(true_time[i] - pred_time[0, i]) - 720)
    if delta1 < delta2:
        diff = delta1
        dlist.append(delta1)
    else:
        diff = delta2
        dlist.append(delta2)
    delta[i] = diff

print('Average time difference is: ' + str(delta.sum() / len(h)) + ' minutes')

In [None]:
#visualize the error distribution of the model

fig, ax = plt.subplots(1, 1, figsize = (8, 8))
n, bins, patches = plt.hist(dlist, 360, density=False, facecolor='purple', alpha=0.75)
ax.set_xlabel(r'$\Delta$ t (minutes)', fontsize = 15)
ax.set_ylabel('Occurance', fontsize = 15)
ax.set_xlim(0, 100)
#plt.savefig('time_distr_mutihead')

### Model using Periodic Function

In this we reformulate the hour ($y_h$) and minute ($y_m$) label in the following way:
<br> $Y_h$ = cos($\pi$/12 $y_h$) 
<br> $Y_m$ = cos($\pi$/60 $y_m$) 

In [None]:
#functions used to reformulate the labels 

def hours_periodic(h):
    return np.cos(np.pi/12 * h)

def minutes_periodic(m):
    return np.cos(np.pi/60 * m)

In [None]:
#reformulate the labels using the periodic functions 

y_train, y_test = labels[index_train], labels[index_test]
y_valid, y_train = y_train[:1000], y_train[1000:]

#the new training, validataion and test labels
y_train = np.transpose([hours_periodic(y_train[:, 0]), minutes_periodic(y_train[:, 1])])
y_valid = np.transpose([hours_periodic(y_valid[:, 0]), minutes_periodic(y_valid[:, 1])])
y_test = np.transpose([hours_periodic(y_test[:, 0]), minutes_periodic(y_test[:, 1])])

In [None]:
#define a custom activation function which is periodic for the output layer 
from tensorflow.keras import backend as K

def cos(x):
    return np.cos(x)

def my_function(x):
    x = K.cos(x)
    return x

In [None]:
#create the architecture of the model 
model = Sequential()
model.add(Convolution2D(32, kernel_size = (2,  2), strides = 3,  activation = 'relu', input_shape=(150, 150, 1)))
model.add(Convolution2D(32, kernel_size = (3,  3), activation = 'relu', input_shape=(150, 150, 1)))
model.add(MaxPooling2D(pool_size=(2,2)))
model.add(Convolution2D(32, kernel_size = (3, 3), activation = 'relu'))
model.add(Convolution2D(32, kernel_size = (3, 3), activation = 'relu'))
model.add(MaxPooling2D(pool_size=(2,2)))
model.add(Convolution2D(64, kernel_size = (3, 3), activation = 'relu'))
model.add(Convolution2D(64, kernel_size = (3, 3), activation = 'relu'))
model.add(Flatten())
model.add(Dense(200, activation = 'relu'))
model.add(Dropout(0.1))
model.add(Dense(200, activation = 'relu'))
model.add(Dense(2, activation = my_function))  #last layer with a custom peridic activation function

In [None]:
#compile the model 
model.compile(loss='mse', optimizer='adam', metrics=['accuracy'])

In [None]:
#train the model 
history = model.fit(x_train, y_train, epochs=300, batch_size = 100, validation_data=(x_valid, y_valid), callbacks = [early_stop])

#save history
hist = pd.DataFrame(history.history)
hist_csv_file = 'periodic_history'
with open(hist_csv_file, mode='w') as f:
    hist.to_csv(f)

#save model 
model.save('Task3_models/periodic_model')

In [None]:
#in case you want to test the model that is included in the submission
model = keras.models.load_model('periodic_model.h5',  custom_objects={'my_function': my_function})

In [None]:
#assess the performance of the model

model.evaluate(x_test, y_test)

y_pred = model.predict([x_test[:,:]])
hour = np.round(np.arccos(y_pred[:, 0]) * 12 / np.pi)
minute = np.arccos(y_pred[:, 1]) * 60 / np.pi
pred = hour*60 + minute
hour_t = np.arccos(y_test[:, 0]) * 12 / np.pi
minute_t = np.arccos(y_test[:, 1]) * 60 / np.pi
true = hour_t*60 + minute_t

dlist = []  
delta = 0
for i in range(len(hour)):
    delta1 = abs(pred[i] - true[i])
    delta2 = abs(abs(pred[i] - true[i]) - 720)
    if delta1<delta2:
        dlist.append(delta1)
        delta+=delta1
    else:
        dlist.append(delta2)
        delta+=delta2
delta = delta / len(hour)
print('Average time difference is: ' + str(delta) + ' minutes')

In [None]:
#visualize the error distribution of the model

fig, ax = plt.subplots(1, 1, figsize = (8, 8))
n, bins, patches = plt.hist(dlist, 360, density=False, facecolor='purple', alpha=0.75)
ax.set_xlabel(r'$\Delta$ t (minutes)', fontsize = 15)
ax.set_ylabel('Occurance', fontsize = 15)
ax.set_xlim(0, 100)
#plt.savefig('time_distr_mutihead')