# Helpers

In [None]:
!pip install pickle5

In [None]:
!pip install keras_applications

In [1]:
import os
import pickle5 as pickle
from pathlib import Path
import random
import tensorflow as tf
import keras
import string
from math import ceil
import random
import numpy as np
import sys
from PIL import Image
from tqdm import tqdm
from sklearn.metrics import mean_squared_error as mse

In [2]:
def path_of(location):
    me_dir, me_file= os.path.split(os.path.abspath(__file__))
    return os.path.join(me_dir, location)

def load_pkl(filename):
    filename= filename
    data= None
    with open(filename, "rb") as handle:
        data= pickle.load(handle)
        handle.close()
    return data

def store_pkl(object, filename):
    filename= filename
    with open(filename, "wb") as handle:
        pickle.dump(object, handle, protocol=pickle.HIGHEST_PROTOCOL)
        handle.close()

def is_valid_file(filename):
    filename= path_of(filename)
    file= Path(filename)
    if file.is_file():
        return True
    return False


def to_same_shape(arr_of_items, required_shape):
    if len(arr_of_items) == 0:
        print("Error: tried to make shape: ", required_shape, ", but item is empty...")
        exit()
    if len(arr_of_items)>required_shape:
        return arr_of_items[:required_shape]
    res= []
    ind=0
    while len(res)<required_shape:
        res.append(arr_of_items[ind])
        ind= (ind+1)%len(arr_of_items)
    return res

def get_rand_str(size):
    return "".join(random.choice(string.ascii_uppercase + string.digits) for _ in range(size))

def save_model(keras_model, save_folder="./models", save_filename= None):
    if save_filename is None:
        me_dir, me_file= os.path.split(os.path.abspath(__file__))
        save_filename= me_file.split(".")[0]+".h5"
    op_file= path_of(save_folder +"/"+save_filename)
    keras_model.save(op_file)
    print("\n\n    Saved_model:", save_filename, "\n\n")

# Model

# Train

In [3]:
use_precomputed_dict_for_video= False

imageW, imageH= 112, 112

batch_size= 64

train_obj= load_pkl("/home/pulkit_2111mt05/Flickscore_final/Dataset/train_v2.obj") #will load pkl data using func
test_obj= load_pkl("/home/pulkit_2111mt05/Flickscore_final/Dataset/test_v2.obj")   #will load pkl data using func


user_enc= load_pkl("/home/pulkit_2111mt05/Flickscore_final/Dataset/new_user_embedding_v2_500.obj")     #will load pkl data using func
user_size= len(user_enc.get(list(user_enc.keys())[0])) #user_size --> 915
#print(user_size)



video_frames_path= "/home/pulkit_2111mt05/Flickscore_final/Dataset/Frames_entropy/"

def get_pixels(img_file:str):
    im= Image.open(img_file, 'r')
    im= im.resize((imageW, imageH, ))
    pixel_values= im.getdata()
    data= np.array(pixel_values, dtype=int).reshape((imageH, imageW, 3))
    def range_squish(x): return x/255 #Normalization of image
    data= range_squish(data)
    return data      #Return pixel array of an image H*W*3


def make_vec_from_video_folder(folder_path:str):
    #print(folder_path)
    if not os.path.isdir(folder_path):
        # print("Invalid folder:", folder_path)
        raise Exception("Invalid folder path")
        print(folder_path)
    files= [folder_path+"/"+f for f in os.listdir(folder_path) if os.path.isfile(os.path.join(folder_path, f))]
    
    pixels_marged= None
    for f in files:
        pixels_for_this_frame= get_pixels(f)
        if pixels_marged is None:
            pixels_marged= pixels_for_this_frame
        else:
            pixels_marged= np.concatenate([pixels_marged, pixels_for_this_frame], axis=-1)
        
    return pixels_marged


def shuffle_single_epoch(ratings):  #will shuffle the provided data
    data_copied= ratings.copy() 
    random.shuffle(data_copied)
    return data_copied

    
def normalize_rate(rate):
    return rate/5

def de_normalize_rate(rate):
    return rate*5


def get_nth_batch(data):  
    
    users, movies, nrates= [], [], [] # 3 empty list 
    
    for user_id, movie_id, rate in data:
        #user_id --> 11megha89 , movie_id --.tt014651 , rate --> 1,0,-1
        if user_enc.get(user_id) is None:   #Return user_vec --> 915            
            continue
        try:
            m_vid= make_vec_from_video_folder(video_frames_path+str(movie_id))
            #print(m_vid.shape) #-->(112, 112, 9)
        except:
            continue
        
        users.append(user_enc.get(user_id)) #Will 
        movies.append(m_vid)
        if int(rate) == -1:
               nrates.append([0,0,1])
        elif int(rate) == 1:
               nrates.append([0,1,0])
        elif int(rate) == 0:
               nrates.append([1,0,0])
               
    users= np.array(users, dtype=float)
    # print(users.shape) --> (64, 915)
    movies= np.array(movies, dtype=float)
    # print(movies.shape) --> (64, 112, 112, 9)
    nrates= np.array(nrates, dtype=float)
    #print(nrates.shape) --> (64,)

    assert len(users)==len(movies)==len(nrates)

    return users, movies, nrates



def test(model, test_data):
    test_acc,test_loss= [],[]
    test_final_acc,test_final_loss = [] , []
    batch_count= ceil(len(test_data)/batch_size)
    for batch_id in range(batch_count):
        print(" -> Testing Batch: ", batch_id)
        user, movie, rate= get_nth_batch(test_data, batch_id)
        loss , acc = model.evaluate([movie, user], rate)
        test_acc.append(acc)
        test_loss.append(loss)
        
    test_final_acc.append(sum(test_acc)/len(test_acc))
    test_final_loss.append(sum(test_loss)/len(test_acc))
    return sum(test_acc)/len(test_acc) , sum(test_loss)/len(test_acc)



def train(model, data, test_data= None, epochs=50):
    test_acc_main = 0
    batch_count= ceil(len(data)/batch_size)       
    for epoch_id in range(1, epochs+1):
        data= shuffle_single_epoch(data)
        print("\n\t---- Starting Epoch:", epoch_id, "----")
        
        for batch_id in range(batch_count):
            print(" -> Batch: ", batch_id)
            print(batch_id)
            user, movie, rate= get_nth_batch(data, batch_id,batch_size) 
            #print(rate.shape)                        
            model.fit([movie, user], rate, batch_size=batch_size, epochs=1)            
            
        if test_data is not None:
            test_acc , test_loss= test(model, test_data)
            print("TestAcc after Epoch",epoch_id,": ",test_acc)
            print("TestLoss after Epoch",epoch_id,": ",test_loss)
            if test_acc>test_acc_main:
                    test_acc_main = test_acc
                    location= "./model_vid/"
                    model.save(location+"/model_abc.h5")


In [4]:
import pandas as pd
data_obj = test_obj + train_obj
pd_data = pd.DataFrame(data_obj)

results = []
for i in pd_data.iloc[:,2]:
    results.append(i[0])

pd_data[3] = results
x_data = np.array(pd_data.iloc[:,:2])
y_data = np.array(pd_data.iloc[:,3])

In [5]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(x_data, y_data, stratify=y_data ,test_size=0.2)

train_obj = pd.DataFrame(X_train)
train_obj[3] = pd.DataFrame(y_train)
train_obj = np.array(train_obj)

test_obj = pd.DataFrame(X_test)
test_obj[3] = pd.DataFrame(y_test)
test_obj = np.array(test_obj)

In [6]:
def Main_Model(image_dim):
    image_width , image_height , image_length  = image_dim
    print(image_width , image_height , image_length)
    cnn_encode_dim = 231
    user_size = 915
    
    #------------------------------------------CNN MODEL-------------------------------------------
    cnn_input = keras.layers.Input(shape=(image_width,image_height,image_length),name="Input_frames")
    #Conv1 
    conv1 = keras.layers.Conv2D(4, kernel_size=(5, 5),strides=(1, 1), padding="same")(cnn_input)
    act_1 = keras.layers.Activation(keras.activations.relu)(conv1)
    drop_1 = keras.layers.Dropout(rate=0.2)(act_1)
    maxpool_1 = keras.layers.MaxPooling2D(pool_size=(2,2) , strides = (2,2) , padding = "valid")(drop_1)
    norm_1 = keras.layers.BatchNormalization()(maxpool_1)
    
    #Conv2
    conv2 = keras.layers.Conv2D(8, kernel_size=(5, 5),strides=(2, 2), padding="same")(norm_1)
    act_2 = keras.layers.Activation(keras.activations.relu)(conv2)
    drop_2 = keras.layers.Dropout(rate=0.2)(act_2)    
    norm_2 = keras.layers.BatchNormalization()(drop_2)

    #Conv3
    conv3 = keras.layers.Conv2D(8, kernel_size=(3, 3),strides=(2, 2), padding="same")(norm_2)
    act_3 = keras.layers.Activation(keras.activations.relu)(conv3)
    norm_3 = keras.layers.BatchNormalization()(act_3)
    
    #flatten
    flat = keras.layers.Flatten()(norm_3)
    dense1 = keras.layers.Dense(units = 512 , activation = "tanh")(flat)
    dense2 = keras.layers.Dense(units = 256 , activation = "tanh")(dense1)
    dense3 = keras.layers.Dense(units = cnn_encode_dim , activation = "tanh" , name="cnn_feature")(dense2)
    
    cnn_model = keras.models.Model(inputs = cnn_input , outputs = dense3)
    
    #-----------------------------------------USER MODEL------------------------------------------------------
    
    user1 = keras.layers.Input(shape=(user_size,) , name = "User_Input")
    user1_norm =  keras.layers.BatchNormalization()(user1)
    user_dense= keras.layers.Dense(units= int(user_size*2/3), activation="tanh")(user1_norm)
    
    user_model = keras.models.Model(inputs = user1 , outputs = user_dense)
        
    #-----------------------------------------Concatination--------------------------------------------------
    combined = keras.layers.concatenate([cnn_model.output, user_model.output])
    combined_norm = keras.layers.BatchNormalization()(combined)
    concat_dense1 = keras.layers.Dense(units= int((cnn_encode_dim + user_size)/2), activation="tanh" )(combined_norm)
    concat_dense2 = keras.layers.Dense(units=1024, activation="relu")(concat_dense1)   
    concat_dense2 = keras.layers.BatchNormalization()(concat_dense2)
    concat_dense3 = keras.layers.Dense(units=128, activation="relu")(concat_dense2)  
    concat_dense3 = keras.layers.BatchNormalization()(concat_dense3)
    concat_linear = keras.layers.Dense(units=1, activation="sigmoid")(concat_dense3)
    
    model = keras.models.Model(inputs=[cnn_model.input, user_model.input], outputs = concat_linear)
    
    return model

In [7]:
model = Main_Model([imageW, imageH, 9])
print(model.summary())

112 112 9
Model: "model_2"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 Input_frames (InputLayer)      [(None, 112, 112, 9  0           []                               
                                )]                                                                
                                                                                                  
 conv2d (Conv2D)                (None, 112, 112, 4)  904         ['Input_frames[0][0]']           
                                                                                                  
 activation (Activation)        (None, 112, 112, 4)  0           ['conv2d[0][0]']                 
                                                                                                  
 dropout (Dropout)              (None, 112, 112, 4)  0           ['activation[0][0

# Training 

In [8]:
users_train, movies_train, nrates_train = get_nth_batch(train_obj)

In [None]:
from tensorflow.keras.callbacks import EarlyStopping
early_stopping_callback = EarlyStopping(monitor = 'val_loss', patience = 12, mode = 'min', restore_best_weights = True)
 
# Compile the model and specify loss function, optimizer and metrics to the model.
model.compile(loss='mse', optimizer='RMSprop', metrics=['mean_squared_error'])
 
# Start training the model.
model_training_history = model.fit(x = [movies_train,users_train], y = nrates_train, epochs = 100, batch_size = 16 , shuffle = True, validation_split=0.1, callbacks = [early_stopping_callback])

# Testing

In [11]:
users_test, movies_test, nrates_test = get_nth_batch(test_obj)

In [12]:
model_evaluation_history = model.evaluate(x = [movies_test,users_test], y = nrates_test)



In [13]:
model_evaluation_history = model.evaluate(x = [movies_train,users_train], y = nrates_train)



## Save_Model

In [106]:
import datetime as dt
# Get the loss and accuracy from model_evaluation_history.
model_evaluation_loss, model_evaluation_accuracy = model_evaluation_history
 
# Define the string date format.
# Get the current Date and Time in a DateTime Object.
# Convert the DateTime object to string according to the style mentioned in date_time_format string.
date_time_format = '%Y_%m_%d__%H_%M_%S'
current_date_time_dt = dt.datetime.now()
current_date_time_string = dt.datetime.strftime(current_date_time_dt, date_time_format)
 
# Define a useful name for our model to make it easy for us while navigating through multiple saved models.
model_file_name = f'_model_Video_Embeddings___Date_Time_{current_date_time_string}___Loss_{model_evaluation_loss}___Accuracy_{model_evaluation_accuracy}.h5'
 
# Save your Model.
location= "/home/pulkit_2111mt05/MovieLens_final/Save/"
model.save(location+model_file_name)