# Imports
### Python version: 3.6.9
### Keras version: 2.3.1

In [None]:
%tensorflow_version 1.x
import pandas as pd
import keras
import tensorflow as tf
from keras import backend as K
from keras.models import Sequential
from keras.layers import Dense, Dropout, Flatten, Input, Embedding, BatchNormalization, concatenate, Conv1D, MaxPool1D, LSTM, GRU, Add, Average
from keras.optimizers import Adam
from keras.layers.merge import dot
from keras.models import Model
from sklearn.model_selection import train_test_split

import matplotlib.pyplot as plt
%matplotlib inline

#Loading MovieLens 1M dataset

In [None]:
DATASET_PATH = 'dataset'
df = pd.read_csv(DATASET_PATH + 'ratings.dat', sep="::", header=None, names=['UserId', 'MovieId', 'Rating', 'Timestamp'])
df_movies = pd.read_csv(DATASET_PATH + 'movies.dat', sep="::", header=None, names=['MovieId', 'Title', 'Genres'])
df_users = pd.read_csv(DATASET_PATH + 'users.dat', sep="::", header=None, names=['UserId', 'Gender', 'Age', 'Occupation', 'Zipcode'])

In [None]:
print('Number of users:', len(df['UserId'].unique()))
print('Number of movies:', len(df['MovieId'].unique()))
print('Number of ratings:', len(df))

In [None]:
df['UserId'] = df['UserId'].astype('category').cat.codes.values
df['MovieId'] = df['MovieId'].astype('category').cat.codes.values

In [None]:
def getUserAge(age):
  if (age <= 8):
    return '1-8'
  elif (age <= 16):
    return '9-16'
  elif (age <= 24):
    return '17-24'
  elif (age <= 32):
    return '25-32'
  elif (age <= 40):
    return '33-40'
  elif (age <= 48):
    return '41-48'
  elif (age <= 56):
    return '49-56'

In [None]:
df['UserGender'] = df_users['Gender'].array[df['UserId']]
df['UserOccupation'] = df_users['Occupation'].array[df['UserId']]

df['UserTempAge'] = df_users['Age'].array[df['UserId']]
df['UserAge'] = df['UserTempAge'].apply(getUserAge)
df = df.drop('UserTempAge', axis=1)

df['MovieGenres'] = df_movies['Genres'].array[df['MovieId']]
df['AuxInfo'] = df['UserGender'].astype(str) + '_' + df['UserOccupation'].astype(str) + '_' + df['UserAge']

In [None]:
from sklearn.preprocessing import LabelEncoder
encoder = LabelEncoder()
df['AuxInfo'] = encoder.fit_transform(df['AuxInfo'])
encoder2 = LabelEncoder()
df['MovieGenres'] = encoder2.fit_transform(df['MovieGenres'])

#Splitting data into training and test datasets

In [None]:
x_train, x_test, y_train, y_test = train_test_split(df[['UserId','MovieId']], df[['Rating']], test_size=0.2, random_state=42)

# MLP1+MLP model



In [None]:
latent_factors_aux = 128

In [None]:
num_users = len(df['UserId'].unique())
num_movies = len(df['MovieId'].unique())

In [None]:
aux_input = Input(shape=(1,),name='aux_input', dtype='int32')
aux_embedding = Embedding(num_users, latent_factors_aux, name='aux_embedding')(aux_input)
aux_flat = Flatten(name='aux_flat')(aux_embedding)
aux_flat = Dropout(0.2)(aux_flat)

In [None]:
aux_dense1 = Dense(128, activation='relu')(aux_flat)
aux_dense1 = Dropout(0.2)(aux_dense1)
aux_dense2 = Dense(64, activation='relu')(aux_dense1)

In [None]:
latent_factors = 480

In [None]:
user_input = Input(shape=(1,),name='user_input', dtype='int32')
user_embedding = Embedding(num_users, latent_factors, name='user_embedding')(user_input)
user_flat = Flatten(name='user_flat')(user_embedding)
user_flat = Dropout(0.2)(user_flat)

In [None]:
movie_input = Input(shape=(1,),name='movie_input', dtype='int32')
movie_embedding = Embedding(num_movies, latent_factors, name='movie_embedding')(movie_input)
movie_flat = Flatten(name='movie_flat')(movie_embedding)
movie_flat = Dropout(0.2)(movie_flat)

In [None]:
product = dot([user_flat, movie_flat], name='product', axes=1)

In [None]:
dense1 = Dense(768, activation='relu')(product)
dense1 = Dropout(0.2)(dense1)
dense2 = Dense(256, activation='relu')(dense1)
dense2 = Dropout(0.2)(dense2)
dense3 = Dense(128, activation='relu')(dense2)
dense3 = Dropout(0.2)(dense3)
dense4 = Dense(64, activation='relu')(dense3)

product2 = dot([dense4, aux_dense2], name ='product2', axes=1)

In [None]:
model = Model([user_input, movie_input, user_aux_input], product2)
model.summary()

In [None]:
learning_rate = 1e-4
epochs = 120
batch_size = 256

In [None]:
callbacks = [
    keras.callbacks.EarlyStopping(
        monitor='val_loss',
        min_delta=2e-4,
        patience=4,
        verbose=1)
]

In [None]:
model.compile(optimizer=Adam(lr=learning_rate), loss='mse', metrics=[tf.keras.metrics.RootMeanSquaredError(), tf.keras.metrics.MeanAbsoluteError()])

In [None]:
fit = model.fit([x_train['UserId'], x_train['MovieId'], x_train['AuxInfo']], y_train,
                batch_size=batch_size,
                epochs=epochs,
                callbacks=callbacks,
                shuffle=True, 
                validation_split=0.25)

In [None]:
result = model.evaluate([x_test['UserId'], x_test['MovieId'], x_test['AuxInfo']], y_test, batch_size=batch_size)
print('Test dataset results (test loss, RMSE, MAE): ', result)