In [3]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [4]:
%cp -av /content/drive/MyDrive/rec reco

'/content/drive/MyDrive/rec' -> 'reco'
'/content/drive/MyDrive/rec/evaluate.py' -> 'reco/evaluate.py'
'/content/drive/MyDrive/rec/utils.py' -> 'reco/utils.py'
'/content/drive/MyDrive/rec/users.csv' -> 'reco/users.csv'
'/content/drive/MyDrive/rec/recommend.py' -> 'reco/recommend.py'
'/content/drive/MyDrive/rec/preprocess.py' -> 'reco/preprocess.py'
'/content/drive/MyDrive/rec/vis.py' -> 'reco/vis.py'
'/content/drive/MyDrive/rec/ratings.csv' -> 'reco/ratings.csv'
'/content/drive/MyDrive/rec/items.csv' -> 'reco/items.csv'


In [5]:
import numpy as np
import pickle
import pandas as pd
from IPython.display import SVG, display
import matplotlib.pyplot as plt
import seaborn as sns
from reco.preprocess import encode_user_item, random_split, user_split
from sklearn.neighbors import NearestNeighbors
from reco.evaluate import get_embedding, get_predictions, recommend_topk
from reco.evaluate import precision_at_k

In [6]:
df_ratings = pd.read_csv("reco/ratings.csv")
movies = pd.read_csv("reco/items.csv")

In [7]:
DATA, user_encoder, item_encoder = encode_user_item(df_ratings, "user_id", "movie_id", "rating", "unix_timestamp")

Number of users:  943
Number of items:  1682


In [8]:
DATA.head()

Unnamed: 0,user_id,movie_id,RATING,TIMESTAMP,USER,ITEM
0,196,242,3,881250949,195,241
1,186,302,3,891717742,185,301
2,22,377,1,878887116,21,376
3,244,51,2,880606923,243,50
4,166,346,1,886397596,165,345


In [9]:
n_users = DATA.USER.nunique()
n_items = DATA.ITEM.nunique()
max_rating = DATA.RATING.max()
min_rating = DATA.RATING.min()
n_users

943

In [10]:
train, test = user_split(DATA, [0.9, 0.1])

In [11]:
from keras.models import Model
from keras.layers import Input, Embedding, Flatten, Dot, Add, Lambda, Activation, Reshape, Concatenate, Dense, Dropout
from keras.regularizers import l2
from keras.constraints import non_neg
from keras.optimizers import Adam
from keras.utils.vis_utils import model_to_dot
from reco import vis

In [12]:
def Deep_MF(n_users, n_items, n_factors):
    
    # Item Layer
    item_input = Input(shape=[1], name='Item')
    item_embedding = Embedding(n_items, n_factors, embeddings_regularizer=l2(1e-6),
                               embeddings_initializer='glorot_normal',
                               name='ItemEmbedding')(item_input)
    item_vec = Flatten(name='FlattenItemE')(item_embedding)
    
    # Item Bias
    item_bias = Embedding(n_items, 1, embeddings_regularizer=l2(1e-6), 
                          embeddings_initializer='glorot_normal',
                          name='ItemBias')(item_input)
    item_bias_vec = Flatten(name='FlattenItemBiasE')(item_bias)

    # User Layer
    user_input = Input(shape=[1], name='User')
    user_embedding = Embedding(n_users, n_factors, embeddings_regularizer=l2(1e-6),
                               embeddings_initializer='glorot_normal',
                               name='UserEmbedding')(user_input)
    user_vec = Flatten(name='FlattenUserE')(user_embedding)
    
    # User Bias
    user_bias = Embedding(n_users, 1, embeddings_regularizer=l2(1e-6), 
                        embeddings_initializer='glorot_normal',
                          name='UserBias')(user_input)
    user_bias_vec = Flatten(name='FlattenUserBiasE')(user_bias)

    # Dot Product of Item and User & then Add Bias
    Concat = Concatenate(name='Concat')([item_vec, user_vec])
    ConcatDrop = Dropout(0.5)(Concat)

    kernel_initializer='he_normal'
    
    # Use Dense to learn non-linear dense representation
    Dense_1 = Dense(10, kernel_initializer='glorot_normal', name="Dense1")(ConcatDrop)
    Dense_1_Drop = Dropout(0.5)(Dense_1)
    Dense_2 = Dense(1, kernel_initializer='glorot_normal', name="Dense2")(Dense_1_Drop)

    
    AddBias = Add(name="AddBias")([Dense_2, item_bias_vec, user_bias_vec])
    
    
    
    # Scaling for each user
    y = Activation('sigmoid')(AddBias)
    rating_output = Lambda(lambda x: x * (max_rating - min_rating) + min_rating)(y)
    
    # Model Creation
    model = Model([user_input, item_input], rating_output)
    
    # Compile Model
    model.compile(loss='mean_squared_error', optimizer=Adam(learning_rate=0.001))
    
    return model

In [13]:
n_factors = 50
model = Deep_MF(n_users, n_items, n_factors)
model.summary()

Model: "model"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
Item (InputLayer)               [(None, 1)]          0                                            
__________________________________________________________________________________________________
User (InputLayer)               [(None, 1)]          0                                            
__________________________________________________________________________________________________
ItemEmbedding (Embedding)       (None, 1, 50)        84100       Item[0][0]                       
__________________________________________________________________________________________________
UserEmbedding (Embedding)       (None, 1, 50)        47150       User[0][0]                       
______________________________________________________________________________________________

In [14]:
output = model.fit([train.USER, train.ITEM], train.RATING, 
                                  batch_size=128, epochs=5, verbose=1, 
                   validation_data= ([test.USER, test.ITEM], test.RATING))

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


In [15]:
vis.metrics(output.history)

In [17]:
ranking_topk = recommend_topk(model, DATA, train, k=5)

In [18]:
precision = precision_at_k(test, ranking_topk, k=10)
precision

0.016755037115588543

In [16]:
embedding = model.get_layer(name = "ItemEmbedding").get_weights()[0]

In [43]:
np.save('embedding.npy', embedding)

In [60]:
model_res = NearestNeighbors(n_neighbors=20, algorithm="ball_tree").fit(embedding)

with open('deep_matrix_factorzation.sav', 'wb') as pickle_out:
    pickle.dump(model_res, pickle_out)

distances, indices = model_res.kneighbors(embedding,n_neighbors=10)

In [67]:
recommended = indices[user_encoder.transform([1])]
recommended

array([[   0, 1497, 1525, 1357, 1481, 1397, 1634,  644,    6, 1615]])

In [68]:
recommend_frame = []
for val in recommended[0]:
    movie_idx = DATA.iloc[val]['movie_id']
    idx = movies[movies['movie_id'] == movie_idx].index
    recommend_frame.append({'Title':movies.iloc[idx]['title'].values[0]})
df = pd.DataFrame(recommend_frame,index=range(1,11))
df

Unnamed: 0,Title
1,Kolya (1996)
2,Basquiat (1996)
3,Quiz Show (1994)
4,Bram Stoker's Dracula (1992)
5,Jurassic Park (1993)
6,Mary Poppins (1964)
7,"Philadelphia Story, The (1940)"
8,Money Train (1995)
9,"Hunt for Red October, The (1990)"
10,Ran (1985)
