The collaborative filtering method conists of two parts
Part1:Creating the model
Part2:Training the model and then calculating the RMSE 
    
Following is a demonstration of Part 1:

In [16]:
#Importing libraries
import numpy as np
import keras.backend as K
import keras
from keras.layers import Embedding, Reshape, Merge, Dropout, Dense
from keras.models import Sequential



In [17]:
#CFModel
#Making use of a Multi Layered Perceptron making use of keras layers
#Making use of TensorFlow
class CFModel(Sequential):                 [1] [2] [3] 

    def __init__(self, n_users, m_items, k_factors, **kwargs):
        P = Sequential()
        P.add(Embedding(n_users, k_factors, input_length=1))
        P.add(Reshape((k_factors,)))
        Q = Sequential()
        Q.add(Embedding(m_items, k_factors, input_length=1))
        Q.add(Reshape((k_factors,)))
        super(CFModel, self).__init__(**kwargs)
        self.add(Merge([P, Q], mode='dot', dot_axes=1))

    def rate(self, user_id, item_id):
        return self.predict([np.array([user_id]), np.array([item_id])])[0][0]
#Deep Model class containing activation layers of relu and linear
class DeepModel(Sequential):

    def __init__(self, n_users, m_items, k_factors, p_dropout=0.1, **kwargs):
        P = Sequential()
        P.add(Embedding(n_users, k_factors, input_length=1))
        P.add(Reshape((k_factors,)))
        Q = Sequential()
        Q.add(Embedding(m_items, k_factors, input_length=1))
        Q.add(Reshape((k_factors,)))
        super(DeepModel, self).__init__(**kwargs)
        self.add(Merge([P, Q], mode='concat'))
        self.add(Dropout(p_dropout))
        self.add(Dense(k_factors, activation='relu'))
        self.add(Dropout(p_dropout))
        self.add(Dense(1, activation='linear'))

    def rate(self, user_id, item_id):
        return self.predict([np.array([user_id]), np.array([item_id])])[0][0]

In [1]:
#importing the model described
import pandas as pd
from CFmodeldeep import CFModel

  from ._conv import register_converters as _register_converters
Using TensorFlow backend.


In [3]:
#defining the different constants
RATINGS_CSV_FILE = 'ml1m_ratings.csv'
USERS_CSV_FILE = 'ml1m_users.csv'
MOVIES_CSV_FILE = 'ml1m_movies.csv'
MODEL_WEIGHTS_FILE = 'ml1m_weights.h5'
K_FACTORS = 120
TEST_USER = 3000

In [5]:
# dropping the duplicate values
ratings = pd.read_csv(RATINGS_CSV_FILE, sep='\t', encoding='latin-1', usecols=['userid', 'movieid', 'rating'])
max_userid = ratings['userid'].drop_duplicates().max()
max_movieid = ratings['movieid'].drop_duplicates().max()


In [6]:
#Loading the users dataset
users = pd.read_csv(USERS_CSV_FILE, sep='\t', encoding='latin-1', usecols=['userid', 'gender', 'zipcode', 'age_desc', 'occ_desc'])


In [7]:
#Loading the movies dataset
movies = pd.read_csv(MOVIES_CSV_FILE, sep='\t', encoding='latin-1', usecols=['movieid', 'title', 'genre'])

In [8]:
#CFmodeldeep which is obtained from the python file
trained_model = CFModel(max_userid, max_movieid, K_FACTORS)

  self.add(Merge([P, Q], mode='dot', dot_axes=1))


In [9]:
#make use of h5py using the weights file
trained_model.load_weights(MODEL_WEIGHTS_FILE)

In [10]:
# checked for max userId (One User)
users[users['userid'] == TEST_USER]

Unnamed: 0,userid,gender,zipcode,age_desc,occ_desc
2999,3000,M,55408,25-34,college/grad student


In [11]:
#Making use of the defination stated above and defining the parameters as userid and movieid of the test user
def predict_rating(userid, movieid):
    return trained_model.rate(userid - 1, movieid - 1)

In [12]:
#Recommendations for test user (Unsorted)
user_ratings = ratings[ratings['userid'] == TEST_USER][['userid', 'movieid', 'rating']]
user_ratings['prediction'] = user_ratings.apply(lambda x: predict_rating(TEST_USER, x['movieid']), axis=1)
user_ratings.sort_values(by='rating', 
                         ascending=False).merge(movies, 
                                                on='movieid', 
                                                how='inner', 
                                                suffixes=['_u', '_m']).head(10)

Unnamed: 0,userid,movieid,rating,prediction,title,genre
0,3000,590,5,3.560065,Dances with Wolves (1990),Adventure|Drama|Western
1,3000,3552,5,3.770743,Caddyshack (1980),Comedy
2,3000,2858,5,4.609284,American Beauty (1999),Comedy|Drama
3,3000,3358,5,3.693714,Defending Your Life (1991),Comedy|Romance
4,3000,2968,5,3.724358,Time Bandits (1981),Adventure|Fantasy|Sci-Fi
5,3000,1307,5,3.907889,When Harry Met Sally... (1989),Comedy|Romance
6,3000,144,5,3.282056,"Brothers McMullen, The (1995)",Comedy
7,3000,1193,5,4.160789,One Flew Over the Cuckoo's Nest (1975),Drama
8,3000,1265,5,4.434525,Groundhog Day (1993),Comedy|Romance
9,3000,733,5,3.30558,"Rock, The (1996)",Action|Adventure|Thriller


In [13]:

#Making  use of the model deteming recommendations for a user for (TEST USER)
recommendations = ratings[ratings['movieid'].isin(user_ratings['movieid']) == False][['movieid']].drop_duplicates()
recommendations['prediction'] = recommendations.apply(lambda x: predict_rating(TEST_USER, x['movieid']), axis=1)
recommendations.sort_values(by='prediction',
                          ascending=False).merge(movies,
                                                 on='movieid',
                                                 how='inner',
                                                 suffixes=['_u', '_m']).head(10)

Unnamed: 0,movieid,prediction,title,genre
0,296,4.674262,Pulp Fiction (1994),Crime|Drama
1,50,4.633973,"Usual Suspects, The (1995)",Crime|Thriller
2,2959,4.633747,Fight Club (1999),Drama
3,1394,4.558996,Raising Arizona (1987),Comedy
4,318,4.551593,"Shawshank Redemption, The (1994)",Drama
5,608,4.485023,Fargo (1996),Crime|Drama|Thriller
6,1617,4.481786,L.A. Confidential (1997),Crime|Film-Noir|Mystery|Thriller
7,3160,4.376641,Magnolia (1999),Drama
8,1288,4.354255,This Is Spinal Tap (1984),Comedy|Drama|Musical
9,1136,4.351563,Monty Python and the Holy Grail (1974),Comedy


#References
Keras Layers
1)https://keras.io/layers/about-keras-layers/
Keras activation
2)https://keras.io/activations/
Keras Sequential Model
3)https://keras.io/models/sequential/

The text in the document by <Romell Segaran,Akshay Singh,Sushant Dhar> is licensed under CC BY 3.0 https://creativecommons.org/licenses/by/3.0/us/

The code in the document by <Romell Segaran,Akshay Singh,Sushant Dhar> is licensed under the MIT License https://opensource.org/licenses/MIT