## A recommender system is nothing but a system which predicts how a user will rate a movie which he/she has not seen yet, Top rated movies are then recommended sequentially  

This Notebook showcase **4 techniques** which are uses for the above stated task 

**Memory based techniques**
1. USER-USER COLLABORATIVE FILTERING
2. ITEM-ITEM COLLABORATIVE FILTERING
3. CONTENT BASED FILTERING

**Model based technique**

4. Matrix Factorization

In [0]:
import numpy as np
import pandas as pd

In [0]:
train = pd.read_csv(r"C:\Users\HP\Desktop\ML\MovieLens\ml-100k\ua.base",sep="\t",names=["user_id","movie_id","ratings","unix_timestamp"])
test = pd.read_csv(r"C:\Users\HP\Desktop\ML\MovieLens\ml-100k\ua.test",sep="\t",names=["user_id","movie_id","ratings","unix_timestamp"])
ratings = pd.read_csv(r"C:\Users\HP\Desktop\ML\MovieLens\ml-100k\u.data",sep="\t",names=["user_id","movie_id","ratings","unix_timestamp"])

In [0]:
print(train.shape)
print(test.shape)

(90570, 4)
(9430, 4)


In [0]:
num_user=ratings["user_id"].unique().shape[0]
num_items=ratings["movie_id"].unique().shape[0]

In [0]:
data_matrix=np.zeros((num_user,num_items))
for row in ratings.itertuples():
    data_matrix[row[1]-1,row[2]-1]=row[3]

In [0]:
data_matrix

array([[5., 3., 4., ..., 0., 0., 0.],
       [4., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       ...,
       [5., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 5., 0., ..., 0., 0., 0.]])

Collaborative Filtering

In [0]:
from sklearn.metrics.pairwise import pairwise_distances 
user_similarity = pairwise_distances(data_matrix, metric='cosine')
item_similarity = pairwise_distances(data_matrix.T, metric='cosine')

In [0]:
user_similarity

array([[0.        , 0.83306902, 0.95254046, ..., 0.85138306, 0.82049212,
        0.60182526],
       [0.83306902, 0.        , 0.88940868, ..., 0.83851522, 0.82773219,
        0.89420212],
       [0.95254046, 0.88940868, 0.        , ..., 0.89875744, 0.86658385,
        0.97344413],
       ...,
       [0.85138306, 0.83851522, 0.89875744, ..., 0.        , 0.8983582 ,
        0.90488042],
       [0.82049212, 0.82773219, 0.86658385, ..., 0.8983582 , 0.        ,
        0.81753534],
       [0.60182526, 0.89420212, 0.97344413, ..., 0.90488042, 0.81753534,
        0.        ]])

In [0]:
item_similarity

array([[0.        , 0.59761782, 0.66975521, ..., 1.        , 0.95281693,
        0.95281693],
       [0.59761782, 0.        , 0.72693082, ..., 1.        , 0.92170064,
        0.92170064],
       [0.66975521, 0.72693082, 0.        , ..., 1.        , 1.        ,
        0.90312495],
       ...,
       [1.        , 1.        , 1.        , ..., 0.        , 1.        ,
        1.        ],
       [0.95281693, 0.92170064, 1.        , ..., 1.        , 0.        ,
        1.        ],
       [0.95281693, 0.92170064, 0.90312495, ..., 1.        , 1.        ,
        0.        ]])

In [0]:

mean_user_rating = data_matrix.mean(axis=1)

In [0]:
def predict(ratings, similarity, type='user'):
    if type == 'user':
        mean_user_rating = ratings.mean(axis=1)
        #We use np.newaxis so that mean_user_rating has same format as ratings
        ratings_diff = (ratings - mean_user_rating[:, np.newaxis])
        pred = mean_user_rating[:, np.newaxis] + similarity.dot(ratings_diff) / np.array([np.abs(similarity).sum(axis=1)]).T
    elif type == 'item':
        pred = ratings.dot(similarity) / np.array([np.abs(similarity).sum(axis=1)])
    return pred

In [0]:
user_prediction = predict(data_matrix, user_similarity, type='user')
item_prediction = predict(data_matrix, item_similarity, type='item')

In [0]:
item_prediction

array([[0.44627765, 0.475473  , 0.50593755, ..., 0.58815455, 0.5731069 ,
        0.56669645],
       [0.10854432, 0.13295661, 0.12558851, ..., 0.13445801, 0.13657587,
        0.13711081],
       [0.08568497, 0.09169006, 0.08764343, ..., 0.08465892, 0.08976784,
        0.09084451],
       ...,
       [0.03230047, 0.0450241 , 0.04292449, ..., 0.05302764, 0.0519099 ,
        0.05228033],
       [0.15777917, 0.17409459, 0.18900003, ..., 0.19979296, 0.19739388,
        0.20003117],
       [0.24767207, 0.24489212, 0.28263031, ..., 0.34410424, 0.33051406,
        0.33102478]])

Content Based Filtering

In [0]:
item_cols = ['movie id','movie title','release date',
'video release date','IMDb URL','unknown','Action',
'Adventure','Animation','Childrens','Comedy','Crime',
'Documentary','Drama','Fantasy','Film-Noir','Horror',
'Musical','Mystery','Romance ','Sci-Fi','Thriller',
'War' ,'Western']
item = pd.read_csv(r"C:\Users\HP\Desktop\ML\MovieLens\ml-100k\u.item", sep='|',
names=item_cols, encoding='latin-1')

In [0]:
item.head()

Unnamed: 0,movie id,movie title,release date,video release date,IMDb URL,unknown,Action,Adventure,Animation,Childrens,...,Fantasy,Film-Noir,Horror,Musical,Mystery,Romance,Sci-Fi,Thriller,War,Western
0,1,Toy Story (1995),01-Jan-1995,,http://us.imdb.com/M/title-exact?Toy%20Story%2...,0,0,0,1,1,...,0,0,0,0,0,0,0,0,0,0
1,2,GoldenEye (1995),01-Jan-1995,,http://us.imdb.com/M/title-exact?GoldenEye%20(...,0,1,1,0,0,...,0,0,0,0,0,0,0,1,0,0
2,3,Four Rooms (1995),01-Jan-1995,,http://us.imdb.com/M/title-exact?Four%20Rooms%...,0,0,0,0,0,...,0,0,0,0,0,0,0,1,0,0
3,4,Get Shorty (1995),01-Jan-1995,,http://us.imdb.com/M/title-exact?Get%20Shorty%...,0,1,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,5,Copycat (1995),01-Jan-1995,,http://us.imdb.com/M/title-exact?Copycat%20(1995),0,0,0,0,0,...,0,0,0,0,0,0,0,1,0,0


In [0]:
item=item.drop(["release date",'video release date','IMDb URL',"movie title"],axis=1)
item.head()

Unnamed: 0,movie id,unknown,Action,Adventure,Animation,Childrens,Comedy,Crime,Documentary,Drama,Fantasy,Film-Noir,Horror,Musical,Mystery,Romance,Sci-Fi,Thriller,War,Western
0,1,0,0,0,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0
1,2,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0
2,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0
3,4,0,1,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0
4,5,0,0,0,0,0,0,1,0,1,0,0,0,0,0,0,0,1,0,0


In [0]:
item=item.set_index(["movie id"])
item.head()

Unnamed: 0_level_0,unknown,Action,Adventure,Animation,Childrens,Comedy,Crime,Documentary,Drama,Fantasy,Film-Noir,Horror,Musical,Mystery,Romance,Sci-Fi,Thriller,War,Western
movie id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1
1,0,0,0,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0
2,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0
3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0
4,0,1,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0
5,0,0,0,0,0,0,1,0,1,0,0,0,0,0,0,0,1,0,0


In [0]:
for i in range(1,item.shape[0]):
    item.loc[i]=item.loc[i]/item.loc[i].sum()

In [0]:
item.shape

(1682, 19)

In [0]:
data_matrix.shape

(943, 1682)

In [0]:
user_vector=np.dot(data_matrix,np.asarray(item))
user_vector.shape

(943, 19)

In [0]:
user_vector=user_vector/19
user_vector

array([[0.21052632, 4.6754386 , 1.97280702, ..., 4.18245614, 1.63596491,
        0.55263158],
       [0.        , 0.71929825, 0.21052632, ..., 0.87719298, 0.1754386 ,
        0.        ],
       [0.        , 0.68684211, 0.23947368, ..., 1.05701754, 0.23508772,
        0.        ],
       ...,
       [0.        , 0.81578947, 0.42807018, ..., 0.55263158, 0.05263158,
        0.        ],
       [0.        , 1.46929825, 1.05701754, ..., 1.84210526, 0.89035088,
        0.38596491],
       [0.        , 4.60614035, 1.9       , ..., 3.1622807 , 0.91315789,
        0.5745614 ]])

In [0]:
final_score=np.dot(user_vector,item.transpose())
final_score

array([[ 3.89327485,  3.61023392,  4.18245614, ...,  8.98464912,
         9.67192982, 14.26315789],
       [ 0.86403509,  0.60233918,  0.87719298, ...,  3.03289474,
         2.26754386,  4.53070175],
       [ 0.35964912,  0.66111111,  1.05701754, ...,  1.13947368,
         1.07894737,  1.97807018],
       ...,
       [ 0.39883041,  0.59883041,  0.55263158, ...,  0.40789474,
         0.84912281,  0.76315789],
       [ 1.27631579,  1.45614035,  1.84210526, ...,  3.09429825,
         2.28947368,  4.57894737],
       [ 1.88304094,  3.22280702,  3.1622807 , ...,  4.22850877,
         5.0877193 ,  6.29385965]])

Matrix Factorization

In [0]:
class MF():

    def __init__(self, R, K, alpha, beta, iterations):
        """

        Arguments
        - R (ndarray)   : user-item rating matrix
        - K (int)       : number of latent dimensions
        - alpha (float) : learning rate
        - beta (float)  : regularization parameter
        """

        self.R = R
        self.num_users, self.num_items = R.shape
        self.K = K
        self.alpha = alpha
        self.beta = beta
        self.iterations = iterations

    def train(self):
        
        self.P = np.random.normal(scale=1./self.K, size=(self.num_users, self.K))
        self.Q = np.random.normal(scale=1./self.K, size=(self.num_items, self.K))

        
        self.b_u = np.zeros(self.num_users)
        self.b_i = np.zeros(self.num_items)
        self.b = np.mean(self.R[np.where(self.R != 0)])

        
        self.samples = [
            (i, j, self.R[i, j])
            for i in range(self.num_users)
            for j in range(self.num_items)
            if self.R[i, j] > 0
        ]

        
        training_process = []
        for i in range(self.iterations):
            np.random.shuffle(self.samples)
            self.sgd()
            mse = self.mse()
            training_process.append((i, mse))
            if (i+1) % 10 == 0:
                print("Iteration: %d ; error = %.4f" % (i+1, mse))

        return training_process

    def mse(self):
        
        xs, ys = self.R.nonzero()
        predicted = self.full_matrix()
        error = 0
        for x, y in zip(xs, ys):
            error += pow(self.R[x, y] - predicted[x, y], 2)
        return np.sqrt(error)

    def sgd(self):
        """
        stochastic graident descent
        """
        for i, j, r in self.samples:
            
            prediction = self.get_rating(i, j)
            e = (r - prediction)

            
            self.b_u[i] += self.alpha * (e - self.beta * self.b_u[i])
            self.b_i[j] += self.alpha * (e - self.beta * self.b_i[j])

            self.P[i, :] += self.alpha * (e * self.Q[j, :] - self.beta * self.P[i,:])
            self.Q[j, :] += self.alpha * (e * self.P[i, :] - self.beta * self.Q[j,:])

    def get_rating(self, i, j):
        """
        predicted rating of user i and item j
        """
        prediction = self.b + self.b_u[i] + self.b_i[j] + self.P[i, :].dot(self.Q[j, :].T)
        return prediction

    def full_matrix(self):
        """
        full matrix using the resultant biases, P and Q
        """
        return self.b + self.b_u[:,np.newaxis] + self.b_i[np.newaxis:,] + self.P.dot(self.Q.T)

In [0]:
R = np.array([
    [5, 3, 0, 1],
    [4, 0, 0, 1],
    [1, 1, 0, 5],
    [1, 0, 0, 4],
    [0, 1, 5, 4],
])
mf = MF(data_matrix, K=2, alpha=0.1, beta=0.01, iterations=20)

In [0]:
training_process = mf.train()
print(mf.full_matrix())