<a href="https://colab.research.google.com/github/riyajaiswal25/MLProjects/blob/main/Movie_Recommendation_System_using_SVD.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# **Movie Recommendation System Using SVD**

***Importing the basic libraries***

In [2]:
import numpy as np
import pandas as pd


***Importing & Parsing the dataset as ratings and movies details***

In [3]:
ratingData = pd.io.parsers.read_csv('ratings.dat',names=['user_id','movie_id','rating','time'], engine='python', delimiter=',')
movieData = pd.io.parsers.read_csv('movies.dat',names=['movie_id','title','genre'], engine='python', delimiter=',')
print(ratingData)

        user_id  movie_id  rating        time
0             1        31     2.5  1260759144
1             1      1029     3.0  1260759179
2             1      1061     3.0  1260759182
3             1      1129     2.0  1260759185
4             1      1172     4.0  1260759205
...         ...       ...     ...         ...
99999       671      6268     2.5  1065579370
100000      671      6269     4.0  1065149201
100001      671      6365     4.0  1070940363
100002      671      6385     2.5  1070979663
100003      671      6565     3.5  1074784724

[100004 rows x 4 columns]


***Create the ratings matrix of shape(mxu)***

In [4]:
ratingMatrix = np.ndarray(shape=(np.max(ratingData.movie_id.values),np.max(ratingData.user_id.values)),dtype=np.uint8)
ratingMatrix[ratingData.movie_id.values-1, ratingData.user_id.values-1] = ratingData.rating.values
print(ratingMatrix)

[[240 151 243 ... 215   4   5]
 [  0  48 173 ...  51 215 127]
 [  0   0 176 ... 243  51 215]
 ...
 [  0   0   0 ...   0   0   0]
 [  0   0   0 ...   0   0   0]
 [  0   0   0 ...   0   0   0]]


***Subtract Mean off - Normalization***

In [5]:
normalizedMatrix = ratingMatrix - np.asarray([(np.mean(ratingMatrix, 1))]).T
print(normalizedMatrix)

[[ 1.61157973e+02  7.21579732e+01  1.64157973e+02 ...  1.36157973e+02
  -7.48420268e+01 -7.38420268e+01]
 [-1.04283159e+02 -5.62831595e+01  6.87168405e+01 ... -5.32831595e+01
   1.10716841e+02  2.27168405e+01]
 [-1.15372578e+02 -1.15372578e+02  6.06274218e+01 ...  1.27627422e+02
  -6.43725782e+01  9.96274218e+01]
 ...
 [ 0.00000000e+00  0.00000000e+00  0.00000000e+00 ...  0.00000000e+00
   0.00000000e+00  0.00000000e+00]
 [ 0.00000000e+00  0.00000000e+00  0.00000000e+00 ...  0.00000000e+00
   0.00000000e+00  0.00000000e+00]
 [-7.45156483e-03 -7.45156483e-03 -7.45156483e-03 ... -7.45156483e-03
  -7.45156483e-03 -7.45156483e-03]]


***Computing SVD***

In [None]:
A = normalizedMatrix.T / np.sqrt(ratingMatrix.shape[0]-1)
U, S, V = np.linalg.svd(A)

***Calculating cosine similarity, sort by most similar and return the top N***

In [None]:
def similar(ratingData, movie_id, top_n):
  index = movie_id-1 #movie starts from 1
  movie_row = ratingData[index, :]
  magnitude = np.sqrt(np.einsum('ij','ij->i', ratingData, ratingData)) #Einstein summation | traditional matrix
  similarity = np.dot(movie_row, ratingData.T) / (magnitude[index]*magnitude)
  sort_indexes = np.argsort(-similarity) #Perform an indirect sort along the given axis (Last axis)
  return sort_indexes[:top_n]


***Select k principal components to represent the movies, a movie_id to find recommendations and print the top_n results***

In [None]:
k = 50
movie_id = 23
top_n = 10