#22 Movie Recommendation System Using SVD

### Importing the basic libraries

In [1]:
import numpy as np
import pandas as pd

### Importing & Parsing the dataset as ratings and movies details

In [5]:
ratingData = pd.io.parsers.read_csv('/content/ratings.dat', 
    names=['user_id', 'movie_id', 'rating', 'time'],
    engine='python', delimiter='::',encoding='latin-1')
movieData = pd.io.parsers.read_csv('/content/movies.dat',
    names=['movie_id', 'title', 'genre'],
    engine='python', delimiter='::',encoding='latin-1')
print(ratingData)

         user_id  movie_id  rating       time
0              1      1193       5  978300760
1              1       661       3  978302109
2              1       914       3  978301968
3              1      3408       4  978300275
4              1      2355       5  978824291
...          ...       ...     ...        ...
1000204     6040      1091       1  956716541
1000205     6040      1094       5  956704887
1000206     6040       562       5  956704746
1000207     6040      1096       4  956715648
1000208     6040      1097       4  956715569

[1000209 rows x 4 columns]


### Create the ratings matrix of shape (m×u)

In [6]:
ratingMatrix = np.ndarray(
    shape=(np.max(ratingData.movie_id.values), np.max(ratingData.user_id.values)),
    dtype=np.uint8)
ratingMatrix[ratingData.movie_id.values-1, ratingData.user_id.values-1] = ratingData.rating.values
print(ratingMatrix)

[[ 5  0  0 ...  0  0  3]
 [ 9  0  0 ...  0  0  0]
 [15  0  0 ...  0  0  0]
 ...
 [ 4  0  0 ...  0  0  0]
 [ 5  0  0 ...  0  0  0]
 [ 3  0  0 ...  0  0  0]]


### Subtract Mean off - Normalization

In [7]:
normalizedMatrix = ratingMatrix - np.asarray([(np.mean(ratingMatrix, 1))]).T
print(normalizedMatrix)

[[ 3.16804636 -1.83195364 -1.83195364 ... -1.83195364 -1.83195364
   1.16804636]
 [ 7.39884106 -1.60115894 -1.60115894 ... -1.60115894 -1.60115894
  -1.60115894]
 [12.82301325 -2.17698675 -2.17698675 ... -2.17698675 -2.17698675
  -2.17698675]
 ...
 [ 3.53824503 -0.46175497 -0.46175497 ... -0.46175497 -0.46175497
  -0.46175497]
 [ 4.51639073 -0.48360927 -0.48360927 ... -0.48360927 -0.48360927
  -0.48360927]
 [ 2.31821192 -0.68178808 -0.68178808 ... -0.68178808 -0.68178808
  -0.68178808]]


### Computing SVD

In [8]:
A = normalizedMatrix.T / np.sqrt(ratingMatrix.shape[0] - 1)
U, S, V = np.linalg.svd(A)

### Calculate cosine similarity, sort by most similar and return the top N

In [9]:
def similar(ratingData, movie_id, top_n):
    index = movie_id - 1 # Movie id starts from 1
    movie_row = ratingData[index, :]
    magnitude = np.sqrt(np.einsum('ij, ij -> i', ratingData, ratingData)) #Einstein summation |  traditional matrix multiplication and is equivalent to np.matmul(a,b)
    similarity = np.dot(movie_row, ratingData.T) / (magnitude[index] * magnitude)
    sort_indexes = np.argsort(-similarity) #Perform an indirect sort along the given axis (Last axis)
    return sort_indexes[:top_n]

### Select k principal components to represent the movies, a movie_id to find recommendations and print the top_n results

In [12]:
k = 50
movie_id = 4
top_n = 5

sliced = V.T[:, :k] # representative data
indexes = similar(sliced, movie_id, top_n)

print('Recommendations for Movie --: {0}: \n'.format(
movieData[movieData.movie_id == movie_id].title.values[0]))
for id in indexes + 1:
    print(movieData[movieData.movie_id == id].title.values[0])

Recommendations for Movie --: Waiting to Exhale (1995): 

Waiting to Exhale (1995)
Nixon (1995)
Home for the Holidays (1995)
Carrington (1995)
Immortal Beloved (1994)
