#Movie Recommendation System Using SVD

### Importing the basic libraries

In [None]:
import numpy as np
import pandas as pd

### Importing & Parsing the dataset as ratings and movies details

In [None]:
from google.colab import files
uploaded = files.upload()

Saving movies.dat to movies.dat


In [None]:
from google.colab import files
uploaded = files.upload()

Saving ratings.dat to ratings.dat


In [None]:
ratingData = pd.io.parsers.read_csv('ratings.dat', 
    names=['user_id', 'movie_id', 'rating', 'time'],
    engine='python', delimiter='::')
movieData = pd.io.parsers.read_csv('movies.dat',
    names=['movie_id', 'title', 'genre'],
    engine='python', delimiter='::')
print(ratingData)

         user_id  movie_id  rating       time
0              1      1193       5  978300760
1              1       661       3  978302109
2              1       914       3  978301968
3              1      3408       4  978300275
4              1      2355       5  978824291
...          ...       ...     ...        ...
1000204     6040      1091       1  956716541
1000205     6040      1094       5  956704887
1000206     6040       562       5  956704746
1000207     6040      1096       4  956715648
1000208     6040      1097       4  956715569

[1000209 rows x 4 columns]


### Create the ratings matrix of shape (m×u)

In [None]:
ratingMatrix = np.ndarray(
    shape=(np.max(ratingData.movie_id.values), np.max(ratingData.user_id.values)),
    dtype=np.uint8)
ratingMatrix[ratingData.movie_id.values-1, ratingData.user_id.values-1] = ratingData.rating.values
print(ratingMatrix)

[[  5  45 193 ... 127   0   3]
 [240  12 186 ... 127   0   0]
 [ 48  10 195 ... 127   0   0]
 ...
 [  0   0   0 ... 175 204  65]
 [  0   0 128 ... 187 204  65]
 [  0   0 128 ... 174 204  65]]


### Subtract Mean off - Normalization

In [None]:
normalizedMatrix = ratingMatrix - np.asarray([(np.mean(ratingMatrix, 1))]).T
print(normalizedMatrix)

[[ -52.08990066  -12.08990066  135.91009934 ...   69.91009934
   -57.08990066  -54.08990066]
 [ 165.27996689  -62.72003311  111.27996689 ...   52.27996689
   -74.72003311  -74.72003311]
 [ -27.49122517  -65.49122517  119.50877483 ...   51.50877483
   -75.49122517  -75.49122517]
 ...
 [-100.22284768 -100.22284768 -100.22284768 ...   74.77715232
   103.77715232  -35.22284768]
 [ -95.4839404   -95.4839404    32.5160596  ...   91.5160596
   108.5160596   -30.4839404 ]
 [ -88.29536424  -88.29536424   39.70463576 ...   85.70463576
   115.70463576  -23.29536424]]


### Computing SVD

In [None]:
A = normalizedMatrix.T / np.sqrt(ratingMatrix.shape[0] - 1)
U, S, V = np.linalg.svd(A)

### Calculate cosine similarity, sort by most similar and return the top N

In [None]:
def similar(ratingData, movie_id, top_n):
    index = movie_id - 1 # Movie id starts from 1
    movie_row = ratingData[index, :]
    magnitude = np.sqrt(np.einsum('ij, ij -> i', ratingData, ratingData)) #Einstein summation |  traditional matrix multiplication and is equivalent to np.matmul(a,b)
    similarity = np.dot(movie_row, ratingData.T) / (magnitude[index] * magnitude)
    sort_indexes = np.argsort(-similarity) #Perform an indirect sort along the given axis (Last axis)
    return sort_indexes[:top_n]

### Select k principal components to represent the movies, a movie_id to find recommendations and print the top_n results

In [None]:
k = 50
movie_id = 2
top_n = 5

sliced = V.T[:, :k] # representative data
indexes = similar(sliced, movie_id, top_n)

print('Recommendations for Movie {0}: \n'.format(
movieData[movieData.movie_id == movie_id].title.values[0]))
for id in indexes + 1:
    print(movieData[movieData.movie_id == id].title.values[0])

Recommendations for Movie Jumanji (1995): 

Jumanji (1995)
Tales from the Hood (1995)
Johnny 100 Pesos (1993)
GoldenEye (1995)
Perez Family, The (1995)
