# Movie Recommendation System Using Singular Value Decomposition

In [1]:
import numpy as np
import pandas as pd

# Importing & Parsing the dataset as rating and movies details

In [2]:
ratingData = pd.io.parsers.read_csv("C:/Users/rushi/OneDrive/Desktop/INTERNSHIP COURSE-Pantech/3 Machine learning-30Days Internship/Unsupervised ML-DayWise/DAY_22_MovieRecommendationSystemUsingSVD/ratings.dat",names=['user_id', 'movie_id', 'rating', 'time'],
                                   engine='python', delimiter='::',encoding='ISO-8859-1')

In [3]:
movieData = pd.io.parsers.read_csv("C:/Users/rushi/OneDrive/Desktop/INTERNSHIP COURSE-Pantech/3 Machine learning-30Days Internship/Unsupervised ML-DayWise/DAY_22_MovieRecommendationSystemUsingSVD/movies.dat",names=['movie_id', 'title', 'genre'],
                                   engine='python', delimiter='::',encoding='ISO-8859-1')

In [4]:
ratingData

Unnamed: 0,user_id,movie_id,rating,time
0,1,1193,5,978300760
1,1,661,3,978302109
2,1,914,3,978301968
3,1,3408,4,978300275
4,1,2355,5,978824291
...,...,...,...,...
1000204,6040,1091,1,956716541
1000205,6040,1094,5,956704887
1000206,6040,562,5,956704746
1000207,6040,1096,4,956715648


In [5]:
movieData

Unnamed: 0,movie_id,title,genre
0,1,Toy Story (1995),Animation|Children's|Comedy
1,2,Jumanji (1995),Adventure|Children's|Fantasy
2,3,Grumpier Old Men (1995),Comedy|Romance
3,4,Waiting to Exhale (1995),Comedy|Drama
4,5,Father of the Bride Part II (1995),Comedy
...,...,...,...
3878,3948,Meet the Parents (2000),Comedy
3879,3949,Requiem for a Dream (2000),Drama
3880,3950,Tigerland (2000),Drama
3881,3951,Two Family House (2000),Drama


# Create the rating matrix of shape(mxu)

In [6]:
ratingMatrix = np.ndarray(
    shape=(np.max(ratingData.movie_id.values), np.max(ratingData.user_id.values)),
    dtype=np.uint8)
ratingMatrix[ratingData.movie_id.values-1, ratingData.user_id.values-1] = ratingData.rating.values
print(ratingMatrix)

[[5 0 0 ... 0 0 3]
 [0 0 0 ... 0 0 0]
 [0 0 0 ... 0 0 0]
 ...
 [0 0 0 ... 0 0 0]
 [0 0 0 ... 0 0 0]
 [0 0 0 ... 0 0 0]]


# Subtract Mean off-Normalization

In [7]:
normalizedMatrix = ratingMatrix - np.asarray([(np.mean(ratingMatrix, 1))]).T

print(normalizedMatrix)

[[ 3.57400662 -1.42599338 -1.42599338 ... -1.42599338 -1.42599338
   1.57400662]
 [-0.37152318 -0.37152318 -0.37152318 ... -0.37152318 -0.37152318
  -0.37152318]
 [-0.23874172 -0.23874172 -0.23874172 ... -0.23874172 -0.23874172
  -0.23874172]
 ...
 [-0.03278146 -0.03278146 -0.03278146 ... -0.03278146 -0.03278146
  -0.03278146]
 [-0.02582781 -0.02582781 -0.02582781 ... -0.02582781 -0.02582781
  -0.02582781]
 [-0.24288079 -0.24288079 -0.24288079 ... -0.24288079 -0.24288079
  -0.24288079]]


# Computing SVD

In [8]:
A = normalizedMatrix.T /  np.sqrt(ratingMatrix.shape[0] - 1)
U, S, V = np.linalg.svd(A)

# Calculate cosine similarity, sort by most similar and return the top N

In [9]:
def similar(ratingData, movie_id, top_n=10):
    index = movie_id - 1 # movie id starts from 1
    movie_row =ratingData[index, :]
    
    magnitude = np.sqrt(np.einsum('ij, ij -> i', ratingData, ratingData)) # Einstein summation | traditional matrix multiplication and is equivalent to np.matmul(a,b)
    
    similarity = np.dot(movie_row, ratingData.T) / (magnitude[index] * magnitude)
    
    sort_indexes = np.argsort(-similarity) #Perform an indirect sort along the given axis (Last axis)
    return sort_indexes[:top_n]


# Select k Principal components to represent the movies, a movie_id to find recommendations and print the top_n results

In [11]:
k = 50
movie_id = int(input("Enter the movie_id which you want to similarity : "))
top_n = int(input("Enter the Number which you want to recommendation: "))

sliced = V.T[:, : k] # representative data
indexes = similar(sliced, movie_id, top_n)

print('Recommendation for Movie {0}: \n'.format(
movieData[movieData.movie_id == movie_id].title.values[0]))
for id in indexes + 1:
    print(movieData[movieData.movie_id == id].title.values[0])

Enter the movie_id which you want to similarity : 54
Enter the Number which you want to recommendation: 10
Recommendation for Movie Big Green, The (1995): 

Big Green, The (1995)
Richie Rich (1994)
Heavyweights (1994)
Blank Check (1994)
D3: The Mighty Ducks (1996)
Little Rascals, The (1994)
D2: The Mighty Ducks (1994)
Man of the House (1995)
Theodore Rex (1995)
Jack (1996)


  similarity = np.dot(movie_row, ratingData.T) / (magnitude[index] * magnitude)


In [12]:
k = 50
movie_id = int(input("Enter the movie_id which you want to similarity : "))
top_n = int(input("Enter the Number which you want to recommendation: "))

sliced = V.T[:, : k] # representative data
indexes = similar(sliced, movie_id, top_n)

print('Recommendation for Movie {0}: \n'.format(
movieData[movieData.movie_id == movie_id].title.values[0]))
for id in indexes + 1:
    print(movieData[movieData.movie_id == id].title.values[0])

Enter the movie_id which you want to similarity : 2
Enter the Number which you want to recommendation: 5
Recommendation for Movie Jumanji (1995): 

Jumanji (1995)
Hook (1991)
Indian in the Cupboard, The (1995)
NeverEnding Story II: The Next Chapter, The (1990)
Dragonheart (1996)


  similarity = np.dot(movie_row, ratingData.T) / (magnitude[index] * magnitude)
