#### Copyright (C) 2020 Sobhan Moradiyan Daghigh - All Rights Reserved
#### 12/29/2020
## Implementation of Movie Recommender System - (using MovieLenz dataset)
#### Here we wanna make a item-item recommendation which is a kind of Content base recommendation 
#### Question: find the 10 items which have most similarity to an input movie

In [7]:
import pandas as pd
import numpy as np
from sklearn.metrics.pairwise import pairwise_distances

### Reading datasets

In [15]:
r_col = 'user_id | item_id | rating | timestamp'.split(' | ')
raiting = pd.read_csv('movies-100k/u.data', sep='\t', names=r_col)
raiting.head()

Unnamed: 0,user_id,item_id,rating,timestamp
0,196,242,3,881250949
1,186,302,3,891717742
2,22,377,1,878887116
3,244,51,2,880606923
4,166,346,1,886397596


In [16]:
i_col = 'movie_id | movie_title | release date | video release date | IMDb URL | unknown | Action | Adventure | Animation | Children | Comedy | Crime | Documentary | Drama | Fantasy | Film-Noir | Horror | Musical | Mystery | Romance | Sci-Fi | Thriller | War | Western'.split(' | ')
items = pd.read_csv('movies-100k/u.item', sep='|', names=i_col, encoding='latin-1')
items.head()

Unnamed: 0,movie_id,movie_title,release date,video release date,IMDb URL,unknown,Action,Adventure,Animation,Children,...,Fantasy,Film-Noir,Horror,Musical,Mystery,Romance,Sci-Fi,Thriller,War,Western
0,1,Toy Story (1995),01-Jan-1995,,http://us.imdb.com/M/title-exact?Toy%20Story%2...,0,0,0,1,1,...,0,0,0,0,0,0,0,0,0,0
1,2,GoldenEye (1995),01-Jan-1995,,http://us.imdb.com/M/title-exact?GoldenEye%20(...,0,1,1,0,0,...,0,0,0,0,0,0,0,1,0,0
2,3,Four Rooms (1995),01-Jan-1995,,http://us.imdb.com/M/title-exact?Four%20Rooms%...,0,0,0,0,0,...,0,0,0,0,0,0,0,1,0,0
3,4,Get Shorty (1995),01-Jan-1995,,http://us.imdb.com/M/title-exact?Get%20Shorty%...,0,1,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,5,Copycat (1995),01-Jan-1995,,http://us.imdb.com/M/title-exact?Copycat%20(1995),0,0,0,0,0,...,0,0,0,0,0,0,0,1,0,0


### Getting the number of users and items in order to define a similarity matrix

In [22]:
n_users = raiting.user_id.unique().shape[0]
n_items = raiting.item_id.unique().shape[0]

n_items

1682

### Place rates which users submitted for each item in a matrix

In [26]:
data_matrix = np.zeros((n_users, n_items))
for line in raiting.itertuples():
    data_matrix[line[1] - 1, line[2] - 1] = line[3]
    
data_matrix

array([[5., 3., 4., ..., 0., 0., 0.],
       [4., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       ...,
       [5., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 5., 0., ..., 0., 0., 0.]])

### Now calculate the similarity using 'Cosine' method

In [28]:
user_similarity = pairwise_distances(data_matrix, metric='cosine')
item_similarity = pairwise_distances(data_matrix, metric='cosine')

user_similarity

array([[0.        , 0.83306902, 0.95254046, ..., 0.85138306, 0.82049212,
        0.60182526],
       [0.83306902, 0.        , 0.88940868, ..., 0.83851522, 0.82773219,
        0.89420212],
       [0.95254046, 0.88940868, 0.        , ..., 0.89875744, 0.86658385,
        0.97344413],
       ...,
       [0.85138306, 0.83851522, 0.89875744, ..., 0.        , 0.8983582 ,
        0.90488042],
       [0.82049212, 0.82773219, 0.86658385, ..., 0.8983582 , 0.        ,
        0.81753534],
       [0.60182526, 0.89420212, 0.97344413, ..., 0.90488042, 0.81753534,
        0.        ]])

### And finaly define a function for searching item in the similarity matrix

In [35]:
def movie_recommender(movie_name, similarity_matrix):
    
    movie_id = int(items[items.movie_title == movie_name]['movie_id'])
    
    score = similarity_matrix[movie_id - 1]
    
    sorted_score = sorted(score, key=lambda x : x, reverse=True)
    
    movie_indexes = [list(score).index(x) for x in sorted_score]
    
    return items['movie_title'].iloc[movie_indexes]
    

### Yooha :) Did it.
#### The 10 items which have the most rate similarity to 'Mad Love (1995)' movie 

In [39]:
movie_recommender('Mad Love (1995)', item_similarity).unique().tolist()[1:11]

['Executive Decision (1996)',
 'Andre (1994)',
 'Strange Days (1995)',
 'Hour of the Pig, The (1993)',
 'Kicking and Screaming (1995)',
 'Notorious (1946)',
 'Sphere (1998)',
 'Grease (1978)',
 'My Life as a Dog (Mitt liv som hund) (1985)',
 'Time to Kill, A (1996)']

### Thank U.