### Excercise based on [this article](https://www.analyticsvidhya.com/blog/2018/06/comprehensive-guide-recommendation-engine-python/)


In [1]:
# load user data

users = pd.read_csv('ml-100k/u.user', sep = '|', names = ['uid', 'age', 'sex', 'job', 'zip'])
users.head()

Unnamed: 0,uid,age,sex,job,zip
0,1,24,M,technician,85711
1,2,53,F,other,94043
2,3,23,M,writer,32067
3,4,24,M,technician,43537
4,5,33,F,other,15213


In [2]:
# load rating data

ratings = pd.read_csv('ml-100k/u.data', sep = '\t', names = ['uid', 'mid', 'rating', 'timestamp'])
ratings.timestamp = pd.to_datetime(ratings.timestamp, unit = 's')
ratings.head()

Unnamed: 0,uid,mid,rating,timestamp
0,196,242,3,1997-12-04 15:55:49
1,186,302,3,1998-04-04 19:22:22
2,22,377,1,1997-11-07 07:18:36
3,244,51,2,1997-11-27 05:02:03
4,166,346,1,1998-02-02 05:33:16


In [3]:
# load items data

items = pd.read_csv('ml-100k/u.item', sep = '|', names = ['mid', 'title', 'release_date', 'vd_release_date', 'url', 'unkown', 'Action', 'Adventure',
'Animation', 'Children\'s', 'Comedy', 'Crime', 'Documentary', 'Drama', 'Fantasy',
'Film-Noir', 'Horror', 'Musical', 'Mystery', 'Romance', 'Sci-Fi', 'Thriller', 'War', 'Western'], encoding = 'latin-1', parse_dates = ['release_date'])
items.head()

Unnamed: 0,mid,title,release_date,vd_release_date,url,unkown,Action,Adventure,Animation,Children's,Comedy,Crime,Documentary,Drama,Fantasy,Film-Noir,Horror,Musical,Mystery,Romance,Sci-Fi,Thriller,War,Western
0,1,Toy Story (1995),1995-01-01,,http://us.imdb.com/M/title-exact?Toy%20Story%2...,0,0,0,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0
1,2,GoldenEye (1995),1995-01-01,,http://us.imdb.com/M/title-exact?GoldenEye%20(...,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0
2,3,Four Rooms (1995),1995-01-01,,http://us.imdb.com/M/title-exact?Four%20Rooms%...,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0
3,4,Get Shorty (1995),1995-01-01,,http://us.imdb.com/M/title-exact?Get%20Shorty%...,0,1,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0
4,5,Copycat (1995),1995-01-01,,http://us.imdb.com/M/title-exact?Copycat%20(1995),0,0,0,0,0,0,1,0,1,0,0,0,0,0,0,0,1,0,0


In [4]:
# load training & testing data for ratings

ratings_train = pd.read_csv('ml-100k/ua.base', sep = '\t', names = ['uid', 'mid', 'rating', 'timestamp'])
ratings_test = pd.read_csv('ml-100k/ua.test', sep = '\t', names = ['uid', 'mid', 'rating', 'timestamp'])
ratings_train.timestamp = pd.to_datetime(ratings_train.timestamp, unit = 's')
ratings_test.timestamp = pd.to_datetime(ratings_test.timestamp, unit = 's')
ratings_train.shape,ratings_test.shape


((90570, 4), (9430, 4))

In [5]:
# get unique number of users and movies

n_users = len(ratings.uid.unique())
n_items = len(ratings.mid.unique())

In [6]:
# pivot data ratings df into user by movie rating matrix

data_matrix = ratings.pivot_table(index='uid', columns=['mid'], values='rating', aggfunc='mean').replace(np.nan, 0).values
data_matrix

array([[5., 3., 4., ..., 0., 0., 0.],
       [4., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       ...,
       [5., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 5., 0., ..., 0., 0., 0.]])

In [7]:
# calculate similarities based on distance

from sklearn.metrics.pairwise import pairwise_distances

user_sim = pairwise_distances(data_matrix, metric = 'cosine')
item_sim = pairwise_distances(data_matrix.T, metric = 'cosine')

In [8]:
# build prediction function based on similarity

def predict(ratings, similarity, type = 'user'):
    
    if type == 'user':
        
        mean_user_rating = ratings.mean(axis = 1)
        ratings_diff = ratings - mean_user_rating[:, np.newaxis]
        pred = mean_user_rating[:, np.newaxis] + similarity.dot(ratings_diff) / np.abs(similarity).sum(axis = 1).reshape(-1, 1)
        
        # each matrix element ij represents all users' ratings on movie j, weighted by similarity between user i and all others
        
    elif type =='item':
        
        pred = ratings.dot(similarity) /np.abs(similarity).sum(axis=1).reshape(1, -1)
        
        # each matrix element ij represents user i's ratings on all movies j, weighted by similarity between movie j and all others
        
    return pred

In [9]:
# make predictions

user_pred = predict(data_matrix, user_sim, type = 'user')
item_pred = predict(data_matrix, item_sim, type = 'item')

In [10]:
# test on training data

user_pred[0]

array([2.06532606, 0.73430275, 0.62992381, ..., 0.39359041, 0.39304874,
       0.3927712 ])

In [11]:
# check the most recommended movie for user 1 based on user similarity

items.loc[np.argmax(user_pred[0]),:]

mid                                                               50
title                                               Star Wars (1977)
release_date                                     1977-01-01 00:00:00
vd_release_date                                                  NaN
url                http://us.imdb.com/M/title-exact?Star%20Wars%2...
unkown                                                             0
Action                                                             1
Adventure                                                          1
Animation                                                          0
Children's                                                         0
Comedy                                                             0
Crime                                                              0
Documentary                                                        0
Drama                                                              0
Fantasy                           

In [12]:
# check the most recommended movie for user 1 based on item similarity

items.loc[np.argmax(item_pred[0]),:]

mid                                                        1387
title                                               Fall (1997)
release_date                                1997-06-27 00:00:00
vd_release_date                                             NaN
url                http://us.imdb.com/M/title-exact?Fall+(1997)
unkown                                                        0
Action                                                        0
Adventure                                                     0
Animation                                                     0
Children's                                                    0
Comedy                                                        0
Crime                                                         0
Documentary                                                   0
Drama                                                         0
Fantasy                                                       0
Film-Noir                               

In [26]:
# check all the movies rated 5 by user 1

items[items.mid.isin(ratings.loc[(ratings.uid ==1) & (ratings.rating == 5),:]['mid'].values)]

Unnamed: 0,mid,title,release_date,vd_release_date,url,unkown,Action,Adventure,Animation,Children's,Comedy,Crime,Documentary,Drama,Fantasy,Film-Noir,Horror,Musical,Mystery,Romance,Sci-Fi,Thriller,War,Western
0,1,Toy Story (1995),1995-01-01,,http://us.imdb.com/M/title-exact?Toy%20Story%2...,0,0,0,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0
5,6,Shanghai Triad (Yao a yao yao dao waipo qiao) ...,1995-01-01,,http://us.imdb.com/Title?Yao+a+yao+yao+dao+wai...,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0
8,9,Dead Man Walking (1995),1995-01-01,,http://us.imdb.com/M/title-exact?Dead%20Man%20...,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0
11,12,"Usual Suspects, The (1995)",1995-08-14,,http://us.imdb.com/M/title-exact?Usual%20Suspe...,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0
12,13,Mighty Aphrodite (1995),1995-10-30,,http://us.imdb.com/M/title-exact?Mighty%20Aphr...,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0
13,14,"Postino, Il (1994)",1994-01-01,,"http://us.imdb.com/M/title-exact?Postino,%20Il...",0,0,0,0,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0
14,15,Mr. Holland's Opus (1995),1996-01-29,,http://us.imdb.com/M/title-exact?Mr.%20Holland...,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0
15,16,French Twist (Gazon maudit) (1995),1995-01-01,,http://us.imdb.com/M/title-exact?Gazon%20maudi...,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0
18,19,Antonia's Line (1995),1995-01-01,,http://us.imdb.com/M/title-exact?Antonia%20(1995),0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0
31,32,Crumb (1994),1994-01-01,,http://us.imdb.com/M/title-exact?Crumb%20(1994),0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0
