<a href="https://colab.research.google.com/github/rishikesh1410/Projects/blob/master/RecommenderSystemPyTorch.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
from google.colab import drive
drive.mount('/gdrive')

Drive already mounted at /gdrive; to attempt to forcibly remount, call drive.mount("/gdrive", force_remount=True).


In [0]:
from __future__ import print_function, division

# Torch Libraries
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F

# Libraries
import numpy as np
import time
import os
import copy
import cv2
import pandas as pd
import matplotlib.pyplot as plt
from numpy import *
from scipy import optimize
np.random.seed(1)

In [0]:
os.chdir("/gdrive/My Drive/data/movielens")

In [0]:
genome_scores = pd.read_csv('genome_scores.csv')
link = pd.read_csv('link.csv')
tag = pd.read_csv('tag.csv')
genome_tags = pd.read_csv('genome_tags.csv')
movie = pd.read_csv('movie.csv')
rating = pd.read_csv('rating.csv')

In [0]:
def tabular_preview(ratings, n=15):
  
    user_groups = ratings.groupby('userId')['rating'].count()
    top_users = user_groups.sort_values(ascending=False)[:1000]

    movie_groups = ratings.groupby('movieId')['rating'].count()
    top_movies = movie_groups.sort_values(ascending=False)[:1700]

    top = (
        ratings.
        join(top_movies, rsuffix='_r', how='inner', on='movieId').
        join(top_users, rsuffix='_r', how='inner', on='userId'))

    return pd.crosstab(top.movieId, top.userId, top.rating, aggfunc=np.sum)

In [0]:
frame = tabular_preview(rating)

In [0]:
train_data = np.array(frame)

In [0]:
did_rate = np.ndarray(shape = (1700,1000))

In [0]:
for i in range(1700):
  for j in range(1000):
    if np.isnan(train_data[i][j]):
      train_data[i][j]=0

In [0]:
did_rate = (train_data != 0 )* 1

In [11]:
train_data

array([[5. , 4. , 5. , ..., 5. , 3. , 5. ],
       [5. , 0. , 0. , ..., 3. , 2. , 3. ],
       [2. , 0. , 0. , ..., 4. , 2. , 0. ],
       ...,
       [0. , 0. , 0. , ..., 0. , 0. , 4.5],
       [0. , 0. , 0. , ..., 0. , 0. , 0. ],
       [0. , 3. , 0. , ..., 0. , 0. , 0. ]])

In [12]:
did_rate

array([[1, 1, 1, ..., 1, 1, 1],
       [1, 0, 0, ..., 1, 1, 1],
       [1, 0, 0, ..., 1, 1, 0],
       ...,
       [0, 0, 0, ..., 0, 0, 1],
       [0, 0, 0, ..., 0, 0, 0],
       [0, 1, 0, ..., 0, 0, 0]])

In [0]:
def normalize_ratings(ratings, did_rate):
    num_movies = ratings.shape[0]

    ratings_mean = np.zeros(shape = (num_movies, 1))
    ratings_norm = np.zeros(shape = ratings.shape)

    for i in range(num_movies): 
        idx = np.where(did_rate[i] == True)[0]
        ratings_mean[i] = np.mean(ratings[i, idx])
        ratings_norm[i, idx] = ratings[i, idx] - ratings_mean[i]

    return ratings_norm, ratings_mean

In [0]:
rating, rating_mean = normalize_ratings(train_data, did_rate)

In [0]:
num_users = 1000
num_movies = 1700
num_features = 3

In [0]:
movie_features = random.randn(num_movies, num_features)
user_prefs = random.randn(num_users, num_features)

In [23]:
movie_features = torch.tensor(movie_features, requires_grad=True)
user_prefs = torch.tensor(user_prefs, requires_grad=True)
did_rate = torch.tensor(did_rate, dtype=torch.double, requires_grad=False)
rating = torch.tensor(rating, requires_grad=False)
rating_mean = torch.tensor(rating_mean, dtype=torch.double, requires_grad=False)

  This is separate from the ipykernel package so we can avoid doing imports until
  after removing the cwd from sys.path.
  """


In [0]:
def loss(movie_features, user_prefs, ratings, did_rate):

  cost = torch.sum( ((torch.mm(movie_features, torch.t(user_prefs)) * did_rate) - ratings) ** 2 ) / (1700*1000)
  return cost

In [0]:
optimizer = optim.Adam([movie_features, user_prefs], lr=0.001)

In [0]:
def train(epoch):
  minimum_loss = 100
  epoch_min_loss = 0
  for i in range(epoch):
    train_loss = loss(movie_features, user_prefs, rating, did_rate)
    train_loss.backward()
    optimizer.step()
    if train_loss <= minimum_loss:
      minimum_loss = train_loss
      X = movie_features
      theta = user_prefs
      epoch_min_loss = epoch  
    print('Epoch : {} ----- Train Loss : {} ------- Minimum Loss {}' .format(i, train_loss, minimum_loss))

In [27]:
train(1000)

Epoch : 0 ----- Train Loss : 1.990342858157188 ------- Minimum Loss 1.990342858157188
Epoch : 1 ----- Train Loss : 1.985337206954278 ------- Minimum Loss 1.985337206954278
Epoch : 2 ----- Train Loss : 1.9805172691342667 ------- Minimum Loss 1.9805172691342667
Epoch : 3 ----- Train Loss : 1.9757454268056918 ------- Minimum Loss 1.9757454268056918
Epoch : 4 ----- Train Loss : 1.970974167978855 ------- Minimum Loss 1.970974167978855
Epoch : 5 ----- Train Loss : 1.9661818904471102 ------- Minimum Loss 1.9661818904471102
Epoch : 6 ----- Train Loss : 1.9613572871914797 ------- Minimum Loss 1.9613572871914797
Epoch : 7 ----- Train Loss : 1.956493981502662 ------- Minimum Loss 1.956493981502662
Epoch : 8 ----- Train Loss : 1.951588273793094 ------- Minimum Loss 1.951588273793094
Epoch : 9 ----- Train Loss : 1.9466380558559184 ------- Minimum Loss 1.9466380558559184
Epoch : 10 ----- Train Loss : 1.9416422307893613 ------- Minimum Loss 1.9416422307893613
Epoch : 11 ----- Train Loss : 1.936600382

In [28]:
print(user_prefs)

tensor([[ 0.3034, -0.7378, -0.1330],
        [ 0.2981, -0.4893, -0.6389],
        [-0.5103,  0.0422, -0.5147],
        ...,
        [ 0.4630, -0.0205, -0.1432],
        [-0.3668, -0.0026,  0.2953],
        [ 0.0584, -0.4979, -0.3386]], dtype=torch.float64, requires_grad=True)


In [29]:
print(movie_features)

tensor([[-0.5957, -0.2301,  0.1568],
        [-0.6648,  0.3481, -0.5716],
        [-0.4021,  0.3735, -0.3362],
        ...,
        [-0.4084,  0.0929, -0.2248],
        [ 0.0551, -0.1700,  0.2585],
        [ 0.5950,  0.3690, -0.5245]], dtype=torch.float64, requires_grad=True)


In [0]:
predictions = torch.mm(movie_features, torch.t(user_prefs))

In [0]:
predictions = predictions + rating_mean

In [32]:
print(predictions)

tensor([[3.9366, 3.8032, 4.1820,  ..., 3.6749, 4.2338, 3.9951],
        [2.5314, 2.9105, 3.5620,  ..., 2.6808, 2.9880, 2.8953],
        [2.3879, 2.6529, 3.1347,  ..., 2.5950, 2.7878, 2.6451],
        ...,
        [3.4828, 3.6217, 3.9734,  ..., 3.4866, 3.7285, 3.6514],
        [3.7120, 3.5387, 3.4359,  ..., 3.5962, 3.6608, 3.6046],
        [3.8550, 4.2090, 3.8590,  ..., 4.2201, 3.5029, 3.9056]],
       dtype=torch.float64, grad_fn=<AddBackward0>)


In [33]:
all_predictions = torch.tensor(predictions+0.5 , dtype=torch.long)

  """Entry point for launching an IPython kernel.


In [34]:
print(all_predictions)

tensor([[4, 4, 4,  ..., 4, 4, 4],
        [3, 3, 4,  ..., 3, 3, 3],
        [2, 3, 3,  ..., 3, 3, 3],
        ...,
        [3, 4, 4,  ..., 3, 4, 4],
        [4, 4, 3,  ..., 4, 4, 4],
        [4, 4, 4,  ..., 4, 4, 4]])


In [35]:
print(train_data)

[[5.  4.  5.  ... 5.  3.  5. ]
 [5.  0.  0.  ... 3.  2.  3. ]
 [2.  0.  0.  ... 4.  2.  0. ]
 ...
 [0.  0.  0.  ... 0.  0.  4.5]
 [0.  0.  0.  ... 0.  0.  0. ]
 [0.  3.  0.  ... 0.  0.  0. ]]


In [36]:
loss(movie_features, user_prefs, rating, did_rate)

tensor(0.4758, dtype=torch.float64, grad_fn=<DivBackward0>)

In [0]:
pd.DataFrame(movie_features.detach().numpy()).to_csv('new_movie_features.csv')

In [0]:
pd.DataFrame(user_prefs.detach().numpy()).to_csv('new_user_prefs.csv')

In [0]:
!ls

genome_scores.csv  movie_features.csv	      rating.csv
genome_tags.csv    movielens-20m-dataset.zip  rating_with_name.csv
link.csv	   new_movie_features.csv     tag.csv
movie.csv	   new_user_prefs.csv	      user_prefs.csv


In [0]:
user1_preds = all_predictions[:,0]

In [0]:
user1_preds

tensor([4, 3, 3,  ..., 4, 4, 4])

In [0]:
user1_did_rate = did_rate[:,0]

In [0]:
user1_did_rate

tensor([1., 1., 1.,  ..., 0., 0., 0.], dtype=torch.float64)

In [0]:
frame.head()

userId,156,208,359,572,586,741,768,775,903,982,...,136875,136989,137037,137202,137277,137343,137686,137885,138208,138325
movieId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1,5.0,4.0,5.0,5.0,2.5,5.0,,4.5,4.0,3.0,...,4.0,2.0,4.0,4.5,4.0,4.0,5.0,5.0,3.0,5.0
2,5.0,,,3.5,3.0,3.0,3.0,2.0,4.0,2.0,...,3.0,2.0,3.0,2.5,2.5,2.5,3.0,3.0,2.0,3.0
3,2.0,,,3.5,2.0,3.0,,3.5,2.0,2.0,...,,,2.0,3.5,3.0,3.5,3.0,4.0,2.0,
4,3.0,,,,,,,,2.0,2.0,...,,,,,,,,2.0,2.0,
5,3.0,,,3.5,3.0,4.0,2.0,1.0,3.0,2.5,...,,4.5,3.0,,3.0,3.5,,3.0,2.0,


In [0]:
all_predictions_arr = all_predictions.numpy()

In [0]:
all_predictions_arr

array([[4, 4, 4, ..., 4, 3, 4],
       [3, 3, 3, ..., 3, 3, 3],
       [3, 3, 3, ..., 2, 2, 3],
       ...,
       [4, 4, 4, ..., 4, 3, 4],
       [4, 4, 4, ..., 4, 3, 4],
       [4, 4, 4, ..., 4, 4, 4]])

In [0]:
result_frame = pd.DataFrame(all_predictions_arr, index=frame.index, columns = frame.columns)

In [0]:
result_frame

userId,156,208,359,572,586,741,768,775,903,982,...,136875,136989,137037,137202,137277,137343,137686,137885,138208,138325
movieId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1,4,4,4,4,4,5,4,4,4,4,...,3,3,4,4,4,4,4,4,3,4
2,3,3,3,3,3,3,3,2,3,3,...,3,3,3,3,2,3,3,3,3,3
3,3,3,3,3,3,3,3,3,2,2,...,2,2,2,3,3,3,3,2,2,3
4,3,3,3,2,3,3,3,2,3,3,...,3,2,3,2,2,2,2,3,3,3
5,3,3,3,3,3,4,3,2,2,2,...,2,2,2,3,2,3,3,3,2,2
6,4,4,4,4,4,4,4,4,3,4,...,4,4,4,4,4,4,4,4,3,4
7,4,3,3,3,3,4,3,3,3,3,...,3,2,3,3,3,3,3,3,2,3
9,2,2,2,3,2,3,2,3,2,2,...,2,2,2,3,2,3,3,2,2,2
10,3,3,4,3,3,4,3,3,3,3,...,3,3,3,3,3,3,4,3,3,3
11,4,4,4,4,4,4,4,3,3,3,...,4,3,3,4,3,3,4,4,3,3


In [0]:
frame

userId,156,208,359,572,586,741,768,775,903,982,...,136875,136989,137037,137202,137277,137343,137686,137885,138208,138325
movieId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1,5.0,4.0,5.0,5.0,2.5,5.0,,4.5,4.0,3.0,...,4.0,2.0,4.0,4.5,4.0,4.0,5.0,5.0,3.0,5.0
2,5.0,,,3.5,3.0,3.0,3.0,2.0,4.0,2.0,...,3.0,2.0,3.0,2.5,2.5,2.5,3.0,3.0,2.0,3.0
3,2.0,,,3.5,2.0,3.0,,3.5,2.0,2.0,...,,,2.0,3.5,3.0,3.5,3.0,4.0,2.0,
4,3.0,,,,,,,,2.0,2.0,...,,,,,,,,2.0,2.0,
5,3.0,,,3.5,3.0,4.0,2.0,1.0,3.0,2.5,...,,4.5,3.0,,3.0,3.5,,3.0,2.0,
6,4.0,,5.0,,4.5,3.5,4.0,3.5,4.0,3.5,...,4.0,4.0,5.0,3.0,4.0,,4.0,5.0,3.0,4.5
7,4.0,,,,,4.5,,3.0,3.0,2.0,...,,,2.0,3.0,3.0,,3.0,,3.0,
9,3.0,,,,,,3.0,,2.0,2.5,...,,,1.0,,2.0,1.5,,,,
10,4.0,,4.0,,4.0,4.0,4.0,4.5,1.0,3.0,...,3.0,3.0,4.0,4.0,3.0,2.0,4.0,3.0,2.0,3.5
11,5.0,3.0,4.0,3.5,2.5,,,4.0,3.0,3.0,...,,2.5,3.0,3.0,3.0,2.0,4.0,3.0,3.0,


In [0]:
rr = pd.read_csv('rating.csv')
mm = pd.read_csv('movie.csv')

In [0]:
lt=[]
pp = result_frame.copy()
pp['name']="kjhwkjhe"
pp.head()

userId,156,208,359,572,586,741,768,775,903,982,...,136989,137037,137202,137277,137343,137686,137885,138208,138325,name
movieId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1,4,4,4,4,4,5,4,4,4,4,...,3,4,4,4,4,4,4,3,4,kjhwkjhe
2,3,3,3,3,3,3,3,2,3,3,...,3,3,3,2,3,3,3,3,3,kjhwkjhe
3,3,3,3,3,3,3,3,3,2,2,...,2,2,3,3,3,3,2,2,3,kjhwkjhe
4,3,3,3,2,3,3,3,2,3,3,...,2,3,2,2,2,2,3,3,3,kjhwkjhe
5,3,3,3,3,3,4,3,2,2,2,...,2,2,3,2,3,3,3,2,2,kjhwkjhe


In [0]:

for i in result_frame.index:
  if i in mm.movieId.values:
    lt.append(mm[mm['movieId'] == i].iloc[0][1])

In [0]:
pp['name']= lt

In [0]:
pp.head()

userId,156,208,359,572,586,741,768,775,903,982,...,136989,137037,137202,137277,137343,137686,137885,138208,138325,name
movieId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1,4,4,4,4,4,5,4,4,4,4,...,3,4,4,4,4,4,4,3,4,Toy Story (1995)
2,3,3,3,3,3,3,3,2,3,3,...,3,3,3,2,3,3,3,3,3,Jumanji (1995)
3,3,3,3,3,3,3,3,3,2,2,...,2,2,3,3,3,3,2,2,3,Grumpier Old Men (1995)
4,3,3,3,2,3,3,3,2,3,3,...,2,3,2,2,2,2,3,3,3,Waiting to Exhale (1995)
5,3,3,3,3,3,4,3,2,2,2,...,2,2,3,2,3,3,3,2,2,Father of the Bride Part II (1995)


In [0]:
rr = pp[[156,'name']]

In [0]:
pd.Series(did_rate[:,0])

0       1.0
1       1.0
2       1.0
3       1.0
4       1.0
5       1.0
6       1.0
7       1.0
8       1.0
9       1.0
10      1.0
11      0.0
12      1.0
13      1.0
14      1.0
15      1.0
16      0.0
17      1.0
18      1.0
19      1.0
20      1.0
21      1.0
22      1.0
23      0.0
24      0.0
25      0.0
26      0.0
27      1.0
28      1.0
29      1.0
       ... 
1670    0.0
1671    0.0
1672    0.0
1673    0.0
1674    0.0
1675    0.0
1676    0.0
1677    0.0
1678    0.0
1679    0.0
1680    0.0
1681    0.0
1682    0.0
1683    0.0
1684    0.0
1685    0.0
1686    0.0
1687    0.0
1688    0.0
1689    0.0
1690    0.0
1691    0.0
1692    0.0
1693    0.0
1694    0.0
1695    0.0
1696    0.0
1697    0.0
1698    0.0
1699    0.0
Length: 1700, dtype: float64

In [0]:
len(rr[156]*pd.Series(did_rate[:,0]))

2732

In [0]:
rr

userId,156,name
movieId,Unnamed: 1_level_1,Unnamed: 2_level_1
1,4,Toy Story (1995)
2,3,Jumanji (1995)
3,3,Grumpier Old Men (1995)
4,3,Waiting to Exhale (1995)
5,3,Father of the Bride Part II (1995)
6,4,Heat (1995)
7,4,Sabrina (1995)
9,2,Sudden Death (1995)
10,3,GoldenEye (1995)
11,4,"American President, The (1995)"


In [0]:
dd = result_frame[[156]]

In [0]:
dd.index

Int64Index([    1,     2,     3,     4,     5,     6,     7,     9,    10,
               11,
            ...
            88125, 88129, 89745, 91500, 91529, 92259, 96079, 96610, 98809,
            99114],
           dtype='int64', name='movieId', length=1700)

In [0]:
85414 in movie.movieId

False

In [0]:
for i in dd.index:
  if i in movie.movieId:
    print(i)

1
2
3
4
5
6
7
9
10
11
12
14
15
16
17
18
19
20
21
22
23
24
25
26
28
29
31
32
34
36
39
41
44
45
46
47
48
50
52
58
60
62
65
69
70
73
74
76
79
81
85
86
88
89
94
95
100
101
104
105
107
110
111
112
122
125
135
140
141
144
145
147
150
151
153
154
158
160
161
162
163
164
165
168
169
170
172
173
175
180
181
185
186
193
194
195
196
198
203
204
207
208
215
216
218
222
223
224
225
227
230
231
232
233
234
235
236
237
239
246
247
248
249
252
253
256
257
260
261
262
265
266
267
272
273
276
277
280
281
282
288
289
290
292
293
296
300
303
305
306
307
308
314
315
316
317
318
319
322
327
329
333
337
338
339
342
344
345
348
349
350
351
353
355
356
357
361
362
364
367
368
370
371
372
373
374
376
377
379
380
381
382
383
405
410
412
413
415
417
419
420
426
427
428
431
432
434
435
440
441
442
446
448
450
454
455
457
464
466
468
471
474
475
477
480
481
482
485
489
490
491
492
493
494
497
500
502
507
508
509
514
515
516
517
519
520
524
527
529
531
532
533
534
535
537
538
539
540
541
542
543
546
548
550
551
552


In [0]:
print(j)

1528


In [0]:
movie

Unnamed: 0,movieId,title,genres
0,1,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy
1,2,Jumanji (1995),Adventure|Children|Fantasy
2,3,Grumpier Old Men (1995),Comedy|Romance
3,4,Waiting to Exhale (1995),Comedy|Drama|Romance
4,5,Father of the Bride Part II (1995),Comedy
5,6,Heat (1995),Action|Crime|Thriller
6,7,Sabrina (1995),Comedy|Romance
7,8,Tom and Huck (1995),Adventure|Children
8,9,Sudden Death (1995),Action
9,10,GoldenEye (1995),Action|Adventure|Thriller
