# Collaborative filtering deep dive 


Movie dataset and we want to recommend movies to some people but we don't really know what does the movie mean in model so we have to engineer the features which we are going to use 

In [44]:
!pip install fastbook 




In [45]:
import fastbook 
from fastai import * 
from fastai.tabular.all import * 
from fastai.collab import * 

In [46]:
path = untar_data(URLs.ML_100k)

In [47]:
ratings = pd.read_csv(path/'u.data',delimiter = '\t', header = None , names = ['user', 'movie', 'rating', 'timestamp'])

In [48]:
ratings.head()

Unnamed: 0,user,movie,rating,timestamp
0,196,242,3,881250949
1,186,302,3,891717742
2,22,377,1,878887116
3,244,51,2,880606923
4,166,346,1,886397596


# learning the latent factors 


for our model to learn from data we have to create some latent factors which willl help the model to recommend movie 

1. Gradient Descent approach : 
  Initally randomly arrange some parameters this parameters are set from some latent factors such as user and movies 

  because of each user will have some number with the help of latent factors associated with him simliarly each movie will have set of latent factor associated with it 

# let's first create the data loaders 

In [49]:
movies = pd.read_csv(path/'u.item',delimiter = '|',encoding = 'latin-1', 
                     usecols = (0,1), names = ('movie', 'title'), header = None  )

merging this movies column with the ratings column

In [50]:
ratings = ratings.merge(movies)
ratings.head()

Unnamed: 0,user,movie,rating,timestamp,title
0,196,242,3,881250949,Kolya (1996)
1,63,242,3,875747190,Kolya (1996)
2,226,242,5,883888671,Kolya (1996)
3,154,242,3,879138235,Kolya (1996)
4,306,242,5,876503793,Kolya (1996)


# Building the dataset from pd DataFrmae

In [51]:
dls = CollabDataLoaders.from_df(ratings , item_name = 'title', bs = 64)

In [52]:
dls.show_batch()

Unnamed: 0,user,title,rating
0,277,Love Jones (1997),3
1,411,"Sting, The (1973)",5
2,203,Rumble in the Bronx (1995),4
3,878,"Graduate, The (1967)",4
4,597,Toy Story (1995),3
5,303,Dumb & Dumber (1994),5
6,680,Grosse Pointe Blank (1997),4
7,393,Swimming with Sharks (1995),1
8,715,"Substitute, The (1996)",2
9,110,Naked Gun 33 1/3: The Final Insult (1994),2


In [53]:
n_users = len(dls.classes['user'])
n_movies = len(dls.classes['title'])

In [54]:
n_factors = 5 

In [55]:
 user_factors = torch.randn(n_users, n_factors) 
 movie_factors = torch.randn(n_movies , n_factors )

In [56]:
one_hot_3 = one_hot(3, n_users).float()
user_factors.t() @ one_hot_3

tensor([-0.0485, -0.3958,  0.8114,  2.0313, -4.3449])

In [57]:
user_factors[3]

tensor([-0.0485, -0.3958,  0.8114,  2.0313, -4.3449])

In [58]:
class Example: 
  def __init__(self, a): self.a = a 
  def say(self,x): return f'hello {self.a} , {x}'


In [59]:
ex = Example('som')

In [60]:
ex.say('nice to meet u')

'hello som , nice to meet u'

# Making the dot product with the help of pytorch module 

In [61]:
class DotProduct(Module):
  def __init__(self, n_users, n_movies, n_factors):
    self.user_factors = Embedding(n_users, n_factors)
    self.movie_factors = Embedding(n_movies, n_factors)
  
  def forward(self, x):
    users = self.user_factors(x[:,0])
    movies = self.movie_factors(x[:,1])
    return (users*movies).sum(dim = 1)

In [62]:
x,y = dls.one_batch()

In [63]:
x.shape

torch.Size([64, 2])

In [64]:
model = DotProduct(n_users, n_movies, 50) 
learn = Learner(dls, model , loss_func = MSELossFlat())


In [65]:
learn.fit_one_cycle(5, 5e-3)

epoch,train_loss,valid_loss,time
0,1.360123,1.285815,00:06
1,1.049457,1.079589,00:06
2,0.927138,0.971896,00:06
3,0.826857,0.887556,00:06
4,0.765574,0.86797,00:06


to make the model little bit easier we can pass the range and make the loss to sigmoid with range 


In [69]:
class DotProduct(Module):
  def __init__(self, n_users, n_movies, n_factors, y_range= (0.,5.5)):
    self.user_factors = Embedding(n_users, n_factors)
    self.movie_factors = Embedding(n_movies, n_factors) 
    self.y_range = y_range
  
  def forward(self, x ):
    users = self.user_factors(x[:,0])
    movies = self.movie_factors(x[:,1])
    return sigmoid_range((users*movies).sum(dim =1), *self.y_range)


In [70]:
model = DotProduct(n_users,n_movies, 50 ) 
learn = Learner(dls, model , loss_func = MSELossFlat()) 
learn.fit_one_cycle(5, 5e-3)

epoch,train_loss,valid_loss,time
0,1.001092,0.968796,00:07
1,0.832746,0.894806,00:06
2,0.678448,0.86507,00:06
3,0.494312,0.875952,00:06
4,0.366354,0.880783,00:06


In [72]:
class DotProductBias(Module):
  def __init__(self, n_users, n_movies, n_factors, y_range=(0,5.5)):
    self.user_factors = Embedding(n_users, n_factors)
    self.user_bias = Embedding(n_users, 1)
    self.movie_factors = Embedding(n_movies, n_factors)
    self.movie_bias = Embedding(n_movies, 1)
    self.y_range = y_range
  def forward(self, x):
    users = self.user_factors(x[:,0])
    movies = self.movie_factors(x[:,1])
    res = (users * movies).sum(dim=1, keepdim=True)
    res += self.user_bias(x[:,0]) + self.movie_bias(x[:,1])
    return sigmoid_range(res, *self.y_range)

In [73]:
model = DotProductBias(n_users,n_movies, 50)
learn = Learner(dls, model , loss_func=MSELossFlat())
learn.fit_one_cycle(5, 5e-3)

epoch,train_loss,valid_loss,time
0,0.970231,0.930188,00:07
1,0.823878,0.856197,00:07
2,0.615829,0.863109,00:07
3,0.402612,0.889062,00:07
4,0.275797,0.896526,00:07


# weight decay 
instead of improving the model it's getting worse so we can see that the model is overfitting to avoid this we should add weight decay to it 