<a href="https://colab.research.google.com/github/rediff-pro/pytorch-learning/blob/dev/Tensorflow_practice.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import torch

In [2]:
if torch.cuda.is_available():
  print('We have GPU')

In [3]:
if torch.cuda.is_available():
  gpu_rand = torch.rand(2, 2, device='cuda')
  print(gpu_rand)
else:
  print('Sorry, CPU only')

Sorry, CPU only


In [4]:
# !wget http://files.grouplens.org/datasets/movielens/ml-latest-small.zip

In [5]:
# !unzip ml-latest-small.zip

In [6]:
from pathlib import Path
import pandas as pd
import numpy as np

In [7]:
PATH = Path('ml-latest-small')
list(PATH.iterdir())

[PosixPath('ml-latest-small/README.txt'),
 PosixPath('ml-latest-small/ratings.csv'),
 PosixPath('ml-latest-small/movies.csv'),
 PosixPath('ml-latest-small/tags.csv'),
 PosixPath('ml-latest-small/links.csv')]

In [8]:
! head $PATH/ratings.csv

userId,movieId,rating,timestamp
1,1,4.0,964982703
1,3,4.0,964981247
1,6,4.0,964982224
1,47,5.0,964983815
1,50,5.0,964982931
1,70,3.0,964982400
1,101,5.0,964980868
1,110,4.0,964982176
1,151,5.0,964984041


In [9]:
data = pd.read_csv(PATH/"ratings.csv")
data.head()

Unnamed: 0,userId,movieId,rating,timestamp
0,1,1,4.0,964982703
1,1,3,4.0,964981247
2,1,6,4.0,964982224
3,1,47,5.0,964983815
4,1,50,5.0,964982931


In [10]:
# spliting the data to training and validation set
np.random.seed(3)
msk = np.random.rand(len(data)) < 0.8
train = data[msk].copy()
val = data[~msk].copy()


In [11]:
# len(data)
# np.random.rand(len(data))

In [12]:
def proc_col(col, train_col=None):
  if train_col is not None:
    uniq = train_col.unique()
  else:
    uniq = col.unique()
  name2idx = {o:i for i,o in enumerate(uniq)}
  return name2idx, np.array([name2idx.get(x, -1) for x in col]), len(uniq)

In [13]:
def encode_data(df, train=None):
  df = df.copy()
  for col_name in ["userId", "movieId"]:
    train_col = None
    if train is not None:
      train_col = train[col_name]
    _, col, _ = proc_col(df[col_name], train_col)
    df[col_name] = col
    df = df[df[col_name] >= 0]
  return df

In [14]:
df_t = pd.read_csv('https://raw.githubusercontent.com/yanneta/pytorch-tutorials/refs/heads/master/images/tiny_training2.csv')
df_v = pd.read_csv('https://raw.githubusercontent.com/yanneta/pytorch-tutorials/refs/heads/master/images/tiny_val2.csv')
print(df_t)
df_t_e = encode_data(df_t)
df_v_e = encode_data(df_v, df_t)
df_v_e

    userId  movieId  rating
0       11        1       4
1       11       23       5
2        2       23       5
3        2        4       3
4       31        1       4
5       31       23       4
6        4        1       5
7        4        3       2
8       52        1       1
9       52        3       4
10      61        3       5
11       7       23       1
12       7        3       3


Unnamed: 0,userId,movieId,rating
0,1,0,5
1,3,1,5


In [15]:
# encoding training and validation data
df_train = encode_data(train)
df_val = encode_data(val, train)


#Embedding Layer

In [16]:
import torch
import torch.nn as nn
import torch.nn.functional as F

In [17]:
embed = nn.Embedding(10, 3)
embed

Embedding(10, 3)

In [18]:
a = torch.LongTensor([[1,2,0,4,5,1]])
embed(a)

tensor([[[ 0.2935,  1.1625,  1.7669],
         [ 0.4320,  0.3402,  1.1141],
         [-0.9455, -0.1803, -1.0241],
         [ 0.4148, -0.2212, -1.3289],
         [-1.5936, -2.5525,  0.8167],
         [ 0.2935,  1.1625,  1.7669]]], grad_fn=<EmbeddingBackward0>)

## Matix factorization Model

In [19]:
class MF(nn.Module):
  def __init__(self, num_users, num_items, emb_size=100):
    super(MF, self).__init__()
    self.user_emb = nn.Embedding(num_users, emb_size)
    self.item_emb = nn.Embedding(num_items, emb_size)
    self.user_emb.weight.data.uniform_(0, 0.05)
    self.item_emb.weight.data.uniform_(0, 0.05)

  def forward(self, u, v):
    u = self.user_emb(u)
    v = self.item_emb(v)
    return (u*v).sum(1)


In [20]:
df_t_e

Unnamed: 0,userId,movieId,rating
0,0,0,4
1,0,1,5
2,1,1,5
3,1,2,3
4,2,0,4
5,2,1,4
6,3,0,5
7,3,3,2
8,4,0,1
9,4,3,4


In [21]:
num_users = 7
num_items = 7
emb_size = 3
user_emb = nn.Embedding(num_users, emb_size)
item_emb = nn.Embedding(num_items, emb_size)
users = torch.LongTensor(df_t_e.userId.values)
items =torch.LongTensor(df_t_e.movieId.values)


In [22]:
U = user_emb(users)
V = item_emb(items)

In [23]:
U

tensor([[-0.5587, -0.0821, -1.4723],
        [-0.5587, -0.0821, -1.4723],
        [-0.5153, -0.1501, -0.8421],
        [-0.5153, -0.1501, -0.8421],
        [-0.7066,  0.6251, -0.4720],
        [-0.7066,  0.6251, -0.4720],
        [-0.2295,  0.5532,  0.6148],
        [-0.2295,  0.5532,  0.6148],
        [ 0.6141, -0.2387,  0.1850],
        [ 0.6141, -0.2387,  0.1850],
        [-0.0204, -0.1396, -1.2185],
        [-0.1890,  2.5102, -1.7167],
        [-0.1890,  2.5102, -1.7167]], grad_fn=<EmbeddingBackward0>)

In [24]:
U*V

tensor([[ 7.4718e-01,  8.4561e-02, -3.2013e+00],
        [ 3.3442e-02,  6.8169e-02,  2.6321e+00],
        [ 3.0846e-02,  1.2466e-01,  1.5056e+00],
        [ 7.5409e-01, -2.5176e-01, -2.1759e+00],
        [ 9.4487e-01, -6.4389e-01, -1.0263e+00],
        [ 4.2290e-02, -5.1908e-01,  8.4385e-01],
        [ 3.0694e-01, -5.6981e-01,  1.3369e+00],
        [-4.7801e-02,  5.9874e-01,  1.6129e+00],
        [-8.2117e-01,  2.4587e-01,  4.0218e-01],
        [ 1.2788e-01, -2.5835e-01,  4.8520e-01],
        [-4.2421e-03, -1.5105e-01, -3.1966e+00],
        [ 1.1311e-02, -2.0843e+00,  3.0691e+00],
        [-3.9355e-02,  2.7167e+00, -4.5033e+00]], grad_fn=<MulBackward0>)

In [25]:
(U*V).sum(1)

tensor([-2.3695,  2.7337,  1.6611, -1.6736, -0.7253,  0.3671,  1.0740,  2.1638,
        -0.1731,  0.3547, -3.3519,  0.9961, -1.8259], grad_fn=<SumBackward1>)

## Training MF model

In [26]:
num_users = len(df_train.userId.unique())
num_items = len(df_train.movieId.unique())
print(num_users, num_items)

610 8998


In [27]:
model = MF(num_users, num_items, emb_size)#.cuda(device='cuda')

In [28]:
def train_epocs(mode, epochs=10, lr=0.01, wd=0.0, unsqueeze=False):
  optimizer = torch.optim.Adam(model.parameters(), lr=lr, weight_decay=wd)
  model.train()
  for i in range(epochs):
    users = torch.LongTensor(df_train.userId.values)#.cuda(device='cuda')
    items = torch.LongTensor(df_train.movieId.values)#.cuda(device='cuda')
    ratings = torch.FloatTensor(df_train.rating.values)#.cuda(device='cuda')
    if unsqueeze:
      ratings = ratings.unsqueeze(1)
    y_hat = model(users, items)
    loss = F.mse_loss(y_hat, ratings)
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()
    print(loss.item())
  test_loss(model, unsqueeze)

In [29]:
# Here is what unsqueese does
ratings = torch.FloatTensor(df_train.rating.values)
print(ratings.shape)
ratings = ratings.unsqueeze(1)#.cuda(device='cuda')
print(ratings.shape)

torch.Size([80450])
torch.Size([80450, 1])


In [30]:
def test_loss(mode, unsqueese=False):
  mode.eval()
  users = torch.LongTensor(df_val.userId.values)#.cuda(device='cuda')
  items = torch.LongTensor(df_val.movieId.values)#.cuda(device='cuda')
  ratings = torch.FloatTensor(df_val.rating.values)#.cuda(device='cuda')
  if unsqueese:
    ratings = ratings.unsqueeze(1)
  y_hat = model(users, items)
  loss = F.mse_loss(y_hat, ratings)
  print('test loss %.3f'%loss.item())

In [31]:
train_epocs(model,epochs=10, lr=0.1)

13.332194328308105
13.019166946411133
12.425786018371582
11.520169258117676
10.323539733886719
8.879491806030273
7.25673770904541
5.553677558898926
3.902621269226074
2.4702584743499756
test loss 1.456


In [32]:
train_epocs(model,epochs=15, lr=0.01)

1.44469153881073
1.36446213722229
1.2893205881118774
1.2193461656570435
1.1545863151550293
1.0950323343276978
1.0406187772750854
0.9912350177764893
0.9467227458953857
0.9068748950958252
0.8714406490325928
0.840131402015686
0.8126286268234253
0.788592517375946
0.767673134803772
test loss 0.825


In [33]:
train_epocs(model,epochs=15, lr=0.01)

0.7495211362838745
0.7265082001686096
0.7071058750152588
0.6911022663116455
0.6780299544334412
0.6674059629440308
0.6588312387466431
0.6519656777381897
0.6464911103248596
0.6421186327934265
0.6386099457740784
0.6357762217521667
0.6334622502326965
0.631537139415741
0.6298964023590088
test loss 0.760


### MF with bias

In [34]:
class MF_bias(nn.Module):
  def __init__(self,num_users, num_items, emb_size=100):
    super(MF_bias, self).__init__()
    self.user_emb = nn.Embedding(num_users, emb_size)
    self.user_bias = nn.Embedding(num_users,1)
    self.item_emb = nn.Embedding(num_items, emb_size)
    self.item_bias = nn.Embedding(num_items, 1)
    self.user_emb.weight.data.uniform_(0, 0.05)
    self.item_emb.weight.data.uniform_(0, 0.05)
    self.user_bias.weight.data.uniform_(-0.01, 0.01)
    self.item_bias.weight.data.uniform_(-0.01, 0.01)

  def forward(self, u, v):
    U = self.user_emb(u)
    V = self.item_emb(v)
    b_u = self.user_bias(u).squeeze()
    b_v = self.item_bias(v).squeeze()
    return (U*V).sum(1) + b_u * b_v


In [35]:
model = MF_bias(num_users, num_items, emb_size=100)#.cuda(device='cuda')

In [36]:
train_epocs(model, epochs=10, lr=0.05, wd=1e-5)

12.911827087402344
9.73202896118164
5.168066501617432
1.4599766731262207
1.9565908908843994
3.8386995792388916
2.8605194091796875
1.294800877571106
0.8037036061286926
1.2675602436065674
test loss 2.111


In [37]:
train_epocs(model, epochs=10, lr=0.01, wd=1e-5)

1.9246934652328491
1.3322712182998657
0.9346444010734558
0.748653769493103
0.7300360798835754
0.7805191874504089
0.8164900541305542
0.8072583675384521
0.7633401155471802
0.7111247777938843
test loss 0.794


In [38]:
train_epocs(model, epochs=10, lr=0.001, wd=1e-5)

0.673792839050293
0.6595631241798401
0.6475827097892761
0.6376769542694092
0.6296459436416626
0.6232671737670898
0.6183074712753296
0.6145389080047607
0.6117488741874695
0.6097428202629089
test loss 0.751


## Neural network model

In [39]:
# Note here there is no matrix multiplication, we could potentially make the embeddings of different sizes.
# Here we could get better results by keep playing with regularization.

class CollabFNet(nn.Module):
    def __init__(self, num_users, num_items, emb_size=100, n_hidden=10):
        super(CollabFNet, self).__init__()
        self.user_emb = nn.Embedding(num_users, emb_size)
        self.item_emb = nn.Embedding(num_items, emb_size)
        self.lin1 = nn.Linear(emb_size*2, n_hidden)
        self.lin2 = nn.Linear(n_hidden, 1)
        self.drop1 = nn.Dropout(0.1)

    def forward(self, u, v):
        U = self.user_emb(u)
        V = self.item_emb(v)
        x = F.relu(torch.cat([U, V], dim=1))
        x = self.drop1(x)
        x = F.relu(self.lin1(x))
        x = self.lin2(x)
        return x

In [40]:
model = CollabFNet(num_users, num_items, emb_size=100)#.cuda(device='cuda')

In [41]:
train_epocs(model, epochs=15, lr=0.05, wd=1e-6, unsqueeze=True)#.cuda(device='cuda')

13.235099792480469
6.184718608856201
1.6655224561691284
3.313239574432373
2.3089849948883057
1.1190638542175293
1.5835399627685547
2.052760362625122
1.2912790775299072
0.8884531259536743
1.1780836582183838
1.4153590202331543
1.2370829582214355
0.8643498420715332
0.7515420317649841
test loss 1.041


In [42]:
train_epocs(model, epochs=10, lr=0.01, wd=1e-6, unsqueeze=True)

0.9857784509658813
0.7620552182197571
0.9212272763252258
0.8411545157432556
0.7237251996994019
0.7166762948036194
0.7832443714141846
0.7895985841751099
0.7286323308944702
0.6887142658233643
test loss 0.766


In [43]:
train_epocs(model, epochs=10, lr=0.001, wd=1e-6, unsqueeze=True)

0.699085533618927
0.6862924098968506
0.6815400719642639
0.6845831871032715
0.6893553733825684
0.6874909996986389
0.6836525201797485
0.6818828582763672
0.6806396842002869
0.6808539628982544
test loss 0.751


In [44]:
train_epocs(model, epochs=10, lr=0.001, wd=1e-6, unsqueeze=True)

0.6798769235610962
0.6807288527488708
0.6819581985473633
0.6794312596321106
0.6784835457801819
0.6786403656005859
0.6770465970039368
0.675611138343811
0.6761929392814636
0.6760192513465881
test loss 0.749


In [45]:
import pandas as pd
list(pd.RangeIndex(3))

[0, 1, 2]