**Udemy 8-4. Recommender Systems with Deep Learning Code (Modified)**

Modified version is:

- Faster

- Find a better answer

In [None]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from datetime import datetime
from sklearn.utils import shuffle

In [None]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print(device)

cuda:0


In [None]:
# data is from: https://grouplens.org/datasets/movielens/
# MovieLens 20M movie ratings. Stable benchmark dataset. 20 million ratings and 465,000 tag applications applied to 27,000 movies by 138,000 users. Includes tag genome data with 12 million relevance scores across 1,100 tags. Released 4/2015; updated 10/2016 to update links.csv and add tag genome data.
!wget -nc https://files.grouplens.org/datasets/movielens/ml-20m.zip

File ‘ml-20m.zip’ already there; not retrieving.



In [None]:
!unzip -n /content/ml-20m.zip
# -n : never overwrite existing files. If a file already exists, skip the extraction of that file without prompting.

Archive:  /content/ml-20m.zip


In [None]:
!ls

ml-20m	ml-20m.zip  sample_data


In [None]:
df = pd.read_csv("/content/ml-20m/ratings.csv")
df.head()

Unnamed: 0,userId,movieId,rating,timestamp
0,1,2,3.5,1112486027
1,1,29,3.5,1112484676
2,1,32,3.5,1112484819
3,1,47,3.5,1112484727
4,1,50,3.5,1112484580


In [None]:
# We can't trust the userID and movieID to be numbered 0...N-1
# Let;s just set our own ids

df.userId = pd.Categorical(df.userId)
df['new_user_id'] = df.userId.cat.codes

df.movieId = pd.Categorical(df.movieId)
df['new_movie_id'] = df.movieId.cat.codes

In [None]:
user_ids = df['new_user_id'].values
movie_ids = df['new_movie_id'].values
ratings = df['rating'].values - 2.5

In [None]:
# Get the number of users and movies 
N = len(set(user_ids))
M = len(set(movie_ids))

In [None]:
# Set the embeding dimension
D = 10

In [None]:
# Make a Neural Network
class Model(nn.Module):
  def __init__(self, n_users, n_movies, embed_dim, n_hidden=1024):
    super().__init__()
    self.N = n_users
    self.M = n_movies
    self.D = embed_dim

    self.u_emb = nn.Embedding(num_embeddings=self.N, embedding_dim=self.D)
    self.m_emb = nn.Embedding(num_embeddings=self.M, embedding_dim=self.D)

    self.fc1 = nn.Linear(in_features=2*self.D, out_features=n_hidden)
    self.fc2 = nn.Linear(in_features=n_hidden, out_features=1)

    # set the weights since N(0,1) leads to poor results
    self.u_emb.weight.data = nn.Parameter(torch.Tensor(np.random.randn(self.N, self.D) * 0.01))
    self.m_emb.weight.data = nn.Parameter(torch.Tensor(np.random.randn(self.M, self.D) * 0.01))
  
  def forward(self, u, m):
    u = self.u_emb(u) # the output is (number_of_samples, D)
    m = self.m_emb(m) # the output is (number_of_samples, D)

    # merge
    out = torch.cat((u,m), dim=1) # the output is (number_of_samples, 2D)

    out = self.fc1(out)
    out = F.relu(out)
    out = self.fc2(out)
    return out


In [None]:
model = Model(n_users=N, n_movies=M, embed_dim=D)
model.to(device)

Model(
  (u_emb): Embedding(138493, 10)
  (m_emb): Embedding(26744, 10)
  (fc1): Linear(in_features=20, out_features=1024, bias=True)
  (fc2): Linear(in_features=1024, out_features=1, bias=True)
)

In [None]:
# Loss and Optimizer
criterion = nn.MSELoss()
#optimizer = torch.optim.Adam(model.parameters())
optimizer = torch.optim.SGD(model.parameters(), lr=0.08, momentum=0.9)

In [None]:
# A function to encapsulate the training loop


def batch_gd2(model, criterion, optimizer, train_data, test_data, epochs, bs=512):
  train_users, train_movies, train_ratings = train_data
  test_users, test_movies, test_ratings = test_data
  
  train_losses = np.zeros(epochs)
  test_losses = np.zeros(epochs)

  # batches per epoch
  Ntrain = len(train_users)
  batches_per_epoch = int(np.ceil(Ntrain / bs))

  for it in range(epochs):
    t0 = datetime.now()
    train_loss = []
    
    # Shuffle
    train_users, train_movies, train_ratings = shuffle(
        train_users, train_movies, train_ratings
    )

    for j in range(batches_per_epoch):
      users = train_users[j*bs:(j+1)*bs]
      movies = train_movies[j*bs:(j+1)*bs]
      targets = train_ratings[j*bs:(j+1)*bs]

      users = torch.from_numpy(users).long()
      movies = torch.from_numpy(movies).long()
      targets = torch.from_numpy(targets)

      # reshape targets
      targets = targets.view(-1,1).float()

      users, movies, targets = users.to(device), movies.to(device), targets.to(device)

      # zero the gradients
      optimizer.zero_grad()

      # Forward Pass
      outputs = model(users, movies)
      loss = criterion(outputs, targets)

      # backward pass
      loss.backward()
      optimizer.step()

      train_loss.append(loss.item())
    
    # Get the train loss and test loss
    train_losses[it] = np.mean(train_loss)
    
    test_loss = []
    for j in range(int(np.ceil(len(test_users)/ bs))):
      users = test_users[j*bs:(j+1)*bs]
      movies = test_movies[j*bs:(j+1)*bs]
      targets = test_ratings[j*bs:(j+1)*bs]

      users = torch.from_numpy(users).long()
      movies = torch.from_numpy(movies).long()
      targets = torch.from_numpy(targets)

      # reshape targets
      targets = targets.view(-1,1).float()

      users, movies, targets = users.to(device), movies.to(device), targets.to(device)

      outputs = model(users, movies)
      loss = criterion(outputs, targets)
      test_loss.append(loss.item())

    test_losses[it] = np.mean(test_loss)
    dt = datetime.now() - t0
    print(f"Epoch: {it+1}/{epochs}, Train Loss: {train_loss[it]:.4f}, Test Loss: {test_loss[it]:.4f}, Duration: {dt}")
  return train_losses, test_losses


In [None]:
# Shuffle data in corresponding order
user_ids, movies_ids, ratings = shuffle(user_ids, movie_ids, ratings)

In [None]:
Ntrain = int(0.8 * len(ratings))

train_users = user_ids[:Ntrain]
train_movies = movies_ids[:Ntrain]
train_ratings = ratings[:Ntrain]

test_users = user_ids[Ntrain:]
test_movies = movies_ids[Ntrain:]
test_ratings = ratings[Ntrain:]


In [None]:
train_losses, test_losses = batch_gd2(
    model, 
    criterion, 
    optimizer, 
    (train_users, train_movies, train_ratings), 
    (test_users, test_movies, test_ratings), 
    epochs=10,
    bs=512
    )

In [None]:
plt.plot(train_losses, label="train loss")
plt.plot(test_losses, label="test loss")
plt.legend()
plt.show()

In [None]:
#