In [10]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
from sklearn.model_selection import train_test_split
import pandas as pd
import numpy as np

In [3]:

# Load MovieLens Small dataset
data = pd.read_csv("../../datasets/ml-latest-small/ratings.csv", sep=",")
#prefs = pd.read_csv("ratings.csv", sep=",")
#prefs = pd.read_csv("drive/MyDrive/PycharmProjects/datasets/ml-latest-small/ratings.csv", sep=",")

data.head()

Unnamed: 0,userId,movieId,rating,timestamp
0,1,1,4.0,964982703
1,1,3,4.0,964981247
2,1,6,4.0,964982224
3,1,47,5.0,964983815
4,1,50,5.0,964982931


In [4]:
# Map user and movie IDs to unique indices
user_ids = data['userId'].unique()
movie_ids = data['movieId'].unique()

user_mapping = {user_id: idx for idx, user_id in enumerate(user_ids)}
movie_mapping = {movie_id: idx for idx, movie_id in enumerate(movie_ids)}

data['userId'] = data['userId'].map(user_mapping)
data['movieId'] = data['movieId'].map(movie_mapping)

In [15]:


class MovieLensDataset(Dataset):
    def __init__(self, dataframe):
        self.data = dataframe[['userId', 'movieId']].values
        self.ratings = dataframe['rating'].values.astype(np.float32)

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        return torch.LongTensor(self.data[idx]), torch.FloatTensor([self.ratings[idx]])


In [46]:
# Define the matrix factorization model
class MatrixFactorization(nn.Module):
    def __init__(self, num_users, num_movies, embedding_size=10):
        super(MatrixFactorization, self).__init__()
        self.user_embedding = nn.Embedding(num_users, embedding_size)
        self.movie_embedding = nn.Embedding(num_movies, embedding_size)

    def forward(self, X):
        user_embedding = self.user_embedding(X[:,0])
        movie_embedding = self.movie_embedding(X[:,1])
        prediction = torch.sum(user_embedding * movie_embedding, dim=1)
        return prediction

In [16]:
# Split the data into training and testing sets
train_data, test_data = train_test_split(data, test_size=0.1)

# Initialize dataset and data loaders
train_dataset = MovieLensDataset(train_data)
test_dataset = MovieLensDataset(test_data)

train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=64, shuffle=False)

In [84]:
# Initialize the model and optimizer
num_users = len(user_ids)
num_movies = len(movie_ids)
model = MatrixFactorization(num_users, num_movies, embedding_size=5)
criterion = nn.MSELoss()
optimizer = optim.SGD(model.parameters(), lr=0.03, momentum=0.9)

# Training loop
num_epochs = 20

for epoch in range(num_epochs):
    model.train()
    total_loss = 0.0
    absolute_errors = []

    for batch_idx, (X, y) in enumerate(train_loader):
        optimizer.zero_grad()
        predictions = model(X).squeeze()
        loss = criterion(predictions, y)
        loss.backward()
        optimizer.step()
        total_loss += loss.item()
        absolute_errors.extend(torch.abs(predictions - y).tolist()[0])


    print(f"Epoch {epoch + 1}/{num_epochs}, Loss: {total_loss / len(train_loader)}")
    print(f"Train MAE: {np.mean(absolute_errors)}")



  return F.mse_loss(input, target, reduction=self.reduction)


Epoch 1/20, Loss: 14.422742045984954
Train MAE: 3.472874481689411
Epoch 2/20, Loss: 9.16441351671313
Train MAE: 2.60714994117812
Epoch 3/20, Loss: 4.838072515172918
Train MAE: 1.6483400150613363
Epoch 4/20, Loss: 3.368247222328724
Train MAE: 1.334041022049652
Epoch 5/20, Loss: 2.69239187820674
Train MAE: 1.2096932899917519
Epoch 6/20, Loss: 2.297594761394479
Train MAE: 1.117897429833207
Epoch 7/20, Loss: 2.0356852540428445
Train MAE: 1.0475897935129785
Epoch 8/20, Loss: 1.8510580745082985
Train MAE: 1.0017842948991722
Epoch 9/20, Loss: 1.712286000653618
Train MAE: 0.9832450229019762
Epoch 10/20, Loss: 1.6051621564436698
Train MAE: 0.9696744242531993
Epoch 11/20, Loss: 1.520357853813131
Train MAE: 0.9315203795477496
Epoch 12/20, Loss: 1.451890031987084
Train MAE: 0.9082881939743235
Epoch 13/20, Loss: 1.396779225012143
Train MAE: 0.9098692313828394
Epoch 14/20, Loss: 1.3506638863779425
Train MAE: 0.8754684763779267
Epoch 15/20, Loss: 1.3106977058566676
Train MAE: 0.8845962068901433
Epoch

In [85]:
# Evaluation on the test set
model.eval()
test_loss = 0.0
absolute_errors = []
with torch.no_grad():
    for batch_idx, (X, y) in enumerate(test_loader):
        predictions = model(X).squeeze()
        loss = criterion(predictions, y)
        test_loss += loss.item()
        absolute_errors.extend(torch.abs(predictions - y).tolist()[0])

print(f"Test Loss: {test_loss / len(test_loader)}")
print(f"Test MAE: {np.mean(absolute_errors)}")

Test Loss: 1.886280511376224
Test MAE: 1.0429305349879678


  return F.mse_loss(input, target, reduction=self.reduction)


In [93]:
x = torch.tensor([1.0], requires_grad=True)
y = torch.tensor([2.0], requires_grad=True)
z = 3*x**2+y**3

print("x.grad: ", x.grad)
print("y.grad: ", y.grad)
print("z.grad: ", z.grad)



x.grad:  None
y.grad:  None
z.grad:  None


  print("z.grad: ", z.grad)


In [94]:
# calculate the gradient
z.backward()

print("x.grad: ", x.grad)
print("y.grad: ", y.grad)
print("z.grad: ", z.grad)

x.grad:  tensor([6.])
y.grad:  tensor([12.])
z.grad:  None


  print("z.grad: ", z.grad)


In [95]:
a = torch.tensor([1.0])

In [92]:
a

tensor([1.])