In [1]:
import pandas as pd
import numpy as np
import torch
from torch.autograd import Variable
from tqdm.notebook import tqdm
import torch.nn as nn
import torch.optim as optim
from torch.optim import Adam
from torch.utils.data import random_split, DataLoader, Dataset
from sklearn.model_selection import train_test_split
from sklearn import model_selection, metrics, preprocessing
from IPython.display import Image
import matplotlib.pyplot as plt
from prettytable import PrettyTable

In [2]:
df = pd.read_csv("ratings.csv")

In [3]:
df = df.sample(10000)

In [4]:
df

Unnamed: 0,userId,movieId,rating,timestamp
115384,847,69995,3.5,1.377057e+09
58385,462,32,3.0,1.138575e+09
75392,576,688,3.0,8.430574e+08
121822,872,3173,3.0,1.194650e+09
154074,1089,4128,3.0,1.031605e+09
...,...,...,...,...
40175,318,53894,5.0,1.186514e+09
108773,821,849,5.0,8.684012e+08
156383,1115,574,2.0,9.885668e+08
97312,738,64983,1.5,1.248175e+09


In [5]:
class MovieDataset(Dataset):
    def __init__(self, userId, movieId, rating):
        self.userId = userId
        self.movieId = movieId
        self.rating = rating

    def __len__(self):
        return len(self.userId)

    def __getitem__(self, item):
        userId = self.userId[item]
        movieId = self.movieId[item]
        rating = self.rating[item]
        return torch.tensor(userId), torch.tensor(movieId), torch.tensor(rating)

In [6]:
lbl_user = preprocessing.LabelEncoder()
lbl_movie = preprocessing.LabelEncoder()
df.userId = lbl_user.fit_transform(df.userId.values)
df.movieId = lbl_movie.fit_transform(df.movieId.values)
#   Split Data Into Train And Validation Data :
train_set, test_set = train_test_split(df, test_size = 0.1)

In [7]:
#   Create Torch Datasets :
train_dataset = MovieDataset(train_set.userId.values, train_set.movieId.values, train_set.rating.values)
test_dataset = MovieDataset(test_set.userId.values, test_set.movieId.values, test_set.rating.values)
train_loader = DataLoader(train_dataset, batch_size = 8)
test_loader = DataLoader(test_dataset, batch_size = 8)

In [8]:
n_factors = 20
n_users = len(df.userId.unique())
n_items = len(df.movieId.unique())
device = 'cuda' if torch.cuda.is_available() else 'cpu'

In [9]:
class MatrixFactorization(torch.nn.Module):
    def __init__(self, n_users, n_items, n_factors):
        super().__init__()
        #   Create Vector Embeddings
        self.user_factors = torch.nn.Embedding(n_users, n_factors)
        self.movie_factors = torch.nn.Embedding(n_items, n_factors)
        self.mlp = nn.Sequential(
            nn.Linear(n_factors*2, 32),
            nn.ReLU(),
            nn.Linear(32, 16),
            nn.ReLU(),
            nn.Linear(16, 1)
)

    def forward(self, user, movie):
        #   Matrix Multiplication
        user_embedding = self.user_factors(user)
        movie_embedding = self.movie_factors(movie)
        x = torch.cat((user_embedding, movie_embedding), dim=1)
        return self.mlp(x).view(-1)

In [10]:
#   Training Function
accuracies = []  # قائمة لتخزين قيم الدقة
losses = []
def train(num_epochs):
    best_accuracy = 0.0

    print("Begin Training...")
    for epoch in range(1, num_epochs+1):
        running_train_loss = 0.0
        correct_predictions = 0
        total_samples = 0
        #   Training Loop
        for user, movie, rating in tqdm(train_loader, desc="Processing"):
            #   Make Prediction For This Batch
            user, movie, rating = user.cuda(), movie.cuda(), rating.cuda()
            optimizer.zero_grad()
            outputs = model(user, movie).view(-1)

            #   Calculate accuracy
            predicted_labels = torch.round(outputs)  # لتقريب القيم إلى 0 أو 1
            correct_predictions += torch.sum(torch.eq(predicted_labels, rating)).item()
            total_samples += len(rating)

            #   Calculate Loss
            loss = loss_fn(outputs, rating.float())
            loss.backward()     #   Backpropagate The Loss
            optimizer.step()    #   Adjust Parameters Based On The Calcuated Gradients
            running_train_loss += loss.item()   #   Track The Loss Value

        #   Calculate Training Of The Epoch
        train_loss_value = running_train_loss / len(train_loader)
        accuracy_value = correct_predictions / total_samples

        losses.append(train_loss_value)
        accuracies.append(accuracy_value)

        if epoch % 10 == 0:
            print('Batch', epoch, "Training Loss Is: %.4f, Accuracy Is: %.4f" % (train_loss_value, accuracy_value))


In [14]:
num_epochs = 100
model = MatrixFactorization(n_users, n_items, n_factors)
model = model.to(device)
#   MSE Loss
loss_fn = torch.nn.MSELoss()
#   ADAM Optimizer
optimizer = torch.optim.Adam(model.parameters(), lr=1e-3)

In [15]:
model.train()

MatrixFactorization(
  (user_factors): Embedding(1124, 20)
  (movie_factors): Embedding(3557, 20)
  (mlp): Sequential(
    (0): Linear(in_features=40, out_features=32, bias=True)
    (1): ReLU()
    (2): Linear(in_features=32, out_features=16, bias=True)
    (3): ReLU()
    (4): Linear(in_features=16, out_features=1, bias=True)
  )
)

In [25]:
train(num_epochs)

Begin Training...


Processing:   0%|          | 0/1125 [00:00<?, ?it/s]

Processing:   0%|          | 0/1125 [00:00<?, ?it/s]

Processing:   0%|          | 0/1125 [00:00<?, ?it/s]

Processing:   0%|          | 0/1125 [00:00<?, ?it/s]

Processing:   0%|          | 0/1125 [00:00<?, ?it/s]

Processing:   0%|          | 0/1125 [00:00<?, ?it/s]

Processing:   0%|          | 0/1125 [00:00<?, ?it/s]

Processing:   0%|          | 0/1125 [00:00<?, ?it/s]

Processing:   0%|          | 0/1125 [00:00<?, ?it/s]

Processing:   0%|          | 0/1125 [00:00<?, ?it/s]

Batch 10 Training Loss Is: 0.0234, Accuracy Is: 0.7160


Processing:   0%|          | 0/1125 [00:00<?, ?it/s]

Processing:   0%|          | 0/1125 [00:00<?, ?it/s]

Processing:   0%|          | 0/1125 [00:00<?, ?it/s]

Processing:   0%|          | 0/1125 [00:00<?, ?it/s]

Processing:   0%|          | 0/1125 [00:00<?, ?it/s]

Processing:   0%|          | 0/1125 [00:00<?, ?it/s]

Processing:   0%|          | 0/1125 [00:00<?, ?it/s]

Processing:   0%|          | 0/1125 [00:00<?, ?it/s]

Processing:   0%|          | 0/1125 [00:00<?, ?it/s]

Processing:   0%|          | 0/1125 [00:00<?, ?it/s]

Batch 20 Training Loss Is: 0.0155, Accuracy Is: 0.7176


Processing:   0%|          | 0/1125 [00:00<?, ?it/s]

Processing:   0%|          | 0/1125 [00:00<?, ?it/s]

Processing:   0%|          | 0/1125 [00:00<?, ?it/s]

Processing:   0%|          | 0/1125 [00:00<?, ?it/s]

Processing:   0%|          | 0/1125 [00:00<?, ?it/s]

Processing:   0%|          | 0/1125 [00:00<?, ?it/s]

Processing:   0%|          | 0/1125 [00:00<?, ?it/s]

Processing:   0%|          | 0/1125 [00:00<?, ?it/s]

Processing:   0%|          | 0/1125 [00:00<?, ?it/s]

Processing:   0%|          | 0/1125 [00:00<?, ?it/s]

Batch 30 Training Loss Is: 0.0212, Accuracy Is: 0.7168


Processing:   0%|          | 0/1125 [00:00<?, ?it/s]

Processing:   0%|          | 0/1125 [00:00<?, ?it/s]

Processing:   0%|          | 0/1125 [00:00<?, ?it/s]

Processing:   0%|          | 0/1125 [00:00<?, ?it/s]

Processing:   0%|          | 0/1125 [00:00<?, ?it/s]

Processing:   0%|          | 0/1125 [00:00<?, ?it/s]

Processing:   0%|          | 0/1125 [00:00<?, ?it/s]

Processing:   0%|          | 0/1125 [00:00<?, ?it/s]

Processing:   0%|          | 0/1125 [00:00<?, ?it/s]

Processing:   0%|          | 0/1125 [00:00<?, ?it/s]

Batch 40 Training Loss Is: 0.0206, Accuracy Is: 0.7168


Processing:   0%|          | 0/1125 [00:00<?, ?it/s]

Processing:   0%|          | 0/1125 [00:00<?, ?it/s]

Processing:   0%|          | 0/1125 [00:00<?, ?it/s]

Processing:   0%|          | 0/1125 [00:00<?, ?it/s]

Processing:   0%|          | 0/1125 [00:00<?, ?it/s]

Processing:   0%|          | 0/1125 [00:00<?, ?it/s]

Processing:   0%|          | 0/1125 [00:00<?, ?it/s]

Processing:   0%|          | 0/1125 [00:00<?, ?it/s]

Processing:   0%|          | 0/1125 [00:00<?, ?it/s]

Processing:   0%|          | 0/1125 [00:00<?, ?it/s]

Batch 50 Training Loss Is: 0.0163, Accuracy Is: 0.7176


Processing:   0%|          | 0/1125 [00:00<?, ?it/s]

Processing:   0%|          | 0/1125 [00:00<?, ?it/s]

Processing:   0%|          | 0/1125 [00:00<?, ?it/s]

Processing:   0%|          | 0/1125 [00:00<?, ?it/s]

Processing:   0%|          | 0/1125 [00:00<?, ?it/s]

Processing:   0%|          | 0/1125 [00:00<?, ?it/s]

Processing:   0%|          | 0/1125 [00:00<?, ?it/s]

Processing:   0%|          | 0/1125 [00:00<?, ?it/s]

Processing:   0%|          | 0/1125 [00:00<?, ?it/s]

Processing:   0%|          | 0/1125 [00:00<?, ?it/s]

Batch 60 Training Loss Is: 0.0162, Accuracy Is: 0.7178


Processing:   0%|          | 0/1125 [00:00<?, ?it/s]

Processing:   0%|          | 0/1125 [00:00<?, ?it/s]

Processing:   0%|          | 0/1125 [00:00<?, ?it/s]

Processing:   0%|          | 0/1125 [00:00<?, ?it/s]

Processing:   0%|          | 0/1125 [00:00<?, ?it/s]

Processing:   0%|          | 0/1125 [00:00<?, ?it/s]

Processing:   0%|          | 0/1125 [00:00<?, ?it/s]

Processing:   0%|          | 0/1125 [00:00<?, ?it/s]

Processing:   0%|          | 0/1125 [00:00<?, ?it/s]

Processing:   0%|          | 0/1125 [00:00<?, ?it/s]

Batch 70 Training Loss Is: 0.0179, Accuracy Is: 0.7178


Processing:   0%|          | 0/1125 [00:00<?, ?it/s]

Processing:   0%|          | 0/1125 [00:00<?, ?it/s]

Processing:   0%|          | 0/1125 [00:00<?, ?it/s]

Processing:   0%|          | 0/1125 [00:00<?, ?it/s]

Processing:   0%|          | 0/1125 [00:00<?, ?it/s]

Processing:   0%|          | 0/1125 [00:00<?, ?it/s]

Processing:   0%|          | 0/1125 [00:00<?, ?it/s]

Processing:   0%|          | 0/1125 [00:00<?, ?it/s]

Processing:   0%|          | 0/1125 [00:00<?, ?it/s]

Processing:   0%|          | 0/1125 [00:00<?, ?it/s]

Batch 80 Training Loss Is: 0.0158, Accuracy Is: 0.7180


Processing:   0%|          | 0/1125 [00:00<?, ?it/s]

Processing:   0%|          | 0/1125 [00:00<?, ?it/s]

Processing:   0%|          | 0/1125 [00:00<?, ?it/s]

Processing:   0%|          | 0/1125 [00:00<?, ?it/s]

Processing:   0%|          | 0/1125 [00:00<?, ?it/s]

Processing:   0%|          | 0/1125 [00:00<?, ?it/s]

Processing:   0%|          | 0/1125 [00:00<?, ?it/s]

Processing:   0%|          | 0/1125 [00:00<?, ?it/s]

Processing:   0%|          | 0/1125 [00:00<?, ?it/s]

Processing:   0%|          | 0/1125 [00:00<?, ?it/s]

Batch 90 Training Loss Is: 0.0150, Accuracy Is: 0.7177


Processing:   0%|          | 0/1125 [00:00<?, ?it/s]

Processing:   0%|          | 0/1125 [00:00<?, ?it/s]

Processing:   0%|          | 0/1125 [00:00<?, ?it/s]

Processing:   0%|          | 0/1125 [00:00<?, ?it/s]

Processing:   0%|          | 0/1125 [00:00<?, ?it/s]

Processing:   0%|          | 0/1125 [00:00<?, ?it/s]

Processing:   0%|          | 0/1125 [00:00<?, ?it/s]

Processing:   0%|          | 0/1125 [00:00<?, ?it/s]

Processing:   0%|          | 0/1125 [00:00<?, ?it/s]

Processing:   0%|          | 0/1125 [00:00<?, ?it/s]

Batch 100 Training Loss Is: 0.0158, Accuracy Is: 0.7180


In [26]:
print(losses)

[2.2126709396508004, 1.0671840004854731, 0.9487339729799165, 0.8559593775338596, 0.7735895178516706, 0.6988071717056963, 0.6301849321888553, 0.5662731699082586, 0.5071671762632, 0.4524220416148504, 1.9297831032408608, 1.03779617509577, 0.935380582816071, 0.8496665172941155, 0.7703758065700531, 0.6943834713333182, 0.6213198908600542, 0.5530202116370201, 0.48916212373309664, 0.42822688258356517, 0.3723626943777005, 0.3231767401612467, 0.27909400488519004, 0.24035150252862109, 0.20772106328648, 0.17960733324123754, 0.15593969151957168, 0.1357415981826683, 0.11819445976614952, 0.1025578879824736, 0.09143424571698738, 0.08190984889181951, 0.07453894810958041, 0.0694548168298271, 0.06677982023254865, 0.06851709811575711, 0.06847019216211306, 0.07060798024965657, 0.0660628460927142, 0.06503187796411415, 0.06592373476839727, 0.06854001161456108, 0.0700074272358583, 0.07245203796691364, 0.07054218045663503, 0.06967551610949967, 0.06847194500391682, 0.06807119456637237, 0.06856040589925316, 0.06

In [20]:
#   Set The Model To Evaluation Model
model.eval()

#   Initiate Values
total_loss = 0.0
num_samples = len(test_loader)

with torch.no_grad():
    for(user, movie, rating) in test_loader:
        user, movie, rating = user.cuda(), movie.cuda(), rating.cuda()
        outputs = model(user, movie).view(-1)
        mae = nn.L1Loss()
        loss = mae(outputs, rating)
        total_loss += loss.item()

print(f"MAE= {total_loss/num_samples}")

MAE= 0.9262294380664825


In [21]:
#   Choosing A Random Sample
example_set = test_set.sample(10)

In [22]:
#   Create A PrettyTable Torch Datasets
example_dataset = MovieDataset(example_set.userId.values, example_set.movieId.values, example_set.rating.values)
example_loader = DataLoader(example_dataset, batch_size = 8)

In [23]:
#   Create A PrettyTable To Store Values
table = PrettyTable()
table.field_names = ["User ID", "Actual Rating", "Predicted Rating"]

#   Set The Model To Evaluation Mode
model.eval()

#   Make Predicted
for(user, movie, rating) in example_loader:
    with torch.no_grad():
        user, movie, rating = user.cuda(), movie.cuda(), rating.cuda()
        outputs = model(user, movie).view(-1)

    #   Print Input, Target, And Predicted Values
    for i in range(len(rating)):
        userId = user[i].item()
        rating_value = rating[i].item()
        predicted_value = outputs[i].item()

        table.add_row([userId, rating_value, round(predicted_value*2, 0) / 2])

In [24]:
#   Print The Results
print(table)

+---------+---------------+------------------+
| User ID | Actual Rating | Predicted Rating |
+---------+---------------+------------------+
|   1090  |      4.0      |       5.0        |
|   921   |      1.0      |       3.0        |
|   829   |      3.5      |       2.5        |
|   606   |      0.5      |       2.0        |
|   1123  |      2.5      |       4.0        |
|   842   |      4.0      |       4.0        |
|   946   |      4.0      |       4.0        |
|   664   |      4.5      |       2.0        |
|   497   |      5.0      |       2.0        |
|   130   |      4.0      |       4.5        |
+---------+---------------+------------------+
