In [None]:
import matplotlib.pyplot as plt 
import pandas as pd
import numpy as np
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.metrics import confusion_matrix
from sklearn.ensemble import RandomForestClassifier
from sklearn.neighbors import NearestNeighbors
from scipy.sparse import csr_matrix

In [None]:
#MovieID to movie name mapping (Youtube Spencer Pao)
movie_names= df_movies.set_index('movieId')['title'].to_dict()
n_users = len(df_ratings.userId.unique())
n_items = len(df_ratings.movieId.unique())
print("Number of unique users:", n_users)
print("Number of unique movies:", n_users)
print("The ful rating matrix will have:", n_users*n_items,'elements.')
print('.............')
print("Number of ratings:",len(df_ratings))
print("Therefore:", len(df_ratings)/(n_users*n_items)*100, '% of the matrix is filled.') 


In [None]:
import torch
from torch.autograd import Variable
from tqdm.notebook import tqdm


class MatrixFactorization(torch.nn.Module):
    def __init__(self, n_users, n_items, n_factors=20):
        super().__init__()
        # Skapa användarinbäddningar
        self.user_factors = torch.nn.Embedding(n_users, n_factors)
        # Skapa objektinbäddningar
        self.item_factors = torch.nn.Embedding(n_items, n_factors)
        self.user_factors.weight.data.uniform_(0, 0.05)
        self.item_factors.weight.data.uniform_(0, 0.05)
    
    def forward(self, data):
        # Matrixmultiplikation
        users, items = data[:, 0], data[:, 1]
        user_embedding = self.user_factors(users)
        item_embedding = self.item_factors(items)
        return (user_embedding * item_embedding).sum(1)
    
    def predict(self, user, item):
        return self.forward(torch.tensor([[user, item]], dtype=torch.long))

In [None]:
#Creating the data loader (nececery for PyTorch)
from torch.utils.data.dataset import Dataset
from torch.utils.data import DataLoader # package that helps transform your data to machine learning readiness

class Loader(Dataset):
    def __init__(self):
        self.ratings = df_ratings.copy()
        
        #Extract all user IDs and movie IDs
        users = df_ratings.userId.unique()
        movies = df_ratings.movieId.unique()
        
        #Unique values: index
        self.userid2idx = {i:i for i,o in enumerate(users)}
        self.movieid2idx = {i:i for i,o in enumerate(movies)}
        
        #Obtiaind continuoues ID for users and movies
        self.idx2userid = {i:o for o, i in self.userid2idx.items()}
        self.idx2movieid = {i:o for o, i in self.movieid2idx.items()}
        
        #return the id from the index values as noted in the lambada function down below.
        #self.ratings.movieId = df_ratings.movieId.apply(lambda x: self.movieid2idx[x])
        #self.ratings.userId =df_ratings.userId.apply(lambda x: self.userid2idx[x])
        #self.ratings.movieId = df_ratings.movieId.apply(lambda x: self.movieid2idx[x])
        self.ratings.userId = df_ratings.userId.apply(lambda x: self.userid2idx.get(x, None))
        self.ratings.movieId = df_ratings.movieId.apply(lambda x: self.movieid2idx.get(x, None))


        
        self.x = self.ratings.drop(['rating', 'timestamp'], axis=1).values
        self.y =self.ratings['rating'].values
        self.x , self.y = torch.tensor(self.x), torch.tensor(self.y) # Transforms the data to tensors(ready for torch models.)
        
    def __getitem__ (self, index):
        return (self.x[index], self.y[index])
        
    def __len__(self):
        return len(self.ratings)

In [None]:
num_epochs = 128
cuda = torch.cuda.is_available()

print("Is running on GPU", cuda)

model = MatrixFactorization(n_users, n_items, n_factors=8)
print(model)
for name, param in model.named_parameters():
    if param.requires_grad:
        print(name, param.data)
        
#GPU enable if yu have a GPE...
if cuda:
    model = model.cuda()
# MSE loss
loss_fn = torch.nn.MSELoss()

#ADM optimizer
optimizer = torch.optim.Adam(model.parameters(),lr=1e-3)

#Train data
train_set = Loader()
train_loader = DataLoader(train_set, 128, shuffle = True)
     

In [None]:
from tqdm.notebook import tqdm
#Y= Ratings x UserId, MovieId

for it in tqdm(range(num_epochs)):
    losses = []
    for x, y in train_loader:
        if cuda:
            x, y = x.cuda(), y.cuda()
            x = x.long()
            optimizer.zero_grad()
            outputs = model(x)
            loss = loss_fn(outputs.squeeze(), y.type(torch.float32))
            losses.append(loss.item())
            loss.backward()
            optimizer.step()
    #print()        
    print("iter #{}".format(it), "Loss:", sum(losses) / len(losses))

In [None]:
if losses:  # Kontrollera om losses inte är tom
    print("iter #{}".format(it), "Loss:", sum(losses) / len(losses))
else:
    print("iter #{}".format(it), "Loss: No losses calculated")  # Hantera fallet när losses är tom
