In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
import pandas as pd
from sklearn.model_selection import train_test_split
from torch.utils.data import Dataset, DataLoader

# Step 1: Load the Dataset
ratings = pd.read_csv('ml-latest-small/tags.csv')  # Contains userId, movieId, rating, timestamp


In [2]:
# Preprocess Data: Map userId and movieId to a continuous range of indexes
user_ids = ratings['userId'].unique()
movie_ids = ratings['movieId'].unique()
user_to_index = {user_id: idx for idx, user_id in enumerate(user_ids)}
movie_to_index = {movie_id: idx for idx, movie_id in enumerate(movie_ids)}

ratings['userId'] = ratings['userId'].apply(lambda x: user_to_index[x])
ratings['movieId'] = ratings['movieId'].apply(lambda x: movie_to_index[x])

# Data Splitting
train_df, test_df = train_test_split(ratings, test_size=0.2, random_state=42)


In [3]:
class MovieLensDataset(Dataset):
    def __init__(self, user_ids, movie_ids, ratings):
        self.user_ids = torch.tensor(user_ids, dtype=torch.long)
        self.movie_ids = torch.tensor(movie_ids, dtype=torch.long)
        self.ratings = torch.tensor(ratings, dtype=torch.float)
    
    def __len__(self):
        return len(self.ratings)
    
    def __getitem__(self, idx):
        return self.user_ids[idx], self.movie_ids[idx], self.ratings[idx]

train_dataset = MovieLensDataset(train_df['userId'].values, train_df['movieId'].values, train_df['rating'].values)
test_dataset = MovieLensDataset(test_df['userId'].values, test_df['movieId'].values, test_df['rating'].values)

train_loader = DataLoader(train_dataset, batch_size=512, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=512)


KeyError: 'rating'

In [None]:
class RecommenderSystem(nn.Module):
    def __init__(self, num_users, num_movies, embedding_size):
        super().__init__()
        self.user_embedding = nn.Embedding(num_users, embedding_size)
        self.movie_embedding = nn.Embedding(num_movies, embedding_size)
        self.fc_layers = nn.Sequential(
            nn.Linear(embedding_size * 2, 128),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(128, 1)
        )
    
    def forward(self, user_ids, movie_ids):
        user_embedded = self.user_embedding(user_ids)
        movie_embedded = self.movie_embedding(movie_ids)
        x = torch.cat([user_embedded, movie_embedded], dim=1)
        x = self.fc_layers(x)
        return x.squeeze()

In [None]:
num_users = len(user_ids)
num_movies = len(movie_ids)
model = RecommenderSystem(num_users, num_movies, embedding_size=50)

# Training and Evaluation Functions
criterion = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

In [None]:
def train(model, loader, criterion, optimizer, epochs=5):
    model.train()
    for epoch in range(epochs):
        total_loss = 0
        for users, movies, ratings in loader:
            optimizer.zero_grad()
            predictions = model(users, movies)
            loss = criterion(predictions, ratings)
            loss.backward()
            optimizer.step()
            total_loss += loss.item()
        print(f'Epoch {epoch+1}, Loss: {total_loss / len(loader)}')

train(model, train_loader, criterion, optimizer)

Epoch 1, Loss: 0.891633173696634


In [7]:
# Smaller
import pandas as pd
import torch
from torch.utils.data import Dataset, DataLoader
from sklearn.model_selection import train_test_split
import torch.nn as nn
import torch.optim as optim

# Load data
ratings = pd.read_csv('ml-latest-small/ratings.csv')
# Assume movies.csv is also loaded if needed for metadata
# movies = pd.read_csv('movies.csv')

# Map user IDs and movie IDs to a continuous range of integers
user_ids = ratings['userId'].unique()
movie_ids = ratings['movieId'].unique()
user_to_index = {user_id: idx for idx, user_id in enumerate(user_ids)}
movie_to_index = {movie_id: idx for idx, movie_id in enumerate(movie_ids)}

ratings['userId'] = ratings['userId'].apply(lambda x: user_to_index[x])
ratings['movieId'] = ratings['movieId'].apply(lambda x: movie_to_index[x])

# Split the dataset
train_df, test_df = train_test_split(ratings, test_size=0.2, random_state=42)


In [8]:
class MovieLensDataset(Dataset):
    def __init__(self, user_ids, movie_ids, ratings):
        self.user_ids = torch.tensor(user_ids, dtype=torch.long)
        self.movie_ids = torch.tensor(movie_ids, dtype=torch.long)
        self.ratings = torch.tensor(ratings, dtype=torch.float)
    
    def __len__(self):
        return len(self.ratings)
    
    def __getitem__(self, idx):
        return self.user_ids[idx], self.movie_ids[idx], self.ratings[idx]

train_dataset = MovieLensDataset(train_df['userId'].values, train_df['movieId'].values, train_df['rating'].values)
test_dataset = MovieLensDataset(test_df['userId'].values, test_df['movieId'].values, test_df['rating'].values)

train_loader = DataLoader(train_dataset, batch_size=512, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=512, shuffle=False)


In [9]:
class RecommenderSystem(nn.Module):
    def __init__(self, num_users, num_movies, embedding_size):
        super().__init__()
        self.user_embedding = nn.Embedding(num_users, embedding_size)
        self.movie_embedding = nn.Embedding(num_movies, embedding_size)
        self.fc_layers = nn.Sequential(
            nn.Linear(embedding_size * 2, 128),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(128, 1)
        )
    
    def forward(self, user_ids, movie_ids):
        user_embedded = self.user_embedding(user_ids)
        movie_embedded = self.movie_embedding(movie_ids)
        x = torch.cat([user_embedded, movie_embedded], dim=1)
        x = self.fc_layers(x)
        return x.squeeze()

In [10]:
num_users = len(user_ids)
num_movies = len(movie_ids)
model = RecommenderSystem(num_users, num_movies, embedding_size=50)

# Training and Evaluation Functions
criterion = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

In [11]:
def train(model, loader, criterion, optimizer, epochs=5):
    model.train()
    for epoch in range(epochs):
        total_loss = 0
        for users, movies, ratings in loader:
            optimizer.zero_grad()
            predictions = model(users, movies)
            loss = criterion(predictions, ratings)
            loss.backward()
            optimizer.step()
            total_loss += loss.item()
        print(f'Epoch {epoch+1}, Loss: {total_loss / len(loader)}')

train(model, train_loader, criterion, optimizer)

Epoch 1, Loss: 3.159167663206028
Epoch 2, Loss: 1.3757724354538736
Epoch 3, Loss: 1.2632743082469022
Epoch 4, Loss: 1.1864377301705034
Epoch 5, Loss: 1.1400051132033142


In [14]:
torch.save(model.state_dict(), 'ML_Small_State_Dict.pth')

In [None]:
model2 = RecommenderSystem()
model2.load_state_dict(torch.load("ML_Small_State_Dict.pth"))


In [None]:
# https://towardsdatascience.com/serving-pytorch-models-with-torchserve-6b8e8cbdb632