In [1]:
import torch
import torch.nn as nn
from torch.utils.data import DataLoader
from tqdm import tqdm
import numpy as np
from sklearn import metrics

class MovieDataset(torch.utils.data.Dataset):
    def __init__(self, users, movies, ratings):
        self.users = users
        self.movies = movies
        self.ratings = ratings
        
    def __len__(self):
        return len(self.users)
    
    def __getitem__(self, item):
        user = self.users[item]
        movie = self.movies[item]
        rating = self.ratings[item]
        return {
            'user': torch.tensor(user, dtype=torch.long),
            'movie': torch.tensor(movie, dtype=torch.long),
            'rating': torch.tensor(rating, dtype=torch.float),
        }

class RecSysModel(nn.Module):
    def __init__(self, num_users, num_movies):
        super().__init__()
        self.user_embed = nn.Embedding(num_users, 32)
        self.movie_embed = nn.Embedding(num_movies, 32)
        self.out = nn.Linear(64, 1)
        
    def forward(self, users, movies):
        user_embeds = self.user_embed(users)
        movie_embeds = self.movie_embed(movies)
        output = torch.cat([user_embeds, movie_embeds], dim=1)
        output = self.out(output)
        return output

def train_model(model, train_dataset, valid_dataset, epochs=10, train_bs=512, valid_bs=512, lr=1e-3):
    device = torch.device("cpu")
    model = model.to(device)
    
    train_loader = DataLoader(
        train_dataset, 
        batch_size=train_bs, 
        shuffle=True,
        num_workers=0
    )
    
    valid_loader = DataLoader(
        valid_dataset, 
        batch_size=valid_bs, 
        shuffle=False,
        num_workers=0
    )
    
    optimizer = torch.optim.Adam(model.parameters(), lr=lr)
    scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=3, gamma=0.7)
    criterion = nn.MSELoss()
    
    best_rmse = float('inf')
    
    for epoch in range(epochs):
        # Training
        model.train()
        train_loss = 0
        train_pbar = tqdm(train_loader, total=len(train_loader))
        
        for batch in train_pbar:
            users = batch["user"].to(device)
            movies = batch["movie"].to(device)
            ratings = batch["rating"].to(device)
            
            optimizer.zero_grad()
            outputs = model(users, movies)
            loss = criterion(outputs, ratings.view(-1, 1))
            loss.backward()
            optimizer.step()
            
            train_loss += loss.item()
            train_pbar.set_description(f"Epoch {epoch+1} - Training Loss: {train_loss/(train_pbar.n+1):.4f}")
            
        scheduler.step()
        
        # Validation
        model.eval()
        valid_loss = 0
        all_preds = []
        all_ratings = []
        
        with torch.no_grad():
            valid_pbar = tqdm(valid_loader, total=len(valid_loader))
            for batch in valid_pbar:
                users = batch["user"].to(device)
                movies = batch["movie"].to(device)
                ratings = batch["rating"].to(device)
                
                outputs = model(users, movies)
                loss = criterion(outputs, ratings.view(-1, 1))
                
                valid_loss += loss.item()
                valid_pbar.set_description(f"Epoch {epoch+1} - Validation Loss: {valid_loss/(valid_pbar.n+1):.4f}")
                
                all_preds.extend(outputs.cpu().numpy().flatten())
                all_ratings.extend(ratings.cpu().numpy().flatten())
        
        # Calculate RMSE
        rmse = np.sqrt(metrics.mean_squared_error(all_ratings, all_preds))
        print(f"Epoch {epoch+1} - Validation RMSE: {rmse:.4f}")
        
        if rmse < best_rmse:
            best_rmse = rmse
            torch.save(model.state_dict(), "best_model.pth")
            print(f"Saved best model with RMSE: {best_rmse:.4f}")
            
    return model

def run_training():
    import pandas as pd
    from sklearn import model_selection, preprocessing
    
    df = pd.read_csv('../data/train_v2.csv')
    
    lbl_user = preprocessing.LabelEncoder()
    lbl_movie = preprocessing.LabelEncoder()
    
    df.user = lbl_user.fit_transform(df.user.values)
    df.movie = lbl_movie.fit_transform(df.movie.values)
    
    df_train, df_valid = model_selection.train_test_split(
        df, 
        test_size=0.2, 
        random_state=42, 
        stratify=df.rating.values
    )
    
    train_dataset = MovieDataset(
        users=df_train.user.values, 
        movies=df_train.movie.values, 
        ratings=df_train.rating.values
    )
    
    valid_dataset = MovieDataset(
        users=df_valid.user.values, 
        movies=df_valid.movie.values, 
        ratings=df_valid.rating.values
    )
    
    model = RecSysModel(
        num_users=len(lbl_user.classes_), 
        num_movies=len(lbl_movie.classes_)
    )
    
    trained_model = train_model(
        model, 
        train_dataset, 
        valid_dataset, 
        epochs=10, 
        train_bs=512, 
        valid_bs=512
    )
    
    return trained_model, lbl_user, lbl_movie

if __name__ == "__main__":
    run_training()

Epoch 1 - Training Loss: 4.4943: 100%|█████████████████████████████████████████████| 1173/1173 [00:36<00:00, 32.56it/s]
Epoch 1 - Validation Loss: 1.2751: 100%|█████████████████████████████████████████████| 294/294 [00:09<00:00, 31.72it/s]


Epoch 1 - Validation RMSE: 1.1236
Saved best model with RMSE: 1.1236


Epoch 2 - Training Loss: 1.0414: 100%|█████████████████████████████████████████████| 1173/1173 [00:46<00:00, 25.24it/s]
Epoch 2 - Validation Loss: 0.9545: 100%|█████████████████████████████████████████████| 294/294 [00:08<00:00, 34.69it/s]


Epoch 2 - Validation RMSE: 0.9720
Saved best model with RMSE: 0.9720


Epoch 3 - Training Loss: 0.8948: 100%|█████████████████████████████████████████████| 1173/1173 [00:42<00:00, 27.90it/s]
Epoch 3 - Validation Loss: 0.8782: 100%|█████████████████████████████████████████████| 294/294 [00:08<00:00, 34.57it/s]


Epoch 3 - Validation RMSE: 0.9357
Saved best model with RMSE: 0.9357


Epoch 4 - Training Loss: 0.8494: 100%|█████████████████████████████████████████████| 1173/1173 [01:05<00:00, 17.82it/s]
Epoch 4 - Validation Loss: 0.8600: 100%|█████████████████████████████████████████████| 294/294 [00:08<00:00, 35.15it/s]


Epoch 4 - Validation RMSE: 0.9231
Saved best model with RMSE: 0.9231


Epoch 5 - Training Loss: 0.8327: 100%|█████████████████████████████████████████████| 1173/1173 [01:01<00:00, 19.17it/s]
Epoch 5 - Validation Loss: 0.8473: 100%|█████████████████████████████████████████████| 294/294 [00:20<00:00, 14.03it/s]


Epoch 5 - Validation RMSE: 0.9178
Saved best model with RMSE: 0.9178


Epoch 6 - Training Loss: 0.8257: 100%|█████████████████████████████████████████████| 1173/1173 [00:55<00:00, 21.06it/s]
Epoch 6 - Validation Loss: 0.8419: 100%|█████████████████████████████████████████████| 294/294 [00:07<00:00, 37.97it/s]


Epoch 6 - Validation RMSE: 0.9148
Saved best model with RMSE: 0.9148


Epoch 7 - Training Loss: 0.8183: 100%|█████████████████████████████████████████████| 1173/1173 [00:29<00:00, 40.19it/s]
Epoch 7 - Validation Loss: 0.8409: 100%|█████████████████████████████████████████████| 294/294 [00:06<00:00, 46.37it/s]


Epoch 7 - Validation RMSE: 0.9129
Saved best model with RMSE: 0.9129


Epoch 8 - Training Loss: 0.8149: 100%|█████████████████████████████████████████████| 1173/1173 [00:30<00:00, 38.91it/s]
Epoch 8 - Validation Loss: 0.8404: 100%|█████████████████████████████████████████████| 294/294 [00:06<00:00, 43.41it/s]


Epoch 8 - Validation RMSE: 0.9126
Saved best model with RMSE: 0.9126


Epoch 9 - Training Loss: 0.8143: 100%|█████████████████████████████████████████████| 1173/1173 [00:38<00:00, 30.10it/s]
Epoch 9 - Validation Loss: 0.8310: 100%|█████████████████████████████████████████████| 294/294 [00:05<00:00, 51.72it/s]


Epoch 9 - Validation RMSE: 0.9121
Saved best model with RMSE: 0.9121


Epoch 10 - Training Loss: 0.8102: 100%|████████████████████████████████████████████| 1173/1173 [00:41<00:00, 28.25it/s]
Epoch 10 - Validation Loss: 0.8295: 100%|████████████████████████████████████████████| 294/294 [00:06<00:00, 45.87it/s]


Epoch 10 - Validation RMSE: 0.9113
Saved best model with RMSE: 0.9113
