In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split

interaction_matrix = pd.read_csv("src/models/interaction_matrix.csv", index_col=0)

ratings_data = interaction_matrix.stack().reset_index()
ratings_data.columns = ["user_id", "anime_id", "rating"]

ratings_data = ratings_data[ratings_data["rating"] > 0]

train_data, test_data = train_test_split(ratings_data, test_size=0.2, random_state=42)

train_data.to_csv("src/dataset/train_data.csv", index=False)
test_data.to_csv("src/dataset/test_data.csv", index=False)

print("Training and test data generated successfully!")
print(f"Training data shape: {train_data.shape}")
print(f"Test data shape: {test_data.shape}")


In [None]:
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error
from models.base_recommender import BaseRecommender

class BaselineModel(BaseRecommender):
    def __init__(self, anime_data):
        """
        Predicts ratings based on user averages and computes MSE.
        """
        super().__init__()
        self.anime_data = anime_data
        self.user_mean_ratings = None
        self.global_mean_rating = None

    def fit(self, train_df):
        """
        Train the baseline model by computing:
        - Mean rating for each user (user-level average)
        - Global mean rating as a fallback
        """
        self.user_mean_ratings = train_df.groupby('user_id')['rating'].mean().to_dict()
        self.global_mean_rating = train_df['rating'].mean()
        print(f"Global Mean Rating: {self.global_mean_rating:.4f}")

    def forward(self, user_ids, item_ids):
        """
        Predict ratings for user-item pairs based on user mean ratings or global mean.
        """
        predictions = []
        for user_id in user_ids:
            pred = self.user_mean_ratings.get(user_id, self.global_mean_rating)
            predictions.append(pred)
        return torch.tensor(predictions, dtype=torch.float32)

    def evaluate_mse(self, test_df):
        """
        Evaluate the model by predicting ratings and computing the MSE.
        """
        user_ids = test_df['user_id'].tolist()
        predictions = self.forward(user_ids, test_df['anime_id'].tolist())
        actual_ratings = torch.tensor(test_df['rating'].tolist(), dtype=torch.float32)
        mse = nn.MSELoss()(predictions, actual_ratings).item()
        return mse




In [None]:
anime_data = pd.read_csv("src/dataset/anime.csv")
train_data_full = pd.read_csv("src/dataset/train_data.csv")
test_data = pd.read_csv("src/dataset/test_data.csv")

train_data, val_data = train_test_split(train_data_full, test_size=0.2, random_state=42)

In [None]:
model = BaselineModel(anime_data)
model.fit(train_data)

val_mse = model.evaluate(val_data)
test_mse = model.evaluate(test_data)
print(f"Validation MSE: {val_mse:.4f}")
print(f"Test MSE: {test_mse:.4f}")
