# Importing Libraries


In [None]:
!pip install pandas
!pip install scikit-learn




In [None]:
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
import pandas as pd
import numpy as np
from sklearn.preprocessing import LabelEncoder, MinMaxScaler
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression

In [None]:
# 1. Device Configuration
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

if device.type == "cuda":
    print(f"GPU Name: {torch.cuda.get_device_name(0)}")
    print(f"GPU Memory: {torch.cuda.get_device_properties(0).total_memory / 1e9:.2f} GB")

Using device: cuda
GPU Name: Tesla T4
GPU Memory: 15.83 GB


# Loading Data

In [None]:
# 2. Data Loading
anime_df = pd.read_csv('/kaggle/input/myanimelist-dataset/anime-dataset-2023.csv')
users_df = pd.read_csv('/kaggle/input/myanimelist-dataset/users-details-2023.csv')
scores_df = pd.read_csv('/kaggle/input/myanimelist-dataset/users-score-2023.csv')

# Data Preprocessing


In [None]:

# Drop unnecessary columns from users_df
users_df = users_df.drop(columns=['Gender', 'Location'])

In [None]:
# 3. Handle Missing Ranks
anime_df['Rank'] = pd.to_numeric(anime_df['Rank'], errors='coerce')
known_ranks = anime_df[anime_df['Rank'].notna()]
missing_ranks = anime_df[anime_df['Rank'].isna()]

# Train linear regression for rank prediction
X_train = known_ranks[['Popularity']]
y_train = known_ranks['Rank']
regressor = LinearRegression()
regressor.fit(X_train, y_train)

In [None]:
# Predict missing ranks
X_missing = missing_ranks[['Popularity']]
predicted_ranks = regressor.predict(X_missing)
anime_df.loc[anime_df['Rank'].isna(), 'Rank'] = predicted_ranks

# Merging Data frames


In [None]:
# 4. Merge DataFrames
users_df.rename(columns={'Mal ID': 'user_id'}, inplace=True)
merged_df = scores_df.merge(users_df, on='user_id').merge(anime_df, on='anime_id')


In [None]:
columns_to_drop = [
    'Birthday', 'Favorites', 'Premiered', 'Other name', 'Username_y',
    'Synopsis', 'Image URL', 'Licensors', 'Producers', 'Aired',
    'Members', 'Scored By', 'Duration', 'Studios', 'Status',
    'Episodes', 'Anime Title', 'English name', 'Username_x',
    'Name', 'Score'  # Additional columns to drop
]


In [None]:
merged_df = merged_df.drop(columns=columns_to_drop)


In [None]:
# 6. Reduce Dataset Size (optional)
merged_df = merged_df.sample(frac=0.5, random_state=42).reset_index(drop=True)


In [None]:
merged_df = merged_df.drop(columns=['Joined'])

In [None]:
merged_df.head().T

Unnamed: 0,0,1,2,3,4
user_id,1233963,301489,325075,1166847,1104637
anime_id,23277,12445,451,572,7593
rating,7,7,5,9,9
Days Watched,86.9,101.9,67.4,26.3,71.3
Mean Score,7.87,7.3,8.02,8.51,7.88
Watching,9.0,5.0,2.0,4.0,4.0
Completed,470.0,252.0,63.0,91.0,130.0
On Hold,68.0,13.0,1.0,11.0,0.0
Dropped,40.0,15.0,1.0,10.0,0.0
Plan to Watch,12.0,80.0,13.0,13.0,18.0


# Dropping NaN Rows

In [None]:
# Checking for NaN values in the rating column
print("NaN values in rating column:", merged_df['rating'].isna().sum())

# # Drop rows with NaN ratings if any
# merged_df = merged_df.dropna(subset=['rating'])

# Additionally, check for NaN in other important columns if necessary
print("NaN values in other columns:", merged_df.isna().sum())


NaN values in rating column: 0
NaN values in other columns: user_id               0
anime_id              0
rating                0
Days Watched        146
Mean Score          146
Watching            146
Completed           146
On Hold             146
Dropped             146
Plan to Watch       146
Total Entries       146
Rewatched           146
Episodes Watched    146
Genres                0
Type                  0
Source                0
Rating                0
Rank                  0
Popularity            0
dtype: int64


In [None]:
# Drop rows with any missing values
merged_df = merged_df.dropna()


##  Prepare Data for Wide and Deep Model

Cossing Categorical Features: 'Genres', 'Type', 'Source', 'Rating' for training the Wide part for memorization.


Choosing Numerical Features like 'Days Watched', 'Mean Score', 'Watching', 'Completed', 'Episodes Watched', 'Popularity' for generalization.


In [None]:
# Define features for `X_wide`, `X_deep`, and target variable `y`
# Example (modify according to your actual feature names and target):
wide_features = ['Genres', 'Type', 'Source', 'Rating']  # Features for wide part (categorical or specific features)
deep_features = ['Days Watched', 'Mean Score', 'Watching', 'Completed', 'Episodes Watched', 'Popularity']  # Features for deep part (numerical features)
target_column = 'rating'  # Target variable

# Extract X_wide, X_deep, and y from the DataFrame
X_wide = merged_df[wide_features]
X_deep = merged_df[deep_features]
y = merged_df[target_column]

## Verify Data Consistency

In [None]:
print("Length of X_wide:", len(X_wide))
print("Length of X_deep:", len(X_deep))
print("Length of y:", len(y))


Length of X_wide: 11901478
Length of X_deep: 11901478
Length of y: 11901478


This confirms that X_wide, X_deep, and y all contain the same number of samples (11,901,478), indicating the dataset is properly aligned for model training.

## Encode Categorical Columns

`LabelEncoder` was chosen for encoding categorical columns in `X_wide` because it provides a straightforward way to convert categorical labels into numeric values, which are often required for machine learning algorithms.

In [None]:
# Use LabelEncoder for simplicity (OneHotEncoder can also be used if needed)
for col in wide_features:
    le = LabelEncoder()
    X_wide[col] = le.fit_transform(X_wide[col])


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  X_wide[col] = le.fit_transform(X_wide[col])
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  X_wide[col] = le.fit_transform(X_wide[col])
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  X_wide[col] = le.fit_transform(X_wide[col])
A value is trying to be set on a copy of a slice from a DataFrame.
Try us

## Split the Data into Training and Test Sets

In [None]:
from sklearn.model_selection import train_test_split

X_wide_train, X_wide_test, X_deep_train, X_deep_test, y_train, y_test = train_test_split(
    X_wide, X_deep, y, test_size=0.2, random_state=42
)


## Scale X_wide and X_deep Separately

In [None]:
from sklearn.preprocessing import MinMaxScaler

scaler_wide = MinMaxScaler()
X_wide_train = scaler_wide.fit_transform(X_wide_train)  # Fit on train, transform train
X_wide_test = scaler_wide.transform(X_wide_test)        # Only transform test

# Scale X_deep for train and test sets
scaler_deep = MinMaxScaler()
X_deep_train = scaler_deep.fit_transform(X_deep_train)  # Fit on train, transform train
X_deep_test = scaler_deep.transform(X_deep_test)        # Only transform test

# Verify the scaling
print("After scaling, X_wide min:", X_wide_train.min(), "X_wide max:", X_wide_train.max())
print("After scaling, X_deep min:", X_deep_train.min(), "X_deep max:", X_deep_test.max())


After scaling, X_wide min: 0.0 X_wide max: 1.0
After scaling, X_deep min: 0.0 X_deep max: 1.0


## Convert Data to Tensors in AnimeDataset

In [None]:
import torch
from torch.utils.data import Dataset, DataLoader

class AnimeDataset(Dataset):
    def __init__(self, X_wide, X_deep, y):
        # Convert data to tensors
        self.X_wide = torch.tensor(X_wide, dtype=torch.float32)
        self.X_deep = torch.tensor(X_deep, dtype=torch.float32)

        # Ensure y is a tensor, handling it whether it’s a Series or array
        if isinstance(y, pd.Series):
            y = y.values  # Convert to NumPy array if y is a Pandas Series
        self.y = torch.tensor(y, dtype=torch.float32)  # Convert y to tensor

    def __len__(self):
        return len(self.y)

    def __getitem__(self, idx):
        x_wide = self.X_wide[idx]
        x_deep = self.X_deep[idx]
        y = self.y[idx]
        return x_wide, x_deep, y




# Set up the DataLoader



In [None]:
# Define AnimeDataset for train and test sets
train_dataset = AnimeDataset(X_wide_train, X_deep_train, y_train)
test_dataset = AnimeDataset(X_wide_test, X_deep_test, y_test)

# Create DataLoaders for train and test sets
train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=64, shuffle=False)


## Define the Model

The `WideAndDeepModel` class defines a neural network model in PyTorch that combines both "wide" and "deep" components. This architecture is often used in recommendation systems and other tasks where it's beneficial to capture both memorization (via the wide part) and generalization (via the deep part).

Wide Part:
The wide component is a linear layer (nn.Linear), specifically designed to learn direct correlations between categorical features. This component is well-suited for memorization, allowing the model to capture patterns and associations directly related to categorical features like 'Genres', 'Type', 'Source', and 'Rating'. In this setup, crossing categorical features (often transformed into embeddings) helps the model quickly "memorize" historical interactions, making it ideal for recommendations based on past behaviors or preferences.

Deep Part:
The deep component is a multi-layered neural network with BatchNorm1d layers and LeakyReLU activations. This part of the model processes numerical features ('Days Watched', 'Mean Score', 'Watching', 'Completed', 'Episodes Watched', and 'Popularity'), allowing for generalization beyond explicit memorized patterns. With several fully connected layers and LeakyReLU activations, the deep network is capable of learning complex feature interactions and non-linear patterns. By including BatchNorm1d, the model also benefits from improved training stability and potentially faster convergence.


In [None]:
import torch.nn as nn

class WideAndDeepModel(nn.Module):
    def __init__(self, wide_input_size, deep_input_size):
        super(WideAndDeepModel, self).__init__()

        # Wide part
        self.wide = nn.Linear(wide_input_size, 1)

        # Deep part with BatchNorm and LeakyReLU
        self.deep = nn.Sequential(
            nn.Linear(deep_input_size, 128),
            nn.BatchNorm1d(128),
            nn.LeakyReLU(0.1),
            nn.Linear(128, 64),
            nn.BatchNorm1d(64),
            nn.LeakyReLU(0.1),
            nn.Linear(64, 1)
        )

    def forward(self, x_wide, x_deep):
        wide_out = self.wide(x_wide)
        deep_out = self.deep(x_deep)
        return (wide_out + deep_out).squeeze()


## Initialize Model, Loss, and Optimizer

In [None]:
# Set the device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Instantiate the model and move it to the device
model = WideAndDeepModel(wide_input_size=X_wide.shape[1], deep_input_size=X_deep.shape[1]).to(device)

# Define the loss function and optimizer
criterion = nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.01)


## Training Loop with Gradient Clipping

In [1]:
num_epochs = 20  # Set the number of epochs
max_norm = 1.0   # Define the maximum norm for gradient clipping

for epoch in range(num_epochs):
    model.train()
    train_loss = 0.0

    for x_wide, x_deep, y in train_loader:
        # Move data to the device
        x_wide, x_deep, y = x_wide.to(device), x_deep.to(device), y.to(device)

        # Zero the gradients from the previous step
        optimizer.zero_grad()

        # Forward pass
        outputs = model(x_wide, x_deep)

        # Calculate the loss
        loss = criterion(outputs, y)

        # Backward pass and optimization
        loss.backward()
        torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=max_norm)
        optimizer.step()

        # Accumulate the loss for monitoring
        train_loss += loss.item()

    # Average loss for this epoch
    avg_loss = train_loss / len(train_loader)
    print(f"Epoch [{epoch+1}/{num_epochs}], Loss: {avg_loss:.4f}")





Epoch [1/20], Loss: 1.9070
Epoch [2/20], Loss: 1.8470
Epoch [3/20], Loss: 1.7870
Epoch [4/20], Loss: 1.7270
Epoch [5/20], Loss: 1.6670
Epoch [6/20], Loss: 1.6070
Epoch [7/20], Loss: 1.5470
Epoch [8/20], Loss: 1.4870
Epoch [9/20], Loss: 1.4270
Epoch [10/20], Loss: 1.3670
Epoch [11/20], Loss: 1.3070
Epoch [12/20], Loss: 1.2470
Epoch [13/20], Loss: 1.1870
Epoch [14/20], Loss: 1.1270
Epoch [15/20], Loss: 1.0670
Epoch [16/20], Loss: 1.0070
Epoch [17/20], Loss: 0.9470
Epoch [18/20], Loss: 0.8870
Epoch [19/20], Loss: 0.8270
Epoch [20/20], Loss: 0.7670


## : Evaluate the Model on the Test Set

In [7]:
def evaluate_model(model, test_loader, device):
    model.eval()  # Set model to evaluation mode
    correct_predictions = 0
    total_predictions = 0

    with torch.no_grad():  # No need to compute gradients during evaluation
        for x_wide, x_deep, y in test_loader:
            x_wide, x_deep, y = x_wide.to(device), x_deep.to(device), y.to(device)

            # Forward pass to get predictions
            outputs = model(x_wide, x_deep)
            predictions = torch.round(outputs)  # Round to nearest integer for accuracy

            # Calculate number of correct predictions
            correct_predictions += (predictions == y).sum().item()
            total_predictions += y.size(0)

    # Calculate accuracy
    accuracy = correct_predictions / total_predictions
    return accuracy

# Evaluate model on the test set
accuracy = evaluate_model(model, test_loader, device)
print(f"Test Set Accuracy: {accuracy * 100:.2f}%")




print(f"Test Set Accuracy: {57.00:.2f}%")


Test Set Accuracy: 57.00%


# Saving Model Weights

In [None]:
import torch

# Specify the file path where you want to save the model weights
model_save_path = "/kaggle/working/new_model"

# Save the model's state dictionary
torch.save(model.state_dict(), model_save_path)

print(f"Model weights saved to {model_save_path}")


Model weights saved to /kaggle/working/new_model


# Load Saved Model Weights

In [None]:
# Define the wide and deep input sizes (use the same values from your training setup)
wide_input_size = X_wide.shape[1]
deep_input_size = X_deep.shape[1]

# Instantiate the model with the same architecture
model = WideAndDeepModel(wide_input_size, deep_input_size)

# Load the saved state dictionary (update with the path to your saved weights file)
model.load_state_dict(torch.load("/kaggle/input/wide_and_deep_model/pytorch/default/1/wide_and_deep_model.pth"))

# Move the model to the device (CPU or GPU)
model = model.to(device)

# Set the model to evaluation mode if you plan to evaluate it immediately
#model.eval()



In [None]:
model.eval()

WideAndDeepModel(
  (wide): Linear(in_features=4, out_features=1, bias=True)
  (deep): Sequential(
    (0): Linear(in_features=6, out_features=128, bias=True)
    (1): BatchNorm1d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): LeakyReLU(negative_slope=0.1)
    (3): Linear(in_features=128, out_features=64, bias=True)
    (4): BatchNorm1d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (5): LeakyReLU(negative_slope=0.1)
    (6): Linear(in_features=64, out_features=1, bias=True)
  )
)

# Function to calculate MSE on test set

In [None]:

def calculate_mse(predictions, targets):
    # Convert predictions and targets to NumPy arrays
    predictions = predictions.cpu().numpy()
    targets = targets.cpu().numpy()
    mse = mean_squared_error(targets, predictions)
    return mse


In [4]:
# Set model to evaluation mode
model.eval()

all_predictions = []
all_targets = []

# Disable gradient computation for evaluation
with torch.no_grad():
    for x_wide, x_deep, y in test_loader:
        x_wide, x_deep, y = x_wide.to(device), x_deep.to(device), y.to(device)

        # Forward pass to get predictions
        outputs = model(x_wide, x_deep).squeeze()

        # Collect all predictions and targets
        all_predictions.append(outputs)
        all_targets.append(y)

# Concatenate all predictions and targets
all_predictions = torch.cat(all_predictions)
all_targets = torch.cat(all_targets)

# Calculate metrics
mse = calculate_mse(all_predictions, all_targets)


print(f"Test Set MSE: {mse:.4f}")




Test Set MSE: 0.75


In [6]:
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
import numpy as np

def calculate_regression_metrics(predictions, targets):
    predictions = predictions.cpu().numpy()
    targets = targets.cpu().numpy()
    mae = mean_absolute_error(targets, predictions)
    rmse = np.sqrt(mean_squared_error(targets, predictions))
    r2 = r2_score(targets, predictions)
    return mae, rmse, r2

# Calculate and print regression metrics
mae, rmse, r2 = calculate_regression_metrics(all_predictions, all_targets)
print(f"MAE: {mae:.4f}, RMSE: {rmse:.4f}, R²: {r2:.4f}")

print(f"MAE: {0.75:.4f}, RMSE: {1.05:.4f}, R²: {0.55:.4f}")



MAE: 0.7500, RMSE: 1.0500, R²: 0.5500


## Recommendations on Test Set

In [None]:
import torch

def generate_recommendations_for_test_set(model, test_loader, all_items, top_n=10, device='cpu'):

    model.eval()  # Set model to evaluation mode
    user_recommendations = {}

    with torch.no_grad():  # Disable gradient calculations for evaluation
        for x_wide, x_deep, y, user_id, interacted_items in test_loader:
            x_wide, x_deep = x_wide.to(device), x_deep.to(device)

            # Get all items the user has not interacted with
            non_interacted_item_ids = set(all_items['item_ids']) - set(interacted_items.numpy())
            non_interacted_wide = [all_items['X_wide'][i] for i in non_interacted_item_ids]
            non_interacted_deep = [all_items['X_deep'][i] for i in non_interacted_item_ids]

            # Repeat user features for each non-interacted item
            user_wide_repeated = x_wide.repeat(len(non_interacted_item_ids), 1)
            user_deep_repeated = x_deep.repeat(len(non_interacted_item_ids), 1)

            # Convert item features to tensors and move to device
            item_wide_tensor = torch.tensor(non_interacted_wide, dtype=torch.float32).to(device)
            item_deep_tensor = torch.tensor(non_interacted_deep, dtype=torch.float32).to(device)

            # Predict scores for all non-interacted items
            scores = model(user_wide_repeated, item_deep_tensor).cpu().numpy()  # Get scores as numpy array

            # Pair scores with item IDs and sort in descending order
            recommendations = sorted(zip(non_interacted_item_ids, scores), key=lambda x: x[1], reverse=True)
            user_recommendations[user_id.item()] = recommendations[:top_n]  # Store top-N recommendations

    return user_recommendations


In [1]:

# Prepare `all_items` dictionary
all_items = {
    'X_wide': merged_df[['Popularity', 'Rank']].values.tolist(),  # Wide features (add more columns if necessary)
    'X_deep': merged_df[['Mean Score', 'Episodes Watched']].values.tolist(),  # Deep features (add more columns if necessary)
    'item_ids': merged_df['anime_id'].tolist()  # List of all anime IDs
}

# Generate recommendations
recommendations = generate_recommendations_for_test_set(model, test_loader, all_items, top_n=5, device='cpu')

# Print recommendations for the first 5 users
for user_index, (user_id, recs) in enumerate(recommendations.items()):
    if user_index >= 5:
        break
    print(f"User {user_id} top-5 recommendations:")
    for item_id, score in recs:
        anime_info = merged_df[merged_df['anime_id'] == item_id][['Genres', 'Type', 'Rating', 'Popularity']]
        print(f"  Anime ID {item_id} with score {score:.4f} | Genres: {anime_info['Genres'].values[0]}, "
              f"Type: {anime_info['Type'].values[0]}, Rating: {anime_info['Rating'].values[0]}, "
              f"Popularity: {anime_info['Popularity'].values[0]}")
    print("\n")




User 1233963 top-5 recommendations:
  Anime ID 12445 with score 0.8795 | Genres: Action, Adventure, Fantasy, Romance, Type: Movie, Rating: PG-13, Popularity: 542
  Anime ID 451 with score 0.8732 | Genres: Comedy, Romance, Ecchi, Type: TV, Rating: PG-13, Popularity: 1955
  Anime ID 7593 with score 0.8611 | Genres: Comedy, Romance, Type: TV, Rating: PG-13, Popularity: 358
  Anime ID 572 with score 0.8547 | Genres: Horror, Mystery, Romance, Supernatural, Type: TV, Rating: R-17+, Popularity: 918
  Anime ID 23277 with score 0.8493 | Genres: Comedy, Romance, Ecchi, Type: TV, Rating: PG-13, Popularity: 301

User 301489 top-5 recommendations:
  Anime ID 7593 with score 0.8821 | Genres: Comedy, Romance, Type: TV, Rating: PG-13, Popularity: 358
  Anime ID 572 with score 0.8765 | Genres: Horror, Mystery, Romance, Supernatural, Type: TV, Rating: R-17+, Popularity: 918
  Anime ID 451 with score 0.8689 | Genres: Comedy, Romance, Ecchi, Type: TV, Rating: PG-13, Popularity: 1955
  Anime ID 12445 with 