In [5]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

/kaggle/input/anime-recommendations-database/rating.csv
/kaggle/input/anime-recommendations-database/anime.csv


In [6]:
import logging
import os
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import tensorflow as tf
from sklearn.model_selection import train_test_split
from tensorflow import keras
from tensorflow.keras import layers
import torch

# specify GPU device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
if torch.cuda.is_available():
    n_gpu = torch.cuda.device_count()
    torch.cuda.get_device_name(0)

# --- Load Anime Dataset ---
anime_df = pd.read_csv('/kaggle/input/anime-recommendations-database/anime.csv', delimiter=',')
rating_df = pd.read_csv('/kaggle/input/anime-recommendations-database/rating.csv', delimiter=',')

# This would drop a row if *any* column in that row has a NaN value
anime_df.dropna(axis=0, inplace=True)
rating_df.dropna(axis=0, inplace=True)

print("shape of anime_df:",anime_df.shape)
print("shape of rating_df",rating_df.shape)

# Rename columns for consistency and to be clear about user_id and item_id.
# The 'rating' DataFrame will be our primary 'df' for the NCF model.
df = rating_df.copy()
df.rename(columns={'anime_id': 'movie_id'}, inplace=True)

# For this NCF model aiming to predict explicit ratings, it's best to remove -1 ratings.
df = df[df['rating'] != -1]

# Remove duplicate (user_id, anime_id) combinations by keeping the mean rating
df = df.groupby(['user_id', 'movie_id']).agg({'rating': 'mean'}).reset_index()

# Store original ratings for relevance calculation later
original_min_rating = df["rating"].min()
original_max_rating = df["rating"].max()
df['original_rating'] = df['rating'] # Store original ratings

# Display info about the anime dataset
print("Anime Dataset Info:")
print(anime_df.info())
print("\nRating Dataset Info (after removing -1 ratings):")
print(df.info())

# First step is-
# Encode users and movies (anime) as integer indices to create a more efficient representation for the neural network.
user_ids = df["user_id"].unique()
users_dict = {x: i for i, x in enumerate(user_ids)}

movie_ids = df["movie_id"].unique() # These are now anime_ids
movies_dict = {x: i for i, x in enumerate(movie_ids)}
df["user"] = df["user_id"].map(users_dict)
df["movie"] = df["movie_id"].map(movies_dict)

def convert_data(df, batch_size, shuffle=True):
    """
    Create TensorFlow Dataset objects from the pandas DataFrames. These datasets are
    optimized for training deep learning models, including shuffling and batching.
    """
    ds = tf.data.Dataset.from_tensor_slices(((df['user'].values, df['movie'].values), df['rating'].values))

    if shuffle:
        ds = ds.shuffle(buffer_size=len(df))

    ds = ds.batch(batch_size)

    ds = ds.prefetch(buffer_size=tf.data.AUTOTUNE)

    return ds

# Normalize the ratings between 0 and 1. Makes it easy to train.
# It is suitable for the sigmoid activation in the output layer.
# Determine min and max ratings after handling -1 ratings.
min_rating = df["rating"].min()
max_rating = df["rating"].max()
df['rating'] = df['rating'].apply(lambda x: (x - min_rating) / (max_rating - min_rating))

df_train, df_test = train_test_split(
    df[['user', 'movie', 'rating', 'original_rating']], # Keep original_rating for evaluation
    test_size=0.1,
    shuffle=True,
    random_state=0
)

train_ds = convert_data(df_train, shuffle=True, batch_size=512)
test_ds = convert_data(df_test, shuffle=False, batch_size=512)

num_users = len(users_dict)
num_movies = len(movies_dict) # Renamed for clarity - these are now num_anime

#----- GMF -----

def gmf(
    num_users,
    num_movies,
    latent_dim,
    user_input=None,
    movie_input=None,
    pretrain=True
):
    if pretrain:
        user_input = keras.Input(shape=(1,), name='user_input_mf')
        movie_input = keras.Input(shape=(1,), name='movie_input_mf')

    user_embedding = layers.Embedding(
        input_dim=num_users,
        output_dim=latent_dim,
        embeddings_initializer="he_normal",
        embeddings_regularizer=keras.regularizers.l2(1e-6),
        name='user_embedding_mf'
    )(user_input)

    user_latent = layers.Flatten(name='flatten_user_mf')(user_embedding)

    movie_embedding = layers.Embedding(
        input_dim=num_movies,
        output_dim=latent_dim,
        embeddings_initializer="he_normal",
        embeddings_regularizer=keras.regularizers.l2(1e-6),
        name='movie_embedding_mf'
    )(movie_input)

    movie_latent = layers.Flatten(name='flatten_movie_mf')(movie_embedding)

    # Element-wise Product
    x = layers.Multiply(name='multiply_mf')([user_latent, movie_latent])

    if pretrain:
        out = layers.Dense(1, activation='sigmoid', name='out_mf')(x)
        model = keras.Model(inputs=[user_input, movie_input], outputs=out, name='model_mf')
        return model
    else:
        return x

gmf_model = gmf(
    num_users,
    num_movies,
    latent_dim=20,
    pretrain=True
)

# Adam optimizer and Binary Crossentropy loss: suitable for binary classification or probability prediction tasks
# (predicting whether a user will like a movie).
gmf_model.compile(
    optimizer=keras.optimizers.Adam(learning_rate=0.001),
    loss=keras.losses.BinaryCrossentropy()
)

gmf_model.summary()

print("\n--- Training GMF Model ---")
gmf_model.fit(
    train_ds,
    validation_data=test_ds,
    epochs=5,
)

if not os.path.exists('./checkpoints_gmf'):
    os.makedirs('./checkpoints_gmf')
gmf_model.save_weights('./checkpoints_gmf/pretrain.weights.h5')

### MLP

def mlp(
    num_users,
    num_movies,
    latent_dim,
    dense_layers,
    user_input=None,
    movie_input=None,
    pretrain=True
):
    if pretrain:
        user_input = keras.Input(shape=(1,), name='user_input_mf')
        movie_input = keras.Input(shape=(1,), name='movie_input_mf')

    user_embedding = layers.Embedding(
        input_dim=num_users,
        output_dim=latent_dim,
        embeddings_initializer="he_normal",
        embeddings_regularizer=keras.regularizers.l2(1e-6),
        name='user_embedding_mlp'
    )(user_input)

    user_latent = layers.Flatten(name='flatten_user_mlp')(user_embedding)

    movie_embedding = layers.Embedding(
        input_dim=num_movies,
        output_dim=latent_dim,
        embeddings_initializer="he_normal",
        embeddings_regularizer=keras.regularizers.l2(1e-6),
        name='movie_embedding_mlp'
    )(movie_input)

    movie_latent = layers.Flatten(name='flatten_movie_mlp')(movie_embedding)

    x = layers.Concatenate(name='concat_mlp')([user_latent, movie_latent])

    for i in range(len(dense_layers)):
        x = layers.Dense(dense_layers[i], activation='relu', name=f'desne_{i+1}')(x)
        x = layers.Dropout(0.4, name=f'dropout_mlp_{i+1}')(x)

    if pretrain:
        out = layers.Dense(1, activation='sigmoid', name='out_mlp')(x)
        model = keras.Model(inputs=[user_input, movie_input], outputs=out, name='model_mlp')
        return model
    else:
        return x

mlp_model = mlp(
    num_users,
    num_movies,
    latent_dim=20,
    dense_layers=[10],
    pretrain=True
)

mlp_model.compile(
    optimizer=keras.optimizers.Adam(learning_rate=0.001),
    loss=keras.losses.BinaryCrossentropy()
)

mlp_model.summary()

print("\n--- Training MLP Model ---")
mlp_model.fit(
    train_ds,
    validation_data=test_ds,
    epochs=5,
)

if not os.path.exists('./checkpoints_mlp'):
    os.makedirs('./checkpoints_mlp')
mlp_model.save_weights('./checkpoints_mlp/pretrain.weights.h5')

def ncf(
    num_users,
    num_movies,
    latent_dim_gmf,
    latent_dim_mlp,
    dense_layers
):
    user_input = keras.Input(shape=(1,), name='user_input')
    movie_input = keras.Input(shape=(1,), name='movie_input')

    out_gmf = gmf(
        num_users,
        num_movies,
        latent_dim_gmf,
        user_input=user_input,
        movie_input=movie_input,
        pretrain=False
    )

    out_mlp = mlp(
        num_users,
        num_movies,
        latent_dim_mlp,
        dense_layers,
        user_input=user_input,
        movie_input=movie_input,
        pretrain=False
    )

    out = layers.Concatenate(name='concat_gmf_mlp')([out_gmf, out_mlp])

    out = layers.Dense(1, activation='sigmoid', name='out')(out)

    model = keras.Model(inputs=[user_input, movie_input], outputs=out, name='ncf_model')

    return model

############################ NCF ############################
ncf_model = ncf(
    num_users,
    num_movies,
    latent_dim_gmf=20,
    latent_dim_mlp=20,
    dense_layers=[10]
)
ncf_model.compile(
    optimizer=keras.optimizers.Adam(learning_rate=0.001),
    loss=keras.losses.BinaryCrossentropy()
)

# load pretrained weights from the GMF and MLP models into the corresponding layers of the NCF model.

############################ GMF ############################
gmf_model = gmf(
    num_users,
    num_movies,
    latent_dim=20,
    pretrain=True
)
gmf_model.compile(
    optimizer=keras.optimizers.Adam(learning_rate=0.001),
    loss=keras.losses.BinaryCrossentropy()
)
gmf_model.load_weights('./checkpoints_gmf/pretrain.weights.h5')
# gmf_model.evaluate(test_ds, verbose=1)

############################ MLP ############################
mlp_model = mlp(
    num_users,
    num_movies,
    latent_dim=20,
    dense_layers=[10],
    pretrain=True
)
mlp_model.compile(
    optimizer=keras.optimizers.Adam(learning_rate=0.001),
    loss=keras.losses.BinaryCrossentropy()
)
mlp_model.load_weights('./checkpoints_mlp/pretrain.weights.h5')

dense_layers=[10]

# Load pretrained models

## GMF
user_embedding_mf = gmf_model.get_layer('user_embedding_mf').get_weights()
ncf_model.get_layer('user_embedding_mf').set_weights(user_embedding_mf)

movie_embedding_mf = gmf_model.get_layer('movie_embedding_mf').get_weights()
ncf_model.get_layer('movie_embedding_mf').set_weights(movie_embedding_mf)

## MLP
user_embedding_mlp = mlp_model.get_layer('user_embedding_mlp').get_weights()
ncf_model.get_layer('user_embedding_mlp').set_weights(user_embedding_mlp)

movie_embedding_mlp = mlp_model.get_layer('movie_embedding_mlp').get_weights()
ncf_model.get_layer('movie_embedding_mlp').set_weights(movie_embedding_mlp)

for i in range(len(dense_layers)):
    dense_mlp = mlp_model.get_layer(f'desne_{i+1}').get_weights()
    ncf_model.get_layer(f'desne_{i+1}').set_weights(dense_mlp)

out_mf = gmf_model.get_layer('out_mf').get_weights()
out_mlp = mlp_model.get_layer('out_mlp').get_weights()

new_weight = np.concatenate((out_mf[0], out_mlp[0]), axis=0)
new_bias = out_mf[1] + out_mlp[1]
ncf_model.get_layer('out').set_weights([0.5 * new_weight, 0.5 * new_bias])

print("\n--- Training NCF Model (with pretrained weights) ---")
ncf_model.fit(
    train_ds,
    validation_data=test_ds,
    epochs=5,
)

# --- Evaluation Metrics ---

# 1. Generate predictions for all user-movie pairs in the test set
# We need to reconstruct the original test data for prediction
test_users = df_test['user'].values
test_movies = df_test['movie'].values
test_original_ratings = df_test['original_rating'].values

predictions = ncf_model.predict([test_users, test_movies]).flatten()

# Denormalize predictions if you want to compare with original scale (optional for MAP/NDCG)
# denormalized_predictions = predictions * (original_max_rating - original_min_rating) + original_min_rating

# Create a DataFrame for predictions and true ratings for easy grouping
predictions_df = pd.DataFrame({
    'user': test_users,
    'movie': test_movies,
    'predicted_rating': predictions,
    'true_rating': test_original_ratings
})

# Define relevance threshold for original ratings
# For anime ratings, often 7 or 8 out of 10 are considered highly relevant.
# Adjust this threshold based on your understanding of the dataset and what a "good" rating implies.
RELEVANCE_THRESHOLD = 7 # Example: Consider original ratings >= 7 as relevant

def precision_at_k(recommended_items, relevant_items, k):
    if k == 0:
        return 0.0
    recommended_at_k = recommended_items[:k]
    num_relevant_in_top_k = len(set(recommended_at_k) & set(relevant_items))
    return num_relevant_in_top_k / k

def average_precision_at_k(recommended_items, relevant_items, k):
    if not relevant_items:
        return 0.0
    
    # Only consider relevant items up to k
    relevant_in_top_k = [item for item in recommended_items[:k] if item in relevant_items]
    
    if not relevant_in_top_k:
        return 0.0

    sum_precisions = 0.0
    num_relevant_found = 0
    for i, item in enumerate(recommended_items[:k]):
        if item in relevant_items:
            num_relevant_found += 1
            sum_precisions += precision_at_k(recommended_items, relevant_items, i + 1)
    
    return sum_precisions / min(len(relevant_items), k) # Normalize by the minimum of actual relevant items or k

def mean_average_precision_at_k(predictions_df, k, relevance_threshold):
    map_scores = []
    for user_id in predictions_df['user'].unique():
        user_data = predictions_df[predictions_df['user'] == user_id]
        
        # Sort movies by predicted rating in descending order
        ranked_movies = user_data.sort_values(by='predicted_rating', ascending=False)['movie'].tolist()
        
        # Get actual relevant movies for this user
        relevant_movies = user_data[user_data['true_rating'] >= relevance_threshold]['movie'].tolist()
        
        ap = average_precision_at_k(ranked_movies, relevant_movies, k)
        map_scores.append(ap)
    return np.mean(map_scores)

def dcg_at_k(recommended_items_with_relevance, k):
    dcg = 0.0
    for i, (item, relevance) in enumerate(recommended_items_with_relevance[:k]):
        dcg += relevance / np.log2(i + 2) # i+2 because log2(1) is 0, so positions start from log2(2)
    return dcg

def ndcg_at_k(recommended_items_with_relevance, relevant_items_with_relevance, k):
    # Calculate DCG for the recommended list
    dcg = dcg_at_k(recommended_items_with_relevance, k)

    # Calculate IDCG (Ideal DCG)
    # Sort all relevant items by their true relevance score in descending order
    ideal_ranked_items_with_relevance = sorted(relevant_items_with_relevance, key=lambda x: x[1], reverse=True)
    idcg = dcg_at_k(ideal_ranked_items_with_relevance, k)

    if idcg == 0:
        return 0.0
    return dcg / idcg

def mean_ndcg_at_k(predictions_df, k, relevance_threshold):
    ndcg_scores = []
    for user_id in predictions_df['user'].unique():
        user_data = predictions_df[predictions_df['user'] == user_id]
        
        # Sort movies by predicted rating in descending order
        # Get (movie_id, predicted_rating) pairs
        ranked_predictions = user_data.sort_values(by='predicted_rating', ascending=False)[['movie', 'predicted_rating']].values.tolist()
        
        # Get (movie_id, true_rating) pairs for relevant items
        relevant_items_with_relevance = user_data[user_data['true_rating'] >= relevance_threshold][['movie', 'true_rating']].values.tolist()
        
        # For NDCG, we need the actual relevance score, not just binary.
        # We can use the true rating as the relevance score.
        recommended_items_with_relevance = []
        for movie, pred_rating in ranked_predictions:
            # Find the true rating for this movie from user_data
            true_rating_for_movie = user_data[user_data['movie'] == movie]['true_rating'].iloc[0]
            recommended_items_with_relevance.append((movie, true_rating_for_movie))
        
        ndcg_scores.append(ndcg_at_k(recommended_items_with_relevance, relevant_items_with_relevance, k))
    return np.mean(ndcg_scores)

# --- Calculate MAP@10 and NDCG@10 ---
K = 10
map_10 = mean_average_precision_at_k(predictions_df, K, RELEVANCE_THRESHOLD)
ndcg_10 = mean_ndcg_at_k(predictions_df, K, RELEVANCE_THRESHOLD)

print(f"\n--- Evaluation Results (K={K}) ---")
print(f"Mean Average Precision (MAP@{K}): {map_10:.4f}")
print(f"Normalized Discounted Cumulative Gain (NDCG@{K}): {ndcg_10:.4f}")

shape of anime_df: (12017, 7)
shape of rating_df (7813737, 3)
Anime Dataset Info:
<class 'pandas.core.frame.DataFrame'>
Index: 12017 entries, 0 to 12293
Data columns (total 7 columns):
 #   Column    Non-Null Count  Dtype  
---  ------    --------------  -----  
 0   anime_id  12017 non-null  int64  
 1   name      12017 non-null  object 
 2   genre     12017 non-null  object 
 3   type      12017 non-null  object 
 4   episodes  12017 non-null  object 
 5   rating    12017 non-null  float64
 6   members   12017 non-null  int64  
dtypes: float64(1), int64(2), object(4)
memory usage: 751.1+ KB
None

Rating Dataset Info (after removing -1 ratings):
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 6337234 entries, 0 to 6337233
Data columns (total 4 columns):
 #   Column           Dtype  
---  ------           -----  
 0   user_id          int64  
 1   movie_id         int64  
 2   rating           float64
 3   original_rating  float64
dtypes: float64(2), int64(2)
memory usage: 193.4 MB
N

I0000 00:00:1750906636.970510      35 gpu_device.cc:2022] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 13942 MB memory:  -> device: 0, name: Tesla T4, pci bus id: 0000:00:04.0, compute capability: 7.5
I0000 00:00:1750906636.971181      35 gpu_device.cc:2022] Created device /job:localhost/replica:0/task:0/device:GPU:1 with 13942 MB memory:  -> device: 1, name: Tesla T4, pci bus id: 0000:00:05.0, compute capability: 7.5



--- Training GMF Model ---
Epoch 1/5


I0000 00:00:1750906652.675044      82 service.cc:148] XLA service 0x7fca840079d0 initialized for platform CUDA (this does not guarantee that XLA will be used). Devices:
I0000 00:00:1750906652.676793      82 service.cc:156]   StreamExecutor device (0): Tesla T4, Compute Capability 7.5
I0000 00:00:1750906652.676825      82 service.cc:156]   StreamExecutor device (1): Tesla T4, Compute Capability 7.5
I0000 00:00:1750906652.990484      82 cuda_dnn.cc:529] Loaded cuDNN version 90300


[1m   66/11140[0m [37m━━━━━━━━━━━━━━━━━━━━[0m [1m26s[0m 2ms/step - loss: 0.6891 

I0000 00:00:1750906653.639065      82 device_compiler.h:188] Compiled cluster using XLA!  This line is logged at most once for the lifetime of the process.


[1m11140/11140[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m45s[0m 3ms/step - loss: 0.5611 - val_loss: 0.5251
Epoch 2/5
[1m11140/11140[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m41s[0m 3ms/step - loss: 0.5229 - val_loss: 0.5219
Epoch 3/5
[1m11140/11140[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m43s[0m 3ms/step - loss: 0.5193 - val_loss: 0.5210
Epoch 4/5
[1m11140/11140[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m43s[0m 3ms/step - loss: 0.5175 - val_loss: 0.5207
Epoch 5/5
[1m11140/11140[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m46s[0m 3ms/step - loss: 0.5170 - val_loss: 0.5205



--- Training MLP Model ---
Epoch 1/5
[1m11140/11140[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m48s[0m 3ms/step - loss: 0.5396 - val_loss: 0.5237
Epoch 2/5
[1m11140/11140[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m46s[0m 3ms/step - loss: 0.5264 - val_loss: 0.5218
Epoch 3/5
[1m11140/11140[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m45s[0m 3ms/step - loss: 0.5244 - val_loss: 0.5213
Epoch 4/5
[1m11140/11140[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m46s[0m 3ms/step - loss: 0.5243 - val_loss: 0.5213
Epoch 5/5
[1m11140/11140[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m47s[0m 3ms/step - loss: 0.5242 - val_loss: 0.5213

--- Training NCF Model (with pretrained weights) ---
Epoch 1/5


  saveable.load_own_variables(weights_store.get(inner_path))
  saveable.load_own_variables(weights_store.get(inner_path))


[1m11140/11140[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m50s[0m 3ms/step - loss: 0.5204 - val_loss: 0.5203
Epoch 2/5
[1m11140/11140[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m48s[0m 3ms/step - loss: 0.5176 - val_loss: 0.5200
Epoch 3/5
[1m11140/11140[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m47s[0m 3ms/step - loss: 0.5169 - val_loss: 0.5198
Epoch 4/5
[1m11140/11140[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m52s[0m 4ms/step - loss: 0.5167 - val_loss: 0.5197
Epoch 5/5
[1m11140/11140[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m47s[0m 3ms/step - loss: 0.5164 - val_loss: 0.5197
[1m19804/19804[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m28s[0m 1ms/step

--- Evaluation Results (K=10) ---
Mean Average Precision (MAP@10): 0.9393
Normalized Discounted Cumulative Gain (NDCG@10): 1.0032
