In [7]:
!pip install tensorflow

Collecting tensorflow
  Downloading tensorflow-2.20.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (4.5 kB)
Collecting absl-py>=1.0.0 (from tensorflow)
  Downloading absl_py-2.3.1-py3-none-any.whl.metadata (3.3 kB)
Collecting astunparse>=1.6.0 (from tensorflow)
  Downloading astunparse-1.6.3-py2.py3-none-any.whl.metadata (4.4 kB)
Collecting flatbuffers>=24.3.25 (from tensorflow)
  Downloading flatbuffers-25.2.10-py2.py3-none-any.whl.metadata (875 bytes)
Collecting gast!=0.5.0,!=0.5.1,!=0.5.2,>=0.2.1 (from tensorflow)
  Downloading gast-0.6.0-py3-none-any.whl.metadata (1.3 kB)
Collecting google_pasta>=0.1.1 (from tensorflow)
  Downloading google_pasta-0.2.0-py3-none-any.whl.metadata (814 bytes)
Collecting libclang>=13.0.0 (from tensorflow)
  Downloading libclang-18.1.1-py2.py3-none-manylinux2010_x86_64.whl.metadata (5.2 kB)
Collecting opt_einsum>=2.3.2 (from tensorflow)
  Downloading opt_einsum-3.4.0-py3-none-any.whl.metadata (6.3 kB)
Collecting protobuf>=5.28.0 (

In [1]:
import pandas as pd
import numpy as np
import tensorflow as tf
from tensorflow import keras
#we need to create Y, R, Ynorm, Ymean from reviews.csv and animes.csv (num_users is number of unique profiles in reviews.csv, num_items is number of unique anime_ids in animes.csv)
#Y matrix is ratings, R matrix is presence of ratings, Ynorm is normalized ratings, Ymean is mean ratings for each item
#Y shape is (num_items, num_users), R shape is (num_items, num_users)

animes_df = pd.read_csv("data/animes.csv")
reviews_df = pd.read_csv("data/reviews.csv")

2025-11-06 17:39:31.329590: I external/local_xla/xla/tsl/cuda/cudart_stub.cc:31] Could not find cuda drivers on your machine, GPU will not be used.
2025-11-06 17:39:31.941511: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.
2025-11-06 17:39:33.685705: I external/local_xla/xla/tsl/cuda/cudart_stub.cc:31] Could not find cuda drivers on your machine, GPU will not be used.


In [2]:
# Map IDs to indices
anime_id_to_idx = {aid: idx for idx, aid in enumerate(animes_df['uid'].unique())}
user_id_to_idx  = {uid: idx for idx, uid in enumerate(reviews_df['profile'].unique())}

# Convert to indices
reviews_df['anime_idx'] = reviews_df['anime_uid'].map(anime_id_to_idx)
reviews_df['profile_idx']  = reviews_df['profile'].map(user_id_to_idx)

# Compute mean per anime
anime_means = reviews_df.groupby('anime_idx')['score'].mean().to_dict()

num_items = len(anime_id_to_idx)

anime_means_array = np.zeros(num_items)   # default fill with 0
for idx, mean in anime_means.items():
    anime_means_array[idx] = mean

# Normalize ratings (subtract per-anime mean)
reviews_df['score_norm'] = reviews_df.apply(
    lambda row: row['score'] - anime_means_array[row['anime_idx']],
    axis=1
)

# Keep only needed columns (triplet + normalized score)
ratings_df = reviews_df[['anime_idx', 'profile_idx', 'score', 'score_norm']].copy()

In [3]:

num_users = len(user_id_to_idx) + 1  # +1 for the new user
num_features = 20

# Initialize parameters
X = tf.Variable(tf.random.normal((num_items, num_features), dtype=tf.float64), name='X')
W = tf.Variable(tf.random.normal((num_users, num_features), dtype=tf.float64), name='W')
b = tf.Variable(tf.random.normal((1, num_users), dtype=tf.float64), name='b')

E0000 00:00:1762468791.467926    1709 cuda_executor.cc:1309] INTERNAL: CUDA Runtime error: Failed call to cudaGetRuntimeVersion: Error loading CUDA libraries. GPU will not be used.: Error loading CUDA libraries. GPU will not be used.
W0000 00:00:1762468791.483331    1709 gpu_device.cc:2342] Cannot dlopen some GPU libraries. Please make sure the missing libraries mentioned above are installed properly if you would like to use GPU. Follow the guide at https://www.tensorflow.org/install/gpu for how to download and setup the required libraries for your platform.
Skipping registering GPU devices...


In [4]:
#concatenate a new user's ratings to Y and R at the beginning
new_user_id = "new_user"
new_user_idx = num_users - 1

new_ratings = [
    (0, new_user_idx, 10),
    (50, new_user_idx, 8),
    (100, new_user_idx, 9),
    (150, new_user_idx, 7),
    (200, new_user_idx, 6),
    (250, new_user_idx, 8),
    (300, new_user_idx, 9),
    (350, new_user_idx, 7),
]

new_df = pd.DataFrame(new_ratings, columns=['anime_idx', 'profile_idx', 'score'])

# normalize new ratings
new_df['score_norm'] = new_df.apply(
    lambda row: row['score'] - anime_means_array[row['anime_idx']],
    axis=1
)

ratings_df = pd.concat([ratings_df, new_df], ignore_index=True)
#change this code to simulate a new user with some ratings


In [5]:
# 1. Remove rows with NaNs in anime_idx, user_idx, or score_norm
ratings_df = ratings_df.dropna(subset=['anime_idx', 'profile_idx', 'score_norm'])

In [6]:
def cofi_cost_func_triplet(X, W, b, anime_idx_tensor, user_idx_tensor, ratings_tensor, lambda_):
    preds = tf.reduce_sum(tf.gather(X, anime_idx_tensor) * tf.gather(W, user_idx_tensor), axis=1) + tf.gather(b[0], user_idx_tensor)
    err = preds - ratings_tensor
    J = 0.5 * tf.reduce_sum(err**2) + (lambda_/2) * (tf.reduce_sum(X**2) + tf.reduce_sum(W**2))
    return J


In [7]:
# Convert to TensorFlow tensors with correct dtypes
anime_idx_tensor = tf.constant(ratings_df['anime_idx'].values, dtype=tf.int32)
user_idx_tensor  = tf.constant(ratings_df['profile_idx'].values, dtype=tf.int32)
ratings_tensor   = tf.constant(ratings_df['score_norm'].values, dtype=tf.float64)

optimizer = keras.optimizers.Adam(learning_rate=0.1)
iterations = 1000
lambda_ = 1

for iter in range(iterations):
    with tf.GradientTape() as tape:
        cost_value = cofi_cost_func_triplet(
            X, W, b, anime_idx_tensor, user_idx_tensor, ratings_tensor, lambda_
        )
    grads = tape.gradient(cost_value, [X, W, b])
    optimizer.apply_gradients(zip(grads, [X, W, b]))

    if iter % 20 == 0:
        print(f"Training loss at iteration {iter}: {cost_value.numpy():0.2f}")


Training loss at iteration 0: 3007276.51
Training loss at iteration 20: 228995.52
Training loss at iteration 40: 84984.63
Training loss at iteration 60: 43937.67
Training loss at iteration 80: 30593.14
Training loss at iteration 100: 25103.25
Training loss at iteration 120: 22299.08
Training loss at iteration 140: 20648.84
Training loss at iteration 160: 19586.53
Training loss at iteration 180: 18854.38
Training loss at iteration 200: 18323.40
Training loss at iteration 220: 17924.51
Training loss at iteration 240: 17616.82
Training loss at iteration 260: 17375.20
Training loss at iteration 280: 17181.09
Training loss at iteration 300: 17022.16
Training loss at iteration 320: 16889.98
Training loss at iteration 340: 16778.38
Training loss at iteration 360: 16684.61
Training loss at iteration 380: 16602.28
Training loss at iteration 400: 16530.61
Training loss at iteration 420: 16468.23
Training loss at iteration 440: 16411.65
Training loss at iteration 460: 16362.16
Training loss at it

In [8]:
# Get feature vector and bias for the new user
w_new = W[new_user_idx].numpy()   # (num_features,)
b_new = b[0, new_user_idx].numpy()  # scalar

my_predictions = X.numpy().dot(w_new) + b_new + anime_means_array
ix = np.argsort(my_predictions)[::-1]
print("\nNew user's actual ratings vs predicted ratings:")
for _, row in new_df.iterrows():
    idx = int(row['anime_idx'])
    actual = row['score']
    predicted = my_predictions[idx]
    
    # get title from animes_df 
    title = animes_df['title'].iloc[idx]
    
    print(f"Anime idx {idx}: {title}, Actual = {actual}, Predicted = {predicted:.2f}")
print("\nTop 10 anime recommendations for the new user:")
for i in ix[:10]:
    anime_id = animes_df['uid'].iloc[i]
    title = animes_df['title'].iloc[i]
    print(f"Anime ID {anime_id}: {title} (Predicted Rating: {my_predictions[i]:.2f})")


New user's actual ratings vs predicted ratings:
Anime idx 0: Haikyuu!! Second Season, Actual = 10.0, Predicted = 9.95
Anime idx 50: Last Exile, Actual = 8.0, Predicted = 7.91
Anime idx 100: Higashi no Eden, Actual = 9.0, Predicted = 8.83
Anime idx 150: Eikoku Koi Monogatari Emma: Molders-hen, Actual = 7.0, Predicted = 7.36
Anime idx 200: Mobile Suit Zeta Gundam, Actual = 6.0, Predicted = 6.20
Anime idx 250: Drifters, Actual = 8.0, Predicted = 7.88
Anime idx 300: Omae Umasou da na, Actual = 9.0, Predicted = 8.74
Anime idx 350: Toaru Kagaku no Railgun S, Actual = 7.0, Predicted = 7.12

Top 10 anime recommendations for the new user:
Anime ID 21085: Witch Craft Works (Predicted Rating: 11.43)
Anime ID 7311: Suzumiya Haruhi no Shoushitsu (Predicted Rating: 10.92)
Anime ID 34822: Tsuki ga Kirei (Predicted Rating: 10.91)
Anime ID 9756: Mahou Shoujo Madoka★Magica (Predicted Rating: 10.72)
Anime ID 1735: Naruto: Shippuuden (Predicted Rating: 10.48)
Anime ID 5114: Fullmetal Alchemist: Brotherho

In [9]:
from save_and_load_cf_recommender import save_collab_model, load_collab_model
save_collab_model(X, W, b, anime_means_array, anime_id_to_idx, user_id_to_idx)


Collaborative filtering model saved successfully.


In [10]:
X_loaded, W_loaded, b_loaded, anime_means_array_loaded, anime_id_to_idx_loaded, user_id_to_idx_loaded = load_collab_model()


Collaborative filtering model loaded successfully.


In [11]:
# Verify predictions are identical
np.allclose(X.numpy(), X_loaded.numpy())
np.allclose(W.numpy(), W_loaded.numpy())
np.allclose(b.numpy(), b_loaded.numpy())


True