In [1]:
import sys
import os
from google.colab import drive
drive.mount('/content/drive')
project_path = '/content/drive/My Drive/CAR_MF'
os.chdir(project_path)
sys.path.append(project_path)

Mounted at /content/drive


In [2]:
import warnings
warnings.simplefilter("ignore", category=FutureWarning)

In [3]:
import pandas as pd
import numpy as np
import itertools

In [4]:
from split_temporal_cv import create_temporal_folds_with_cold_start

In [5]:
from eval_map_k import map_at_k

In [6]:
from model_car_mf import CAR_MF
from model_car_mf_bias import CAR_MF_Bias
from model_mf import Standard_MF
from model_mf_bias import Standard_MF_Bias

# Import data

In [7]:
df_ml_small = pd.read_csv('./database/ml-latest-small/ratings.csv')
df_ml_small = df_ml_small.rename(columns={'userId':'userid',
                                          'movieId':'itemid',
                                          'rating':'rating'})

# Set up "Temporal CV"

In [8]:
train_list_ml_small, test_list_ml_small, cold_start_list_ml_small = \
    create_temporal_folds_with_cold_start(df_ml_small,5)

In [9]:
train_list = [train_list_ml_small]
test_list = [test_list_ml_small]
cold_list = [cold_start_list_ml_small]

# Initialize params

In [10]:
MODEL_REGISTRY = {
    'MF': Standard_MF,
    'MF+Bias': Standard_MF_Bias,
    'CAR_MF': CAR_MF,
    'CAR_MF+Bias': CAR_MF_Bias
}

In [11]:
dataset_list = ['ml_small']
latent_list = [50]
epoch_list = [100]
learning_rate_list = [0.001]
lambda_rate_list = [0.001]

hyperparameter_grid = list(itertools.product(
    MODEL_REGISTRY.keys(),
    latent_list,
    learning_rate_list,
    lambda_rate_list,
    epoch_list
))

# Train model

In [12]:
import pandas as pd

# Assume all your previous setup code is here
# (dataset_list, train_list, test_list, cold_list, hyperparameter_grid, MODEL_REGISTRY, map_at_k)

all_results = [] # 1. Initialize an empty list to store results

for i, dataset_name in enumerate(dataset_list):
    print(f"Data set: {dataset_name}")
    print('===========================================')
    train_list_data = train_list[i]
    test_list_data = test_list[i]
    cold_flag_list_data = cold_list[i]

    # for fold in range(1):
    for fold in range(len(train_list_data)):
        print(f"Fold: {fold + 1}")
        train_array = train_list_data[fold]
        # Your original code had a potential bug here, duplicating test data.
        # Corrected to just use the test set for the fold.
        test_array = test_list_data[fold]
        cold_flag_array = cold_flag_list_data[fold]
        print(f"Shape of data: {train_array.shape}\n")

        for params in hyperparameter_grid:
            model_name, latent, learning_rate, lambda_rate, epoch = params

            print(f"--- Training Model: {model_name} ---")
            print(f"Parameters: K={latent}, LR={learning_rate}, Lambda={lambda_rate}, Epochs={epoch}")

            ModelClass = MODEL_REGISTRY[model_name]
            model_instance = ModelClass(K=latent, learning_rate=learning_rate, lambda_rate=lambda_rate)
            model_instance.fit(Y=train_array, epochs=epoch, batch_size=512)
            results = model_instance.predict()
            pred_array = results['predictions']

            # Unpack the tuple of MAP scores
            map_k, map_k_cold, map_k_not_cold = map_at_k(pred_array, test_array, cold_flag_array, 5)

            print(f"Result -> MAP@5: {map_k:.4f}, Cold: {map_k_cold:.4f}, Not Cold: {map_k_not_cold:.4f}\n")

            # 2. Create a dictionary for the current run and append it to the list
            result_entry = {
                'Dataset': dataset_name,
                'Fold': fold + 1,
                'Model': model_name,
                'K': latent,
                'LR': learning_rate,
                'Lambda': lambda_rate,
                'Epochs': epoch,
                'MAP@5': map_k,
                'MAP@5_cold': map_k_cold,
                'MAP@5_not_cold': map_k_not_cold
            }
            all_results.append(result_entry)

    print('===========================================')

Data set: ml_small
Fold: 1
Shape of data: (610, 9724)

--- Training Model: MF ---
Parameters: K=50, LR=0.001, Lambda=0.001, Epochs=100
Epoch 10/100, Train RMSE: 3.7062
Epoch 20/100, Train RMSE: 3.6913
Epoch 30/100, Train RMSE: 3.6623
Epoch 40/100, Train RMSE: 3.6171
Epoch 50/100, Train RMSE: 3.5550
Epoch 60/100, Train RMSE: 3.4760
Epoch 70/100, Train RMSE: 3.3804
Epoch 80/100, Train RMSE: 3.2660
Epoch 90/100, Train RMSE: 3.1367
Epoch 100/100, Train RMSE: 2.9888
Result -> MAP@5: 0.0511, Cold: 0.0417, Not Cold: 0.0647

--- Training Model: MF+Bias ---
Parameters: K=50, LR=0.001, Lambda=0.001, Epochs=100
Epoch 10/100, Train RMSE: 1.0443
Epoch 20/100, Train RMSE: 1.0244
Epoch 30/100, Train RMSE: 0.9949
Epoch 40/100, Train RMSE: 0.9554
Epoch 50/100, Train RMSE: 0.9052
Epoch 60/100, Train RMSE: 0.8464
Epoch 70/100, Train RMSE: 0.7825
Epoch 80/100, Train RMSE: 0.7149
Epoch 90/100, Train RMSE: 0.6482
Epoch 100/100, Train RMSE: 0.5824
Result -> MAP@5: 0.1190, Cold: 0.1329, Not Cold: 0.0985

--- 

In [13]:
# 3. After all loops, create a DataFrame from the results list
results_df = pd.DataFrame(all_results)

# --- Display the Results ---

# Option 1: Print the full, detailed table for every run
print("\n--- Detailed Results for Each Fold ---")
# Using .to_string() to ensure all rows and columns are displayed
print(results_df.to_string())

# Option 2: Print an aggregated summary (mean and std deviation across folds)
print("\n\n--- Aggregated Summary (Mean & Std Dev over Folds) ---")
summary_df = results_df.groupby(['Dataset', 'Model', 'K', 'LR', 'Lambda', 'Epochs']).agg({
    'MAP@5': ['mean', 'std'],
    'MAP@5_cold': ['mean', 'std'],
    'MAP@5_not_cold': ['mean', 'std']
}).reset_index()

# Clean up the multi-level column names
summary_df.columns = ['_'.join(col).strip('_') for col in summary_df.columns.values]

print(summary_df.to_string())


--- Detailed Results for Each Fold ---
     Dataset  Fold        Model   K     LR  Lambda  Epochs     MAP@5  MAP@5_cold  MAP@5_not_cold
0   ml_small     1           MF  50  0.001   0.001     100  0.051056    0.041722        0.064715
1   ml_small     1      MF+Bias  50  0.001   0.001     100  0.118977    0.132944        0.098537
2   ml_small     1       CAR_MF  50  0.001   0.001     100  0.046700    0.037944        0.059512
3   ml_small     1  CAR_MF+Bias  50  0.001   0.001     100  0.200066    0.229667        0.156748
4   ml_small     2           MF  50  0.001   0.001     100  0.056196    0.049259        0.058060
5   ml_small     2      MF+Bias  50  0.001   0.001     100  0.189373    0.210000        0.183831
6   ml_small     2       CAR_MF  50  0.001   0.001     100  0.109020    0.082593        0.116119
7   ml_small     2  CAR_MF+Bias  50  0.001   0.001     100  0.183020    0.187963        0.181692
8   ml_small     3           MF  50  0.001   0.001     100  0.026775    0.048000       

In [14]:
summary_df

Unnamed: 0,Dataset,Model,K,LR,Lambda,Epochs,MAP@5_mean,MAP@5_std,MAP@5_cold_mean,MAP@5_cold_std,MAP@5_not_cold_mean,MAP@5_not_cold_std
0,ml_small,CAR_MF,50,0.001,0.001,100,0.104102,0.033693,0.074608,0.055472,0.109319,0.032127
1,ml_small,CAR_MF+Bias,50,0.001,0.001,100,0.17588,0.048358,0.188845,0.057908,0.17401,0.057192
2,ml_small,MF,50,0.001,0.001,100,0.041455,0.018774,0.046774,0.020444,0.040803,0.023736
3,ml_small,MF+Bias,50,0.001,0.001,100,0.07806,0.074426,0.110321,0.07571,0.06866,0.073636
