In [None]:
!pip install cornac
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

import cornac
import os
from cornac.eval_methods import RatioSplit
from cornac.data import Reader, Dataset
from cornac.models import MostPop, MF, PMF, BPR, NeuMF, WMF, HPF, VAECF, NMF, UserKNN,LightGCN
from cornac.models import NMF as CornacNMF
from cornac.metrics import MAE, MSE, RMSE, Precision, Recall, NDCG, AUC, MAP, FMeasure, MRR
from cornac.hyperopt import Discrete, Continuous, GridSearch, RandomSearch
import pickle
from collections import defaultdict
from scipy import stats
from numpy.linalg import norm
import time

Collecting cornac
  Downloading cornac-2.3.3-cp311-cp311-manylinux1_x86_64.whl.metadata (51 kB)
[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/51.4 kB[0m [31m?[0m eta [36m-:--:--[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m51.4/51.4 kB[0m [31m4.0 MB/s[0m eta [36m0:00:00[0m
Collecting powerlaw (from cornac)
[0m  Downloading powerlaw-1.5-py3-none-any.whl.metadata (9.3 kB)
Downloading cornac-2.3.3-cp311-cp311-manylinux1_x86_64.whl (31.5 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m31.5/31.5 MB[0m [31m26.5 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading powerlaw-1.5-py3-none-any.whl (24 kB)
Installing collected packages: powerlaw, cornac
Successfully installed cornac-2.3.3 powerlaw-1.5


In [None]:
import psutil
def check_memory():
    print(f"Available memory: {psutil.virtual_memory().available / (1024**3):.2f} GB")

check_memory()


Available memory: 11.37 GB


In [None]:
df = pd.read_csv('preprocessed_book_ratings.csv')

# Rename columns to match Cornac expectations
df_cornac = df[['User-ID', 'ISBN', 'Book-Rating']].copy()
df_cornac.columns = ['user', 'item', 'rating']
df_cornac['rating'] = df_cornac['rating'].astype(np.float32)
df_cornac['user'] = df_cornac['user'].astype('object')

# Convert to list of tuples for Cornac
data = [(str(row['user']), row['item'], row['rating'])  for _, row in df_cornac.iterrows()]
# data = list(zip(df_cornac['user'], df_cornac['item'], df_cornac['rating']))
dataset = Dataset.from_uir(data)

# Create evaluation method with validation split
rs = RatioSplit(data=data,
                        test_size=0.2,   # 20% for testing
                        val_size=0.1,    # 10% for validation

                        verbose=True,
                        seed=123)


print(f"Train set size: {rs.train_size}")
print(f"Validation set size: {rs.val_size}")
print(f"Test set size: {rs.test_size}")

rating_threshold = 1.0
exclude_unknowns = True
---
Training data:
Number of users = 5255
Number of items = 5229
Number of ratings = 47846
Max rating = nan
Min rating = nan
Global mean = nan
---
Test data:
Number of users = 5255
Number of items = 5229
Number of ratings = 13651
Number of unknown users = 0
Number of unknown items = 0
---
Validation data:
Number of users = 5255
Number of items = 5229
Number of ratings = 6826
---
Total users = 5255
Total items = 5229
Train set size: 47846
Validation set size: 6836
Test set size: 13671


In [None]:
# Convert the 'user' column to object explicitly (even if it's already object)
df_cornac['user'] = df_cornac['user'].astype('object')

# Now extract data
data = [(str(row['user']), row['item'], row['rating'])  for _, row in df_cornac.iterrows()]

# Check the data type of the user in the extracted data
print(type(data[0][0]))  # This should print: <class 'str'> or <class 'object'>




<class 'str'>


In [None]:
# Define optimization metric
ndcg50 = NDCG(k=50)

# 1. MostPop - No hyperparameters to tune
most_pop = MostPop()

# 2. UserKNN - RandomSearch
user_knn_tuned = RandomSearch(
    model=UserKNN(seed=123),
    space=[
        Discrete(name="k", values=[10, 20, 30, 40]),
        Discrete(name="similarity", values=["cosine", "pearson"]),
        Discrete(name="mean_centered", values=[True, False]),
        Continuous(name="amplify", low=0.5, high=2.0)
    ],
    metric=ndcg50,
    eval_method=rs,
    n_trails=10
)

# 3. BPR - RandomSearch
bpr_tuned = RandomSearch(
    model=BPR(seed=123),
    space=[
        Discrete(name="k", values=[10, 20, 50]),
        Continuous(name="learning_rate", low=0.0005, high=0.01),
        Continuous(name="lambda_reg", low=0.0001, high=0.05),
        Discrete(name="max_iter", values=[100, 200])
    ],
    metric=ndcg50,
    eval_method=rs,
    n_trails=15
)

# 4. MF - RandomSearch
mf_tuned = RandomSearch(
    model=MF(seed=123),
    space=[
        Discrete(name="k", values=[10, 30, 50]),
        Continuous(name="learning_rate", low=0.001, high=0.05),
        Continuous(name="lambda_reg", low=0.0001, high=0.01),
        Discrete(name="max_iter", values=[50, 100])
    ],
    metric=ndcg50,
    eval_method=rs,
    n_trails=10
)

# 5. PMF - RandomSearch
pmf_tuned = RandomSearch(
    model=PMF(seed=123),
    space=[
        Discrete(name="k", values=[5, 10, 20]),
        Continuous(name="learning_rate", low=0.0005, high=0.005),
        Continuous(name="lambda_reg", low=0.0001, high=0.01),
        Discrete(name="max_iter", values=[50, 100])
    ],
    metric=ndcg50,
    eval_method=rs,
    n_trails=10
)

# 6. NMF - RandomSearch
nmf_tuned = RandomSearch(
    model=NMF(seed=123, verbose=False),
    space=[
        Discrete(name="k", values=[10, 15, 25]),
        Continuous(name="learning_rate", low=0.001, high=0.01),
        Continuous(name="lambda_u", low=0.01, high=0.1),
        Continuous(name="lambda_v", low=0.01, high=0.1),
        Discrete(name="use_bias", values=[True, False]),
        Discrete(name="max_iter", values=[50, 100])
    ],
    metric=ndcg50,
    eval_method=rs,
    n_trails=10
)

# 7. WMF - RandomSearch
wmf_tuned = RandomSearch(
    model=WMF(seed=123, verbose=False),
    space=[
        Discrete(name="k", values=[30, 50, 70]),
        Continuous(name="learning_rate", low=0.0005, high=0.005),
        Continuous(name="lambda_u", low=0.001, high=0.05),
        Continuous(name="lambda_v", low=0.001, high=0.05),
        Discrete(name="max_iter", values=[50, 100])
    ],
    metric=ndcg50,
    eval_method=rs,
    n_trails=10
)

# 8. HPF - RandomSearch
hpf_tuned = RandomSearch(
    model=HPF(seed=123, hierarchical=False, name="PF"),
    space=[
        Discrete(name="k", values=[30, 50, 70]),
        Discrete(name="max_iter", values=[50, 100, 150])
    ],
    metric=ndcg50,
    eval_method=rs,
    n_trails=10
)

# Create list of all tuned models
tuned_models = [
    most_pop,
    user_knn_tuned,
    bpr_tuned,
    mf_tuned,
    pmf_tuned,
    nmf_tuned,
    wmf_tuned,
    hpf_tuned
]

# Define all metrics for final evaluation
all_metrics = [
    MAE(), MSE(), RMSE(), AUC(), MAP(), MRR(),
    Precision(k=5), Precision(k=10), Precision(k=20), Precision(k=50),
    Recall(k=5), Recall(k=10), Recall(k=20), Recall(k=50),
    NDCG(k=5), NDCG(k=10), NDCG(k=20), NDCG(k=50),
    FMeasure(k=5), FMeasure(k=10), FMeasure(k=20), FMeasure(k=50)
]



In [None]:
# Dictionary to store results
best_models = {}
best_params = {}

# Function to safely evaluate one model and save results
def tune_and_save(model_name, model_obj):
    try:
        print(f"\n\n======= Tuning {model_name} =======")

        # Create single-model experiment
        exp = cornac.Experiment(
            eval_method=rs,
            models=[model_obj],
            metrics=[ndcg50],
            user_based=True
        )

        # Run the experiment
        exp.run()

        # For RandomSearch models, save parameters and best model
        if hasattr(model_obj, 'best_params'):
            best_params[model_name] = model_obj.best_params
            best_models[model_name] = model_obj.best_model
            print(f"Best parameters for {model_name}:")
            print(model_obj.best_params)

            # Optionally save to file
            with open(f"{model_name}_best_params.txt", "w") as f:
                f.write(str(model_obj.best_params))
        else:
            # For models without hyperparameter tuning (MostPop)
            best_models[model_name] = model_obj
            print(f"Model {model_name} evaluated (no tunable parameters)")

        print(f"Successfully completed {model_name} evaluation")

    except Exception as e:
        print(f"Error tuning {model_name}: {str(e)}")

In [None]:
# Evaluate MostPop
tune_and_save("MostPop", most_pop)




[MostPop] Training started!

[MostPop] Evaluation started!


Ranking:   0%|          | 0/4577 [00:00<?, ?it/s]

Ranking:   0%|          | 0/3440 [00:00<?, ?it/s]


VALIDATION:
...
        | NDCG@50 | Time (s)
------- + ------- + --------
MostPop |  0.0323 |   1.6197

TEST:
...
        | NDCG@50 | Train (s) | Test (s)
------- + ------- + --------- + --------
MostPop |  0.0362 |    0.0187 |   2.4491

Model MostPop evaluated (no tunable parameters)
Successfully completed MostPop evaluation


In [None]:
# Evaluate BPR
tune_and_save("BPR", bpr_tuned)




[RandomSearch_BPR] Training started!

[RandomSearch_BPR] Evaluation started!


Ranking:   0%|          | 0/4577 [00:00<?, ?it/s]

Ranking:   0%|          | 0/3440 [00:00<?, ?it/s]


VALIDATION:
...
                 | NDCG@50 | Time (s)
---------------- + ------- + --------
RandomSearch_BPR |  0.0322 |   3.6211

TEST:
...
                 | NDCG@50 | Train (s) | Test (s)
---------------- + ------- + --------- + --------
RandomSearch_BPR |  0.0362 |   48.7440 |   7.4302

Best parameters for BPR:
{'k': np.int64(50), 'lambda_reg': 0.020605126198180043, 'learning_rate': 0.006007095821714796, 'max_iter': np.int64(200)}
Successfully completed BPR evaluation


In [None]:
# Evaluate UserKNN
tune_and_save("UserKNN", user_knn_tuned)




[RandomSearch_UserKNN] Training started!
Evaluating: {'amplify': 1.5447037783967925, 'k': np.int64(30), 'mean_centered': np.True_, 'similarity': np.str_('cosine')}


  0%|          | 0/5417 [00:00<?, ?it/s]

Evaluating: {'amplify': 1.5363272825402925, 'k': np.int64(20), 'mean_centered': np.False_, 'similarity': np.str_('cosine')}


  0%|          | 0/5417 [00:00<?, ?it/s]

Evaluating: {'amplify': 1.1346596901866914, 'k': np.int64(30), 'mean_centered': np.False_, 'similarity': np.str_('cosine')}


  0%|          | 0/5417 [00:00<?, ?it/s]

Evaluating: {'amplify': 1.3695414455339152, 'k': np.int64(40), 'mean_centered': np.False_, 'similarity': np.str_('cosine')}


  0%|          | 0/5417 [00:00<?, ?it/s]

Evaluating: {'amplify': 1.014767024226304, 'k': np.int64(40), 'mean_centered': np.False_, 'similarity': np.str_('pearson')}


  0%|          | 0/5417 [00:00<?, ?it/s]

Evaluating: {'amplify': 0.8671389154308797, 'k': np.int64(10), 'mean_centered': np.True_, 'similarity': np.str_('cosine')}


  0%|          | 0/5417 [00:00<?, ?it/s]

Evaluating: {'amplify': 1.6069931085980536, 'k': np.int64(40), 'mean_centered': np.False_, 'similarity': np.str_('cosine')}


  0%|          | 0/5417 [00:00<?, ?it/s]

Evaluating: {'amplify': 0.6255897268453765, 'k': np.int64(30), 'mean_centered': np.True_, 'similarity': np.str_('pearson')}


  0%|          | 0/5417 [00:00<?, ?it/s]

Evaluating: {'amplify': 1.4516014378269815, 'k': np.int64(10), 'mean_centered': np.False_, 'similarity': np.str_('pearson')}


  0%|          | 0/5417 [00:00<?, ?it/s]

Evaluating: {'amplify': 1.610444586102182, 'k': np.int64(10), 'mean_centered': np.True_, 'similarity': np.str_('cosine')}


  0%|          | 0/5417 [00:00<?, ?it/s]

Best parameter settings: {'amplify': 1.3695414455339152, 'k': np.int64(40), 'mean_centered': np.False_, 'similarity': np.str_('cosine')}
NDCG@50 = 0.0043

[RandomSearch_UserKNN] Evaluation started!


Ranking:   0%|          | 0/4577 [00:00<?, ?it/s]

Ranking:   0%|          | 0/3440 [00:00<?, ?it/s]


VALIDATION:
...
                     | NDCG@50 | Time (s)
-------------------- + ------- + --------
RandomSearch_UserKNN |  0.0043 |  67.8192

TEST:
...
                     | NDCG@50 | Train (s) | Test (s)
-------------------- + ------- + --------- + --------
RandomSearch_UserKNN |  0.0050 |  668.6722 |  91.9115

Best parameters for UserKNN:
{'amplify': 1.3695414455339152, 'k': np.int64(40), 'mean_centered': np.False_, 'similarity': np.str_('cosine')}
Successfully completed UserKNN evaluation


In [None]:
# Evaluate MF
tune_and_save("MF", mf_tuned)




[RandomSearch_MF] Training started!

[RandomSearch_MF] Evaluation started!


Ranking:   0%|          | 0/4577 [00:00<?, ?it/s]

Ranking:   0%|          | 0/3440 [00:00<?, ?it/s]


VALIDATION:
...
                | NDCG@50 | Time (s)
--------------- + ------- + --------
RandomSearch_MF |  0.0044 |   3.4137

TEST:
...
                | NDCG@50 | Train (s) | Test (s)
--------------- + ------- + --------- + --------
RandomSearch_MF |  0.0042 |   33.7073 |   4.1025

Best parameters for MF:
{'k': np.int64(30), 'lambda_reg': 0.00190666813148965, 'learning_rate': 0.009597136051227133, 'max_iter': np.int64(100)}
Successfully completed MF evaluation


In [None]:
# Evaluate PMF
tune_and_save("PMF", pmf_tuned)




[RandomSearch_PMF] Training started!

[RandomSearch_PMF] Evaluation started!


Ranking:   0%|          | 0/4577 [00:00<?, ?it/s]

Ranking:   0%|          | 0/3440 [00:00<?, ?it/s]


VALIDATION:
...
                 | NDCG@50 | Time (s)
---------------- + ------- + --------
RandomSearch_PMF |  0.0122 |   1.8646

TEST:
...
                 | NDCG@50 | Train (s) | Test (s)
---------------- + ------- + --------- + --------
RandomSearch_PMF |  0.0144 |   40.8985 |   2.6725

Best parameters for PMF:
{'k': np.int64(20), 'lambda_reg': 0.0006908111764347267, 'learning_rate': 0.0022911991489869415, 'max_iter': np.int64(50)}
Successfully completed PMF evaluation


In [None]:
# Evaluate NMF
tune_and_save("NMF", nmf_tuned)




[RandomSearch_NMF] Training started!
Error tuning NMF: Buffer dtype mismatch, expected 'float' but got 'double'


In [None]:
# Evaluate WMF
tune_and_save("WMF", wmf_tuned)




[RandomSearch_WMF] Training started!

[RandomSearch_WMF] Evaluation started!


Ranking:   0%|          | 0/4577 [00:00<?, ?it/s]

Ranking:   0%|          | 0/3440 [00:00<?, ?it/s]


VALIDATION:
...
                 | NDCG@50 | Time (s)
---------------- + ------- + --------
RandomSearch_WMF |  0.0583 |   2.0304

TEST:
...
                 | NDCG@50 | Train (s) | Test (s)
---------------- + ------- + --------- + --------
RandomSearch_WMF |  0.0694 |  261.8878 |   2.8960

Best parameters for WMF:
{'k': np.int64(70), 'lambda_u': 0.035934810705638136, 'lambda_v': 0.021995075383154462, 'learning_rate': 0.003608981847620878, 'max_iter': np.int64(100)}
Successfully completed WMF evaluation


In [None]:
# Evaluate HPF
tune_and_save("HPF", hpf_tuned)




[RandomSearch_PF] Training started!
Learning...
Learning completed!
Learning...
Learning completed!
Learning...
Learning completed!
Learning...
Learning completed!
Learning...
Learning completed!
Learning...
Learning completed!
Learning...
Learning completed!
Learning...
Learning completed!
Learning...
Learning completed!
Learning...
Learning completed!

[RandomSearch_PF] Evaluation started!


Ranking:   0%|          | 0/4577 [00:00<?, ?it/s]

Ranking:   0%|          | 0/3440 [00:00<?, ?it/s]


VALIDATION:
...
                | NDCG@50 | Time (s)
--------------- + ------- + --------
RandomSearch_PF |  0.0307 |   2.5400

TEST:
...
                | NDCG@50 | Train (s) | Test (s)
--------------- + ------- + --------- + --------
RandomSearch_PF |  0.0348 |  944.7075 |   3.5912

Best parameters for HPF:
{'k': np.int64(30), 'max_iter': np.int64(150)}
Successfully completed HPF evaluation


In [None]:
# WE ARE CREATING 2 SEPARATE ARCHITECTURES FOR NEUMF BECAUSE RANDOM SEARCH GIVES ERRORS OTHERWISE!
# Small architecture NeuMF
neumf_small_tuned = RandomSearch(
    model=NeuMF(
        layers=[16, 8],
        seed=123,
        backend='pytorch',
        verbose=False
    ),
    space=[
        Discrete(name="num_factors", values=[8, 16]),
        Discrete(name="act_fn", values=["tanh", "relu"]),
        Discrete(name="num_epochs", values=[5, 10]),
        Discrete(name="num_neg", values=[3, 5]),
        Discrete(name="batch_size", values=[128, 256]),
        Continuous(name="lr", low=0.0001, high=0.005)
    ],
    metric=ndcg50,
    eval_method=rs,

    n_trails=5
)

# Tune small architecture
tune_and_save("NeuMF_small", neumf_small_tuned)




[RandomSearch_NeuMF] Training started!

[RandomSearch_NeuMF] Evaluation started!


Ranking:   0%|          | 0/4430 [00:00<?, ?it/s]

Ranking:   0%|          | 0/3329 [00:00<?, ?it/s]


VALIDATION:
...
                   | NDCG@50 | Time (s)
------------------ + ------- + --------
RandomSearch_NeuMF |  0.0366 |   4.4806

TEST:
...
                   | NDCG@50 | Train (s) | Test (s)
------------------ + ------- + --------- + --------
RandomSearch_NeuMF |  0.0385 |  125.2804 |   5.4742

Best parameters for NeuMF_small:
{'act_fn': np.str_('tanh'), 'batch_size': np.int64(256), 'lr': 0.0015020827412568593, 'num_epochs': np.int64(5), 'num_factors': np.int64(8), 'num_neg': np.int64(3)}
Successfully completed NeuMF_small evaluation


In [None]:
# Medium architecture NeuMF
neumf_medium_tuned = RandomSearch(
    model=NeuMF(
        layers=[32, 16, 8],
        seed=123,
        backend='pytorch',
        verbose=False
    ),
    space=[
        Discrete(name="num_factors", values=[8, 16]),
        Discrete(name="act_fn", values=["tanh", "relu"]),
        Discrete(name="num_epochs", values=[5, 10]),
        Discrete(name="num_neg", values=[3, 5]),
        Discrete(name="batch_size", values=[128, 256]),
        Continuous(name="lr", low=0.0001, high=0.005)
    ],
    metric=ndcg50,
    eval_method=rs,
    n_trails=5
)

# Tune medium architecture
tune_and_save("NeuMF_medium", neumf_medium_tuned)




[RandomSearch_NeuMF] Training started!

[RandomSearch_NeuMF] Evaluation started!


Ranking:   0%|          | 0/4430 [00:00<?, ?it/s]

Ranking:   0%|          | 0/3329 [00:00<?, ?it/s]


VALIDATION:
...
                   | NDCG@50 | Time (s)
------------------ + ------- + --------
RandomSearch_NeuMF |  0.0335 |   4.1725

TEST:
...
                   | NDCG@50 | Train (s) | Test (s)
------------------ + ------- + --------- + --------
RandomSearch_NeuMF |  0.0353 |  119.3781 |   6.2186

Best parameters for NeuMF_medium:
{'act_fn': np.str_('tanh'), 'batch_size': np.int64(256), 'lr': 0.0015020827412568593, 'num_epochs': np.int64(5), 'num_factors': np.int64(8), 'num_neg': np.int64(3)}
Successfully completed NeuMF_medium evaluation


In [None]:
# Compare the three tuned models
neumf_results = {}

if "NeuMF_small" in best_models:
    small_model = best_models["NeuMF_small"]
    small_params = best_params.get("NeuMF_small", "No parameters found")
    neumf_results["small"] = (small_model, small_params)

if "NeuMF_medium" in best_models:
    medium_model = best_models["NeuMF_medium"]
    medium_params = best_params.get("NeuMF_medium", "No parameters found")
    neumf_results["medium"] = (medium_model, medium_params)



# Compare models to find best architecture
if neumf_results:
    print("\n========= NeuMF Architecture Comparison =========")
    for arch, (model, params) in neumf_results.items():
        print(f"\nArchitecture: {arch}")
        print(f"Parameters: {params}")




Architecture: small
Parameters: {'act_fn': np.str_('tanh'), 'batch_size': np.int64(256), 'lr': 0.0015020827412568593, 'num_epochs': np.int64(5), 'num_factors': np.int64(8), 'num_neg': np.int64(3)}

Architecture: medium
Parameters: {'act_fn': np.str_('tanh'), 'batch_size': np.int64(256), 'lr': 0.0015020827412568593, 'num_epochs': np.int64(5), 'num_factors': np.int64(8), 'num_neg': np.int64(3)}


In [None]:
#doing the asme for VAECF MODELS
# Small architecture VAECF
vaecf_small_tuned = RandomSearch(
    model=VAECF(
        autoencoder_structure=[10],
        seed=123,

        verbose=False
    ),
    space=[
        Discrete(name="k", values=[8, 10]),
        Discrete(name="act_fn", values=["tanh", "sigmoid"]),
        Discrete(name="likelihood", values=["mult", "pois"]),
        Discrete(name="n_epochs", values=[50, 100]),
        Discrete(name="batch_size", values=[50, 100]),
        Continuous(name="learning_rate", low=0.0001, high=0.005),
        Continuous(name="beta", low=0.5, high=1.5)
    ],
    metric=ndcg50,
    eval_method=rs,
    n_trails=4  # Use fewer trials for complex model
)

# Tune small architecture
tune_and_save("VAECF_small", vaecf_small_tuned)




[RandomSearch_VAECF] Training started!

[RandomSearch_VAECF] Evaluation started!


Ranking:   0%|          | 0/4430 [00:00<?, ?it/s]

Ranking:   0%|          | 0/3329 [00:00<?, ?it/s]


VALIDATION:
...
                   | NDCG@50 | Time (s)
------------------ + ------- + --------
RandomSearch_VAECF |  0.0388 |   3.2505

TEST:
...
                   | NDCG@50 | Train (s) | Test (s)
------------------ + ------- + --------- + --------
RandomSearch_VAECF |  0.0426 |  339.2662 |   4.4681

Best parameters for VAECF_small:
{'act_fn': np.str_('tanh'), 'batch_size': np.int64(100), 'beta': 1.2800277619120792, 'k': np.int64(10), 'learning_rate': 0.00345566571906583, 'likelihood': np.str_('mult'), 'n_epochs': np.int64(100)}
Successfully completed VAECF_small evaluation


In [None]:
# Medium architecture VAECF
vaecf_medium_tuned = RandomSearch(
    model=VAECF(
        autoencoder_structure=[20],
        seed=123,

        verbose=False
    ),
    space=[
        Discrete(name="k", values=[8, 10]),
        Discrete(name="act_fn", values=["tanh", "sigmoid"]),
        Discrete(name="likelihood", values=["mult", "pois"]),
        Discrete(name="n_epochs", values=[50, 100]),
        Discrete(name="batch_size", values=[50, 100]),
        Continuous(name="learning_rate", low=0.0001, high=0.005),
        Continuous(name="beta", low=0.5, high=1.5)
    ],
    metric=ndcg50,
    eval_method=rs,
    n_trails=4
)

# Tune medium architecture
tune_and_save("VAECF_medium", vaecf_medium_tuned)




[RandomSearch_VAECF] Training started!

[RandomSearch_VAECF] Evaluation started!


Ranking:   0%|          | 0/4430 [00:00<?, ?it/s]

Ranking:   0%|          | 0/3329 [00:00<?, ?it/s]


VALIDATION:
...
                   | NDCG@50 | Time (s)
------------------ + ------- + --------
RandomSearch_VAECF |  0.0469 |   3.6567

TEST:
...
                   | NDCG@50 | Train (s) | Test (s)
------------------ + ------- + --------- + --------
RandomSearch_VAECF |  0.0534 |  367.4463 |   4.9044

Best parameters for VAECF_medium:
{'act_fn': np.str_('tanh'), 'batch_size': np.int64(100), 'beta': 1.2800277619120792, 'k': np.int64(10), 'learning_rate': 0.00345566571906583, 'likelihood': np.str_('mult'), 'n_epochs': np.int64(100)}
Successfully completed VAECF_medium evaluation


In [None]:
# Deep architecture VAECF
vaecf_deep_tuned = RandomSearch(
    model=VAECF(
        autoencoder_structure=[10, 5],
        seed=123,

        verbose=False
    ),
    space=[
        Discrete(name="k", values=[8, 10]),
        Discrete(name="act_fn", values=["tanh", "sigmoid"]),
        Discrete(name="likelihood", values=["mult", "pois"]),
        Discrete(name="n_epochs", values=[50, 100]),
        Discrete(name="batch_size", values=[50, 100]),
        Continuous(name="learning_rate", low=0.0001, high=0.005),
        Continuous(name="beta", low=0.5, high=1.5)
    ],
    metric=ndcg50,
    eval_method=rs,
    n_trails=4
)

# Tune deep architecture
tune_and_save("VAECF_deep", vaecf_deep_tuned)




[RandomSearch_VAECF] Training started!

[RandomSearch_VAECF] Evaluation started!


Ranking:   0%|          | 0/4430 [00:00<?, ?it/s]

Ranking:   0%|          | 0/3329 [00:00<?, ?it/s]


VALIDATION:
...
                   | NDCG@50 | Time (s)
------------------ + ------- + --------
RandomSearch_VAECF |  0.0401 |   3.6472

TEST:
...
                   | NDCG@50 | Train (s) | Test (s)
------------------ + ------- + --------- + --------
RandomSearch_VAECF |  0.0427 |  357.5700 |   4.6857

Best parameters for VAECF_deep:
{'act_fn': np.str_('tanh'), 'batch_size': np.int64(100), 'beta': 1.2800277619120792, 'k': np.int64(10), 'learning_rate': 0.00345566571906583, 'likelihood': np.str_('mult'), 'n_epochs': np.int64(100)}
Successfully completed VAECF_deep evaluation


In [None]:
# Compare the three tuned VAECF models
vaecf_results = {}

if "VAECF_small" in best_models:
    small_model = best_models["VAECF_small"]
    small_params = best_params.get("VAECF_small", "No parameters found")
    vaecf_results["small"] = (small_model, small_params)

if "VAECF_medium" in best_models:
    medium_model = best_models["VAECF_medium"]
    medium_params = best_params.get("VAECF_medium", "No parameters found")
    vaecf_results["medium"] = (medium_model, medium_params)

if "VAECF_deep" in best_models:
    deep_model = best_models["VAECF_deep"]
    deep_params = best_params.get("VAECF_deep", "No parameters found")
    vaecf_results["deep"] = (deep_model, deep_params)

# Compare models to find best architecture
if vaecf_results:
    print("\n========= VAECF Architecture Comparison =========")
    for arch, (model, params) in vaecf_results.items():
        print(f"\nArchitecture: {arch}")
        print(f"Parameters: {params}")

#LOOKS LIKE MEDIUM IS THE BEST!



Architecture: small
Parameters: {'act_fn': np.str_('tanh'), 'batch_size': np.int64(100), 'beta': 1.2800277619120792, 'k': np.int64(10), 'learning_rate': 0.00345566571906583, 'likelihood': np.str_('mult'), 'n_epochs': np.int64(100)}

Architecture: medium
Parameters: {'act_fn': np.str_('tanh'), 'batch_size': np.int64(100), 'beta': 1.2800277619120792, 'k': np.int64(10), 'learning_rate': 0.00345566571906583, 'likelihood': np.str_('mult'), 'n_epochs': np.int64(100)}

Architecture: deep
Parameters: {'act_fn': np.str_('tanh'), 'batch_size': np.int64(100), 'beta': 1.2800277619120792, 'k': np.int64(10), 'learning_rate': 0.00345566571906583, 'likelihood': np.str_('mult'), 'n_epochs': np.int64(100)}
