# Neural Collaborative Filtering with Randomly Intialized Embeddings By Cluster

For each customer cluster (as defined in step4_customer_segmentation), we will create a NCF model for it.

In [11]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [12]:
import pandas as pd
import itertools
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
import pandas as pd
from sklearn.model_selection import train_test_split
import numpy as np
import copy
import os
from sklearn.metrics import f1_score
from collections import defaultdict
import torch.nn.functional as F

data_dir = '/content/drive/MyDrive/bt4222_group_6/bt4222_group_6_amazon/data'
project_dir = '/content/drive/MyDrive/bt4222_group_6/bt4222_group_6_amazon'

# Step 1: Load dataset

Previously in step1_data_preprocessing.ipynb, We have split the df_reviews dataset into training, testing and validation samples for each user, following chronological order and using the early 70% of each user's interactions for training, followed by the next 15% for validation and the last 15% for testing.

In [13]:
train_data = pd.read_csv(os.path.join(data_dir,"train_data.csv"))
test_data = pd.read_csv(os.path.join(data_dir,"test_data.csv"))
val_data = pd.read_csv(os.path.join(data_dir,"val_data.csv"))

df_reviews = pd.read_csv(os.path.join(data_dir,"filtered_reviews_with_features_and_clusters.csv"))
print(df_reviews.head())

   customer_id  product_id  product_parent  \
0        11960  B00LCJAW06       219600481   
1        11960  B008OTSEXY       682436048   
2        11960  B00KJ15KGY        32170248   
3        11960  B008ZL49WQ       614364353   
4        11960  B002WRGE5O       928204157   

                                       product_title product_category  \
0  Persian-Rugs T1007 Abstract Modern Area Rug Ca...        Furniture   
1  Flash Furniture High Back Black Ribbed Upholst...        Furniture   
2  Jackson Pollock Inspired Coffee Glass Table w/...        Furniture   
3                                  Eaze Lounge Chair        Furniture   
4         Walker Edison L-Shaped Glass Computer Desk        Furniture   

   star_rating  helpful_votes  total_votes vine verified_purchase  ...  \
0            4              1            1    N                 Y  ...   
1            4              0            0    N                 Y  ...   
2            4              1            1    N               

# Filtering Away Customers and Products in test and val set that do not appear in its training set

Prepares cluster-specific training, validation, and test data by:
- Extracting the subset of each dataframe corresponding to the given cluster ID.
- Mapping customer_id and product_id to 0-based index values within the cluster.
- Filtering val/test sets to only include users/items present in the cluster's training set.

Prevent Cold Start problems during validation and testing. If a customer or product appears only in the validation or test set appears only in the validataion or test set, the model has never seen it before and cannot generate a valid prediction. Thus we will remove any rows in the validation or test sets which does not belong to any user in training set or the product is absent in the training set.

We do not remove any rows from the training set.

In [14]:
def prepare_cluster_data(cluster_id, train_df, val_df=None, test_df=None):
    # Cluster-specific train set
    train_cluster = train_df[train_df['cluster'] == cluster_id].reset_index(drop=True)

    # Cluster-local mapping
    user2idx = {uid: idx for idx, uid in enumerate(train_cluster['customer_id'].unique())}
    item2idx = {pid: idx for idx, pid in enumerate(train_cluster['product_id'].unique())}

    # Map train cluster
    train_cluster['user_idx'] = train_cluster['customer_id'].map(user2idx)
    train_cluster['item_idx'] = train_cluster['product_id'].map(item2idx)

    val_cluster, test_cluster = None, None

    # Prepare val cluster
    if val_df is not None:
        val_cluster = val_df[val_df['cluster'] == cluster_id].reset_index(drop=True)
        val_cluster = val_cluster[
            val_cluster['customer_id'].isin(user2idx) &
            val_cluster['product_id'].isin(item2idx)
        ].reset_index(drop=True)
        val_cluster['user_idx'] = val_cluster['customer_id'].map(user2idx)
        val_cluster['item_idx'] = val_cluster['product_id'].map(item2idx)

    # Prepare test cluster
    if test_df is not None:
        test_cluster = test_df[test_df['cluster'] == cluster_id].reset_index(drop=True)
        test_cluster = test_cluster[
            test_cluster['customer_id'].isin(user2idx) &
            test_cluster['product_id'].isin(item2idx)
        ].reset_index(drop=True)
        test_cluster['user_idx'] = test_cluster['customer_id'].map(user2idx)
        test_cluster['item_idx'] = test_cluster['product_id'].map(item2idx)

    return train_cluster, val_cluster, test_cluster, user2idx, item2idx

# Create Data Loader

In [15]:
class ReviewsDataset(Dataset):
    def __init__(self, data):
        self.data = data

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        row = self.data.iloc[idx]
        return {
            'customer_id': torch.tensor(row['user_idx'], dtype=torch.long),
            'product_id': torch.tensor(row['item_idx'], dtype=torch.long),
            'rating': torch.tensor(row['star_rating'], dtype=torch.float)
        }

# Evaluation Functions

- **ndcg_at_k**: Computes the Normalized Discounted Cumulative Gain (NDCG) at rank k for a single list of relevance. If the list contains fewer than k items, it will use actual_k = min(k, len(relevances)) to ensure fair computation.

- **mean_ndcg_user_at_k**: Computes the mean NDCG@k across all users by grouping predicted scores and relevance labels per user, sorting by prediction, and applying ndcg_at_k. For each user, their items are sorted by predicted scores, and NDCG is computed using `ndcg_at_k` with actual_k = min(k, len(user_items)).

- **mean_precision_user_at_k**: Computes the mean Precision@k across all users.
Precision@k is the proportion of relevant items (e.g., rating ≥ threshold) among the top-k predicted items for each user. For each user, top-k items are selected based on predicted scores. If the user has fewer than k items, actual_k = min(k, len(user_items)) is used.  
  Precision is calculated as:  
  `precision = (# of relevant items among top-k) / actual_k`  
  where an item is considered relevant if `rating ≥ threshold`.

- **mean_recall_user_at_k**: Computes the mean Recall@k across all users.
Recall@k is the proportion of a user's relevant items (rating ≥ threshold) that are retrieved in the top-k predicted list. For each user, top-k items are selected based on predicted scores, and recall is calculated as:  
  `recall = (# of relevant items among top-k) / total number of relevant items for the user`  
  actual_k = min(k, len(user_items)) is used to handle users with fewer than k items.

- **mean_f1_user_at_k**:  
  Computes the mean F1@k across all users, where F1 combines precision and recall.  
  For each user, top-k items are selected (using actual_k = min(k, len(user_items))), and F1 is calculated based on binarized relevance labels (`rating ≥ threshold`).  
  The predicted labels are assumed to be all 1s (e.g top-k are predicted as relevant).

In [16]:
def ndcg_at_k(relevances, k):
    relevances = np.asarray(relevances, dtype=np.float64)
    actual_k = min(k, len(relevances))
    if actual_k == 0:
        return 0.0
    relevances = relevances[:actual_k]
    dcg = np.sum((2 ** relevances - 1) / np.log2(np.arange(2, actual_k + 2)))
    ideal_relevances = np.sort(relevances)[::-1]
    idcg = np.sum((2 ** ideal_relevances - 1) / np.log2(np.arange(2, actual_k + 2)))
    return dcg / idcg if idcg > 0 else 0.0

def mean_ndcg_user_at_k(all_users, all_preds, all_labels, k=10):
    user_data = defaultdict(list)
    for u, pred, rel in zip(all_users, all_preds, all_labels):
        user_data[u].append((pred, rel))
    ndcg_list = []
    for entries in user_data.values():
        entries_sorted = sorted(entries, key=lambda x: x[0], reverse=True)
        relevances = [rel for _, rel in entries_sorted]
        ndcg_list.append(ndcg_at_k(relevances, k))
    return np.mean(ndcg_list) if ndcg_list else 0.0

def mean_precision_user_at_k(all_users, all_preds, all_labels, k=10, threshold=4):
    user_data = defaultdict(list)
    for u, pred, label in zip(all_users, all_preds, all_labels):
        user_data[u].append((pred, label))

    precision_list = []
    for entries in user_data.values():
        entries_sorted = sorted(entries, key=lambda x: x[0], reverse=True)
        actual_k = min(k, len(entries_sorted))
        top_k = entries_sorted[:actual_k]
        rels = [1 if r >= threshold else 0 for _, r in top_k]
        precision_list.append(np.sum(rels) / actual_k if actual_k > 0 else 0)
    return np.mean(precision_list) if precision_list else 0.0

def mean_recall_user_at_k(all_users, all_preds, all_labels, k=10, threshold=4):
    user_data = defaultdict(list)
    for u, pred, label in zip(all_users, all_preds, all_labels):
        user_data[u].append((pred, label))

    recall_list = []
    for entries in user_data.values():
        entries_sorted = sorted(entries, key=lambda x: x[0], reverse=True)
        actual_k = min(k, len(entries_sorted))
        top_k = entries_sorted[:actual_k]

        all_rels = [1 if r >= threshold else 0 for _, r in entries]
        top_k_rels = [1 if r >= threshold else 0 for _, r in top_k]
        total_relevant = np.sum(all_rels)

        if total_relevant == 0:
            recall = 0.0
        else:
            recall = np.sum(top_k_rels) / total_relevant
        recall_list.append(recall)
    return np.mean(recall_list) if recall_list else 0.0

def mean_f1_user_at_k(all_users, all_preds, all_labels, k=10, threshold=4):
    user_data = defaultdict(list)
    for u, p, l in zip(all_users, all_preds, all_labels):
        user_data[u].append((p, l))

    f1_list = []
    for entries in user_data.values():
        entries_sorted = sorted(entries, key=lambda x: x[0], reverse=True)
        actual_k = min(k, len(entries_sorted))
        y_true = [int(l >= threshold) for _, l in entries_sorted[:actual_k]]
        y_pred = [1] * actual_k
        f1_list.append(f1_score(y_true, y_pred, zero_division=0))
    return np.mean(f1_list) if f1_list else 0.0

# Define the NCF model with GMF and MLP

NCF class implements a Neural Collaborative Filtering model combining:

- **GMF (Generalized Matrix Factorization)**: Element-wise product of user and item embeddings

- **MLP (Multi-Layer Perceptron)**: Concatenated embeddings passed through FC layers

- **Final prediction**: Merges GMF and MLP outputs to produce a predicted rating (1 to 5 scale)

In [17]:
class NCF(nn.Module):
    def __init__(self, num_users, num_items, embedding_dim, dropout_rate=0.3):
        super(NCF, self).__init__()
        # Randomly initialized embedding layers
        self.customer_embeddings_gmf = nn.Embedding(num_users, embedding_dim)
        self.product_embeddings_gmf = nn.Embedding(num_items, embedding_dim)

        self.customer_embeddings_mlp = nn.Embedding(num_users, embedding_dim)
        self.product_embeddings_mlp = nn.Embedding(num_items, embedding_dim)

        self.fc1_mlp = nn.Linear(2 * embedding_dim, 128)
        self.bn1_mlp = nn.BatchNorm1d(128)
        self.dropout1_mlp = nn.Dropout(dropout_rate)

        self.fc2_mlp = nn.Linear(128, 64)
        self.bn2_mlp = nn.BatchNorm1d(64)
        self.dropout2_mlp = nn.Dropout(dropout_rate)

        self.fc1_combined = nn.Linear(embedding_dim + 64, 128)
        self.bn1_combined = nn.BatchNorm1d(128)
        self.dropout1_combined = nn.Dropout(dropout_rate)

        self.fc2_combined = nn.Linear(128, 1)

    def forward(self, customer_id, product_id):
        customer_emb_gmf = self.customer_embeddings_gmf(customer_id)
        product_emb_gmf = self.product_embeddings_gmf(product_id)
        gmf_output = customer_emb_gmf * product_emb_gmf

        customer_emb_mlp = self.customer_embeddings_mlp(customer_id)
        product_emb_mlp = self.product_embeddings_mlp(product_id)
        mlp_input = torch.cat([customer_emb_mlp, product_emb_mlp], dim=-1)

        mlp_output = F.relu(self.bn1_mlp(self.fc1_mlp(mlp_input)))
        mlp_output = self.dropout1_mlp(mlp_output)
        mlp_output = F.relu(self.bn2_mlp(self.fc2_mlp(mlp_output)))
        mlp_output = self.dropout2_mlp(mlp_output)

        combined_input = torch.cat([gmf_output, mlp_output], dim=-1)
        combined_output = F.relu(self.bn1_combined(self.fc1_combined(combined_input)))
        combined_output = self.dropout1_combined(combined_output)

        output = self.fc2_combined(combined_output)
        return output.squeeze() * 4 + 1

## Model Training




### Grid Search

The grid search algorithm here will perform an exhaustive search to identify the best combination of hyperparameters (embedding_dim, learning_rate, batch_size, dropout_rate, num_epoches) for training the NCF model.

For each configuration:

1. A new NCF model is instantiated with the configuration parameters.

2. The model is trained on the training set and evaluated on the validation set.

3. The best model state (with lowest validation loss) is stored using early stopping.

4. The configuration and model weights are saved if it performs better than all previous configurations.

It will then report the best-performing configuration which we will use to train the final model on the combined training and validation data before evaluating it on our test data.



# Seperate NCF Models for Each Cluster
In step4_customer_segmentation.ipynb file, we have segmented customers in 4 categories, namely steady and satified customers, power buyers, engaged shoppers, casual buyers. Customers in each cluster have distinct purchasing behaviours and preferences thus we will be building a NCF model for each cluster. This will allow us to provided tailored recommendations to different customer groups based on their past purchasing behaviour.

The functions that we will be using are:

1. **train_model_for_cluster** : Trains a model on a single cluster's training data (cluster_id) and validates it on its validation split. It will return the trained model, best validation loss and the best model state.

2. **train_all_clusters** : Trains an NCF model for each cluster using their own data splits. It returns a dictionary of models, dictionary of validation losses, dictionary of model weights whose key is the cluster_id.

2. **grid_search_by_cluster** : Perform an exhaustive search to identify the best combination of hyperparameters (embedding_dim, learning_rate, batch_size, dropout_rate, num_epoches) for the 3 cluster (using batch-averaged MSE). It will return the dictionary of best hyperparameter configuration for all clusters and the dictionary of best model state for all clusters

3. **evaluate_cluster_models** : Evaluates trained models per cluster

In [18]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

def train_model_for_cluster(cluster_id, train_df, val_df, config):
    train_cluster, val_cluster, _, user2idx, item2idx = prepare_cluster_data(
        cluster_id, train_df, val_df, test_df=None
    )

    num_users = len(user2idx)
    num_items = len(item2idx)
    model = NCF(num_users, num_items, config['embedding_dim'], config['dropout_rate']).to(device)

    criterion = nn.MSELoss()
    optimizer = optim.Adam(model.parameters(), lr=config['learning_rate'])
    train_loader = DataLoader(ReviewsDataset(train_cluster), batch_size=config['batch_size'], shuffle=True)
    val_loader = DataLoader(ReviewsDataset(val_cluster), batch_size=config['batch_size'])

    best_val_loss = float('inf')
    best_model_state = None
    patience_counter = 0
    best_val_metrics = {}

    for epoch in range(config['num_epochs']):
        model.train()
        for batch in train_loader:
            user = batch['customer_id'].to(device)
            item = batch['product_id'].to(device)
            label = batch['rating'].to(device)

            optimizer.zero_grad()
            preds = model(user, item)
            loss = criterion(preds, label)
            loss.backward()
            optimizer.step()

        model.eval()
        val_loss = 0
        with torch.no_grad():
            val_all_preds, val_all_labels, val_all_users = [], [], []
            for batch in val_loader:
                user = batch['customer_id'].to(device)
                item = batch['product_id'].to(device)
                label = batch['rating'].to(device)

                preds = model(user, item)
                loss = criterion(preds, label)
                val_loss += loss.item()

                val_all_preds.extend(preds.squeeze().detach().cpu().numpy())
                val_all_labels.extend(label.detach().cpu().numpy())
                val_all_users.extend(user.detach().cpu().numpy())

        val_all_preds = np.array(val_all_preds)
        val_all_labels = np.array(val_all_labels)
        val_all_users = np.array(val_all_users)

        rmse_val = np.sqrt(np.mean((val_all_preds - val_all_labels)**2))
        ndcg_val = mean_ndcg_user_at_k(val_all_users, val_all_preds, val_all_labels, k=10)
        precision_val = mean_precision_user_at_k(val_all_users, val_all_preds, val_all_labels, k=10, threshold=4)
        recall_val = mean_recall_user_at_k(val_all_users, val_all_preds, val_all_labels, k=10, threshold=4)
        f1_val = mean_f1_user_at_k(val_all_users, val_all_preds, val_all_labels, k=10, threshold=4)

        val_loss /= len(val_loader)
        if val_loss < best_val_loss:
            best_val_loss = val_loss
            best_model_state = copy.deepcopy(model.state_dict())
            patience_counter = 0
            best_val_metrics = {
                'rmse': rmse_val,
                'mse': val_loss,
                'ndcg@10': ndcg_val,
                'precision@10': precision_val,
                'recall@10': recall_val,
                'f1@10': f1_val
            }
        else:
            patience_counter += 1
            if patience_counter >= config.get('patience', 5):
                break

    model.load_state_dict(best_model_state)
    return model, best_val_loss, best_model_state, best_val_metrics

def train_all_clusters(train_data, val_data, config):
    models, losses, states, val_metrics = {}, {}, {}, {}
    for cid in sorted(train_data['cluster'].unique()):
        model, val_loss, state, metrics = train_model_for_cluster(cid, train_data, val_data, config)
        models[cid], losses[cid], states[cid], val_metrics[cid] = model, val_loss, state, metrics
    return models, losses, states, val_metrics

def grid_search_by_cluster(train_data, val_data, param_grid):
    combos = list(itertools.product(*param_grid.values()))
    print("======================= GRID SEARCH =======================")
    best_config, best_loss, best_states = None, float('inf'), None
    for vals in combos:
        config = dict(zip(param_grid.keys(), vals))
        print("Running Config:",config)
        _, losses, states, val_metrics = train_all_clusters(train_data, val_data, config)
        avg = np.mean(list(losses.values()))
        if avg < best_loss:
            best_loss, best_config, best_states, best_metrics = avg, config, states, val_metrics
    return best_config, best_states, best_metrics

def evaluate_cluster_models(models, test_data):
    criterion = nn.MSELoss()
    cluster_test_metrics = {}
    test_results = {}
    for cid, model in models.items():
        model.eval()
        train_cluster, _, test_cluster, user2idx, item2idx = prepare_cluster_data(
            cluster_id=cid,
            train_df=train_data,
            val_df=None,
            test_df=test_data
        )
        loader = DataLoader(ReviewsDataset(test_cluster), batch_size=512)
        test_loss = 0
        test_preds, test_labels, test_users = [], [], []
        with torch.no_grad():
            for batch in loader:
                user = batch['customer_id'].to(device)
                item = batch['product_id'].to(device)
                label = batch['rating'].to(device)

                preds = model(user, item)
                test_preds.extend(preds.cpu().numpy())
                test_labels.extend(label.cpu().numpy())
                test_users.extend(user.cpu().numpy())
                test_loss += criterion(preds, label).item()

        test_preds = np.array(test_preds)
        test_labels = np.array(test_labels)
        test_users = np.array(test_users)

        rmse = np.sqrt(np.mean((test_preds - test_labels) ** 2))
        ndcg = mean_ndcg_user_at_k(test_users, test_preds, test_labels, k=10)
        precision = mean_precision_user_at_k(test_users, test_preds, test_labels, k=10, threshold=4)
        recall = mean_recall_user_at_k(test_users, test_preds, test_labels, k=10, threshold=4)
        f1 = mean_f1_user_at_k(test_users, test_preds, test_labels, k=10, threshold=4)

        test_results[cid] = test_loss / len(loader)
        cluster_test_metrics[cid] = {
            'rmse': rmse,
            'mse': test_loss / len(loader),
            'ndcg@10': ndcg,
            'precision@10': precision,
            'recall@10': recall,
            'f1@10': f1
        }
    return cluster_test_metrics


# Final Model Training

We will retrain the NCF model using the optimal hyperparameters identified through Grid Search, this time on the combined training and validation data. Lastly, the model is then evaluated on the test data.

In [19]:
# Step 1: Define hyperparameter grid
param_grid = {
    'embedding_dim': [16, 64],          # keep both for low vs high capacity
    'learning_rate': [0.001],           # pick one reliable value
    'batch_size': [128, 512],           # small vs large batch
    'dropout_rate': [0.0, 0.3],         # low vs regular dropout
    'num_epochs': [20, 40]              # moderate vs longer training
}

def run_final_pipeline_by_clusters(train_data, val_data, test_data, param_grid, cluster_col='cluster'):
    output_path = os.path.join(project_dir, "Model Results/NCF Random Embedding/Clustered Model/results_by_cluster.txt")
    with open(output_path, "w") as f:
        best_config, _, best_metrics = grid_search_by_cluster(train_data, val_data, param_grid)

        # Validation Metrics
        print("======Validation Metrics By Cluster======\n")
        f.write("===== Validation Metrics For Each Cluster ======\n")
        val_results = []
        for cid, metrics in best_metrics.items():
            row = {"cluster_id":cid}
            row.update(metrics)
            row.update(best_config)
            val_results.append(row)
            print(f"Cluster {cid}, MSE: {metrics['mse']:.4f}, RMSE: {metrics['rmse']:.4f}, NDCG@10: {metrics['ndcg@10']}, Precision@10: {metrics['precision@10']}, Recall@10: {metrics['recall@10']}, F1@10: {metrics['f1@10']}\n")
            f.write(f"Cluster {cid}, MSE: {metrics['mse']:.4f}, RMSE: {metrics['rmse']:.4f}, NDCG@10: {metrics['ndcg@10']}, Precision@10: {metrics['precision@10']}, Recall@10: {metrics['recall@10']}, F1@10: {metrics['f1@10']}\n")
        df_val_results = pd.DataFrame(val_results)
        df_val_results.to_csv(os.path.join(project_dir,"Model Results/NCF Random Embedding/Clustered Model/val_results_by_cluster.csv"), index=False)

        train_val_data = pd.concat([train_data, val_data]).reset_index(drop=True)
        final_models, _, _ ,_ = train_all_clusters(train_val_data, val_data, best_config)
        test_results = evaluate_cluster_models(final_models, test_data)

        print("======Test Results By Cluster======\n")
        f.write(f"===== Test Results For Each Cluster ======\n")
        test_results_list = []
        for cid, test_metrics in test_results.items():
            row = {"cluster_id":cid}
            row.update(test_metrics)
            row.update(best_config)
            test_results_list.append(row)
            print(f"Cluster {cid}, MSE: {test_metrics['mse']:.4f}, RMSE: {test_metrics['rmse']}, NDCG@10: {test_metrics['ndcg@10']}, Precision@10: {test_metrics['precision@10']}, Recall@10: {test_metrics['recall@10']}, F1@10: {test_metrics['f1@10']}\n")
            f.write(f"Cluster {cid}, MSE: {test_metrics['mse']:.4f}, RMSE: {test_metrics['rmse']}, NDCG@10: {test_metrics['ndcg@10']}, Precision@10: {test_metrics['precision@10']}, Recall@10: {test_metrics['recall@10']}, F1@10: {test_metrics['f1@10']}\n")
        df_test_results = pd.DataFrame(test_results_list)
        df_test_results.to_csv(os.path.join(project_dir,"Model Results/NCF Random Embedding/Clustered Model/test_results_by_cluster.csv"), index=False)

In [20]:
run_final_pipeline_by_clusters(train_data, val_data, test_data, param_grid)


Cluster 0, MSE: 0.8315, RMSE: 0.9121, NDCG@10: 0.9968564784461629, Precision@10: 0.8886216619671587, Recall@10: 0.8968319787692818, F1@10: 0.8913584342345331

Cluster 1, MSE: 2.1674, RMSE: 1.4816, NDCG@10: 1.0, Precision@10: 0.6421568627450981, Recall@10: 0.6421568627450981, F1@10: 0.6421568627450981

Cluster 2, MSE: 0.7773, RMSE: 0.8881, NDCG@10: 0.976637795672143, Precision@10: 0.9067006147352968, Recall@10: 0.953757225433526, F1@10: 0.9226680973263769

Cluster 3, MSE: 0.5483, RMSE: 0.7342, NDCG@10: 1.0, Precision@10: 0.9339485186983972, Recall@10: 0.9339485186983972, F1@10: 0.9339485186983972


Cluster 0, MSE: 1.0855, RMSE: 1.0414689779281616, NDCG@10: 0.974413732659043, Precision@10: 0.8862679119168612, Recall@10: 0.9528910863892323, F1@10: 0.9088338205767131

Cluster 1, MSE: 2.4049, RMSE: 1.5416595935821533, NDCG@10: 0.9752309223515311, Precision@10: 0.6177800100452034, Recall@10: 0.694123556002009, F1@10: 0.6432278586974719

Cluster 2, MSE: 0.8751, RMSE: 0.9388381242752075, NDCG

# Check Stored Validation and Testing Results

In [22]:
# step5_2_2 - NCF Model: Random Embeddings By Cluster
results_dir = '/content/drive/MyDrive/bt4222_group_6/bt4222_group_6_amazon/Model Results'

rand_emb_clustered_data_val_results = pd.read_csv(os.path.join(results_dir,"NCF Random Embedding/Clustered Model/val_results_by_cluster.csv"))
print(rand_emb_clustered_data_val_results.shape)
print(rand_emb_clustered_data_val_results.head())


rand_emb_by_clustered_data_test_results = pd.read_csv(os.path.join(results_dir,"NCF Random Embedding/Clustered Model/test_results_by_cluster.csv"))
print(rand_emb_by_clustered_data_test_results.shape)
print(rand_emb_by_clustered_data_test_results.head())

print("Average Validation Metrics:")
print(rand_emb_clustered_data_val_results.mean(numeric_only=True))

print("\nAverage Test Metrics:")
print(rand_emb_by_clustered_data_test_results.mean(numeric_only=True))

(4, 12)
   cluster_id      rmse       mse   ndcg@10  precision@10  recall@10  \
0           0  0.912108  0.831512  0.996856      0.888622   0.896832   
1           1  1.481553  2.167412  1.000000      0.642157   0.642157   
2           2  0.888148  0.777309  0.976638      0.906701   0.953757   
3           3  0.734164  0.548259  1.000000      0.933949   0.933949   

      f1@10  embedding_dim  learning_rate  batch_size  dropout_rate  \
0  0.891358             16          0.001         128           0.3   
1  0.642157             16          0.001         128           0.3   
2  0.922668             16          0.001         128           0.3   
3  0.933949             16          0.001         128           0.3   

   num_epochs  
0          20  
1          20  
2          20  
3          20  
(4, 12)
   cluster_id      rmse       mse   ndcg@10  precision@10  recall@10  \
0           0  1.041469  1.085503  0.974414      0.886268   0.952891   
1           1  1.541660  2.404885  0.975231