In [32]:
from google.colab import drive
# drive.mount('/content/drive')
drive.mount("/content/drive", force_remount=True)

Mounted at /content/drive


In [33]:
import pandas as pd
import itertools
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
import pandas as pd
from sklearn.model_selection import train_test_split
import numpy as np
import copy
import os
from collections import defaultdict
import random
import itertools
from sklearn.metrics import precision_score, recall_score, f1_score
from sklearn.metrics import ndcg_score as sk_ndcg

# Build Custom Customer and Product Embedding
from sklearn.decomposition import PCA
from sklearn.preprocessing import StandardScaler

data_dir = '/content/drive/MyDrive/bt4222_group_6/bt4222_group_6_amazon/data'
project_dir = '/content/drive/MyDrive/bt4222_group_6/bt4222_group_6_amazon'

# Loading and splitting the data

In [34]:
df_reviews = pd.read_csv(os.path.join(data_dir,"filtered_reviews_with_features_and_clusters.csv"))

In [35]:
print(df_reviews.head())

   customer_id  product_id  product_parent  \
0        11960  B00LCJAW06       219600481   
1        11960  B008OTSEXY       682436048   
2        11960  B00KJ15KGY        32170248   
3        11960  B008ZL49WQ       614364353   
4        11960  B002WRGE5O       928204157   

                                       product_title product_category  \
0  Persian-Rugs T1007 Abstract Modern Area Rug Ca...        Furniture   
1  Flash Furniture High Back Black Ribbed Upholst...        Furniture   
2  Jackson Pollock Inspired Coffee Glass Table w/...        Furniture   
3                                  Eaze Lounge Chair        Furniture   
4         Walker Edison L-Shaped Glass Computer Desk        Furniture   

   star_rating  helpful_votes  total_votes vine verified_purchase  ...  \
0            4              1            1    N                 Y  ...   
1            4              0            0    N                 Y  ...   
2            4              1            1    N               

# Mapping IDs in df_reviews

ID mapping for users and products in the df_reviews DataFrame, creating new columns for numerical indices corresponding to each unique user and product.

In [36]:
user2idx = {user_id: idx + 1 for idx, user_id in enumerate(df_reviews['customer_id'].unique())}
item2idx = {item_id: idx + 1 for idx, item_id in enumerate(df_reviews['product_id'].unique())}

df_reviews['user_idx'] = df_reviews['customer_id'].map(user2idx)
df_reviews['item_idx'] = df_reviews['product_id'].map(item2idx)

# Splitting the data for LSTM

- **Padding**: Sequences are padded to ensure a fixed length.
- **Data Splitting**: Users are split into training, validation, and test sets (64%, 16%, 20%).

#### Formation of Training, Validation, and Testing Data
1. **Training Data**: From the data, we select the 80% of the interactions to generate sequences. These sequences, along with positive samples (items the user interacted with), and negative samples (randomly selected items the user did not interact with), are created and added to the training data. These sequences allow the model to learn patterns in user behavior.

2. **Validation Data**: 16% of the interactions are used for validation. Similar to training data, we generate sequences, positive and negative samples, to evaluate how well the model generalizes to unseen data during training.

3. **Testing Data**: 24% of the data is used for testing. For each user in the testing data, the first 80% of interactions are used as model input and last 20% as positive labels. 10 negative items are also sampled. The model does not see this data during training. Positive samples are taken from the user's reviews that were held out for testing, and multiple negative samples are randomly chosen from items the user hasn't interacted with.

This approach ensures that the model is trained on sequences of past interactions, with positive samples (items the user interacted with) and negative samples (items the user did not interact with), preventing the model from seeing any cold-start users or products in the validation and test sets. Also ensures that the model is trained, validated, and tested on distinct sets of data, avoiding overfitting and ensuring generalizability.

In [37]:
max_len = 10
min_seq_length = 5
max_interactions = 20

df_sorted = df_reviews.sort_values(by=['user_idx', 'review_date'])
grouped = df_sorted.groupby('user_idx')

all_items = np.array(df_sorted['item_idx'].unique())
filtered_users = [uid for uid, grp in grouped if len(grp) >= min_seq_length]

# 64% train, 16% val, 20% test
train_val_users, test_users = train_test_split(filtered_users, test_size=0.2, random_state=42)
train_users, val_users = train_test_split(train_val_users, test_size=0.2, random_state=42)

def pad_left(seq, max_len, pad_value=0):
    return [pad_value] * (max_len - len(seq)) + seq

train_data, val_data, test_data = [], [], []

for uid in filtered_users:
    group = grouped.get_group(uid).sort_values('review_date').iloc[:max_interactions]
    items = group['item_idx'].tolist()
    user_item_set = set(items)
    neg_pool = np.setdiff1d(all_items, list(user_item_set), assume_unique=True)

    split_point = int(0.8 * len(items))

    for i in range(1, split_point):
        seq = items[max(0, i - max_len):i]
        if len(seq) < 4:
            continue  # Skip short sequences

        seq_padded = pad_left(seq, max_len)
        pos_item = items[i]
        neg_item = np.random.choice(neg_pool)

        if uid in train_users:
            train_data.extend([
                (uid, seq_padded, pos_item, 1),
                (uid, seq_padded, neg_item, 0)
            ])
        elif uid in val_users:
            val_data.extend([
                (uid, seq_padded, pos_item, 1),
                (uid, seq_padded, neg_item, 0)
            ])

    if uid in test_users:
        seq = items[max(0, split_point - max_len):split_point]
        if len(seq) < 4:
            continue

        seq_padded = pad_left(seq, max_len)
        # test positives
        pos_items = items[split_point:]

        for pos_item in pos_items:
            test_data.append((uid, seq_padded, pos_item, 1))

        # sample multiple negatives (e.g., 50)
        neg_items = np.random.choice(neg_pool, size=10, replace=False)
        for neg_item in neg_items:
            test_data.append((uid, seq_padded, neg_item, 0))

print(f"Train Samples: {len(train_data)}")
print(f"Val Samples: {len(val_data)}")
print(f"Test Samples: {len(test_data)}")

Train Samples: 77002
Val Samples: 19204
Test Samples: 111834


In [38]:
import random
test_user_ids = list(set([row[0] for row in test_data]))
# Sample 20% of them
sampled_users = set(random.sample(test_user_ids, int(len(test_user_ids) * 0.2)))
reduced_test_data = [row for row in test_data if row[0] in sampled_users]
test_data = reduced_test_data

print(f"Reduced to {len(test_data)} test samples from {len(sampled_users)} users.")

Reduced to 22361 test samples from 1906 users.


# Building Customer Embeddings

The customer embeddings will constructed by aggregating features within df_reviews by customer_id to find a customer's:

Purchase Frequency (Indicate how active a customer is)
Time Since Last Purchase (Indicate how active a customer is)
Average Star Rating (Overall Customer satisfaction across all of his purchases)
Total Vine Reviews (Measure of Credibility of his Reviews)
Total Helpful Votes (Measure the Credibility of his Reviews)
Total Votes (Measure the Credibility of his Reviews)
Average Sentiment (Overall Customer satisfaction across all of his purchases
These embeddings are more informative than a randomly intialized embedding in typical recommnedation systems. The choice of specific features injects domain knowledge into the model.

In [39]:
def build_customer_embeddings(df_reviews, embedding_dim):
    agg = df_reviews.groupby('customer_id').agg({
        'monthly_purchase_frequency': 'mean',
        'time_since_last_purchase': 'mean',
        'star_rating': 'mean',
        'vine': lambda x: (x == "Y").sum(),
        'helpful_votes': 'sum',
        'total_votes': 'sum',
        'sentiments': lambda x: (x == 'positive').mean()
    }).fillna(0).reset_index()

    cust_ids = agg['customer_id']
    X = StandardScaler().fit_transform(agg.drop(columns='customer_id'))
    num_features = X.shape[1]

    if embedding_dim > num_features:
        raise ValueError(f"Requested embedding_dim={embedding_dim}, but only {num_features} features available.")

    if embedding_dim < num_features:
        pca = PCA(n_components=embedding_dim)
        X = pca.fit_transform(X)
        if (pca.explained_variance_ratio_ > 1e-6).sum() < embedding_dim:
            raise ValueError(f"PCA found fewer than {embedding_dim} meaningful components.")

    df = pd.DataFrame(X, index=cust_ids)
    df.index.name = 'customer_id'
    return df

# Building Product Embeddings

The product embeddings will constructed by aggregating features within df_reviews by product_id to find a product's:
Mean Star Rating that it received (Customer satisfaction)
Total Helpful Votes given to all its reviews (Quality of customer feedback)
Total Votes given to all its reviews (Review Engagement by customers)
Average sentiment (1 is Positive and 0 is Negative)
Total Number of Vine Reviews (Number of Credible Reviews)
Total Product Sales (Demand for Product)

In [40]:
def build_product_embeddings(df_reviews, embedding_dim):
    agg = df_reviews.groupby('product_id').agg({
        'star_rating': 'mean',
        'helpful_votes': 'sum',
        'total_votes': 'sum',
        'sentiments': lambda x: (x == 'positive').mean(),
        'vine': lambda x: (x == 'Y').sum(),
        'product_id': 'count'  # will be renamed
    }).rename(columns={'product_id': 'sales_volume'}).fillna(0).reset_index()

    prod_ids = agg['product_id']
    X = StandardScaler().fit_transform(agg.drop(columns='product_id'))
    num_features = X.shape[1]

    if embedding_dim > num_features:
        raise ValueError(f"Requested embedding_dim={embedding_dim}, but only {num_features} features available.")

    if embedding_dim < num_features:
        pca = PCA(n_components=embedding_dim)
        X = pca.fit_transform(X)
        if (pca.explained_variance_ratio_ > 1e-6).sum() < embedding_dim:
            raise ValueError(f"PCA found fewer than {embedding_dim} meaningful components.")

    df = pd.DataFrame(X, index=prod_ids)
    df.index.name = 'product_id'
    return df


We form two dense matrices: user_embedding_matrix and product_embedding_matrix. which will be concatenated with the LSTM's internal representations (sequence and candidate embeddings) during training to provide the model with more informative input, improving its ability to personalize recommendations based on both temporal behavior and customer/product features.

In [41]:
# Build customer & product embeddings from your reviews
user_embed_df = build_customer_embeddings(df_reviews, embedding_dim=7)
product_embed_df = build_product_embeddings(df_reviews, embedding_dim=6)

# Convert to embedding matrices aligned with user2idx/item2idx
user_embedding_matrix = np.zeros((df_reviews['user_idx'].max() + 1, 7))
for cid, row in user_embed_df.iterrows():
    if cid in user2idx:
        user_embedding_matrix[user2idx[cid]] = row.values

product_embedding_matrix = np.zeros((df_reviews['item_idx'].max() + 1, 6))
for pid, row in product_embed_df.iterrows():
    if pid in item2idx:
        product_embedding_matrix[item2idx[pid]] = row.values

# Creating DataLoader

In [42]:
class PurchaseSequenceDataset(Dataset):
    def __init__(self, data, user_embedding_matrix, product_embedding_matrix):
        self.data = data
        self.user_embedding_matrix = user_embedding_matrix
        self.product_embedding_matrix = product_embedding_matrix

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        user_id, seq, candidate_id, label = self.data[idx][:4]

        customer_vec = torch.FloatTensor(self.user_embedding_matrix[user_id])
        product_vec = torch.FloatTensor(self.product_embedding_matrix[candidate_id])

        return (
            torch.LongTensor(seq),             # inputs
            torch.LongTensor([candidate_id]),  # candidate
            torch.FloatTensor([label]),        # label
            torch.LongTensor([user_id]),       # user_id for ranking
            torch.FloatTensor(customer_vec),   # external user vec
            torch.FloatTensor(product_vec)     # external product vec
        )

batch_size = 64

train_dataset = PurchaseSequenceDataset(train_data, user_embedding_matrix, product_embedding_matrix)
val_dataset   = PurchaseSequenceDataset(val_data, user_embedding_matrix, product_embedding_matrix)
test_dataset  = PurchaseSequenceDataset(test_data, user_embedding_matrix, product_embedding_matrix)

train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
val_loader   = DataLoader(val_dataset, batch_size=batch_size, shuffle=False)
test_loader  = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

print(f"Training Data Size: {len(train_loader.dataset)}")
print(f"Validation Data Size: {len(val_loader.dataset)}")
print(f"Testing Data Size: {len(test_loader.dataset)}")

Training Data Size: 77002
Validation Data Size: 19204
Testing Data Size: 22361


# Defining the LSTM model with Custom Embeddings

- **Model**: Predict whether a user will purchase a candidate product based on past interactions (purchase history) and custom user/product features  
- **Inputs**: User's past purchases; Candidate product; Customer embedding; Product embedding  
- **Architecture**:  
  - **Embedding Layers**: Convert user sequence and candidate item indices into dense vectors  
  - **LSTM**: Processes the user's purchase sequence to capture sequential dependencies  
  - **Hidden State**: The output from the LSTM is combined with the candidate's embedding and customer/product embeddings  
  - **Fully Connected Layer**: Computes a single logit representing the purchase probability  
- **Activation**: **Sigmoid**: Converts the logit into a probability between 0 and 1  
- **Objective**: Binary classification (purchase or not purchase)  


In [43]:
class PurchaseCandidateLSTM(nn.Module):
    def __init__(self, num_items, embed_dim, hidden_dim, dropout, num_layers,
                 external_user_dim, external_product_dim):
        super(PurchaseCandidateLSTM, self).__init__()
        self.num_layers = num_layers
        self.hidden_dim = hidden_dim

        self.embedding = nn.Embedding(num_embeddings=num_items, embedding_dim=embed_dim, padding_idx=0)
        self.lstm = nn.LSTM(
            input_size=embed_dim,
            hidden_size=hidden_dim,
            num_layers=num_layers,
            batch_first=True,
            dropout=dropout
        )

        self.dropout = nn.Dropout(dropout)
        self.candidate_embedding = nn.Embedding(num_embeddings=num_items, embedding_dim=embed_dim, padding_idx=0)

        total_input_dim = hidden_dim + embed_dim + external_user_dim + external_product_dim
        self.fc = nn.Linear(total_input_dim, 1)
        self.sigmoid = nn.Sigmoid()

    def forward(self, seq, candidate, customer_vec, product_vec):
        seq_embedded = self.embedding(seq)                      # (batch_size, seq_len, embed_dim)
        lstm_out, _ = self.lstm(seq_embedded)                   # (batch_size, seq_len, hidden_dim)
        seq_repr = self.dropout(lstm_out[:, -1, :])             # (batch_size, hidden_dim)

        candidate_emb = self.candidate_embedding(candidate).squeeze(1)  # (batch_size, embed_dim)

        combined = torch.cat([seq_repr, candidate_emb, customer_vec, product_vec], dim=1)
        logits = self.fc(combined)
        return self.sigmoid(logits)

    def init_hidden(self, batch_size, device):
        h0 = torch.zeros(self.num_layers, batch_size, self.hidden_dim).to(device)
        c0 = torch.zeros(self.num_layers, batch_size, self.hidden_dim).to(device)
        return (h0, c0)


# Evaluation Metrics Functions

- ndcg_at_k: Computes the NDCG for a single list of relevance labels at rank k.
- mean_ndcg_user_at_k: Computes the mean NDCG@k across all users.
- mean_precision_user_at_k: Computes the mean Precision@k across users, where Precision@k is the proportion of relevant items in the top k predictions.
- mean_recall_user_at_k: Computes the mean Recall@k across users, where Recall@k is the proportion of relevant items retrieved in the top k predictions.
- mean_f1_user_at_k: Computes the mean F1 score at k across users, comparing the predicted top k items with the true labels.

In [44]:
def acc(outputs, labels):
    predicted = (outputs.squeeze() >= 0.5).float()
    return (predicted == labels.squeeze()).sum().item()

def ndcg_at_k(relevances, k):
    relevances = np.asarray(relevances, dtype=np.float64)[:k]
    if relevances.size == 0:
        return 0.0
    dcg = np.sum((2 ** relevances - 1) / np.log2(np.arange(2, relevances.size + 2)))
    ideal_relevances = np.sort(relevances)[::-1]
    idcg = np.sum((2 ** ideal_relevances - 1) / np.log2(np.arange(2, ideal_relevances.size + 2)))
    return dcg / idcg if idcg > 0 else 0.0

def mean_ndcg_user_at_k(all_users, all_preds, all_labels, k=10):
    user_data = defaultdict(list)
    for user, pred, label in zip(all_users, all_preds, all_labels):
        user_data[user].append((pred, label))

    ndcg_scores = []
    for user, entries in user_data.items():
        ranked = sorted(entries, key=lambda x: x[0], reverse=True)
        relevances = [label for _, label in ranked]
        dcg = ndcg_at_k(relevances, k)
        ndcg_scores.append(dcg)

    return np.mean(ndcg_scores)

def mean_precision_user_at_k(all_users, all_preds, all_labels, k=10):
    user_data = defaultdict(list)
    for user, pred, label in zip(all_users, all_preds, all_labels):
        user_data[user].append((pred, label))

    precision_list = []
    for user, entries in user_data.items():
        ranked = sorted(entries, key=lambda x: x[0], reverse=True)
        top_k = ranked[:k]
        precision = sum(label for _, label in top_k) / k
        precision_list.append(precision)

    return np.mean(precision_list) if precision_list else 0.0

def mean_recall_user_at_k(all_users, all_preds, all_labels, k=10):
    user_data = defaultdict(list)
    for user, pred, label in zip(all_users, all_preds, all_labels):
        user_data[user].append((pred, label))

    recall_list = []
    for user, entries in user_data.items():
        total_positives = sum(label for _, label in entries)
        if total_positives == 0:
            recall_list.append(0.0)
            continue

        ranked = sorted(entries, key=lambda x: x[0], reverse=True)
        top_k = ranked[:k]
        retrieved_positives = sum(label for _, label in top_k)
        recall = retrieved_positives / total_positives
        recall_list.append(recall)

    return np.mean(recall_list)


from sklearn.metrics import f1_score

def mean_f1_user_at_k(all_users, all_preds, all_labels, k=10):
    user_data = defaultdict(list)
    for user, pred, label in zip(all_users, all_preds, all_labels):
        user_data[user].append((pred, label))

    f1_list = []
    for user, entries in user_data.items():
        ranked = sorted(entries, key=lambda x: x[0], reverse=True)
        top_k = ranked[:k]
        y_true = [label for _, label in top_k]
        y_pred = [1] * len(top_k)
        if sum(y_true) == 0:
            f1_list.append(0.0)
        else:
            f1_list.append(f1_score(y_true, y_pred, zero_division=0))

    return np.mean(f1_list)

# Training and Evaluation

In [45]:
def train_lstm_model(model, train_loader, val_loader, device, epochs, lr, clip, patience=3):
    best_val_loss = float('inf')
    patience_counter = 0

    criterion = nn.BCELoss()
    optimizer = torch.optim.Adam(model.parameters(), lr=lr, weight_decay=1e-4)

    for epoch in range(epochs):
        model.train()
        train_loss, train_correct, total_train = 0, 0, 0

        for inputs, candidate, labels, user_ids, customer_vec, product_vec in train_loader:
            inputs = inputs.to(device)
            candidate = candidate.to(device)
            labels = labels.to(device)
            customer_vec = customer_vec.to(device)
            product_vec = product_vec.to(device)

            optimizer.zero_grad()
            output = model(inputs, candidate, customer_vec, product_vec)
            loss = criterion(output, labels.float())
            loss.backward()
            nn.utils.clip_grad_norm_(model.parameters(), clip)
            optimizer.step()

            train_loss += loss.item()
            train_correct += acc(output, labels)
            total_train += labels.size(0)

        train_acc = train_correct / total_train
        avg_train_loss = train_loss / len(train_loader)

        # Validation
        model.eval()
        val_loss, val_correct, total_val = 0, 0, 0
        with torch.no_grad():
            for inputs, candidate, labels, user_ids, customer_vec, product_vec in val_loader:
                inputs = inputs.to(device)
                candidate = candidate.to(device)
                labels = labels.to(device)
                customer_vec = customer_vec.to(device)
                product_vec = product_vec.to(device)

                output = model(inputs, candidate, customer_vec, product_vec)
                loss = criterion(output, labels.float())
                val_loss += loss.item()
                val_correct += acc(output, labels)
                total_val += labels.size(0)

        val_acc = val_correct / total_val
        avg_val_loss = val_loss / len(val_loader)

        print(f"Epoch {epoch+1}/{epochs}")
        print(f"\tTrain Loss: {avg_train_loss:.4f} | Val Loss: {avg_val_loss:.4f}")
        print(f"\tTrain Acc: {train_acc*100:.2f}% | Val Acc: {val_acc*100:.2f}%")

        # Early stopping check, when patience = 3
        if avg_val_loss < best_val_loss:
            best_val_loss = avg_val_loss
            patience_counter = 0
        else:
            patience_counter += 1
            if patience_counter >= patience:
                print(f"Early stopping triggered after {epoch+1} epochs.")
                break

In this method, we evaluate the LSTM-based binary classification model that is designed to predict the probability that a user wil purchase each candidate product, given their past purchase sequence. During evaluation, the model scores candidate products for users, including both positive (purchased) and negative (not purchased) items. It then ranks these candidates by predicted probability and computes ranking-based metrics: Precision@K, Recall@K, F1@K, and NDCG@K.

In [46]:
def evaluate_model(model, test_loader, device, k=10, threshold=0.5, set_name="test"):
    model.eval()
    correct, total = 0, 0
    all_users, all_preds, all_labels = [], [], []
    user_results = defaultdict(lambda: {'preds': [], 'labels': []})
    criterion = nn.BCELoss()
    total_loss = 0

    with torch.no_grad():
        for inputs, candidate, labels, user_ids, customer_vec, product_vec in test_loader:
            inputs = inputs.to(device)
            candidate = candidate.to(device)
            labels = labels.to(device)
            customer_vec = customer_vec.to(device)
            product_vec = product_vec.to(device)

            output = model(inputs, candidate, customer_vec, product_vec)
            loss = criterion(output, labels.float())
            total_loss += loss.item()

            preds = output.squeeze().cpu().numpy()
            labs = labels.squeeze().cpu().numpy()
            user_ids = user_ids.squeeze().cpu().numpy()

            preds_binary = (output.squeeze() >= 0.5).float()
            correct += (preds_binary == labels.squeeze()).sum().item()
            total += labels.size(0)

            for u, p, l in zip(user_ids, preds, labs):
                user_results[int(u)]['preds'].append(p)
                user_results[int(u)]['labels'].append(l)

            all_users.extend(user_ids)
            all_preds.extend(preds)
            all_labels.extend(labs)

    # Flat metrics
    binarized_preds = (np.array(all_preds) >= 0.5).astype(int)
    flat_precision = precision_score(all_labels, binarized_preds, zero_division=0)
    flat_recall = recall_score(all_labels, binarized_preds, zero_division=0)
    flat_f1 = f1_score(all_labels, binarized_preds, zero_division=0)

    # Ranking metrics
    prec_at_k = mean_precision_user_at_k(all_users, all_preds, all_labels, k=k)
    rec_at_k = mean_recall_user_at_k(all_users, all_preds, all_labels, k=k)
    f1_at_k = mean_f1_user_at_k(all_users, all_preds, all_labels, k=k)
    ndcg_at_k_val = mean_ndcg_user_at_k(all_users, all_preds, all_labels, k=k)

    # Aggregate & Log
    avg_loss = total_loss / len(test_loader)
    accuracy = correct / total

    print(f"Test Loss: {avg_loss:.4f}")
    print(f"Flat Precision: {flat_precision:.4f} | Recall: {flat_recall:.4f} | F1: {flat_f1:.4f}")
    print(f"Precision@{k}: {prec_at_k:.4f} | Recall@{k}: {rec_at_k:.4f} | F1@{k}: {f1_at_k:.4f} | NDCG@{k}: {ndcg_at_k_val:.4f}")

    metrics = {
        'val_acc': accuracy,
        'loss': avg_loss,
        'flat_precision': flat_precision,
        'flat_recall': flat_recall,
        'flat_f1': flat_f1,
        f'precision@{k}': prec_at_k,
        f'recall@{k}': rec_at_k,
        f'f1@{k}': f1_at_k,
        f'ndcg@{k}': ndcg_at_k_val,
    }

    # Build path and save
    filename = f"{set_name}_evaluation_results.csv"
    output_path = os.path.join(project_dir, "Model Results", "LSTM Custom Embedding", filename)
    pd.DataFrame([metrics]).to_csv(output_path, index=False)
    print(f"Evaluation metrics saved to {filename}")

    return metrics

# Hyperparameters

In [47]:
embedding_dim = 64
hidden_dim = 256
dropout = 0.5
num_layers = 2
clip = 5
epochs = 12
external_embed_dim = 7 + 6  # customer + product vector dim

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
num_items = df_reviews['item_idx'].max() + 1  # assumes item_idx is 0-based mapped

# Initialize the model
model = PurchaseCandidateLSTM(
    num_items=num_items,
    embed_dim=embedding_dim,
    hidden_dim=hidden_dim,
    dropout=dropout,
    num_layers=num_layers,
    external_user_dim=7,
    external_product_dim=6
).to(device)

print(model)

PurchaseCandidateLSTM(
  (embedding): Embedding(73915, 64, padding_idx=0)
  (lstm): LSTM(64, 256, num_layers=2, batch_first=True, dropout=0.5)
  (dropout): Dropout(p=0.5, inplace=False)
  (candidate_embedding): Embedding(73915, 64, padding_idx=0)
  (fc): Linear(in_features=333, out_features=1, bias=True)
  (sigmoid): Sigmoid()
)


In [48]:
train_lstm_model(
    model=model,
    train_loader=train_loader,
    val_loader=val_loader,
    device=device,
    epochs=epochs,
    lr=0.0003,
    clip=clip
)

Epoch 1/12
	Train Loss: 0.5982 | Val Loss: 0.5701
	Train Acc: 69.42% | Val Acc: 73.68%
Epoch 2/12
	Train Loss: 0.5612 | Val Loss: 0.5526
	Train Acc: 73.25% | Val Acc: 74.05%
Epoch 3/12
	Train Loss: 0.5460 | Val Loss: 0.5507
	Train Acc: 74.19% | Val Acc: 73.97%
Epoch 4/12
	Train Loss: 0.5456 | Val Loss: 0.5491
	Train Acc: 74.78% | Val Acc: 75.51%
Epoch 5/12
	Train Loss: 0.5475 | Val Loss: 0.5566
	Train Acc: 75.38% | Val Acc: 74.91%
Epoch 6/12
	Train Loss: 0.5451 | Val Loss: 0.5523
	Train Acc: 75.63% | Val Acc: 75.95%
Epoch 7/12
	Train Loss: 0.5419 | Val Loss: 0.5585
	Train Acc: 75.91% | Val Acc: 76.17%
Early stopping triggered after 7 epochs.


In [49]:
evaluation_results = evaluate_model(
    model=model,
    test_loader=val_loader,
    device=device,
    k=10,  # top-k
    set_name="val"
)

evaluation_results

Test Loss: 0.5585
Flat Precision: 0.8934 | Recall: 0.5943 | F1: 0.7137
Precision@10: 0.3045 | Recall@10: 0.9757 | F1@10: 0.6895 | NDCG@10: 0.9464
Evaluation metrics saved to val_evaluation_results.csv


{'val_acc': 0.7616642366173714,
 'loss': 0.5584850416230996,
 'flat_precision': 0.8933771723813997,
 'flat_recall': 0.5942511976671527,
 'flat_f1': 0.7137406967290012,
 'precision@10': np.float32(0.30451438),
 'recall@10': np.float32(0.975684),
 'f1@10': np.float64(0.6894987041137931),
 'ndcg@10': np.float64(0.9464418822439133)}

In [50]:
evaluation_results = evaluate_model(
    model=model,
    test_loader=test_loader,
    device=device,
    k=10,  # top-k
    set_name="test"
)

evaluation_results

Test Loss: 0.5814
Flat Precision: 0.5775 | Recall: 0.5465 | F1: 0.5616
Precision@10: 0.1661 | Recall@10: 0.9637 | F1@10: 0.2787 | NDCG@10: 0.7926
Evaluation metrics saved to test_evaluation_results.csv


{'val_acc': 0.874021734269487,
 'loss': 0.5814116349390575,
 'flat_precision': 0.5774647887323944,
 'flat_recall': 0.5465010602847622,
 'flat_f1': 0.5615564202334631,
 'precision@10': np.float32(0.16610703),
 'recall@10': np.float32(0.9636674),
 'f1@10': np.float64(0.27866869052598325),
 'ndcg@10': np.float64(0.7925546721470978)}