# Imports

In [1]:
! pip install optuna

Collecting optuna
  Downloading optuna-3.6.1-py3-none-any.whl.metadata (17 kB)
Collecting alembic>=1.5.0 (from optuna)
  Downloading alembic-1.13.2-py3-none-any.whl.metadata (7.4 kB)
Collecting colorlog (from optuna)
  Downloading colorlog-6.8.2-py3-none-any.whl.metadata (10 kB)
Collecting Mako (from alembic>=1.5.0->optuna)
  Downloading Mako-1.3.5-py3-none-any.whl.metadata (2.9 kB)
Downloading optuna-3.6.1-py3-none-any.whl (380 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m380.1/380.1 kB[0m [31m7.9 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading alembic-1.13.2-py3-none-any.whl (232 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m233.0/233.0 kB[0m [31m13.6 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading colorlog-6.8.2-py3-none-any.whl (11 kB)
Downloading Mako-1.3.5-py3-none-any.whl (78 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m78.6/78.6 kB[0m [31m4.6 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: Ma

In [2]:
import pandas as pd
import numpy as np
import os
os.environ['KMP_DUPLICATE_LIB_OK'] = 'True'
export_dir = os.getcwd()
from pathlib import Path
import pickle
from collections import defaultdict
import time
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
import copy
import optuna
import logging

import matplotlib.pyplot as plt
import importlib

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from torch.utils.data import DataLoader, TensorDataset


In [3]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [4]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(device)

cpu


In [5]:
torch.manual_seed(42)
np.random.seed(42)

# help functions

In [63]:
# a function that wraps the different recommenders types
# returns user's scores with respect to a certain item or for all items
def recommender_run(user_tensor, recommender, item_tensor = None, item_id= None, wanted_output = 'single', **kw):
    output_type=kw['output_type']
    if output_type == 'single':
        if wanted_output == 'single':
            return recommender(user_tensor, item_tensor)
        else:
            return recommender(user_tensor, item_tensor).squeeze()
    else:
        if wanted_output == 'single':
            return recommender(user_tensor).squeeze()[item_id]
        else:
            return recommender(user_tensor).squeeze()

In [64]:
def get_user_recommended_item(user_tensor, recommender, **kw):
    all_items_tensor = kw['all_items_tensor']
    num_items = kw['num_items']
    user_res = recommender_run(user_tensor, recommender, all_items_tensor, None, 'vector', **kw)[:num_items]
    user_tensor = user_tensor[:num_items]
    user_catalog = torch.ones_like(user_tensor) - user_tensor
    user_recommendations = torch.mul(user_res, user_catalog)
    return torch.argmax(user_recommendations)

In [6]:
def get_index_in_the_list(user_tensor, item_tensor, item_id, model):
    scores = model(user_tensor.unsqueeze(0), item_tensor.unsqueeze(0)).cpu().detach().numpy()
    sorted_indices = np.argsort(-scores)  # Sort in descending order
    return np.where(sorted_indices == item_id)[0][0]

In [7]:
def recommender_evaluations(model):
    counter_10 = 0
    counter_50 = 0
    counter_100 = 0
    RR = 0
    PR = 0
    temp_test_array = np.array(test_df)
    n = temp_test_array.shape[0]
    num_items = len(item_encoder.classes_)

    for i in range(n):
        item_id = temp_test_array[i][-2]
        item_tensor = item_matrix[item_id]
        user_tensor = torch.Tensor(temp_test_array[i][:-2]).to(device)
        user_tensor[item_id] = 0

        index = get_index_in_the_list(user_tensor, item_tensor, item_id, model) + 1
        if index <= 10:
            counter_10 += 1
        if index <= 50:
            counter_50 += 1
        if index <= 100:
            counter_100 += 1
        RR += np.reciprocal(index)
        PR += index / num_items

    return counter_10 / n, counter_50 / n, counter_100 / n, RR / n, PR * 100 / n

In [8]:
#LXR based similarity
def find_LXR_mask(user_tensor, item_id, item_tensor, explainer):
    expl_scores = explainer(user_tensor, item_tensor)
    x_masked = user_tensor*expl_scores
    item_sim_dict = {i: x_masked[i].item() for i in range(len(x_masked))}

    return item_sim_dict

In [9]:
def calculate_pos_neg_k(user_tensor, item_id, items_tensor, num_of_bins, explainer, k=20):
    # Initializing masks and computing the user history size
    POS_masked = user_tensor.clone()
    NEG_masked = user_tensor.clone()
    user_hist_size = int(torch.sum(user_tensor))  # The number of positive interactions

    # Creating bins for evaluation
    bins = [0] + [len(x) for x in np.array_split(np.arange(user_hist_size), num_of_bins, axis=0)]

    POS_at_20 = [0] * (num_of_bins + 1)
    NEG_at_20 = [0] * (num_of_bins + 1)
    total_items = 0

    # Use the explainer to find LXR scores
    sim_items = find_LXR_mask(user_tensor, item_id, items_tensor, explainer)

    # Sort items based on LXR scores (descending for POS, ascending for NEG)
    POS_sim_items = list(sorted(sim_items.items(), key=lambda item: item[1], reverse=True))[:user_hist_size]
    NEG_sim_items = list(sorted(sim_items.items(), key=lambda item: item[1], reverse=False))[:user_hist_size]

    for i in range(len(bins)):
        total_items += bins[i]

        # Mask positive items
        POS_masked = torch.zeros_like(user_tensor, dtype=torch.float32, device=user_tensor.device)
        for j in POS_sim_items[:total_items]:
            POS_masked[j[0]] = 1
        POS_masked = user_tensor - POS_masked  # Remove masked items from user history

        # Mask negative items
        NEG_masked = torch.zeros_like(user_tensor, dtype=torch.float32, device=user_tensor.device)
        for j in NEG_sim_items[:total_items]:
            NEG_masked[j[0]] = 1
        NEG_masked = user_tensor - NEG_masked  # Remove masked items from user history

        # Calculate POS and NEG indices (e.g., ranking of the target item after masking)
        POS_index = get_index_in_the_list(POS_masked, user_tensor, item_id, recommender, **kw_dict) + 1
        NEG_index = get_index_in_the_list(NEG_masked, user_tensor, item_id, recommender, **kw_dict) + 1

        POS_at_20[i] = 1 if POS_index <= 20 else 0
        NEG_at_20[i] = 1 if NEG_index <= 20 else 0

    res = [np.array(POS_at_20), np.array(NEG_at_20)]
    return res


In [10]:
def compute_hr_at_k(model, user_tensor, pos_item_tensor, k):
    model.eval()
    hr_at_k = 0

    # Iterate over each user in the batch
    for i in range(user_tensor.size(0)):
        user_id = user_tensor[i].unsqueeze(0)  # Select the user
        pos_item_id = pos_item_tensor[i].unsqueeze(0)  # Select the positive item for the user

        # Generate scores for all items for this user
        all_item_ids = torch.arange(len(item_encoder.classes_)).to(device)
        scores = model(user_id, all_item_ids)

        # Adjust k to be at most the number of available items
        k_adjusted = min(k, scores.size(0))

        # Get the top-k items
        _, top_k_indices = torch.topk(scores, k_adjusted)

        # Check if the positive item is in the top-k predictions
        if pos_item_id in top_k_indices:
            hr_at_k += 1

    # Compute HR@k
    hr_at_k /= user_tensor.size(0)
    return hr_at_k


In [11]:
def validate_pos_at_20(explainer, test_data, k=20):
    pos_at_20 = 0
    for i, sample in enumerate(test_data):
        user_tensor = torch.Tensor(sample[:-1]).to(device)
        user_id = int(sample[-1])
        top1_item_test = top1_test[user_id]
        item_vector = items_array[top1_item_test]
        items_tensor = torch.Tensor(item_vector).to(device)

        expl_scores = explainer(user_tensor.unsqueeze(0), items_tensor.unsqueeze(0))
        # Calculate POS@20 or similar metric
        # Placeholder logic
        if expl_scores[0][top1_item_test] > 0.5:
            pos_at_20 += 1

    return pos_at_20 / len(test_data)

# data import and process

In [None]:
# ratings_data = pd.read_csv('/content/drive/MyDrive/School/DL_methods_apps/project/ratings_Electronics.csv')
# ratings_data.columns = ['user_id', 'item_id', 'rating', 'timestamp']
# print(ratings_data.user_id.nunique(), ratings_data.item_id.nunique())
# ratings_data.head(1)

4201696 476001


Unnamed: 0,user_id,item_id,rating,timestamp
0,A2CX7LUOHB2NDG,321732944,5.0,1341100800


In [None]:
# #delete users with less than 100 rows
# interaction_counts = ratings_data.groupby('user_id').size()
# active_users = interaction_counts[interaction_counts >= 100].index
# filtered_ratings_data = ratings_data[ratings_data['user_id'].isin(active_users)]

# print(filtered_ratings_data.user_id.nunique(), filtered_ratings_data.item_id.nunique())
# filtered_ratings_data.head(1)

289 22635


Unnamed: 0,user_id,item_id,rating,timestamp
117,AT09WGFUM934H,594481813,3.0,1377907200


In [None]:
# # create a list of all unique users and items
# all_users = filtered_ratings_data['user_id'].unique()
# all_items = filtered_ratings_data['item_id'].unique()

# # create a DataFrame with all possible user-item combinations
# user_item_matrix = pd.DataFrame(index=all_users, columns=all_items)
# user_item_matrix = user_item_matrix.fillna(0)  # Initialize all with 0s

# # Fill the matrix with 1s where a user has rated an item
# for index, row in filtered_ratings_data.iterrows():
#     user_item_matrix.at[row['user_id'], row['item_id']] = 1

# # Reset the index and convert the DataFrame back into a long-form DataFrame
# user_item_df = user_item_matrix.reset_index().melt(id_vars='index', value_name='rating')
# user_item_df.columns = ['user_id', 'item_id', 'rating']

# # Now `user_item_df` contains all combinations of users and items with a 'rating' of 1 or 0.
# print(user_item_df.user_id.nunique(), user_item_df.item_id.nunique(), user_item_df.shape)
# user_item_df.head(1)

289 22635 (6541515, 3)


Unnamed: 0,user_id,item_id,rating
0,AT09WGFUM934H,594481813,1


In [28]:
# # filtered_user_item_df = pd.read_csv('/content/drive/MyDrive/School/DL_methods_apps/project/user_item_df_processed.csv')
# filtered_user_item_df = pd.read_csv('/content/drive/MyDrive/Tamar/project/user_item_df_processed.csv')
# print(filtered_user_item_df.user_id.nunique(), filtered_user_item_df.item_id.nunique(), filtered_user_item_df.shape)

289 22635 (6541515, 3)


In [29]:
# #further filter the data
# filtered_user_item_df.columns = ['user_id','item_id','interaction']

# import random
# sampled_items = random.sample(list(filtered_user_item_df['item_id'].unique()), 1500)
# filtered_user_item_df = filtered_user_item_df[
#     filtered_user_item_df['item_id'].isin(sampled_items)
# ]

In [73]:
filtered_user_item_df = pd.read_csv('/content/drive/MyDrive/Tamar/project/user_item_df_final.csv')

In [74]:
print(filtered_user_item_df.user_id.nunique(), filtered_user_item_df.item_id.nunique(), filtered_user_item_df.shape)

289 1500 (433500, 4)


In [75]:
n_users = 289
n_items = 1500

In [32]:
# Encode user_id and item_id as integers
user_encoder = LabelEncoder()
item_encoder = LabelEncoder()

filtered_user_item_df['user_id'] = user_encoder.fit_transform(filtered_user_item_df['user_id'])
filtered_user_item_df['item_id'] = item_encoder.fit_transform(filtered_user_item_df['item_id'])

# Split the data into training and testing sets
train_df, test_df = train_test_split(filtered_user_item_df, test_size=0.2, random_state=42)

# Prepare the training and testing data
X_train = train_df[['user_id', 'item_id']].values
y_train = train_df['interaction'].values
X_test = test_df[['user_id', 'item_id']].values
y_test = test_df['interaction'].values

num_items = len(item_encoder.classes_)
item_matrix = np.eye(num_items)  # Identity matrix representing one-hot encoded items

# Recommender's Handle (same as paper)

## architecture

In [70]:
# class MLP(nn.Module):
#     def __init__(self, hidden_size, device, **kw):
#         super(MLP, self).__init__()
#         user_size = 289  # Adjusted to the number of users
#         item_size = 1500  # Adjusted to the number of items
#         self.device = device
#         self.hidden_size = hidden_size
#         self.users_fc = nn.Linear(user_size, hidden_size, bias=True).to(self.device)
#         self.items_fc = nn.Linear(item_size, hidden_size, bias=True).to(self.device)
#         self.sigmoid = nn.Sigmoid()

#     def forward(self, user_tensor, item_tensor):
#         # Ensure that tensors are correctly sized and converted to float
#         user_tensor = user_tensor.float().to(self.device)  # [batch_size, num_users]
#         item_tensor = item_tensor.float().to(self.device)  # [batch_size, num_items]

#         user_vec = self.users_fc(user_tensor)
#         item_vec = self.items_fc(item_tensor)

#         # Perform matrix multiplication of the resulting vectors
#         output = torch.matmul(user_vec, item_vec.T).to(self.device)
#         return self.sigmoid(output).to(self.device)

In [6]:
class MLP(nn.Module):
    def __init__(self, user_size, item_size, hidden_size, device):
        super(MLP, self).__init__()
        self.device = device
        self.user_size = user_size
        self.item_size = item_size
        self.hidden_size = hidden_size

        # Linear layers
        self.users_fc = nn.Linear(self.user_size, self.hidden_size, bias=True).to(self.device)
        self.items_fc = nn.Linear(self.item_size, self.hidden_size, bias=True).to(self.device)
        self.sigmoid = nn.Sigmoid()

    def forward(self, user_tensor, item_tensor):
        # Ensure the tensors are on the correct device and have the right dtype
        user_tensor = user_tensor.to(self.device).float()
        item_tensor = item_tensor.to(self.device).float()
        # Forward pass through the linear layers
        user_vec = self.users_fc(user_tensor)  # [batch_size, hidden_size]
        item_vec = self.items_fc(item_tensor)  # [batch_size, hidden_size]
        # Multiply the user and item vectors
        output = torch.sum(user_vec * item_vec, dim=1)  # [batch_size]
        return self.sigmoid(output).to(self.device)


## HP tuning (optuna)

In [38]:
# Global dictionaries for storing results
train_losses_dict = {}
test_losses_dict = {}
HR10_dict = {}

def MLP_objective(trial):
    # Hyperparameters to tune
    lr = trial.suggest_float('learning_rate', 0.001, 0.01)
    batch_size = trial.suggest_categorical('batch_size', [32, 64])
    hidden_dim = trial.suggest_categorical('hidden_dim', [32, 64, 128])
    beta = trial.suggest_float('beta', 0, 4)
    epochs = 10

    num_users = len(user_encoder.classes_)
    num_items = len(item_encoder.classes_)

    # Initialize model, optimizer, and loss function
    model = MLP(hidden_dim, num_users=num_users, num_items=num_items, device=device).to(device)
    optimizer = torch.optim.Adam(model.parameters(), lr=lr)
    criterion = nn.BCELoss()

    train_losses = []
    hr10 = []

    num_training = train_df.shape[0]
    num_batches = int(np.ceil(num_training / batch_size))

    for epoch in range(epochs):
        # Shuffle training data
        perm = np.random.permutation(num_training)
        epoch_loss = 0

        for b in range(num_batches):
            optimizer.zero_grad()

            # Create batch
            batch_idx = perm[b * batch_size:] if (b + 1) * batch_size >= num_training else perm[b * batch_size: (b + 1) * batch_size]
            user_ids = torch.LongTensor(train_df.iloc[batch_idx]['user_id'].values).to(device)
            pos_item_ids = torch.LongTensor(train_df.iloc[batch_idx]['item_id'].values).to(device)

            # One-hot encode the user and item IDs on the same device
            user_tensor = torch.eye(num_users, device=device)[user_ids]
            pos_item_tensor = torch.eye(num_items, device=device)[pos_item_ids]

            # Positive predictions
            pos_output = torch.diagonal(model(user_tensor, pos_item_tensor))

            # Calculate loss
            pos_loss = criterion(pos_output, torch.ones_like(pos_output))
            neg_loss = criterion(pos_output, torch.zeros_like(pos_output))

            batch_loss = pos_loss + beta * neg_loss
            batch_loss.backward()
            optimizer.step()

            epoch_loss += batch_loss.item()

        train_losses.append(epoch_loss / num_batches)
        print(f'Epoch {epoch + 1}/{epochs}, Loss: {epoch_loss / num_batches}')

        # Evaluation phase using HR@10
        hr_at_10 = compute_hr_at_k(model, user_tensor, pos_item_tensor, k=10)
        hr10.append(hr_at_10)
        print(f'Epoch {epoch + 1}/{epochs}, HR@10: {hr_at_10}')

        # Early stopping (if necessary)
        if epoch > 5 and hr10[-1] > max(hr10[:-1]):
            print("Early stopping...")
            break

    # Save the best model and return the best HR@10
    torch.save(model.state_dict(), f'/content/drive/MyDrive/Tamar/project/trained_recommenders/trained_best_rec_trial_{trial.number}_epoch_{epoch + 1}_{max(hr10)}.pth')
    return max(hr10)


In [39]:
study = optuna.create_study(direction='maximize')
study.optimize(MLP_objective, n_trials=10)

# Print the best trial results
print("Best trial:")
trial = study.best_trial
print(f"  HR@10: {trial.value}")
print("  Best hyperparameters: ", trial.params)


[I 2024-08-24 14:18:58,288] A new study created in memory with name: no-name-9961e63a-2394-4989-999d-77c2f800c5c8


Epoch 1/10, Loss: 1.952631319915485
Epoch 1/10, HR@10: 1.0


  output = torch.matmul(user_vec, item_vec.T).to(self.device)


Epoch 2/10, Loss: 1.949782993800527
Epoch 2/10, HR@10: 1.0
Epoch 3/10, Loss: 1.951545982425515
Epoch 3/10, HR@10: 1.0
Epoch 4/10, Loss: 1.950940334845302
Epoch 4/10, HR@10: 1.0
Epoch 5/10, Loss: 1.950622779644598
Epoch 5/10, HR@10: 1.0
Epoch 6/10, Loss: 1.9509793326167917
Epoch 6/10, HR@10: 1.0
Epoch 7/10, Loss: 1.9509649297741862
Epoch 7/10, HR@10: 1.0
Epoch 8/10, Loss: 1.9507046168108755
Epoch 8/10, HR@10: 1.0
Epoch 9/10, Loss: 1.9509396847048068
Epoch 9/10, HR@10: 1.0


[I 2024-08-24 14:23:02,014] Trial 0 finished with value: 1.0 and parameters: {'learning_rate': 0.005393857000190963, 'batch_size': 32, 'hidden_dim': 128, 'beta': 2.050872265667402}. Best is trial 0 with value: 1.0.


Epoch 10/10, Loss: 1.9508680800248566
Epoch 10/10, HR@10: 1.0
Epoch 1/10, Loss: 1.5953341441788913
Epoch 1/10, HR@10: 1.0
Epoch 2/10, Loss: 1.6077741323790662
Epoch 2/10, HR@10: 1.0
Epoch 3/10, Loss: 1.5958032478901663
Epoch 3/10, HR@10: 1.0
Epoch 4/10, Loss: 1.6084311600430115
Epoch 4/10, HR@10: 1.0
Epoch 5/10, Loss: 1.5955437116241034
Epoch 5/10, HR@10: 1.0
Epoch 6/10, Loss: 1.6064329977558205
Epoch 6/10, HR@10: 1.0
Epoch 7/10, Loss: 1.5967309030577537
Epoch 7/10, HR@10: 1.0
Epoch 8/10, Loss: 1.6082174141131154
Epoch 8/10, HR@10: 1.0
Epoch 9/10, Loss: 1.595596050221798
Epoch 9/10, HR@10: 1.0


[I 2024-08-24 14:25:08,796] Trial 1 finished with value: 1.0 and parameters: {'learning_rate': 0.00603354974575049, 'batch_size': 64, 'hidden_dim': 128, 'beta': 1.3258095818037274}. Best is trial 0 with value: 1.0.


Epoch 10/10, Loss: 1.6074561133202558
Epoch 10/10, HR@10: 1.0
Epoch 1/10, Loss: 1.551689158442452
Epoch 1/10, HR@10: 1.0
Epoch 2/10, Loss: 1.5516628171271736
Epoch 2/10, HR@10: 1.0
Epoch 3/10, Loss: 1.5517548518463307
Epoch 3/10, HR@10: 1.0
Epoch 4/10, Loss: 1.5524507851085005
Epoch 4/10, HR@10: 1.0
Epoch 5/10, Loss: 1.5515743584887174
Epoch 5/10, HR@10: 1.0
Epoch 6/10, Loss: 1.5516301868894176
Epoch 6/10, HR@10: 1.0
Epoch 7/10, Loss: 1.5524445159405795
Epoch 7/10, HR@10: 1.0
Epoch 8/10, Loss: 1.551605068277798
Epoch 8/10, HR@10: 1.0
Epoch 9/10, Loss: 1.5516029742068087
Epoch 9/10, HR@10: 1.0


[I 2024-08-24 14:29:11,771] Trial 2 finished with value: 1.0 and parameters: {'learning_rate': 0.006494750497228074, 'batch_size': 32, 'hidden_dim': 32, 'beta': 1.2577495608763574}. Best is trial 0 with value: 1.0.


Epoch 10/10, Loss: 1.5524387131935777
Epoch 10/10, HR@10: 1.0
Epoch 1/10, Loss: 0.9055878495043727
Epoch 1/10, HR@10: 1.0
Epoch 2/10, Loss: 0.905205545917739
Epoch 2/10, HR@10: 1.0
Epoch 3/10, Loss: 0.9051868816785078
Epoch 3/10, HR@10: 1.0
Epoch 4/10, Loss: 0.9050768417039686
Epoch 4/10, HR@10: 1.0
Epoch 5/10, Loss: 0.9051711126367288
Epoch 5/10, HR@10: 1.0
Epoch 6/10, Loss: 0.9051935885924434
Epoch 6/10, HR@10: 1.0
Epoch 7/10, Loss: 0.905189059849352
Epoch 7/10, HR@10: 1.0
Epoch 8/10, Loss: 0.9051912030097544
Epoch 8/10, HR@10: 1.0
Epoch 9/10, Loss: 0.9051947735468605
Epoch 9/10, HR@10: 1.0


[I 2024-08-24 14:31:16,822] Trial 3 finished with value: 1.0 and parameters: {'learning_rate': 0.004775710213802556, 'batch_size': 64, 'hidden_dim': 32, 'beta': 0.4559041576995275}. Best is trial 0 with value: 1.0.


Epoch 10/10, Loss: 0.9051837465225891
Epoch 10/10, HR@10: 1.0
Epoch 1/10, Loss: 2.4716682527355216
Epoch 1/10, HR@10: 1.0
Epoch 2/10, Loss: 2.4658890189797176
Epoch 2/10, HR@10: 1.0
Epoch 3/10, Loss: 2.465682812470927
Epoch 3/10, HR@10: 1.0
Epoch 4/10, Loss: 2.4655494230175
Epoch 4/10, HR@10: 1.0
Epoch 5/10, Loss: 2.465538281711579
Epoch 5/10, HR@10: 1.0
Epoch 6/10, Loss: 2.465533334757046
Epoch 6/10, HR@10: 1.0
Epoch 7/10, Loss: 2.465524849405058
Epoch 7/10, HR@10: 1.0
Epoch 8/10, Loss: 2.465494850109063
Epoch 8/10, HR@10: 1.0
Epoch 9/10, Loss: 2.465473462259842
Epoch 9/10, HR@10: 1.0


[I 2024-08-24 14:33:21,122] Trial 4 finished with value: 1.0 and parameters: {'learning_rate': 0.002132070118787784, 'batch_size': 64, 'hidden_dim': 32, 'beta': 3.8388398498063245}. Best is trial 0 with value: 1.0.


Epoch 10/10, Loss: 2.4654711982732684
Epoch 10/10, HR@10: 1.0
Epoch 1/10, Loss: 2.05895060136939
Epoch 1/10, HR@10: 1.0
Epoch 2/10, Loss: 2.060138047874403
Epoch 2/10, HR@10: 1.0
Epoch 3/10, Loss: 2.060336882105875
Epoch 3/10, HR@10: 1.0
Epoch 4/10, Loss: 2.0606485531765237
Epoch 4/10, HR@10: 1.0
Epoch 5/10, Loss: 2.0603370131502996
Epoch 5/10, HR@10: 1.0
Epoch 6/10, Loss: 2.0600648276105007
Epoch 6/10, HR@10: 1.0
Epoch 7/10, Loss: 2.060023875270829
Epoch 7/10, HR@10: 1.0
Epoch 8/10, Loss: 2.0592277048698406
Epoch 8/10, HR@10: 1.0
Epoch 9/10, Loss: 2.0580463454740516
Epoch 9/10, HR@10: 1.0


[I 2024-08-24 14:37:22,615] Trial 5 finished with value: 1.0 and parameters: {'learning_rate': 0.008910981756659109, 'batch_size': 32, 'hidden_dim': 64, 'beta': 2.303301528634996}. Best is trial 0 with value: 1.0.


Epoch 10/10, Loss: 2.0567969408922253
Epoch 10/10, HR@10: 1.0
Epoch 1/10, Loss: 2.2165515364745287
Epoch 1/10, HR@10: 1.0
Epoch 2/10, Loss: 2.266673566274789
Epoch 2/10, HR@10: 1.0
Epoch 3/10, Loss: 2.2229163497548896
Epoch 3/10, HR@10: 1.0
Epoch 4/10, Loss: 2.262539152950351
Epoch 4/10, HR@10: 1.0
Epoch 5/10, Loss: 2.2248004126051604
Epoch 5/10, HR@10: 1.0
Epoch 6/10, Loss: 2.262939505349184
Epoch 6/10, HR@10: 1.0
Epoch 7/10, Loss: 2.2253171479840588
Epoch 7/10, HR@10: 1.0
Epoch 8/10, Loss: 2.262163670702292
Epoch 8/10, HR@10: 1.0
Epoch 9/10, Loss: 2.224366919495871
Epoch 9/10, HR@10: 1.0


[I 2024-08-24 14:39:27,496] Trial 6 finished with value: 1.0 and parameters: {'learning_rate': 0.007852979391799377, 'batch_size': 64, 'hidden_dim': 128, 'beta': 2.8129727040187045}. Best is trial 0 with value: 1.0.


Epoch 10/10, Loss: 2.2613735494633005
Epoch 10/10, HR@10: 1.0
Epoch 1/10, Loss: 2.2209893522897888
Epoch 1/10, HR@10: 1.0
Epoch 2/10, Loss: 2.2204199395599478
Epoch 2/10, HR@10: 1.0
Epoch 3/10, Loss: 2.220443013497969
Epoch 3/10, HR@10: 1.0
Epoch 4/10, Loss: 2.2203820354275474
Epoch 4/10, HR@10: 1.0
Epoch 5/10, Loss: 2.2203400942601275
Epoch 5/10, HR@10: 1.0
Epoch 6/10, Loss: 2.220296597080403
Epoch 6/10, HR@10: 1.0
Epoch 7/10, Loss: 2.2202831248907966
Epoch 7/10, HR@10: 1.0
Epoch 8/10, Loss: 2.2203042123739647
Epoch 8/10, HR@10: 1.0
Epoch 9/10, Loss: 2.220249520410227
Epoch 9/10, HR@10: 1.0


[I 2024-08-24 14:43:29,390] Trial 7 finished with value: 1.0 and parameters: {'learning_rate': 0.0044661994254919166, 'batch_size': 32, 'hidden_dim': 64, 'beta': 2.8904057892399457}. Best is trial 0 with value: 1.0.


Epoch 10/10, Loss: 2.2202850164005867
Epoch 10/10, HR@10: 1.0
Epoch 1/10, Loss: 2.2922967189043337
Epoch 1/10, HR@10: 1.0
Epoch 2/10, Loss: 2.28888834338937
Epoch 2/10, HR@10: 1.0
Epoch 3/10, Loss: 2.2888172078031435
Epoch 3/10, HR@10: 1.0
Epoch 4/10, Loss: 2.288775116034728
Epoch 4/10, HR@10: 1.0
Epoch 5/10, Loss: 2.288749756199734
Epoch 5/10, HR@10: 1.0
Epoch 6/10, Loss: 2.28873552167783
Epoch 6/10, HR@10: 1.0
Epoch 7/10, Loss: 2.288727140708215
Epoch 7/10, HR@10: 1.0
Epoch 8/10, Loss: 2.288713628041566
Epoch 8/10, HR@10: 1.0
Epoch 9/10, Loss: 2.2887129190993676
Epoch 9/10, HR@10: 1.0


[I 2024-08-24 14:47:32,604] Trial 8 finished with value: 1.0 and parameters: {'learning_rate': 0.0010730416752593671, 'batch_size': 32, 'hidden_dim': 32, 'beta': 3.1394425832029214}. Best is trial 0 with value: 1.0.


Epoch 10/10, Loss: 2.2887122626453333
Epoch 10/10, HR@10: 1.0
Epoch 1/10, Loss: 1.4149559637479048
Epoch 1/10, HR@10: 1.0
Epoch 2/10, Loss: 1.4145129016018108
Epoch 2/10, HR@10: 1.0
Epoch 3/10, Loss: 1.41486539638753
Epoch 3/10, HR@10: 1.0
Epoch 4/10, Loss: 1.414898624981565
Epoch 4/10, HR@10: 1.0
Epoch 5/10, Loss: 1.4145007624268027
Epoch 5/10, HR@10: 1.0
Epoch 6/10, Loss: 1.4148663919687843
Epoch 6/10, HR@10: 1.0
Epoch 7/10, Loss: 1.4149215741886998
Epoch 7/10, HR@10: 1.0
Epoch 8/10, Loss: 1.4144777514041844
Epoch 8/10, HR@10: 1.0
Epoch 9/10, Loss: 1.4149020793231613
Epoch 9/10, HR@10: 1.0


[I 2024-08-24 14:49:37,445] Trial 9 finished with value: 1.0 and parameters: {'learning_rate': 0.0037553251409142997, 'batch_size': 64, 'hidden_dim': 64, 'beta': 1.0411302058083782}. Best is trial 0 with value: 1.0.


Epoch 10/10, Loss: 1.4148879993399652
Epoch 10/10, HR@10: 1.0
Best trial:
  HR@10: 1.0
  Best hyperparameters:  {'learning_rate': 0.005393857000190963, 'batch_size': 32, 'hidden_dim': 128, 'beta': 2.050872265667402}


In [72]:
# Retrieve the best hyperparameters from the Optuna trial
best_params = trial.params

# Initialize the MLPRecommender model with the best hyperparameters
model = MLP(user_size=len(user_encoder.classes_),
                       item_size=len(item_encoder.classes_),
                       hidden_size=best_params['hidden_dim'], device= device).to(device)

# Initialize the optimizer with the best learning rate
optimizer = optim.Adam(model.parameters(), lr=best_params['learning_rate'])

best_beta = best_params['beta']

In [73]:
model.hidden_size

128

In [59]:
print(best_params) #print so we can create the dict

{'learning_rate': 0.005393857000190963, 'batch_size': 32, 'hidden_dim': 128, 'beta': 2.050872265667402}


## loading trained rec

In [7]:
best_params = {'learning_rate': 0.005393857000190963, 'batch_size': 32, 'hidden_dim': 128, 'beta': 2.050872265667402}

In [None]:
#create model based on chosen hp
model = MLP(user_size=len(user_encoder.classes_),
                       item_size=len(item_encoder.classes_),
                       hidden_size=best_params['hidden_dim'], device= device).to(device)

optimizer = optim.Adam(model.parameters(), lr=best_params['learning_rate'])

#load best trained model
model.load_state_dict(torch.load(f'/content/drive/MyDrive/Tamar/project/trained_recommenders/trained_best_rec_trial_0_epoch_10_1.0.pth'), )

# LXR adaptation to amazon dataset

## data handling

In [7]:
filtered_user_item_df

Unnamed: 0,user_id,item_id,interaction
0,274,0,0
1,15,0,0
2,216,0,0
3,19,0,0
4,224,0,0
...,...,...,...
433495,52,1499,0
433496,126,1499,0
433497,195,1499,0
433498,174,1499,0


In [11]:
# Load final df
filtered_user_item_df = pd.read_csv('/content/drive/MyDrive/Tamar/project/user_item_df_final.csv')
filtered_user_item_df = filtered_user_item_df[['user_id','item_id','interaction']].copy()

# Ensure all columns are treated as strings for IDs and integers for interactions
filtered_user_item_df['user_id'] = filtered_user_item_df['user_id'].astype(str)
filtered_user_item_df['item_id'] = filtered_user_item_df['item_id'].astype(str)
filtered_user_item_df['interaction'] = filtered_user_item_df['interaction'].astype(int)

# Encode user_id and item_id as integers across the entire dataset
user_encoder = LabelEncoder()
item_encoder = LabelEncoder()

# Fit the encoders on the entire dataset before splitting
filtered_user_item_df['user_id'] = user_encoder.fit_transform(filtered_user_item_df['user_id'])
filtered_user_item_df['item_id'] = item_encoder.fit_transform(filtered_user_item_df['item_id'])

# Now split the data into training and testing sets
train_df, test_df = train_test_split(filtered_user_item_df, test_size=0.2, random_state=42)

# Prepare PyTorch datasets and data loaders
X_train = train_df[['user_id', 'item_id']].values
y_train = train_df['interaction'].values.astype(float)  # Ensure interaction labels are floats
X_test = test_df[['user_id', 'item_id']].values
y_test = test_df['interaction'].values.astype(float)  # Ensure interaction labels are floats

train_dataset = TensorDataset(torch.tensor(X_train[:, 0], dtype=torch.long),
                              torch.tensor(X_train[:, 1], dtype=torch.long),
                              torch.tensor(y_train, dtype=torch.float))
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)

test_dataset = TensorDataset(torch.tensor(X_test[:, 0], dtype=torch.long),
                             torch.tensor(X_test[:, 1], dtype=torch.long),
                             torch.tensor(y_test, dtype=torch.float))
test_loader = DataLoader(test_dataset, batch_size=512, shuffle=False)


In [12]:
print(filtered_user_item_df.user_id.nunique(), filtered_user_item_df.item_id.nunique(), filtered_user_item_df.shape)

289 1500 (433500, 3)


In [13]:
n_users = 289
n_items = 1500

In [14]:
train_array = train_df.to_numpy()
test_array = test_df.to_numpy()

## architecture + loss

In [35]:
class Explainer(nn.Module):
    def __init__(self, user_size, item_size, hidden_size):
        super(Explainer, self).__init__()
        self.user_size = user_size
        self.item_size = item_size

        # Define the fully connected layers for users and items
        self.users_fc = nn.Linear(in_features=self.user_size, out_features=hidden_size)
        self.items_fc = nn.Linear(in_features=self.item_size, out_features=hidden_size)

        # Define the bottleneck neural network
        self.bottleneck = nn.Sequential(
            nn.Tanh(),
            nn.Linear(in_features=hidden_size * 2, out_features=hidden_size),
            nn.Tanh(),
            nn.Linear(in_features=hidden_size, out_features=1),
            nn.Sigmoid()
        )

    def forward(self, user_tensor, item_tensor):
        # Ensure that user_tensor and item_tensor have the correct dimensions
        if len(user_tensor.shape) == 1:
            user_tensor = user_tensor.unsqueeze(0)
        if len(item_tensor.shape) == 1:
            item_tensor = item_tensor.unsqueeze(0)

        if user_tensor.shape[1] != self.user_size or item_tensor.shape[1] != self.item_size:
            raise ValueError(f"Expected user_tensor shape [batch_size, {self.user_size}] and item_tensor shape [batch_size, {self.item_size}], but got {user_tensor.shape} and {item_tensor.shape}.")

        user_output = self.users_fc(user_tensor.float())
        item_output = self.items_fc(item_tensor.float())

        # Combine the user and item outputs
        combined_output = torch.cat((user_output, item_output), dim=-1)

        # Pass the combined output through the bottleneck network
        expl_scores = self.bottleneck(combined_output)
        return expl_scores


In [20]:
class LXR_loss(nn.Module):
    def __init__(self, lambda_pos, lambda_neg, alpha, recommender):
        super(LXR_loss, self).__init__()
        self.lambda_pos = lambda_pos
        self.lambda_neg = lambda_neg
        self.alpha = alpha
        self.recommender = recommender

    def forward(self, user_tensors, items_tensors, items_ids, pos_masks):
        # print(f"LXR_loss forward pass:")
        # print(f"  user_tensors.shape: {user_tensors.shape}")
        # print(f"  items_tensors.shape: {items_tensors.shape}")

        neg_masks = torch.sub(torch.ones_like(pos_masks), pos_masks)
        x_masked_pos = user_tensors * pos_masks
        x_masked_neg = user_tensors * neg_masks

        # print(f"  x_masked_pos.shape: {x_masked_pos.shape}")
        # print(f"  x_masked_neg.shape: {x_masked_neg.shape}")

        x_masked_res_pos = torch.diag(self.recommender(x_masked_pos.float(), items_tensors.float()))
        x_masked_res_neg = torch.diag(self.recommender(x_masked_neg.float(), items_tensors.float()))

        # print(f"  x_masked_res_pos.shape: {x_masked_res_pos.shape}")
        # print(f"  x_masked_res_neg.shape: {x_masked_res_neg.shape}")

        pos_loss = -torch.mean(torch.log(x_masked_res_pos + 1e-8))
        neg_loss = torch.mean(torch.log(x_masked_res_neg + 1e-8))
        l1 = torch.mean(x_masked_pos[user_tensors > 0])

        combined_loss = self.lambda_pos * pos_loss + self.lambda_neg * neg_loss + self.alpha * l1

        return combined_loss, pos_loss, neg_loss, l1


In [21]:
items_array = np.eye(len(item_encoder.classes_))
all_items_tensor = torch.Tensor(items_array).to(device)
# Convert test_df to a NumPy array
test_array = test_df.values

# Create random_sampled_array
num_of_rand_users = 200  # Number of users for evaluations
random_rows = np.random.choice(test_array.shape[0], num_of_rand_users, replace=False)
random_sampled_array = test_array[random_rows]

## HP tune

In [23]:
import os
os.environ['CUDA_LAUNCH_BLOCKING'] = '1'

In [None]:
pip install wandb

In [39]:
import wandb
import math

In [51]:
def lxr_training(trial):
    learning_rate = trial.suggest_float('learning_rate', 0.001, 0.01)
    alpha = trial.suggest_categorical('alpha', [1])
    lambda_neg = trial.suggest_float('lambda_neg', 0, 50)
    lambda_pos = trial.suggest_float('lambda_pos', 0, 50)
    batch_size = trial.suggest_categorical('batch_size', [32, 64, 16])
    explainer_hidden_size = trial.suggest_categorical('explainer_hidden_size', [32, 64, 128])
    epochs = 40

    path = '/content/drive/MyDrive/Tamar/project/'

    # Initialize WandB for logging
    wandb.init(
        project=f"LXR_ecommerce",
        name=f"trial_{trial.number}",
        config={
            'learning_rate': learning_rate,
            'alpha': alpha,
            'lambda_neg': lambda_neg,
            'lambda_pos': lambda_pos,
            'batch_size': batch_size,
            'explainer_hidden_size': explainer_hidden_size,
            'architecture': 'LXR_combined',
            'activation_function': 'Tanh',
            'loss_type': 'logloss',
            'optimize_for': 'pos_at_20',
            'epochs': epochs
        })

    loader = torch.utils.data.DataLoader(train_array, batch_size=batch_size, shuffle=True, drop_last=True)
    num_batches=math.ceil(train_array.shape[0]/batch_size)

    print(f"user_size: {len(user_encoder.classes_)}, item_size: {len(item_encoder.classes_)}")
    print(f"hidden_size (from best_params): {best_params['hidden_dim']}")

    # Initialize the recommender model
    recommender = MLP(user_size=len(user_encoder.classes_),
                      item_size=len(item_encoder.classes_),
                      hidden_size=best_params['hidden_dim'], device=device).to(device)

    # Load the pre-trained weights for the recommender model
    recommender.load_state_dict(torch.load(f'{path}/trained_recommenders/trained_best_rec_trial_0_epoch_10_1.0.pth', map_location=device))
    recommender.eval()
    print(f"Recommender model initialized on device: {recommender.device}")

    # Initialize the explainer and optimizer
    explainer = Explainer(len(user_encoder.classes_), len(item_encoder.classes_), explainer_hidden_size).to(device)
    optimizer_comb = torch.optim.Adam(explainer.parameters(), lr=learning_rate)

    # Pass the recommender model to the loss function
    loss_func = LXR_loss(lambda_pos, lambda_neg, alpha, recommender)

    # creating top item dicts
    top1_train = {}
    top1_test = {}
    for i in range(train_array.shape[0]): #iterate over all the rows in train
        user_index = train_array[i][-1]
        user_tensor = torch.Tensor(train_array[i][:-1]).to(device)
        top1_train[user_index] = int(get_user_recommended_item(user_tensor, recommender, **kw_dict))
    for i in range(test_array.shape[0]):
        user_index = test_array[i][-1]
        user_tensor = torch.Tensor(test_array[i][:-1]).to(device)
        top1_test[user_index] = int(get_user_recommended_item(user_tensor, recommender, **kw_dict))

    print('======================== new run ========================')

    # Lists to store POS@20 and NEG@20 metrics
    run_pos_at_20 = []
    run_neg_at_20 = []
    metric_for_monitoring = []

    for epoch in range(epochs):
        print(f"starting epoch {epoch}")
        if epoch % 15 == 0 and epoch > 0:  # Reduce learning rate every 15 epochs
            learning_rate *= 0.1
            for param_group in optimizer_comb.param_groups:
                param_group['lr'] = learning_rate

        train_loss = 0
        total_pos_loss, total_neg_loss, total_l1_loss = 0, 0, 0
        explainer.train()

        top1_train = train_df.groupby('user_id')['item_id'].agg(lambda x: x.value_counts().idxmax()).to_dict()

        batch_counter = 1
        for batch_index, samples in enumerate(loader):
            if batch_counter == num_batches:
              print (f'final batch - batch num {num_batches}')
            if samples.shape[0] < batch_size:
              print(f"Skipping batch due to unexpected batch size: {samples.shape}")
              continue
            user_ids = samples[:, 0].to(device).long()
            # print(f"Max user_id: {user_ids.max()}, Min user_id: {user_ids.min()}")

            user_tensors = torch.zeros((user_ids.size(0), len(user_encoder.classes_)), device=device)
            user_tensors.scatter_(1, user_ids.unsqueeze(1), 1)
            # print(f'user tensors shape: {user_tensors.shape}, user_tensors dtype: {user_tensors.dtype}')

            top1_item = np.array([top1_train[int(x)] for x in user_ids.cpu().numpy()])
            # print(f"Top1 items: {top1_item}")
            items_vectors = items_array[top1_item]
            items_tensors = torch.tensor(items_vectors, device=device, dtype=torch.float32)
            # print(f'item tensors shape: {items_tensors.shape}, item_tensors dtype: {items_tensors.dtype}')

            if user_tensors.shape[1] != len(user_encoder.classes_) or items_tensors.shape[1] != len(item_encoder.classes_):
              print(f"Skipping due to shape mismatch: {user_tensors.shape}, {items_tensors.shape}")
              continue

            # assert user_tensors.shape == (batch_size, 289), f"Unexpected user_tensors shape: {user_tensors.shape}"
            # assert items_tensors.shape == (batch_size, 1500), f"Unexpected items_tensors shape: {items_tensors.shape}"

            optimizer_comb.zero_grad()
            # print(user_tensors.shape, items_tensors.shape)
            expl_scores = explainer(user_tensors, items_tensors)

            # Calculate loss using the recommender model
            comb_loss, pos_loss, neg_loss, l1 = loss_func(user_tensors, items_tensors, top1_item, expl_scores)
            n = user_tensors.shape[0]
            train_loss += comb_loss.item() * n
            total_pos_loss += pos_loss.item() * n
            total_neg_loss += neg_loss.item() * n
            total_l1_loss += l1.item() * n

            # Backward pass
            # print(f"Comb Loss: {comb_loss.item()}, Pos Loss: {pos_loss.item()}, Neg Loss: {neg_loss.item()}, L1: {l1.item()}")
            comb_loss.backward()
            optimizer_comb.step()

            batch_counter += 1

        # Evaluate on the test set
        explainer.eval()
        POS_at_20_lxr, NEG_at_20_lxr = np.zeros(11), np.zeros(11)
        for j in range(random_sampled_array.shape[0]):
            user_id = random_sampled_array[j][0] #first element of the row is the user id
            # user_tensor = torch.Tensor(random_sampled_array[j][:-1]).to(device)
            user_tensor = torch.zeros(len(user_encoder.classes_)).to(device)
            user_tensor[user_id] = 1.0

            top1_test = test_df.groupby('user_id')['item_id'].agg(lambda x: x.value_counts().idxmax()).to_dict()
            top1_item_test = top1_test[user_id]
            item_vector = torch.Tensor(items_array[top1_item_test]).to(device)

            pos_neg_res = calculate_pos_neg_k(user_tensor, top1_item_test, item_vector, num_of_bins=10, explainer=explainer, k=20)
            POS_at_20_lxr += pos_neg_res[0]
            NEG_at_20_lxr += pos_neg_res[1]

        last_pos_at_20 = np.mean(POS_at_20_lxr) / random_sampled_array.shape[0]
        last_neg_at_20 = np.mean(NEG_at_20_lxr) / random_sampled_array.shape[0]

        run_pos_at_20.append(last_pos_at_20)
        run_neg_at_20.append(last_neg_at_20)
        metric_for_monitoring.append(last_pos_at_20)

        wandb.log({"val/pos_at_20": last_pos_at_20, "val/neg_at_20": last_neg_at_20})

        # Early stopping
        if epoch >= 5 and all(run_pos_at_20[-i-1] < run_pos_at_20[-i] for i in range(3)) and all(run_neg_at_20[-i-1] > run_neg_at_20[-i] for i in range(3)):
            print(f'Early stop at epoch {epoch}')
            break

    # Return best metric value for optimization
    torch.save(explainer.state_dict(), f'{path}/trained_exp/trained_best_exp_trial_{trial.number}_epoch_{epoch + 1}_{np.min(metric_for_monitoring)}.pth')
    return np.min(metric_for_monitoring)


In [None]:
logger = logging.getLogger()
logger.setLevel(logging.INFO)
logger.addHandler(logging.FileHandler(f"explainer_training.log", mode="w"))

# Create Optuna study and optimize
optuna.logging.enable_propagation()
optuna.logging.disable_default_handler()

study = optuna.create_study(direction='minimize')
logger.info("Start optimization.")
study.optimize(lxr_training, n_trials=20)

# Print the best hyperparameters
print("Best hyperparameters: {}".format(study.best_params))
print("Best metric value: {}".format(study.best_value))

## finetune trained original LXR

### handle pre trained model

In [43]:
#same as the original code
class pre_trained_Explainer(nn.Module):
    def __init__(self, user_size, item_size, hidden_size):
        super(pre_trained_Explainer, self).__init__()

        self.users_fc = nn.Linear(in_features = user_size, out_features=hidden_size).to(device)
        self.items_fc = nn.Linear(in_features = item_size, out_features=hidden_size).to(device)
        self.bottleneck = nn.Sequential(
            nn.Tanh(),
            nn.Linear(in_features = hidden_size*2, out_features=hidden_size).to(device),
            nn.Tanh(),
            nn.Linear(in_features = hidden_size, out_features=user_size).to(device),
            nn.Sigmoid()
        ).to(device)


    def forward(self, user_tensor, item_tensor):
        user_output = self.users_fc(user_tensor.float())
        item_output = self.items_fc(item_tensor.float())
        combined_output = torch.cat((user_output, item_output), dim=-1)
        expl_scores = self.bottleneck(combined_output).to(device)
        return expl_scores

In [66]:
hidden_size = 128
embedding_dim = 64
lxr = pre_trained_Explainer(user_size=hidden_size, item_size=hidden_size, hidden_size=hidden_size) #create model
model_dict = lxr.state_dict() #get the current model's state dict - with the current dimentions

In [67]:
# load pre trained
checkpoints_path = '/content/drive/MyDrive/Tamar/project/original paper LXR/checkpoints/'
specific_lxr = 'LXR_ML1M_MLP_12_39_64_11.59908096547193_0.1414854294885049.pt'
state_dict = torch.load(f'{checkpoints_path}{specific_lxr}', map_location=torch.device('cpu'))

#ignore fc layers that mismatch the current dataset's dimentions
pretrained_dict = {k: v for k, v in state_dict.items() if k in model_dict and v.size() == model_dict[k].size()}
model_dict.update(pretrained_dict)

lxr.load_state_dict(model_dict) #load the state dict of the pretrained model minus the fc layers (due to dimention mismatch)

  state_dict = torch.load(f'{checkpoints_path}{specific_lxr}', map_location=torch.device('cpu'))


<All keys matched successfully>

In [17]:
model_dict.keys()

odict_keys(['users_fc.weight', 'users_fc.bias', 'items_fc.weight', 'items_fc.bias', 'bottleneck.1.weight', 'bottleneck.1.bias', 'bottleneck.3.weight', 'bottleneck.3.bias'])

In [56]:
#freeze all layers except fc
for name, param in lxr.named_parameters():
    if name not in ['users_fc.weight', 'users_fc.bias', 'items_fc.weight', 'items_fc.bias']:
        param.requires_grad = False

### handle data

In [21]:
user_ids = torch.tensor(filtered_user_item_df['user_id'].values, dtype=torch.long)
item_ids = torch.tensor(filtered_user_item_df['item_id'].values, dtype=torch.long)
interactions = torch.tensor(filtered_user_item_df['interaction'].values, dtype=torch.float)

In [22]:
from torch.utils.data import Dataset, DataLoader

class AmazonInteractionDataset(Dataset):
    def __init__(self, user_ids, item_ids, interactions):
        self.user_ids = user_ids
        self.item_ids = item_ids
        self.interactions = interactions

    def __len__(self):
        return len(self.user_ids)

    def __getitem__(self, idx):
        return self.user_ids[idx], self.item_ids[idx], self.interactions[idx]

# Instantiate the dataset
dataset = AmazonInteractionDataset(user_ids, item_ids, interactions)

# Create DataLoader
train_loader = DataLoader(dataset, batch_size=32, shuffle=True)

### architecture of explainer added on top

In [72]:
class New_explainer(nn.Module):
    def __init__(self, user_size, item_size, hidden_size, embedding_dim, lxr_model):
        super(New_explainer, self).__init__()

        self.user_size = user_size
        self.item_size = item_size
        self.hidden_size = hidden_size

        # Embedding layers for users and items
        self.user_embedding = nn.Embedding(user_size, embedding_dim).to(device)
        self.item_embedding = nn.Embedding(item_size, embedding_dim).to(device)

        # LXR model
        self.lxr_model = lxr_model

        # Additional layers to refine the explanation scores
        self.refinement_layer_user = nn.Linear(embedding_dim, embedding_dim).to(device)
        self.refinement_layer_item = nn.Linear(embedding_dim, embedding_dim).to(device)

        self.sigmoid = nn.Sigmoid()

    def forward(self, user_ids, item_ids):
        # Convert IDs to embeddings
        user_tensor = self.user_embedding(user_ids)
        item_tensor = self.item_embedding(item_ids)

        # Get LXR's explanation scores
        with torch.no_grad():
            lxr_scores = self.lxr_model(user_tensor, item_tensor)

        # Refinement through additional layers
        expl_scores_user = self.sigmoid(self.refinement_layer_user(user_tensor))
        expl_scores_item = self.sigmoid(self.refinement_layer_item(item_tensor))

        # Apply the explanation scores to the original inputs
        user_tensor_adjusted = user_tensor * expl_scores_user
        item_tensor_adjusted = item_tensor * expl_scores_item

        return user_tensor_adjusted, item_tensor_adjusted


In [73]:
# Instantiate the new explainer
new_explainer = New_explainer(n_users, n_items, 128, 128, lxr)

# Define an optimizer for the new explainer
optimizer = torch.optim.Adam(new_explainer.parameters(), lr=0.001)

In [49]:
# Loss function (using the unique LXR loss function concept)
def lxr_loss_function(f_x, f_xm):
    return torch.mean((f_x - f_xm) ** 2)  # L2 loss for minimizing the distance between recommender's outputs when given original X and when given X*m

In [74]:
def train(num_epochs, optimizer, train_loader, test_loader, lxr, new_explainer, recommender_model):
    best_val_loss = float('inf')  # Initialize best validation loss
    path = '/content/drive/MyDrive/Tamar/project/trained_exp'

    for epoch in range(num_epochs):
        print(f"Epoch {epoch+1}/{num_epochs}")

        # Training phase
        new_explainer.train()  # Set new explainer to training mode
        running_train_loss = 0.0

        for batch in train_loader:
            user_tensor, item_tensor, interaction_tensor = batch

            optimizer.zero_grad()  # Reset gradients

            # Get the adjusted tensors from the explainer
            user_tensor_adjusted, item_tensor_adjusted = new_explainer(user_tensor, item_tensor)

            # Compute f(x) using the original input (without applying explanation scores)
            f_x = recommender_model(user_tensor, item_tensor)

            # Compute f(x*m) using the adjusted tensors from the new explainer
            f_xm_new = recommender_model(user_tensor_adjusted, item_tensor_adjusted)

            # Compute the loss between f(x) and f(x*m)
            loss = lxr_loss_function(f_x, f_xm_new)
            running_train_loss += loss.item()

            # Backward pass and optimization
            loss.backward()
            optimizer.step()

        # Calculate average training loss for the epoch
        avg_train_loss = running_train_loss / len(train_loader)

        # Validation phase
        new_explainer.eval()  # Set new explainer to evaluation mode
        running_val_loss = 0.0

        with torch.no_grad():  # Disable gradient computation for validation
            for batch in test_loader:
                user_tensor, item_tensor, interaction_tensor = batch

                #get results of explainer (m), recommender and recommender for x*m
                user_tensor_adjusted, item_tensor_adjusted = new_explainer(user_tensor, item_tensor)
                f_x = recommender_model(user_tensor, item_tensor)
                f_xm_new = recommender_model(user_tensor_adjusted, item_tensor_adjusted)

                loss = lxr_loss_function(f_x, f_xm_new)
                running_val_loss += loss.item()

        # Calculate average validation loss for the epoch
        avg_val_loss = running_val_loss / len(test_loader)

        print(f'Epoch {epoch+1}/{num_epochs}, Train Loss: {avg_train_loss:.4f}, Validation Loss: {avg_val_loss:.4f}')

        # Check if the current validation loss is the best we've seen so far
        if avg_val_loss < best_val_loss:
            best_val_loss = avg_val_loss
            # Save the model state
            torch.save(new_explainer.state_dict(), f'{path}/exp_epoch_{epoch}_loss_{best_val_loss:.4f}.pth')
            print(f'Best model in epoch {epoch+1} saved with validation loss: {best_val_loss:.4f}')


### finetune and train top explainer

In [75]:
# Initialize the recommender model
recommender = MLP(user_size=len(user_encoder.classes_),
                  item_size=len(item_encoder.classes_),
                  hidden_size=best_params['hidden_dim'], device=device).to(device)

# Load the pre-trained weights for the recommender model
path = '/content/drive/MyDrive/Tamar/project/'
recommender.load_state_dict(torch.load(f'{path}/trained_recommenders/trained_best_rec_trial_0_epoch_10_1.0.pth', map_location=device))
recommender.eval()
print(f"Recommender model initialized on device: {recommender.device}")

Recommender model initialized on device: cpu


  recommender.load_state_dict(torch.load(f'{path}/trained_recommenders/trained_best_rec_trial_0_epoch_10_1.0.pth', map_location=device))


In [1]:
train(50, optimizer, train_loader, test_loader, lxr, new_explainer, recommender)