In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import torch
import seaborn as sns
import warnings
import datetime
warnings.filterwarnings("ignore")
import tensorflow as tf
from tensorflow.keras import models, layers
import tensorflow_recommenders as tfrs
from data_preprocessing import preprocess_data
from data_preparation import prepare_interaction_data, prepare_product_data, load_processed_data
from utilities import get_product_features, visualisation
from user_tower import UserModel
from item_tower import ItemModel
from two_tower_model import TwoTowerModel as model
from metrics import recall_at_k, precision_at_k

In [2]:
preprocess_data()

Preprocessing data


In [3]:
interactions, interactions_valid, product_features, item_popularity, user_popularity = load_processed_data()

In [4]:
interactions.shape, product_features.shape, interactions_valid.shape

((185071, 22), (13623, 20), (5547, 22))

In [5]:
interactions = prepare_interaction_data(interactions, user_popularity, item_popularity)
interactions_valid = prepare_interaction_data(interactions_valid, user_popularity, item_popularity)
product_features = prepare_product_data(product_features, item_popularity)

In [6]:
interactions_tf = tf.data.Dataset.from_tensor_slices((interactions.to_dict("list")))
interactions_valid_tf = tf.data.Dataset.from_tensor_slices((interactions_valid.to_dict("list")))
products_tf = tf.data.Dataset.from_tensor_slices((product_features.to_dict("list")))

In [7]:
customer_model = UserModel(interactions_tf, embedding_dim=64)
product_model = ItemModel(products_tf, embedding_dim=64)

In [8]:
products_tf = get_product_features(products_tf)

In [9]:
model = model(customer_model, product_model, products_tf)

In [None]:
# Enable mixed precision training
from tensorflow.keras.mixed_precision import experimental as mixed_precision
policy = mixed_precision.Policy('mixed_float16')
mixed_precision.set_policy(policy)

model.compile(optimizer=tf.keras.optimizers.legacy.Adam(0.01))
model.fit(interactions_tf.batch(128), epochs=20, validation_data=interactions_valid_tf)


Epoch 1/20

: 

In [None]:
import json

with open('training_history.json', 'w') as f:
    json.dump(model.history.history, f)

## Retrival with KNN using FAISS

In [None]:
import faiss

# Generate item embeddings
item_embeddings = model.get_item_embedding(products_tf)
item_embeddings = np.array(item_embeddings).astype('float32')

# Create the faiss index
d = item_embeddings.shape[1]  # Dimensionality of embeddings
index = faiss.IndexFlatL2(d) # Build the index

# Add the item embeddings to the index
index.add(item_embeddings)


In [None]:
interactions_valid = interactions_valid[['visitor_id', 'itemid']]
interactions_valid = interactions_valid.drop_duplicates()
interactions_valid = interactions_valid.groupby('visitorid')['itemid'].apply(set).reset_index()
interactions_valid = interactions_valid.rename(columns={'visitorid': 'visitor_id', 'itemid': 'relevant_items'})
interactions_valid = interactions_valid.merge(user_popularity, on='visitor_id', how='left')
interaction_valid_tf = tf.data.Dataset.from_tensor_slices((interactions_valid.to_dict("list")))

In [None]:
validation_user_embeddings = model.get_user_embedding(interaction_valid_tf)
validation_user_embeddings = np.array(validation_user_embeddings).astype('float32')

In [None]:
k_list = [1,5,10,20,50]
for k in k_list:
    recalls = []
    for user_embedding, relevant_items in zip(validation_user_embeddings, interactions_valid['relevant_items']):
        _, I = index.search(np.array([user_embedding]), k)
        recommended_items = products_tf['itemid'].iloc[I[0]].values
        recalls.append(recall_at_k(relevant_items, recommended_items, k))
    print(f"Recall@{k}: {np.mean(recalls)}")
        