In [None]:
import pandas as pd
import numpy as np
import tensorflow as tf
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Dense
from tensorflow.keras.optimizers import Adam

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
def file_path(category, data_type):
  return f'/content/drive/MyDrive/ShoppingPulse/datasets/raw/interactions_{data_type}/flattened_data_{category}.parquet'

In [None]:
def load_and_preprocess_data(category):

    dataset_path = file_path(category, 'train')
    df = pd.read_parquet(dataset_path)

    df['user_id'] = df['user_id'].astype(str)
    df['parent_asin'] = df['parent_asin'].astype(str)
    df['rating'] = df['rating'].astype(float)
    df['timestamp'] = df['timestamp'].apply(lambda x: pd.Timestamp(int(x), unit='ms'))

    sparse_matrix = df.pivot(index='user_id', columns='parent_asin', values='rating').fillna(0)
    R = sparse_matrix.values

    return sparse_matrix, R

def build_autoencoder(input_dim, encoding_dim):
    input_layer = Input(shape=(input_dim,))
    encoded = Dense(encoding_dim, activation='relu')(input_layer)
    decoded = Dense(input_dim, activation='sigmoid')(encoded)

    autoencoder = Model(input_layer, decoded)
    encoder = Model(input_layer, encoded)

    autoencoder.compile(optimizer=Adam(), loss='mean_squared_error')

    return autoencoder, encoder

def train_autoencoder(autoencoder, data, epochs=50, batch_size=256):
    autoencoder.fit(data, data, epochs=epochs, batch_size=batch_size, shuffle=True, validation_split=0.1)

def generate_predictions(autoencoder, data):
    predictions = autoencoder.predict(data)
    return predictions

def autoencoder_model(category, encoding_dim=50, epochs=50, batch_size=256):
    sparse_matrix, R = load_and_preprocess_data(category)

    input_dim = R.shape[1]
    autoencoder, encoder = build_autoencoder(input_dim, encoding_dim)

    train_autoencoder(autoencoder, R, epochs=epochs, batch_size=batch_size)

    predicted_ratings = generate_predictions(autoencoder, R)
    predicted_ratings_df = pd.DataFrame(predicted_ratings, columns=sparse_matrix.columns, index=sparse_matrix.index)

    return predicted_ratings_df

In [None]:
def evaluate_recommendations(predicted_ratings_df, validation_df):
    users_in_both = set(predicted_ratings_df.index).intersection(set(validation_df['user_id']))

    recommendations = {}
    for user in users_in_both:
        sorted_items = predicted_ratings_df.loc[user].sort_values(ascending=False).index.tolist()
        recommendations[user] = sorted_items

    results = {}
    for user in users_in_both:
        recommended_items = set(recommendations[user])
        actual_purchases = set(validation_df[validation_df['user_id'] == user]['parent_asin'])
        intersection_count = len(recommended_items.intersection(actual_purchases))
        total_purchases = len(actual_purchases)
        results[user] = intersection_count / total_purchases if total_purchases > 0 else 0

    return results

In [None]:
category = 'Gift_Cards'
predicted_ratings_df = autoencoder_model(category)
validation_df = pd.read_parquet(file_path(category, 'valid'))
evaluation_results = evaluate_recommendations(predicted_ratings_df, validation_df)
print(evaluation_results)

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50
{'AGNZ4YBIVFMEC3GSE4HTBKE5HZXQ': 1.0, 'AG7B4VDNPDBU3RZYHIG2AAZGO3BQ': 1.0, 'AEDWAKTXYZAIGTQ3YLZ7ECFSE3CQ': 1.0, 'AHRRKCWZCZYYPKJY6WJ42XH3S5XA': 1.0, 'AFS7KWQZOOL4K7S2DTTXMFRIXBKA': 1.0, 'AGMNXLIXTD5S4LYANYEPZ3JSETBQ': 1.0, 'AFQIT7VBC6OROF5QNF2B22OU7UOQ': 1.0, 'AFWHB37TASLR6LHXDDHHEC63MROQ': 1.0, 'AHYNEYSZQ3T6H6EEOMLDWNUDQBAA': 1.0, 'AG6HGZC63SAIVTIT6IRBDB2VQV5Q': 1.0, 'AEIM5F3P2LDHS6LB3YK4DF5OZYRQ': 1.0, '