In [52]:
import pandas as pd
from scipy.sparse import coo_matrix
import implicit
import pickle
import os

MODEL_PATH = '../model'
os.makedirs(MODEL_PATH, exist_ok=True)

# Load data
data = pd.read_csv('../data/transactions.csv')

# Prepare interaction matrix (Customer-Product matrix) with counts of purchases
# Count the number of purchases for each customer-product pair
interaction_data = data.groupby(['customer_id', 'product_id']).size().reset_index(name='count')

# Convert customer_id and product_id to categorical codes
customer_ids = interaction_data['customer_id'].astype("category")
product_ids = interaction_data['product_id'].astype("category")
customer_codes = customer_ids.cat.codes
product_codes = product_ids.cat.codes

# Create interaction matrix
interaction_matrix = coo_matrix((interaction_data['count'], (customer_codes, product_codes)))

# Create mappings from original customer_id and product_id to their respective codes
customer_map = dict(enumerate(customer_ids.cat.categories))
product_map = dict(enumerate(product_ids.cat.categories))

# Save the interaction matrix
with open(os.path.join(MODEL_PATH, 'interaction_matrix.pkl'), 'wb') as f:
    pickle.dump(interaction_matrix, f)

# Save the customer map (as a dictionary)
with open(os.path.join(MODEL_PATH, 'customer_map.pkl'), 'wb') as f:
    pickle.dump(customer_map, f)

# Save the product map (as a dictionary)
with open(os.path.join(MODEL_PATH, 'product_map.pkl'), 'wb') as f:
    pickle.dump(product_map, f)

# Initialize and train ALS model
als_model = implicit.als.AlternatingLeastSquares(factors=20, regularization=0.1, iterations=30)
interaction_matrix_csr = interaction_matrix.tocsr()
als_model.fit(interaction_matrix_csr)

# Save the ALS model to a binary file
with open(os.path.join(MODEL_PATH, 'als_model.pkl'), 'wb') as f:
    pickle.dump(als_model, f)


  0%|          | 0/30 [00:00<?, ?it/s]