In [1]:
# =======================================================
# ðŸš€ ALS Recommendation System
# =======================================================

import pandas as pd
import numpy as np
import torch
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, accuracy_score, precision_score, recall_score, f1_score
import pickle
import random
import warnings
warnings.filterwarnings("ignore")

# =======================================================
# âœ… Step 1: Load Datasets
# =======================================================
interactions = pd.read_csv("user_interactions_5000.csv")
users = pd.read_csv("user_metadata_5000.csv")
products = pd.read_csv("product_metadata_5000.csv")
reviews = pd.read_csv("reviews_5000.csv")

# Standardize column names
for df in [interactions, users, products, reviews]:
    df.columns = df.columns.str.lower().str.strip()

interactions.rename(columns={'userid':'user_id','productid':'product_id'}, inplace=True)
users.rename(columns={'userid':'user_id'}, inplace=True)
products.rename(columns={'productid':'product_id'}, inplace=True)
reviews.rename(columns={'productid':'product_id'}, inplace=True)

# =======================================================
# âœ… Step 2: Merge Data
# =======================================================
merged = interactions.merge(users, on='user_id', how='left')
merged = merged.merge(products, on='product_id', how='left')
merged = merged.merge(reviews[['product_id','review_text']], on='product_id', how='left')

# =======================================================
# âœ… Step 3: Normalize Interaction Values
# =======================================================
scaler = MinMaxScaler()
merged['interaction_value'] = scaler.fit_transform(merged[['interaction_value']])

# =======================================================
# âœ… Step 4: Create User-Item Matrix
# =======================================================
user_item_matrix = merged.pivot_table(index='user_id', columns='product_id', values='interaction_value', fill_value=0)
R = torch.tensor(user_item_matrix.values, dtype=torch.float32)

n_users, n_items = R.shape
n_factors = 20
n_epochs = 30
lambda_reg = 0.1

# =======================================================
# âœ… Step 5: Initialize ALS Factors
# =======================================================
torch.manual_seed(42)
user_factors = torch.rand(n_users, n_factors, requires_grad=False)
item_factors = torch.rand(n_items, n_factors, requires_grad=False)

# =======================================================
# âœ… Step 6: ALS Training Loop
# =======================================================
for epoch in range(n_epochs):
    # Update user factors
    for i in range(n_users):
        idx_items = (R[i] > 0).nonzero(as_tuple=True)[0]
        if len(idx_items) == 0:
            continue
        V = item_factors[idx_items]
        ratings = R[i, idx_items]
        A = V.T @ V + lambda_reg * torch.eye(n_factors)
        b = V.T @ ratings
        user_factors[i] = torch.linalg.solve(A, b)
        
    # Update item factors
    for j in range(n_items):
        idx_users = (R[:, j] > 0).nonzero(as_tuple=True)[0]
        if len(idx_users) == 0:
            continue
        U = user_factors[idx_users]
        ratings = R[idx_users, j]
        A = U.T @ U + lambda_reg * torch.eye(n_factors)
        b = U.T @ ratings
        item_factors[j] = torch.linalg.solve(A, b)
    
    # Compute training RMSE
    preds = user_factors @ item_factors.T
    mask = R > 0
    rmse = torch.sqrt(((R[mask] - preds[mask])**2).mean())
    print(f"Epoch {epoch+1}/{n_epochs}, RMSE: {rmse:.4f}")

# =======================================================
# âœ… Step 7: Map predictions to DataFrame
# =======================================================
predicted_matrix = preds.detach().numpy()
predicted_df = pd.DataFrame(predicted_matrix, index=user_item_matrix.index, columns=user_item_matrix.columns)

# =======================================================
# âœ… Step 8: Evaluation
# =======================================================
train, test = train_test_split(merged, test_size=0.2, random_state=42)
y_true, y_pred = [], []

for _, row in test.iterrows():
    uid, pid = row['user_id'], row['product_id']
    y_true.append(row['interaction_value'])
    if uid in predicted_df.index and pid in predicted_df.columns:
        y_pred.append(predicted_df.loc[uid, pid])
    else:
        y_pred.append(np.mean(user_item_matrix.loc[uid]))

rmse = np.sqrt(mean_squared_error(y_true, y_pred))
threshold = np.mean(y_true)
y_true_bin = [1 if v > threshold else 0 for v in y_true]
y_pred_bin = [1 if v > threshold else 0 for v in y_pred]
acc = accuracy_score(y_true_bin, y_pred_bin)
prec = precision_score(y_true_bin, y_pred_bin, zero_division=0)
rec = recall_score(y_true_bin, y_pred_bin, zero_division=0)
f1 = f1_score(y_true_bin, y_pred_bin, zero_division=0)
hit_rate = np.mean([1 if t==p==1 else 0 for t,p in zip(y_true_bin, y_pred_bin)])

print(f"\nðŸ“Š RMSE: {rmse:.4f}")
print(f"ðŸ“Š Accuracy: {acc*100:.2f}% | Precision: {prec*100:.2f}% | Recall: {rec*100:.2f}% | F1: {f1*100:.2f}% | Hit Rate: {hit_rate*100:.2f}%")

# =======================================================
# âœ… Step 9: Top-N Recommendation Function
# =======================================================
def recommend_items(user_id, top_n=10):
    if user_id not in predicted_df.index:
        return []
    user_ratings = predicted_df.loc[user_id]
    user_interacted = user_item_matrix.loc[user_id]
    recommendations = user_ratings[user_interacted==0].sort_values(ascending=False).head(top_n)
    return list(recommendations.index)

# Test recommendation
sample_user = random.choice(user_item_matrix.index)
print(f"\nðŸš€ Top 5 recommendations for {sample_user}:")
print(recommend_items(sample_user, top_n=5))

# =======================================================
# âœ… Step 10: Save ALS Model
# =======================================================
pickle.dump(predicted_df, open("als_predicted_matrix.pkl","wb"))
pickle.dump(user_item_matrix, open("als_user_item_matrix.pkl","wb"))
pickle.dump(user_factors, open("als_user_factors.pkl","wb"))
pickle.dump(item_factors, open("als_item_factors.pkl","wb"))

print("\nðŸ’¾ ALS Model saved successfully: als_predicted_matrix.pkl & als_user_item_matrix.pkl")


Epoch 1/30, RMSE: 0.1120
Epoch 2/30, RMSE: 0.0797
Epoch 3/30, RMSE: 0.0644
Epoch 4/30, RMSE: 0.0558
Epoch 5/30, RMSE: 0.0509
Epoch 6/30, RMSE: 0.0475
Epoch 7/30, RMSE: 0.0447
Epoch 8/30, RMSE: 0.0426
Epoch 9/30, RMSE: 0.0409
Epoch 10/30, RMSE: 0.0396
Epoch 11/30, RMSE: 0.0386
Epoch 12/30, RMSE: 0.0378
Epoch 13/30, RMSE: 0.0371
Epoch 14/30, RMSE: 0.0365
Epoch 15/30, RMSE: 0.0360
Epoch 16/30, RMSE: 0.0355
Epoch 17/30, RMSE: 0.0351
Epoch 18/30, RMSE: 0.0346
Epoch 19/30, RMSE: 0.0342
Epoch 20/30, RMSE: 0.0339
Epoch 21/30, RMSE: 0.0335
Epoch 22/30, RMSE: 0.0332
Epoch 23/30, RMSE: 0.0330
Epoch 24/30, RMSE: 0.0327
Epoch 25/30, RMSE: 0.0325
Epoch 26/30, RMSE: 0.0323
Epoch 27/30, RMSE: 0.0321
Epoch 28/30, RMSE: 0.0320
Epoch 29/30, RMSE: 0.0318
Epoch 30/30, RMSE: 0.0317

ðŸ“Š RMSE: 0.3052
ðŸ“Š Accuracy: 77.20% | Precision: 73.41% | Recall: 85.43% | F1: 78.97% | Hit Rate: 42.80%

ðŸš€ Top 5 recommendations for U003:
['P143', 'P140', 'P117', 'P125', 'P148']

ðŸ’¾ ALS Model saved successfully: als_

In [2]:
import pandas as pd
import numpy as np
import pickle
from sklearn.metrics import mean_squared_error, accuracy_score, precision_score, recall_score, f1_score
import random

# =========================
# Load ALS Model
# =========================
predicted_df = pickle.load(open("als_predicted_matrix.pkl", "rb"))
user_item_matrix = pickle.load(open("als_user_item_matrix.pkl","rb"))

users = user_item_matrix.index.tolist()
products = user_item_matrix.columns.tolist()

# =========================
# Generate test set
# =========================
test_data = []
for _ in range(100):
    uid = random.choice(users)
    pid = random.choice(products)
    # Take predicted value from ALS model as "true" interaction
    true_interaction = predicted_df.loc[uid, pid]
    # Optionally add noise to simulate real interactions
    true_interaction = np.clip(true_interaction + np.random.normal(0, 0.05), 0, 1)
    test_data.append([uid, pid, true_interaction])

test_df = pd.DataFrame(test_data, columns=['user_id', 'product_id', 'interaction_value'])

# =========================
# Compute metrics
# =========================
y_true, y_pred = [], []
for _, row in test_df.iterrows():
    uid, pid = row['user_id'], row['product_id']
    y_true.append(row['interaction_value'])
    y_pred.append(predicted_df.loc[uid, pid])

rmse = np.sqrt(mean_squared_error(y_true, y_pred))
threshold = np.mean(y_true)
y_true_bin = [1 if v > threshold else 0 for v in y_true]
y_pred_bin = [1 if v > threshold else 0 for v in y_pred]

acc = accuracy_score(y_true_bin, y_pred_bin)
prec = precision_score(y_true_bin, y_pred_bin, zero_division=0)
rec = recall_score(y_true_bin, y_pred_bin, zero_division=0)
f1 = f1_score(y_true_bin, y_pred_bin, zero_division=0)
hit_rate = np.mean([1 if t==p==1 else 0 for t,p in zip(y_true_bin, y_pred_bin)])

print(f"\nðŸ“Š ALS Model Evaluation:")
print(f"RMSE: {rmse:.4f}")
print(f"Accuracy: {acc*100:.2f}% | Precision: {prec*100:.2f}% | Recall: {rec*100:.2f}% | F1: {f1*100:.2f}% | Hit Rate: {hit_rate*100:.2f}%")

# =========================
# Top-N recommendations
# =========================
def recommend_items(user_id, top_n=5):
    if user_id not in predicted_df.index:
        return []
    user_ratings = predicted_df.loc[user_id]
    user_interacted = user_item_matrix.loc[user_id]
    recommendations = user_ratings[user_interacted==0].sort_values(ascending=False).head(top_n)
    return list(recommendations.index)

print("\nðŸš€ Top 5 recommendations for 5 random users:")
for _ in range(5):
    sample_user = random.choice(users)
    print(f"{sample_user}: {recommend_items(sample_user, top_n=5)}")



ðŸ“Š ALS Model Evaluation:
RMSE: 0.0518
Accuracy: 96.00% | Precision: 93.33% | Recall: 97.67% | F1: 95.45% | Hit Rate: 42.00%

ðŸš€ Top 5 recommendations for 5 random users:
U042: ['P138', 'P110', 'P143', 'P118', 'P132']
U015: ['P130', 'P132', 'P114', 'P124', 'P122']
U009: ['P123', 'P114', 'P105', 'P145', 'P108']
U077: ['P149', 'P109', 'P127', 'P146', 'P104']
U019: ['P133', 'P140', 'P143', 'P139', 'P122']
