In [5]:
import pandas as pd
from faker import Faker
import random
import numpy as np
from sklearn.model_selection import train_test_split
from scipy.sparse import csr_matrix
from sklearn.decomposition import TruncatedSVD
from sklearn.metrics import precision_score, recall_score, average_precision_score, ndcg_score
from sklearn.preprocessing import StandardScaler
from sklearn.metrics.pairwise import cosine_similarity

# Initializing faker to generate fake data
fake = Faker()

# Generating a larger dataset
data = []
for _ in range(20000):  # Generating 20,000 rows of data
    customer_id = "CUST" + fake.uuid4()
    product_id = "PROD" + fake.uuid4()
    interaction_type = random.choice(['purchased', 'viewed', 'clicked'])
    interaction_date = fake.date_time_this_year()
    data.append([customer_id, product_id, interaction_type, interaction_date])

# Creating a DataFrame
df = pd.DataFrame(data, columns=['customer_id', 'product_id', 'interaction_type', 'interaction_date'])

# Converting interaction type to numerical values
interaction_map = {'purchased': 3, 'viewed': 2, 'clicked': 1}
df['interaction_value'] = df['interaction_type'].map(interaction_map)

# Creating additional features for content-based filtering
df['interaction_day'] = df['interaction_date'].dt.day
df['interaction_month'] = df['interaction_date'].dt.month
df['interaction_year'] = df['interaction_date'].dt.year

# Normalizing additional features
scaler = StandardScaler()
df[['interaction_day', 'interaction_month', 'interaction_year']] = scaler.fit_transform(df[['interaction_day', 'interaction_month', 'interaction_year']])

# Creating a user-item matrix
user_item_matrix = df.pivot_table(index='customer_id', columns='product_id', values='interaction_value', fill_value=0)

# Converting to sparse matrix
user_item_sparse = csr_matrix(user_item_matrix.values)

# Splitting data into training and testing sets
train_data, test_data = train_test_split(user_item_matrix, test_size=0.2, random_state=42)

# Implementing the recommendation algorithm using Collaborative Filtering and Matrix Factorization
svd = TruncatedSVD(n_components=50, random_state=42)  # Increasing n_components for better performance
train_svd = svd.fit_transform(train_data)

# Generating collaborative filtering predictions
cf_pred_matrix = np.dot(train_svd, svd.components_)
cf_pred_matrix = cf_pred_matrix[:train_data.shape[0], :user_item_matrix.shape[1]]

# Generating content-based filtering predictions using cosine similarity
content_sim_matrix = cosine_similarity(train_data)
content_pred_matrix = np.dot(content_sim_matrix, train_data)

# Adjusting weights for the hybrid approach
alpha = 0.7  # Weight for collaborative filtering
beta = 0.3  # Weight for content-based filtering
combined_pred_matrix = alpha * cf_pred_matrix + beta * content_pred_matrix

# Converting back to DataFrame
pred_df = pd.DataFrame(combined_pred_matrix, index=user_item_matrix.index[:train_data.shape[0]], columns=user_item_matrix.columns)

# Function for recommending products
def recommend_products(customer_id, num_recommendations=5):
    if customer_id in pred_df.index:
        customer_predictions = pred_df.loc[customer_id]
        recommended_products = customer_predictions.sort_values(ascending=False).head(num_recommendations).index.tolist()
        return recommended_products
    else:
        return []

# Example recommendation for a customer with proper formatting
example_customer_id = df['customer_id'].iloc[0]
recommended_products = recommend_products(example_customer_id)
print(f"Recommended products for customer {example_customer_id}:")
for prod in recommended_products:
    print(f"- {prod}")

# Function to evaluate the model
def evaluate_model(test_data, pred_matrix, threshold=2):
    # Binarizing the interaction values based on the threshold
    test_data_binary = (test_data > threshold).astype(int)
    pred_data_binary = (pred_matrix > threshold).astype(int)

    # Flattening the matrices for evaluation
    test_data_flat = test_data_binary.values.flatten()
    pred_data_flat = pred_data_binary.flatten()

    # Calculating Precision and Recall
    precision = precision_score(test_data_flat, pred_data_flat, average='micro')
    recall = recall_score(test_data_flat, pred_data_flat, average='micro')
    
    # Calculating Mean Average Precision (MAP)
    map_score = average_precision_score(test_data_flat, pred_data_flat)
    
    # Calculating Normalized Discounted Cumulative Gain (NDCG)
    ndcg = ndcg_score([test_data_flat], [pred_data_flat])
    
    return precision, recall, map_score, ndcg

# Generating test predictions
test_svd = svd.transform(test_data)
test_cf_pred_matrix = np.dot(test_svd, svd.components_)
test_cf_pred_matrix = test_cf_pred_matrix[:, :user_item_matrix.shape[1]]

# Generating content-based filtering predictions for the test set using cosine similarity
test_content_sim_matrix = cosine_similarity(test_data)
test_content_pred_matrix = np.dot(test_content_sim_matrix, test_data)

# Combining collaborative and content-based predictions for the test set
test_combined_pred_matrix = alpha * test_cf_pred_matrix + beta * test_content_pred_matrix

# Evaluating the model
precision, recall, map_score, ndcg = evaluate_model(test_data, test_combined_pred_matrix)

# Improved metrics display
print(f"Evaluation Metrics:\n"
      f"- Precision: {precision:.5f}\n"
      f"- Recall: {recall:.5f}\n"
      f"- Mean Average Precision (MAP): {map_score:.5f}\n"
      f"- Normalized Discounted Cumulative Gain (NDCG): {ndcg:.5f}")


Recommended products for customer CUST48d7aacb-aef0-4e57-99f0-a3818e72de23:
- PRODee7f8e95-ea19-4460-b234-d061745e61d1
- PRODfb1891de-e816-4cb8-a919-e22adee47529
- PRODb8ecd3d0-67d5-405d-b1f7-6b63f5f134b7
- PROD9be2925c-a49e-4055-a295-303b855d07e7
- PROD02ea63b4-e10b-4388-807f-bd487449eee3
Evaluation Metrics:
- Precision: 0.99998
- Recall: 0.99998
- Mean Average Precision (MAP): 0.00002
- Normalized Discounted Cumulative Gain (NDCG): 0.34649
