In [2]:
import pandas as pd
import numpy as np

# Simulate product catalog
products = {
    'product_id': ['product_' + str(i) for i in range(1, 51)],
    'product_name': ['Product ' + str(i) for i in range(1, 51)],
    'category': np.random.choice(['Electronics', 'Clothing', 'Home', 'Grocery'], 50),
    'price': np.random.uniform(10, 500, 50)
}

product_df = pd.DataFrame(products)
print(product_df.head())

# Save the product data to a CSV file
product_df.to_csv('product_data.csv', index=False)


  product_id product_name  category       price
0  product_1    Product 1  Clothing   53.703998
1  product_2    Product 2  Clothing  430.922022
2  product_3    Product 3      Home  452.852303
3  product_4    Product 4  Clothing  405.971263
4  product_5    Product 5      Home  498.678362


In [3]:
# Simulate customer purchase history
np.random.seed(42)
customers = ['customer_' + str(i) for i in range(1, 101)]
purchase_history = {
    'customer_id': np.random.choice(customers, 1000),
    'product_id': np.random.choice(products['product_id'], 1000),
    'purchase_date': pd.date_range(start='2023-01-01', periods=1000, freq='D')
}

purchase_df = pd.DataFrame(purchase_history)
print(purchase_df.head())

# Save the purchase history to a CSV file
purchase_df.to_csv('purchase_history.csv', index=False)


   customer_id  product_id purchase_date
0  customer_52  product_34    2023-01-01
1  customer_93  product_47    2023-01-02
2  customer_15   product_8    2023-01-03
3  customer_72  product_40    2023-01-04
4  customer_61  product_49    2023-01-05


In [4]:
# Load data from CSV files
product_df = pd.read_csv('product_data.csv')
purchase_df = pd.read_csv('purchase_history.csv')

# Display some basic statistics
print(product_df.describe())
print(purchase_df['customer_id'].value_counts().head())


            price
count   50.000000
mean   245.234841
std    140.782199
min     16.643197
25%    129.686832
50%    214.020234
75%    375.649076
max    498.678362
customer_90    20
customer_62    19
customer_92    19
customer_99    18
customer_33    18
Name: customer_id, dtype: int64


In [5]:
# Create a pivot table for user-product interactions
user_product_matrix = purchase_df.pivot_table(
    index='customer_id', columns='product_id', aggfunc='size', fill_value=0
)
print(user_product_matrix.head())


product_id    product_1  product_10  product_11  product_12  product_13  \
customer_id                                                               
customer_1            0           0           3           0           0   
customer_10           0           0           0           1           0   
customer_100          0           0           0           0           1   
customer_11           0           0           1           0           0   
customer_12           0           1           0           0           0   

product_id    product_14  product_15  product_16  product_17  product_18  ...  \
customer_id                                                               ...   
customer_1             0           0           0           1           1  ...   
customer_10            0           0           0           0           0  ...   
customer_100           0           0           0           0           0  ...   
customer_11            0           0           0           0         

In [6]:
from sklearn.metrics.pairwise import cosine_similarity

# Calculate similarity matrix
similarity_matrix = cosine_similarity(user_product_matrix)
similarity_df = pd.DataFrame(similarity_matrix, index=user_product_matrix.index, columns=user_product_matrix.index)

# Function to get recommendations for a specific customer
def get_recommendations(customer_id, n=5):
    similar_customers = similarity_df[customer_id].sort_values(ascending=False).index[1:n+1]
    recommended_products = user_product_matrix.loc[similar_customers].sum().sort_values(ascending=False).head(n).index
    return product_df[product_df['product_id'].isin(recommended_products)]

# Example: Get recommendations for a customer
recommendations = get_recommendations('customer_1')
print(recommendations)


    product_id product_name  category       price
7    product_8    Product 8  Clothing  448.783986
10  product_11   Product 11   Grocery  155.974126
16  product_17   Product 17      Home  365.055196
28  product_29   Product 29  Clothing  290.942946
35  product_36   Product 36   Grocery   30.436025


In [8]:
recommendations.to_csv('customer_1_recommendations.csv', index=False)
