In [3]:
import pandas as pd
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.preprocessing import StandardScaler

# Load data
customers = pd.read_csv('Customers.csv')
products = pd.read_csv('Products.csv')
transactions = pd.read_csv('Transactions.csv')

# Merge customer and transaction data
customer_transactions = pd.merge(transactions, customers, on='CustomerID')

# Create a user-item matrix with transaction history
# Assume each row is a transaction and each column represents a product category
# Aggregating data by customer and product
customer_product_matrix = customer_transactions.groupby(['CustomerID', 'ProductID']).size().unstack(fill_value=0)

# Normalize the data
scaler = StandardScaler()
customer_product_matrix_scaled = scaler.fit_transform(customer_product_matrix)

# Compute cosine similarity between customers
similarity_matrix = cosine_similarity(customer_product_matrix_scaled)

# Create a DataFrame for similarity scores
similarity_df = pd.DataFrame(similarity_matrix, index=customer_product_matrix.index, columns=customer_product_matrix.index)

# Define a function to get top 3 similar customers
def get_top_3_similar(customers_df, similarity_df, customer_id):
    similar_customers = similarity_df[customer_id].sort_values(ascending=False)
    similar_customers = similar_customers.drop(customer_id)  # Exclude the customer themselves
    top_3 = similar_customers.head(3)
    return top_3

# Get top 3 similar customers for the first 20 customers
lookalike_map = {}
for customer_id in customers['CustomerID'].head(20):  # First 20 customers
    top_3 = get_top_3_similar(customers, similarity_df, customer_id)
    lookalike_map[customer_id] = [(top_3.index[i], top_3.values[i]) for i in range(3)]

# Create a new DataFrame for the Lookalike.csv
lookalike_data = []
for customer_id, similar_customers in lookalike_map.items():
    for lookalike, score in similar_customers:
        lookalike_data.append([customer_id, lookalike, score])

lookalike_df = pd.DataFrame(lookalike_data, columns=['cust_id', 'lookalike_customer', 'score'])

# Export the lookalike recommendations to CSV
lookalike_df.to_csv('Amjad_PV_Lookalike.csv', index=False)
