In [None]:
# Import necessary libraries
import pandas as pd
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.preprocessing import StandardScaler

# Load datasets
customers = pd.read_csv('Customers.csv')
products = pd.read_csv('Products.csv')
transactions = pd.read_csv('Transactions.csv')

# Prepare data for lookalike model
# Aggregate transaction data by customer
transaction_summary = transactions.groupby('CustomerID').agg({
    'TotalValue': 'sum',
    'TransactionID': 'count',
    'ProductID': lambda x: x.mode()[0]  # Most frequently purchased product
}).reset_index().rename(columns={'TransactionID': 'TransactionCount'})

# Merge with customer profile data
customer_data = customers.merge(transaction_summary, on='CustomerID')

# Merge with product data to get product information
customer_data = customer_data.merge(products, on='ProductID', how='left')

# Select relevant features for similarity calculation
features = customer_data[['TotalValue', 'TransactionCount', 'Price']]

# Normalize the data
scaler = StandardScaler()
scaled_features = scaler.fit_transform(features)

# Calculate similarity scores
similarity_matrix = cosine_similarity(scaled_features)

# Function to get lookalikes
def get_lookalikes(customer_id, top_n=3):
    customer_index = customer_data[customer_data['CustomerID'] == customer_id].index[0]
    similar_indices = similarity_matrix[customer_index].argsort()[-top_n-1:-1][::-1]
    similar_customers = customer_data.iloc[similar_indices]
    scores = similarity_matrix[customer_index][similar_indices]
    return list(zip(similar_customers['CustomerID'], scores))

# Create a dictionary to store lookalikes for the first 20 customers
lookalike_results = {}
for customer_id in customer_data['CustomerID'].head(20):
    lookalikes = get_lookalikes(customer_id)
    lookalike_results[customer_id] = lookalikes

# Convert results to the required format
# Create a list to store data for the DataFrame
lookalike_data = []
for customer_id, lookalikes in lookalike_results.items():
    for lookalike_id, score in lookalikes:
        lookalike_data.append([customer_id, lookalike_id, score])

# Create the DataFrame
lookalike_df = pd.DataFrame(lookalike_data, columns=['CustomerID', 'LookalikeID', 'SimilarityScore'])
lookalike_df.to_csv('Lookalike.csv', index=False)

# Display the lookalike results for the first 20 customers
print(lookalike_df)