In [17]:
pip install scikit-learn




[notice] A new release of pip is available: 24.3.1 -> 25.0
[notice] To update, run: python.exe -m pip install --upgrade pip





In [16]:
# Import Libraries
import os
import pandas as pd
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.preprocessing import StandardScaler

# Load the Datasets of customer,product and transactions
customers = pd.read_csv("D:/zeotap/Customers - Customers.csv")  
products = pd.read_csv("D:/zeotap/Products - Products.csv")   
transactions = pd.read_csv("D:/zeotap/Transactions - Transactions.csv")  

# Combine transactions with customer and product data
data = transactions.merge(customers, on="CustomerID", how="left")
data = data.merge(products, on="ProductID", how="left")

# Create a summary for each customer
customer_summary = data.groupby("CustomerID").agg(
    TotalSpending=("TotalValue", "sum"),             
    AvgTransactionValue=("TotalValue", "mean"),     
    TotalTransactions=("TransactionID", "count")    
).reset_index()

# Add categorical features like Region (from the customer dataset)
customer_summary = customer_summary.merge(customers[['CustomerID', 'Region']], on="CustomerID", how="left")

# One-Hot Encoding for the 'Region' column (convert text to numbers)
customer_summary = pd.get_dummies(customer_summary, columns=["Region"], drop_first=True)

# Normalize numerical columns (scaling values to a similar range)
scaler = StandardScaler()
numerical_columns = ["TotalSpending", "AvgTransactionValue", "TotalTransactions"]
customer_summary[numerical_columns] = scaler.fit_transform(customer_summary[numerical_columns])

# Calculate pairwise cosine similarity for customers
similarity_matrix = cosine_similarity(customer_summary[numerical_columns + list(customer_summary.columns[4:])])
similarity_df = pd.DataFrame(similarity_matrix, index=customer_summary['CustomerID'], columns=customer_summary['CustomerID'])

# Get top 3 similar customers for the first 20 customers (C0001-C0020)
lookalike_map = {}
for customer_id in customers['CustomerID'][:20]:  # First 20 customers
    if customer_id in similarity_df.index:
        # Get the 3 most similar customers (excluding the customer itself)
        similar_customers = similarity_df[customer_id].nlargest(4)[1:]  # Exclude self
        lookalike_map[customer_id] = list(similar_customers.items())
        
# Create a list to store results
lookalike_data = []
for customer_id, lookalikes in lookalike_map.items():
    for similar_customer, score in lookalikes:
        lookalike_data.append({
            "CustomerID": customer_id,
            "SimilarCustomerID": similar_customer,
            "Score": score
        })
# Results in Lookalike.csv

# Folder path
folder_path = "D:/zeotap/model_csv"

# Check if the folder exists, if not, create it
if not os.path.exists(folder_path):
    os.makedirs(folder_path)

# Define the full file path for saving
output_path = os.path.join(folder_path, "Lookalike.csv")

# Save the DataFrame to CSV
lookalike_df.to_csv(output_path, index=False)
print(f"Lookalike recommendations saved to '{output_path}'.")



Lookalike recommendations saved to 'D:/zeotap/model_csv\Lookalike.csv'.
