In [2]:
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.preprocessing import StandardScaler
import pandas as pd

transactions = pd.read_csv('Transactions.csv')
customers = pd.read_csv("Customers.csv")
products = pd.read_csv("Products.csv")
# Merge datasets for Lookalike Model
data = transactions.merge(customers, on="CustomerID", how="left").merge(products, on="ProductID", how="left")

# Feature engineering for similarity calculation
customer_features = data.groupby('CustomerID').agg({
    'TotalValue': 'sum',
    'Quantity': 'sum',
    'Region': lambda x: x.mode()[0],
    'Category': lambda x: x.mode()[0]
}).reset_index()

# Encode categorical columns
customer_features = pd.get_dummies(customer_features, columns=['Region', 'Category'], drop_first=True)

# Normalize numerical columns
scaler = StandardScaler()
numerical_cols = ['TotalValue', 'Quantity']
customer_features[numerical_cols] = scaler.fit_transform(customer_features[numerical_cols])

# Calculate cosine similarity
customer_ids = customer_features['CustomerID']
features_matrix = customer_features.drop('CustomerID', axis=1).values
similarity_matrix = cosine_similarity(features_matrix)

# Find top 3 similar customers for each customer
lookalikes = {}
for i, customer_id in enumerate(customer_ids):
    similarity_scores = list(enumerate(similarity_matrix[i]))
    similarity_scores = sorted(similarity_scores, key=lambda x: x[1], reverse=True)
    top_3 = [(customer_ids[j], score) for j, score in similarity_scores[1:4]]
    lookalikes[customer_id] = top_3

# Save the Lookalike data
lookalike_df = pd.DataFrame({
    "CustomerID": customer_ids,
    "Lookalikes": [lookalikes[cid] for cid in customer_ids]
})
lookalike_df.to_csv("Lookalike.csv", index=False)
print("Lookalike Model completed! Results saved in Lookalike.csv")


Lookalike Model completed! Results saved in Lookalike.csv


In [3]:
transactions = pd.read_csv('Transactions.csv')
customers = pd.read_csv("Customers.csv")
products = pd.read_csv("Products.csv")
# Merge datasets for Lookalike Model
data = transactions.merge(customers, on="CustomerID", how="left").merge(products, on="ProductID", how="left")


In [4]:
# Feature engineering for similarity calculation
customer_features = data.groupby('CustomerID').agg({
    'TotalValue': 'sum',
    'Quantity': 'sum',
    'Region': lambda x: x.mode()[0],
    'Category': lambda x: x.mode()[0]
}).reset_index()



In [5]:
# Encode categorical columns
customer_features = pd.get_dummies(customer_features, columns=['Region', 'Category'], drop_first=True)

In [6]:
# Normalize numerical columns
scaler = StandardScaler()
numerical_cols = ['TotalValue', 'Quantity']
customer_features[numerical_cols] = scaler.fit_transform(customer_features[numerical_cols])

In [7]:
# Calculate cosine similarity
customer_ids = customer_features['CustomerID']
features_matrix = customer_features.drop('CustomerID', axis=1).values
similarity_matrix = cosine_similarity(features_matrix)

In [9]:
# Find top 3 similar customers for the first 20 customers
lookalikes = {}
for i, customer_id in enumerate(customer_ids[:20]):  # Slice to only the first 20 customers
    similarity_scores = list(enumerate(similarity_matrix[i]))
    similarity_scores = sorted(similarity_scores, key=lambda x: x[1], reverse=True)
    top_3 = [(customer_ids[j], score) for j, score in similarity_scores[1:4]]
    lookalikes[customer_id] = top_3

# Save the Lookalike data for the first 20 customers
lookalike_df = pd.DataFrame({
    "CustomerID": customer_ids[:20],  # Only the first 20 customer IDs
    "Lookalikes": [lookalikes[cid] for cid in customer_ids[:20]]
})
lookalike_df.to_csv("Lookalike.csv", index=False)
print("Lookalike Model completed for the first 20 customers! Results saved in Lookalike_first_20.csv")


Lookalike Model completed for the first 20 customers! Results saved in Lookalike_first_20.csv
