In [1]:
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.preprocessing import StandardScaler
import pandas as pd

In [3]:
# Load processed data
data = pd.read_csv("Processed_Data.csv")

# Feature preparation
features = data.groupby('CustomerID').agg({
    'Region': 'first',
    'Category': lambda x: ' '.join(x),
    'TotalValue': 'sum',
    'Quantity': 'sum'
}).reset_index()

In [4]:
# One-hot encode categorical features
features = pd.get_dummies(features, columns=['Region', 'Category'])
scaler = StandardScaler()
scaled_features = scaler.fit_transform(features.iloc[:, 2:])

In [5]:
# Calculate similarity scores
similarity_matrix = cosine_similarity(scaled_features)
similarity_df = pd.DataFrame(similarity_matrix, index=features['CustomerID'], columns=features['CustomerID'])

In [6]:
# Recommend top 3 similar customers
lookalike_results = {}
for customer in features['CustomerID'][:20]:
    similar_customers = similarity_df[customer].sort_values(ascending=False)[1:4]
    lookalike_results[customer] = list(zip(similar_customers.index, similar_customers.values))

In [7]:
# Save Lookalike.csv
lookalike_df = pd.DataFrame([
    {"CustomerID": k, "Recommendations": v} for k, v in lookalike_results.items()
])
lookalike_df.to_csv("Lookalike.csv", index=False)

In [8]:
# Display Lookalike Recommendations
print(lookalike_df.head())

  CustomerID                                    Recommendations
0      C0001  [(C0151, 0.027632487574177963), (C0150, 0.0205...
1      C0002  [(C0133, 0.9487182950854319), (C0097, 0.067033...
2      C0003  [(C0151, 0.02236232969799536), (C0112, 0.01779...
3      C0004  [(C0165, 0.03365373625138064), (C0082, 0.02477...
4      C0005  [(C0197, 0.9462399473655353), (C0097, 0.081022...


In [9]:
with open("lookalike_results.pdf", "w") as file:
    file.write(lookalike_df.head().to_string())