In [17]:
import pandas as pd
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.preprocessing import StandardScaler
import import_ipynb
from Siddharth_Sanghavi_EDA import merged_data

In [18]:
customer_features = merged_data.groupby("CustomerID").agg({
    'TotalValue': 'sum',
    'Quantity': 'sum',
    'Price': 'mean'
}).reset_index()

In [19]:
# Normalize features
scaler = StandardScaler()
features_scaled = scaler.fit_transform(customer_features.iloc[:, 1:])

In [20]:
# Compute similarity
similarity_matrix = cosine_similarity(features_scaled)

In [21]:
# Recommend top 3 similar customers
recommendations = {}
for idx, customer_id in enumerate(customer_features['CustomerID']):
    similar_indices = similarity_matrix[idx].argsort()[-4:-1][::-1]
    recommendations[customer_id] = [
        (customer_features.iloc[i]['CustomerID'], similarity_matrix[idx, i])
        for i in similar_indices
    ]

In [22]:
# Create and save Lookalike results
lookalike_map = {
    k: [(rec[0], rec[1]) for rec in v] for k, v in recommendations.items()
}

In [23]:
# Formatting the results as a DataFrame for the first 20 customers
lookalike_final = pd.DataFrame(
    [(k, str([(rec[0], rec[1]) for rec in v])) for k, v in lookalike_map.items() if k in customer_features['CustomerID'][:20].values],
    columns=["CustomerID", "Lookalikes"]
)

In [24]:
# Save the final lookalike recommendations to CSV
lookalike_final.to_csv("Siddharth_Sanghavi_Lookalike.csv", index=False)