In [1]:
import pandas as pd
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.preprocessing import LabelEncoder, MinMaxScaler

In [2]:
customers = pd.read_csv('Customers.csv')
products = pd.read_csv('Products.csv')
transactions = pd.read_csv('Transactions.csv')

In [3]:
merged_data = transactions.merge(customers, on='CustomerID', how='left').merge(products, on='ProductID', how='left')

In [4]:
label_encoder = LabelEncoder()
merged_data['Region_encoded'] = label_encoder.fit_transform(merged_data['Region'])
merged_data['Category_encoded'] = label_encoder.fit_transform(merged_data['Category'])

In [6]:
customer_features = merged_data.groupby('CustomerID').agg({
    'Region_encoded': 'first',
    'Category_encoded': 'mean',
    'Price_y': 'mean',
    'TotalValue': 'sum',
    'Quantity': 'sum'
}).reset_index()

In [7]:
scaler = MinMaxScaler()
normalized_features = scaler.fit_transform(customer_features.drop(columns=['CustomerID']))
cosine_sim = cosine_similarity(normalized_features)

In [8]:
lookalike_recommendations = {}
for i in range(20):
    customer_id = customer_features.iloc[i]['CustomerID']
    similarity_scores = list(enumerate(cosine_sim[i]))
    similarity_scores = sorted(similarity_scores, key=lambda x: x[1], reverse=True)[1:4]
    recommendations = [(customer_features.iloc[j]['CustomerID'], score) for j, score in similarity_scores]
    lookalike_recommendations[customer_id] = recommendations

In [9]:
lookalike_df = pd.DataFrame({
    'CustomerID': list(lookalike_recommendations.keys()),
    'Recommendations': [str(rec) for rec in lookalike_recommendations.values()]
})

In [10]:
lookalike_df.to_csv('Kumar_Shivam_Lookalike.csv', index=False)