In [25]:
#Task 2
import pandas as pd
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.preprocessing import StandardScaler


customers = pd.read_csv('/content/drive/MyDrive/Space/Customers.csv')
transactions = pd.read_csv('/content/drive/MyDrive/Space/Transactions.csv')
products = pd.read_csv('/content/drive/MyDrive/Space/Products.csv')


customer_transactions = transactions.groupby('CustomerID').agg({
    'TotalValue': ['sum', 'mean'],
    'Quantity': 'sum'
}).reset_index()
customer_transactions.columns = ['CustomerID', 'TotalSpending', 'AvgTransactionValue', 'TotalQuantity']


customer_profiles = pd.merge(customers, customer_transactions, on='CustomerID')


merged_data = pd.merge(transactions, products, on='ProductID')
favorite_category = merged_data.groupby(['CustomerID', 'Category']).size().reset_index(name='Count')
favorite_category = favorite_category.loc[favorite_category.groupby('CustomerID')['Count'].idxmax()]
favorite_category = favorite_category[['CustomerID', 'Category']]
customer_profiles = pd.merge(customer_profiles, favorite_category, on='CustomerID', how='left')


customer_profiles['Region'] = customer_profiles['Region'].astype('category').cat.codes
customer_profiles['Category'] = customer_profiles['Category'].astype('category').cat.codes


features = customer_profiles[['Region', 'TotalSpending', 'AvgTransactionValue', 'TotalQuantity', 'Category']]
scaler = StandardScaler()
features_scaled = scaler.fit_transform(features)

#Calculating cosine similarity
similarity_matrix = cosine_similarity(features_scaled)

#Geting top 3 lookalikes for the first 20 customers
lookalike_results = {}
for i in range(20):
    customer_id = customer_profiles.iloc[i]['CustomerID']
    similarities = similarity_matrix[i]
    top_3_indices = similarities.argsort()[-4:-1][::-1]  # Exclude self
    top_3 = [(customer_profiles.iloc[idx]['CustomerID'], similarities[idx]) for idx in top_3_indices]
    lookalike_results[customer_id] = top_3

# Save to CSV
import csv
with open('Lookalike.csv', 'w', newline='') as file:
    writer = csv.writer(file)
    writer.writerow(['CustomerID', 'LookalikeID', 'SimilarityScore'])
    for cust_id, lookalikes in lookalike_results.items():
        for lookalike_id, score in lookalikes:
            writer.writerow([cust_id, lookalike_id, score])

print("Lookalike.csv has been created.")

Lookalike.csv has been created.


In [24]:
from google.colab import files
files.download('Lookalike.csv')

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>