In [1]:
import pandas as pd
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.preprocessing import StandardScaler

# Load datasets
customers = pd.read_csv("Customers.csv")
products = pd.read_csv("Products.csv")
transactions = pd.read_csv("/content/Transactions - Transactions.csv")

# Merge datasets
merged_data = transactions.merge(customers, on="CustomerID", how="left").merge(products, on="ProductID", how="left")

# Use Price_x (from Transactions.csv) for calculations
# Feature engineering: Aggregate data by customer
customer_features = merged_data.groupby("CustomerID").agg(
    TotalSpent=("TotalValue", "sum"),
    TotalQuantity=("Quantity", "sum"),
    AveragePrice=("Price_x", "mean"),  # Use Price_x for average price
    Transactions=("TransactionID", "count")
).reset_index()

# Standardize numerical features
scaler = StandardScaler()
scaled_features = scaler.fit_transform(customer_features.iloc[:, 1:])

# Compute cosine similarity
similarity_matrix = cosine_similarity(scaled_features)

# Generate lookalike recommendations for the first 20 customers
lookalikes = {}
for i, customer_id in enumerate(customer_features["CustomerID"][:20]):
    similarities = list(enumerate(similarity_matrix[i]))
    # Sort by similarity score and exclude the customer itself
    sorted_similarities = sorted(similarities, key=lambda x: -x[1])[1:4]
    lookalikes[customer_id] = [(customer_features["CustomerID"][j], score) for j, score in sorted_similarities]

# Prepare lookalike recommendations for output
lookalike_data = []
for cust_id, recommendations in lookalikes.items():
    for rec_id, score in recommendations:
        lookalike_data.append({"CustomerID": cust_id, "SimilarCustomerID": rec_id, "SimilarityScore": score})

# Save results to Lookalike.csv
lookalike_df = pd.DataFrame(lookalike_data)
lookalike_df.to_csv("Lookalike.csv", index=False)

print("Lookalike recommendations saved to Lookalike.csv")


Lookalike recommendations saved to Lookalike.csv


Download the lookalike.csv by running the below cell


In [2]:
from google.colab import files
files.download("Lookalike.csv")


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>