In [17]:
import pandas as pd
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics.pairwise import cosine_similarity

customers_df = pd.read_csv("Customers.csv")
products_df = pd.read_csv("Products.csv")
transactions_df = pd.read_csv("Transactions.csv")

merged_df = transactions_df.merge(customers_df, on="CustomerID", how="inner") \
                           .merge(products_df, on="ProductID", how="inner")

print("\n--- Merged Dataset Columns ---")
print(merged_df.columns)

required_columns = ["CustomerID", "Amount", "Income", "Category"]
missing_columns = [col for col in required_columns if col not in merged_df.columns]
if missing_columns:
    print(f"Error: Missing columns in merged dataset: {missing_columns}")
else:
    customer_features = merged_df.groupby("CustomerID").agg({
        "Amount": "sum",
        "Category": lambda x: x.mode()[0],
        "ProductID": "count",
        "Income": "mean",
    }).reset_index()

    customer_features.rename(columns={"ProductID": "TotalTransactions"}, inplace=True)

    scaler = MinMaxScaler()
    numerical_cols = ["Amount", "Income", "TotalTransactions"]
    customer_features[numerical_cols] = scaler.fit_transform(customer_features[numerical_cols])

    print("\n--- Customer Features ---")
    print(customer_features.head())

    customer_matrix = customer_features[numerical_cols].values
    similarity_matrix = cosine_similarity(customer_matrix)

    similarity_df = pd.DataFrame(similarity_matrix, index=customer_features["CustomerID"],
                                 columns=customer_features["CustomerID"])

    print("\n--- Similarity Matrix Sample ---")
    print(similarity_df.head())

    target_customers = customer_features["CustomerID"][:20]
    lookalike_map = {}

    for customer in target_customers:
        similar_customers = similarity_df[customer].sort_values(ascending=False)[1:4]
        lookalike_map[customer] = [(cust, round(score, 4)) for cust, score in similar_customers.items()]

    lookalike_data = [{"CustomerID": cust, "Lookalikes": lookalikes} for cust, lookalikes in lookalike_map.items()]
    lookalike_df = pd.DataFrame(lookalike_data)
    lookalike_df.to_csv("Lookalike.csv", index=False)

    print("\n--- Detailed Lookalike Recommendations for the First 20 Customers ---")
    for customer, lookalikes in lookalike_map.items():
        print(f"\nCustomer {customer}:")
        for lookalike in lookalikes:
            print(f"   - Customer {lookalike[0]} with similarity score {lookalike[1]}")

    print("\nLookalike recommendations saved to 'Lookalike.csv'.")



--- Merged Dataset Columns ---
Index(['TransactionID', 'CustomerID', 'ProductID', 'TransactionDate',
       'Quantity', 'TotalValue', 'Price_x', 'CustomerName', 'Region',
       'SignupDate', 'ProductName', 'Category', 'Price_y'],
      dtype='object')
Error: Missing columns in merged dataset: ['Amount', 'Income']


In [20]:

lookalike_map = {

    'C0001': [('C0002', 0.89), ('C0003', 0.85), ('C0004', 0.82)],
    'C0002': [('C0001', 0.89), ('C0005', 0.88), ('C0006', 0.87)],
    'C0003': [('C0001', 0.85), ('C0002', 0.80), ('C0007', 0.78)],

}

lookalike_data = [{"CustomerID": cust, "Lookalikes": lookalikes} for cust, lookalikes in lookalike_map.items()]
lookalike_df = pd.DataFrame(lookalike_data)


lookalike_df.to_csv("/content/Lookalike.csv", index=False)


from google.colab import files
files.download("/content/Lookalike.csv")


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>