In [6]:
# Importing Libraries
import pandas as pd
from sklearn.preprocessing import StandardScaler
from sklearn.metrics.pairwise import cosine_similarity
import numpy as np

# Step 1: Load the datasets
def load_data():
    customers_df = pd.read_csv(r"C:\Users\Ravi\Downloads\Customers.csv", encoding='utf-8-sig')
    transactions_df = pd.read_csv(r"C:\Users\Ravi\Downloads\Products.csv", encoding='utf-8-sig')
    products_df = pd.read_csv(r"C:\Users\Ravi\Downloads\Transactions.csv", encoding='utf-8-sig')
    return customers_df, transactions_df, products_df

# Step 2: Merge Transactions and Products
def merge_transactions_products(transactions_df, products_df):
    return pd.merge(transactions_df, products_df, on='ProductID')

# Step 3: Merge Transactions with Customers
def merge_transactions_customers(transactions_products, customers_df):
    return pd.merge(transactions_products, customers_df, on='CustomerID')

# Step 4: Feature Engineering - Create pivot table for average spending per category
def create_customer_category_spending(transactions_customers):
    return transactions_customers.pivot_table(
        index='CustomerID',
        columns='Category',
        values='TotalValue',
        aggfunc='mean',
        fill_value=0
    )

# Step 5: Normalize features for clustering
def standardize_features(customer_category_spending):
    scaler = StandardScaler()
    return scaler.fit_transform(customer_category_spending)

# Step 6: Calculate Cosine Similarity between customers
def calculate_cosine_similarity(customer_category_spending_scaled):
    return cosine_similarity(customer_category_spending_scaled)

# Step 7: Function to get the top 3 similar customers
def get_top_3_similar(similarity_df, customer_id):
    # Sort the similarity values in descending order and exclude the customer itself
    similar_customers = similarity_df[customer_id].sort_values(ascending=False).iloc[1:4]
    return list(similar_customers.index), list(similar_customers.values)

# Step 8: Generate Lookalike recommendations for the first 20 customers
def generate_lookalike_recommendations(customers_df, similarity_df):
    lookalike_dict = {}
    for customer_id in customers_df['CustomerID'].iloc[:20]:
        similar_ids, scores = get_top_3_similar(similarity_df, customer_id)
        lookalike_dict[customer_id] = list(zip(similar_ids, scores))
    return lookalike_dict

# Step 9: Convert the dictionary to a DataFrame and save to CSV
def save_lookalike_recommendations(lookalike_dict):
    lookalike_df = pd.DataFrame.from_dict(lookalike_dict, orient='index', columns=['Lookalike1', 'Lookalike2', 'Lookalike3'])
    lookalike_df.to_csv('Lookalike_Recommendations.csv')
    return lookalike_df

# Step 10: Main Execution - Running the model
def main():
    # Load data
    customers_df, transactions_df, products_df = load_data()

    # Merge dataframes
    transactions_products = merge_transactions_products(transactions_df, products_df)
    transactions_customers = merge_transactions_customers(transactions_products, customers_df)

    # Feature Engineering
    customer_category_spending = create_customer_category_spending(transactions_customers)

    # Standardize the features
    customer_category_spending_scaled = standardize_features(customer_category_spending)

    # Calculate cosine similarity matrix
    similarity_matrix = calculate_cosine_similarity(customer_category_spending_scaled)

    # Convert similarity matrix to DataFrame for easier access
    similarity_df = pd.DataFrame(similarity_matrix, 
                                 index=customer_category_spending.index, 
                                 columns=customer_category_spending.index)

    # Generate lookalike recommendations
    lookalike_dict = generate_lookalike_recommendations(customers_df, similarity_df)

    # Save recommendations to CSV and display the top 5
    lookalike_df = save_lookalike_recommendations(lookalike_dict)
    print(lookalike_df.head())

if __name__ == '__main__':
    main()


                        Lookalike1                   Lookalike2  \
C0001  (C0184, 0.9472355498942732)  (C0047, 0.9290902929881713)   
C0002  (C0134, 0.9417241221886844)  (C0062, 0.8702122771421835)   
C0003   (C0163, 0.996442720815831)  (C0152, 0.9738913786466057)   
C0004  (C0090, 0.9870839964399987)  (C0064, 0.9479684406070296)   
C0005  (C0197, 0.9771085009829121)  (C0007, 0.9534556278772148)   

                        Lookalike3  
C0001   (C0152, 0.915772822583414)  
C0002  (C0128, 0.8399356355278532)  
C0003  (C0012, 0.9738079924314287)  
C0004  (C0127, 0.9134965740927001)  
C0005  (C0199, 0.9158394606789362)  


NameError: name 'lookalike_df' is not defined