***Name: Kalhar Patel***

Mail: kalharpatel9@gmail.com

***Task 2: Look alike Model:***

In [1]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler
from sklearn.metrics.pairwise import cosine_similarity

The *build_lookalike_model()* function creates a customer lookalike model by first loading transaction, product, and customer data from CSV files. It then aggregates customer purchase behavior, including total transactions, spending, and quantities purchased. Next, it analyzes product category preferences by merging transaction data with product information and generating a crosstab of product categories purchased by each customer. These features are merged into a single profile for each customer, and all features are scaled to ensure they are on a comparable scale. The function calculates a cosine similarity matrix to measure the similarity between customers based on their purchase behaviors and preferences. For the first 20 customers, it identifies the top 3 most similar customers and stores their customer IDs and similarity scores. Finally, these lookalike recommendations are saved in a DataFrame, which lists the original customer, similar customers, and their similarity scores, providing a foundation for targeted marketing or personalized recommendations.

In [2]:
async def build_lookalike_model():
    # Read the CSV files
    customers_df = pd.read_csv('/content/Customers.csv', encoding='utf8')
    products_df = pd.read_csv('/content/Products.csv', encoding='utf8')
    transactions_df = pd.read_csv('/content/Transactions.csv', encoding='utf8')


    # Feature Engineering
    # 1. Customer Purchase Behavior
    customer_features = transactions_df.groupby('CustomerID').agg({
        'TransactionID': 'count',
        'TotalValue': ['sum', 'mean'],
        'Quantity': ['sum', 'mean']
    }).reset_index()

    # Flatten column names
    customer_features.columns = ['CustomerID', 'total_transactions', 'total_spend',
                               'avg_transaction_value', 'total_quantity', 'avg_quantity']

    # 2. Product Category Preferences
    transactions_df['ProductID'] = transactions_df['ProductID'].astype(str)
    products_df['ProductID'] = products_df['ProductID'].astype(str)

    # Merge to get categories and then create crosstab
    category_data = transactions_df.merge(products_df[['ProductID', 'Category']], on='ProductID', how='left')
    category_pivot = pd.crosstab(
        category_data['CustomerID'],
        category_data['Category']
    )

    # 3. Merge all features
    customer_profiles = customer_features.merge(category_pivot, on='CustomerID')

    # 4. Scale features
    scaler = StandardScaler()
    scaled_features = scaler.fit_transform(customer_profiles.drop('CustomerID', axis=1))

    # Calculate similarity matrix
    similarity_matrix = cosine_similarity(scaled_features)

    # Function to get top 3 similar customers
    def get_top_3_similar(customer_idx, similarity_scores):
        # Get indices of top 3 similar customers (excluding self)
        similar_indices = np.argsort(similarity_scores)[-4:][:-1][::-1]
        return [
            (customer_profiles.iloc[idx]['CustomerID'],
             similarity_scores[idx])
            for idx in similar_indices
        ]

    # Generate recommendations for first 20 customers
    lookalike_recommendations = {}
    for i in range(20):  # First 20 customers
        customer_id = customer_profiles.iloc[i]['CustomerID']
        similar_customers = get_top_3_similar(i, similarity_matrix[i])
        lookalike_recommendations[customer_id] = similar_customers

    # Create Lookalike.csv
    rows = []
    for cust_id, similar_custs in lookalike_recommendations.items():
        for sim_cust_id, score in similar_custs:
            rows.append([cust_id, sim_cust_id, score])

    lookalike_df = pd.DataFrame(rows, columns=['CustomerID', 'SimilarCustomerID', 'SimilarityScore'])
    return lookalike_df


In [3]:
# Execute the model
lookalike_results = await build_lookalike_model()
print(lookalike_results)

   CustomerID SimilarCustomerID  SimilarityScore
0       C0001             C0069         0.924680
1       C0001             C0035         0.799590
2       C0001             C0127         0.788884
3       C0002             C0062         0.891223
4       C0002             C0031         0.850468
5       C0002             C0133         0.845447
6       C0003             C0144         0.828690
7       C0003             C0166         0.776008
8       C0003             C0129         0.724981
9       C0004             C0113         0.926389
10      C0004             C0017         0.919186
11      C0004             C0065         0.900377
12      C0005             C0095         0.905358
13      C0005             C0007         0.902686
14      C0005             C0035         0.890939
15      C0006             C0185         0.881737
16      C0006             C0196         0.861784
17      C0006             C0079         0.850102
18      C0007             C0005         0.902686
19      C0007       