Importing Libraries

In [34]:
import pandas as pd
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.preprocessing import StandardScaler
from mlxtend.frequent_patterns import apriori, association_rules

Load the datasets

In [35]:
customers = pd.read_csv('Datasets/Customers.csv')
products = pd.read_csv('Datasets/Products.csv')
transactions = pd.read_csv('Datasets/Transactions.csv')

Feature Engineering

In [36]:
# Aggregating transaction data for customer features
customer_features = transactions.groupby('CustomerID').agg({
    'Quantity': 'sum',
    'TotalValue': 'sum'
}).reset_index()

In [37]:
# Merging with customer data
customer_features = pd.merge(customer_features, customers, on='CustomerID')

In [38]:
#Scaling the features
scaler = StandardScaler()
scaled_features = scaler.fit_transform(customer_features[['Quantity', 'TotalValue']])

COSINE Similarity Model

In [39]:
# Calculation of cosine similarity
similarity_matrix = cosine_similarity(scaled_features)

In [40]:
# Recommendation of top 3 similar customers using cosine similarity
lookalike_results_cosine = {}
for idx, customer_id in enumerate(customer_features['CustomerID']):
    similar_indices = similarity_matrix[idx].argsort()[-4:-1][::-1]  # Exclude self
    lookalike_results_cosine[customer_id] = [
        (customer_features['CustomerID'][i], similarity_matrix[idx][i]) for i in similar_indices
    ]

APRIORI Model

In [41]:
# Preparing data for apriori algorithm
basket = transactions.pivot_table(index='CustomerID', columns='ProductID', values='Quantity', fill_value=0)

In [42]:
# Applying apriori algorithm
frequent_itemsets = apriori(basket > 0, min_support=0.05, use_colnames=True)
rules = association_rules(frequent_itemsets, metric="lift", min_threshold=1.0)

In [43]:
# Extracting recommendations based on shared product patterns
lookalike_results_apriori = {}
for customer_id in basket.index:
    purchased_products = basket.loc[customer_id][basket.loc[customer_id] > 0].index.tolist()
    recommended_customers = []
    for _, rule in rules.iterrows():
        if set(rule['antecedents']).issubset(purchased_products):
            consequents = list(rule['consequents'])
            for consequent in consequents:
                similar_customers = basket[basket[consequent] > 0].index.tolist()
                for similar_customer in similar_customers:
                    if similar_customer != customer_id:
                        recommended_customers.append(similar_customer)
    # Assign similarity score (frequency of recommendation)
    recommended_customers = pd.Series(recommended_customers).value_counts()
    lookalike_results_apriori[customer_id] = [
        (cust, score) for cust, score in recommended_customers.items()
    ][:3]

Save Results

In [44]:
# Combining both models for the first 20 customers
lookalike_combined = {}
for customer_id in customer_features['CustomerID'][:20]:
    lookalike_combined[customer_id] = {
        'Cosine_Similarity': lookalike_results_cosine.get(customer_id, []),
        'Apriori': lookalike_results_apriori.get(customer_id, [])
    }

In [45]:
# Saveing results in to a CSV file
lookalike_combined_df = pd.DataFrame([
    {'CustomerID': k, 'Lookalikes': v} for k, v in lookalike_combined.items()
])

lookalike_combined_df.to_csv('Lookalike.csv', index=False)

print("Lookalike recommendations (Cosine Similarity & Apriori) saved to 'Lookalike.csv'.")


Lookalike recommendations (Cosine Similarity & Apriori) saved to 'Lookalike.csv'.
