In [2]:
# import relevant libraries

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.preprocessing import StandardScaler
from sklearn.cluster import KMeans
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
from imblearn.over_sampling import SMOTE

In [3]:
# load dataframes and relevant csvs

spend_revenue = pd.read_csv(r"/Users/adityamxr/Desktop/finding-marketing-insights/data-analysis/spend_revenue.csv")
sales_full_merged = pd.read_csv(r"/Users/adityamxr/Desktop/finding-marketing-insights/data-analysis/sales_full_merged.csv")
rfm = pd.read_csv(r"/Users/adityamxr/Desktop/finding-marketing-insights/models/rfm.csv")

## K-Means Clustering to Understand RFM profiles + Strategy

In [4]:
# prepare the data (RFM values)
rfm_for_clustering = rfm[['Recency', 'Frequency', 'Monetary']]

In [5]:
# standardize the RFM values (important for K-Means to work effectively)
scaler = StandardScaler()
rfm_scaled = scaler.fit_transform(rfm_for_clustering)

In [6]:
# apply K-Means clustering

# define the number of clusters
kmeans = KMeans(n_clusters=4, random_state=42)
rfm['KMeans_Cluster'] = kmeans.fit_predict(rfm_scaled)

# analyze cluster profiles
# group by the clusters and calculate the mean Recency, Frequency, and Monetary for each cluster
cluster_profiles = rfm.groupby('KMeans_Cluster').agg({
    'Recency': 'mean',
    'Frequency': 'mean',
    'Monetary': 'mean',
    'CustomerID': 'count'  # Count the number of customers in each cluster
}).rename(columns={'CustomerID': 'Number_of_Customers'}).reset_index()

In [7]:
# display cluster profiles to understand the characteristics
print(cluster_profiles)

# define strategies for each K-Means cluster based on their profiles
# assign strategies to clusters 0, 1, 2, 3
cluster_strategy = {
    0: "High spenders with frequent purchases, engage with personalized offers to retain them.",
    1: "Infrequent purchasers with low spend, use incentives like discounts or loyalty programs.",
    2: "Moderate spenders, encourage repeat purchases with targeted marketing.",
    3: "New or inactive customers, focus on reactivation campaigns with special offers."
}

# map strategies to each K-Means cluster
rfm['Cluster_Strategy'] = rfm['KMeans_Cluster'].map(cluster_strategy)

   KMeans_Cluster     Recency   Frequency      Monetary  Number_of_Customers
0               0   78.437247   12.892038   1950.349249                  741
1               1   81.553672   55.977401   9559.543873                  177
2               2  255.108456   10.194853   1562.488956                  544
3               3   80.833333  270.666667  53059.966194                    6


In [8]:
# display the RFM dataframe with the new clusters and strategies
rfm.head()

Unnamed: 0,CustomerID,Recency,Frequency,Monetary,R_Score,F_Score,M_Score,RFM_Score,Segment,Strategy,KMeans_Cluster,Cluster_Strategy
0,12346,107,1,171.725887,3,1,1,5,Silver,Provide incentives like discounts or free ship...,0,"High spenders with frequent purchases, engage ..."
1,12347,59,31,10632.890268,3,4,4,11,Premium,Focus on maintaining a strong relationship thr...,1,"Infrequent purchasers with low spend, use ince..."
2,12348,73,8,1341.274742,3,2,2,7,Gold,Engage more with targeted offers and loyalty p...,0,"High spenders with frequent purchases, engage ..."
3,12350,17,11,1080.966849,4,2,2,8,Gold,Engage more with targeted offers and loyalty p...,0,"High spenders with frequent purchases, engage ..."
4,12356,107,13,1648.281319,3,3,3,9,Gold,Engage more with targeted offers and loyalty p...,0,"High spenders with frequent purchases, engage ..."
