# Customer Segmentation (RFM + KMeans)
**Author:** Mohsin Iqbal

In [None]:
DATA_PATH='data/ecommerce_synthetic.csv'; SAVE_DIR='assets'; SEGMENTS_CSV='segments.csv'
import os; os.makedirs(SAVE_DIR, exist_ok=True)

In [None]:
import pandas as pd, numpy as np, matplotlib.pyplot as plt
from sklearn.preprocessing import StandardScaler
from sklearn.cluster import KMeans

df=pd.read_csv(DATA_PATH,parse_dates=['InvoiceDate'])
df.head()

## Compute RFM

In [None]:
snapshot_date=df['InvoiceDate'].max()+pd.Timedelta(days=1)
rfm=df.groupby('CustomerID').agg({'InvoiceDate':lambda x:(snapshot_date-x.max()).days,'CustomerID':'count','Amount':'sum'})
rfm.columns=['Recency','Frequency','Monetary']
rfm.head()

## Scale & Cluster

In [None]:
from sklearn.preprocessing import StandardScaler
X=rfm[['Recency','Frequency','Monetary']]
Xs=StandardScaler().fit_transform(X)
km=KMeans(n_clusters=4,n_init=20,random_state=42)
labels=km.fit_predict(Xs)
rfm['Segment']=labels
rfm.head()

## Profile & Save

In [None]:
profile=rfm.groupby('Segment').agg({'Recency':'mean','Frequency':'mean','Monetary':'mean','Segment':'count'}).rename(columns={'Segment':'Count'})
print(profile)
rfm.to_csv(SEGMENTS_CSV)
profile

## Simple Visual

In [None]:
plt.figure()
for s in sorted(rfm['Segment'].unique()):
    sub=rfm[rfm['Segment']==s]
    plt.scatter(sub['Recency'],sub['Monetary'],alpha=0.4,label=f'Segment {s}')
plt.xlabel('Recency (days)'); plt.ylabel('Monetary'); plt.title('Segments: Recency vs Monetary')
plt.legend(); plt.savefig(os.path.join(SAVE_DIR,'segments_scatter.png'))