# 03 – Customer Segmentation (RFM)

In [None]:
import pandas as pd, numpy as np
import matplotlib.pyplot as plt
import plotly.express as px
from pathlib import Path
DATA_RAW = Path('../data/ecommerce_raw.csv')
DATA_CLEAN = Path('../data/ecommerce_clean.csv')
EXPORTS = Path('../data/exports')
EXPORTS.mkdir(parents=True, exist_ok=True)


In [None]:
df = pd.read_csv(DATA_CLEAN, parse_dates=['Order Date'])
max_date = df['Order Date'].max()
rfm = (df.groupby('Customer ID')
         .agg(Recency=('Order Date', lambda s: (max_date - s.max()).days),
              Frequency=('Order ID','nunique'),
              Monetary=('Sales','sum'))
         .reset_index())


# Scores
rfm['R_Score'] = pd.qcut(rfm['Recency'], 5, labels=[5,4,3,2,1]).astype(int)
rfm['F_Score'] = pd.qcut(rfm['Frequency'].rank(method='first'), 5, labels=[1,2,3,4,5]).astype(int)
rfm['M_Score'] = pd.qcut(rfm['Monetary'].rank(method='first'), 5, labels=[1,2,3,4,5]).astype(int)
rfm['RFM_Score'] = rfm[['R_Score','F_Score','M_Score']].sum(axis=1)
rfm.to_csv(EXPORTS / 'rfm_scores.csv', index=False)
print('Saved ->', EXPORTS / 'rfm_scores.csv')
rfm.head()
