# RFM Analysis

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

In [2]:
from datetime import datetime

In [3]:
df = pd.read_csv('online_retail.csv',index_col=0)

In [4]:
df.head()

Unnamed: 0,Invoice,StockCode,Description,Quantity,InvoiceDate,Price,Customer ID,Country
0,489434,85048,15CM CHRISTMAS GLASS BALL 20 LIGHTS,12,2009-12-01 07:45:00,6.95,13085.0,United Kingdom
1,489434,79323P,PINK CHERRY LIGHTS,12,2009-12-01 07:45:00,6.75,13085.0,United Kingdom
2,489434,79323W,WHITE CHERRY LIGHTS,12,2009-12-01 07:45:00,6.75,13085.0,United Kingdom
3,489434,22041,"RECORD FRAME 7"" SINGLE SIZE",48,2009-12-01 07:45:00,2.1,13085.0,United Kingdom
4,489434,21232,STRAWBERRY CERAMIC TRINKET BOX,24,2009-12-01 07:45:00,1.25,13085.0,United Kingdom


## Clean & transform the data

In [5]:
df['InvoiceDate'] = pd.to_datetime(df['InvoiceDate'])

In [6]:
# Drop rows that missing customer id
df = df[~df['Customer ID'].isna()]

In [7]:
# Fix Customer ID
df['Customer ID'] = df['Customer ID'].astype(int)
df['Customer ID'] = df['Customer ID'].astype(str)

In [8]:
df.dtypes

Invoice                object
StockCode              object
Description            object
Quantity                int64
InvoiceDate    datetime64[ns]
Price                 float64
Customer ID            object
Country                object
dtype: object

In [9]:
# In this case of analysis I will remove returned orders.
df = df[~df['Invoice'].str.contains('C')]

In [10]:
df[df['Quantity'] < 0]

Unnamed: 0,Invoice,StockCode,Description,Quantity,InvoiceDate,Price,Customer ID,Country


In [11]:
df.head(1)

Unnamed: 0,Invoice,StockCode,Description,Quantity,InvoiceDate,Price,Customer ID,Country
0,489434,85048,15CM CHRISTMAS GLASS BALL 20 LIGHTS,12,2009-12-01 07:45:00,6.95,13085,United Kingdom


In [12]:
# Adding derived columns
df['Amount'] = df['Quantity'] * df['Price']
df.head()

Unnamed: 0,Invoice,StockCode,Description,Quantity,InvoiceDate,Price,Customer ID,Country,Amount
0,489434,85048,15CM CHRISTMAS GLASS BALL 20 LIGHTS,12,2009-12-01 07:45:00,6.95,13085,United Kingdom,83.4
1,489434,79323P,PINK CHERRY LIGHTS,12,2009-12-01 07:45:00,6.75,13085,United Kingdom,81.0
2,489434,79323W,WHITE CHERRY LIGHTS,12,2009-12-01 07:45:00,6.75,13085,United Kingdom,81.0
3,489434,22041,"RECORD FRAME 7"" SINGLE SIZE",48,2009-12-01 07:45:00,2.1,13085,United Kingdom,100.8
4,489434,21232,STRAWBERRY CERAMIC TRINKET BOX,24,2009-12-01 07:45:00,1.25,13085,United Kingdom,30.0


## RFM model

In [14]:
# Latest date in the dataset
df['InvoiceDate'].max()

Timestamp('2011-12-09 12:50:00')

In [15]:
# Assume today is 2011-12-12 and we are doing this analysis
today_date = datetime(2011,12,12)

In [16]:
rfm = df.groupby('Customer ID').agg({'InvoiceDate': lambda InvoiceDate: (today_date - InvoiceDate.max()).days,
                                     'Invoice': lambda Invoice: Invoice.nunique(),
                                     'Amount': lambda Amount: Amount.sum()})

In [17]:
rfm.columns = ['recency', 'frequency', 'monetary']
rfm

Unnamed: 0_level_0,recency,frequency,monetary
Customer ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
12346,327,12,77556.46
12347,4,8,5633.32
12348,77,5,2019.40
12349,20,4,4428.69
12350,312,1,334.40
...,...,...,...
18283,5,22,2736.65
18284,433,1,461.68
18285,662,1,427.00
18286,478,2,1296.43


In [18]:
# Scoring
rfm["recency_score"] = pd.qcut(rfm['recency'], 4, labels=[4, 3, 2, 1])
rfm["frequency_score"] = pd.qcut(rfm['frequency'].rank(method="first"), 4, labels=[1, 2, 3, 4])
rfm["monetary_score"] = pd.qcut(rfm['monetary'], 4, labels=[1, 2, 3, 4])
rfm

Unnamed: 0_level_0,recency,frequency,monetary,recency_score,frequency_score,monetary_score
Customer ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
12346,327,12,77556.46,2,4,4
12347,4,8,5633.32,4,4,4
12348,77,5,2019.40,3,3,3
12349,20,4,4428.69,4,3,4
12350,312,1,334.40,2,1,1
...,...,...,...,...,...,...
18283,5,22,2736.65,4,4,4
18284,433,1,461.68,1,2,2
18285,662,1,427.00,1,2,2
18286,478,2,1296.43,1,2,3


In [19]:
# segmenting
rfm['RFM Segment'] = rfm.apply(lambda row: str(int(row['recency_score'])) + '-' + str(int(row['frequency_score'])) 
                               + '-' + str(int(row['monetary_score'])), axis=1)

In [20]:
rfm

Unnamed: 0_level_0,recency,frequency,monetary,recency_score,frequency_score,monetary_score,RFM Segment
Customer ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
12346,327,12,77556.46,2,4,4,2-4-4
12347,4,8,5633.32,4,4,4,4-4-4
12348,77,5,2019.40,3,3,3,3-3-3
12349,20,4,4428.69,4,3,4,4-3-4
12350,312,1,334.40,2,1,1,2-1-1
...,...,...,...,...,...,...,...
18283,5,22,2736.65,4,4,4,4-4-4
18284,433,1,461.68,1,2,2,1-2-2
18285,662,1,427.00,1,2,2,1-2-2
18286,478,2,1296.43,1,2,3,1-2-3


## Segment the customers base on RFM Score

#### 1. Core - Your Best Customers
- Who They Are: Highly engaged customers who have bought the most recent, the most often, and generated the most revenue.
- Marketing Strategies: Focus on loyalty programs and new product introductions. These customers have proven to have a higher willingness to pay, so don't use discount pricing to generate incremental sales. Instead, focus on value added offers through product recommendations based on previous purchases.

#### 2. Loyal - Your Most Loyal Customers
- Who They Are: Customers who buy the most often from your store.
- Marketing Strategies: Loyalty programs are effective for these repeat visitors. Advocacy programs and reviews are also common loyalty strategies. Lastly, consider rewarding these customers with Free Shipping or other like benefits.

#### 3. Whales - Your Highest Paying Customers
- Who They Are: Customers who have generated the most revenue for your store.
- Marketing Strategies: These customers have demonstrated a high willingness to pay. Consider premium offers, subscription tiers, luxury products, or value add cross/up-sells to increase AOV. Don't waste margin on discounts. 

#### 4. Promising - Faithful customers
- Who They Are: Customers who return often, but do not spend a lot.
- Marketing Strategies: You've already succeeded in creating loyalty. Focus on increasing monetization through product recommendations based on past purchases and incentives tied to spending thresholds (pegged to your store AOV).

#### 5. Rookies - Your Newest Customers
- Who They Are: First time buyers on your site.
- Marketing Strategies: Most customers never graduate to loyal. Having clear strategies in place for first time buyers such as triggered welcome emails will pay dividends.

In [21]:
# Segmenting the customers base on RFM Score
def segments(df):
    if df['RFM Segment'] == '4-4-4' :
        return 'Core'
    elif (df['frequency_score'] == 4) and (df['monetary_score'] == 3):
        return 'Loyal'
    elif df['monetary_score'] == 4:
        return 'Whales'
    elif (df['frequency_score'] == 4) and (df['monetary_score'] < 3):
        return 'Promising'
    elif (df['frequency_score'] == 1) and (df['recency_score'] == 4):
        return 'Rookies'
    else:  
        return 'Others'

rfm['Segment'] = rfm.apply(segments,axis=1)

In [22]:
rfm

Unnamed: 0_level_0,recency,frequency,monetary,recency_score,frequency_score,monetary_score,RFM Segment,Segment
Customer ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
12346,327,12,77556.46,2,4,4,2-4-4,Whales
12347,4,8,5633.32,4,4,4,4-4-4,Core
12348,77,5,2019.40,3,3,3,3-3-3,Others
12349,20,4,4428.69,4,3,4,4-3-4,Whales
12350,312,1,334.40,2,1,1,2-1-1,Others
...,...,...,...,...,...,...,...,...
18283,5,22,2736.65,4,4,4,4-4-4,Core
18284,433,1,461.68,1,2,2,1-2-2,Others
18285,662,1,427.00,1,2,2,1-2-2,Others
18286,478,2,1296.43,1,2,3,1-2-3,Others


In [23]:
rfm.groupby('Segment').agg({'recency':'mean','frequency':'mean',
                                    'monetary':['mean','count']}).round(1)

Unnamed: 0_level_0,recency,frequency,monetary,monetary
Unnamed: 0_level_1,mean,mean,mean,count
Segment,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2
Core,11.7,24.6,14670.6,653
Loyal,92.6,8.8,1738.8,284
Others,263.5,2.4,679.7,4010
Promising,129.9,8.7,699.0,25
Rookies,16.3,1.0,287.3,92
Whales,120.4,10.2,5997.9,817
