In [1]:
#Prepare basket variables
#Rename variables
#Create a RFM model with 3 levels
#Define 3 segments
#prepare final table overview

Directory, Libraries, and data

In [2]:
%cd /content/drive/MyDrive/Business Analyst course/Segmentation/RFM

/content/drive/MyDrive/Business Analyst course/Segmentation/RFM


In [3]:
#Libraries
import pandas as pd
import matplotlib.pyplot as plt
from datetime import timedelta

In [4]:
#Load the data
df=pd.read_csv('customer_data.csv').dropna()
df.head()

Unnamed: 0,customer_id,revenue,most_recent_visit,number_of_orders,recency_days
0,22086,777,5/14/2006,9,232
1,2290,1555,9/8/2006,16,115
2,26377,336,11/19/2006,5,43
3,24650,1189,10/29/2006,12,64
4,12883,1229,12/9/2006,12,23


In [5]:
#Basket variables
df['monetary']=df.revenue/df.number_of_orders
df.head(2)

Unnamed: 0,customer_id,revenue,most_recent_visit,number_of_orders,recency_days,monetary
0,22086,777,5/14/2006,9,232,86.333333
1,2290,1555,9/8/2006,16,115,97.1875


In [6]:
#Dropping the revenue variable
df=df.drop(['revenue','most_recent_visit'],axis=1)
df.head()

Unnamed: 0,customer_id,number_of_orders,recency_days,monetary
0,22086,9,232,86.333333
1,2290,16,115,97.1875
2,26377,5,43,67.2
3,24650,12,64,99.083333
4,12883,12,23,102.416667


In [7]:
#Renaming variables
df.rename(columns={
    'number_of_orders': 'frequency',
    'recency_days': 'recency'
},inplace=True)
df.head()

Unnamed: 0,customer_id,frequency,recency,monetary
0,22086,9,232,86.333333
1,2290,16,115,97.1875
2,26377,5,43,67.2
3,24650,12,64,99.083333
4,12883,12,23,102.416667


#RFM Preparation

In [8]:
#Create Frequency groups
df['F']=pd.qcut(x=df['frequency'], q=3, labels=range(1,4,1))
df['M']=pd.qcut(x=df['monetary'],q=3,labels=range(1,4,1))
df['R']=pd.qcut(x=df['recency'],q=3,labels=range(3,0,-1))
df.head(3)

Unnamed: 0,customer_id,frequency,recency,monetary,F,M,R
0,22086,9,232,86.333333,2,1,1
1,2290,16,115,97.1875,3,2,2
2,26377,5,43,67.2,1,1,3


In [9]:
#RFM Score
df['RFM']=df[['R','F','M']].sum(axis=1)
df.head(3)

Unnamed: 0,customer_id,frequency,recency,monetary,F,M,R,RFM
0,22086,9,232,86.333333,2,1,1,4
1,2290,16,115,97.1875,3,2,2,7
2,26377,5,43,67.2,1,1,3,5


In [13]:
#Create the RFM function
def rfm_segment(df):
  if df['RFM']>=7:
    return 'Excellent'
  elif((df['RFM']>=4)and(df['RFM']<7)):
    return 'Good'
  else:
    return 'Bad'

#RFM

In [14]:
#Apply RFM function
df['RFM_level']=df.apply(rfm_segment, axis=1)
df.head()

Unnamed: 0,customer_id,frequency,recency,monetary,F,M,R,RFM,RFM_level
0,22086,9,232,86.333333,2,1,1,4,Good
1,2290,16,115,97.1875,3,2,2,7,Excellent
2,26377,5,43,67.2,1,1,3,5,Good
3,24650,12,64,99.083333,3,2,3,8,Excellent
4,12883,12,23,102.416667,3,2,3,8,Excellent


In [16]:
#Looking into the segments
df.groupby('RFM_level').agg({
    'recency': 'mean',
    'frequency': 'mean',
    'monetary':['mean','count']
})

Unnamed: 0_level_0,recency,frequency,monetary,monetary
Unnamed: 0_level_1,mean,mean,mean,count
RFM_level,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2
Bad,362.096867,6.232481,71.650599,2426
Excellent,99.744546,11.900979,104.214756,14805
Good,215.264143,8.868104,92.317502,22768
