<h1>Customer Segmentation using RFM Analysis</h1>

In [2]:
import matplotlib as plt
import pandas as pd
import numpy as np

In [3]:
#change source file path
df = pd.read_csv('C:/Users/toshiba/Desktop/AT&R/FP_src_RFM.csv')
df.dtypes

Order ID                int64
Order Date             object
Full Name (Billing)    object
Phone (Billing)        object
Order Total Amount      int64
dtype: object

In [4]:
df['Customer ID'] = df['Phone (Billing)'].str[-7:]                 #this picks last 7 digits as ID
df['Customer ID'] = 'FP' + df['Customer ID'].astype(str)           #this adds company prefix to the ID, 'FP' in this case

In [5]:
df.dtypes

Order ID                int64
Order Date             object
Full Name (Billing)    object
Phone (Billing)        object
Order Total Amount      int64
Customer ID            object
dtype: object

In [6]:
#if Order data is not of date type, then change its data type
df['Order Date'] = df['Order Date'].astype('datetime64[ns]')

In [7]:
df.dtypes

Order ID                        int64
Order Date             datetime64[ns]
Full Name (Billing)            object
Phone (Billing)                object
Order Total Amount              int64
Customer ID                    object
dtype: object

In [8]:
#here monetary, recency, frequency, and avg. order values are calculated for each customer
df['Avg Amount'] = df['Order Total Amount']
df_RFM = df.groupby(['Customer ID', 'Full Name (Billing)']).agg({'Order Date': lambda y: (df['Order Date'].max().date() - y.max().date()).days,
                                        'Order ID': lambda y: len(y.unique()),  
                                        'Order Total Amount': lambda y: round(y.sum(),2),
                                        'Avg Amount': lambda y: round(y.mean(), 2)})

#this step is for renaming the columns
df_RFM.columns = ['Recency', 'Frequency', 'Monetary', 'Avg. Order value']

In [9]:
#creating quantiles for segmentation
quantiles = df_RFM[['Recency', 'Frequency', 'Monetary']].quantile([.2, .4, .6, .8]).to_dict()

In [10]:
#assigning RFM values to the quantiles

def Rscore(x):
    if x <= quantiles['Recency'][.2]:
        return 5
    elif x <= quantiles['Recency'][.4]:
        return 4
    elif x <= quantiles['Recency'][.6]:
        return 3
    elif x <= quantiles['Recency'][.8]:
        return 2
    else:
        return 1
    
def FMscore(x, c):
    if x <= quantiles[c][.2]:
        return 1
    elif x <= quantiles[c][.4]:
        return 2
    elif x <= quantiles[c][.6]:
        return 3
    elif x <= quantiles[c][.8]:
        return 4
    else:
        return 5

In [11]:
df_RFM['R'] = df_RFM['Recency'].apply(lambda x: Rscore(x))
df_RFM['F'] = df_RFM['Frequency'].apply(lambda x: FMscore(x, 'Frequency'))
df_RFM['M'] = df_RFM['Monetary'].apply(lambda x: FMscore(x, 'Monetary'))

In [12]:
#calculating RFM Score
df_RFM['RFM Score'] = df_RFM.R.map(str) + df_RFM.F.map(str) + df_RFM.M.map(str)

In [13]:
#assigning segment names to different customers

segt_map = {
    r'[1-2][1-2]': 'Hibernating',
    r'[1-2][3-4]': 'At Risk',
    r'[1-2]5': 'Can\'t Lose',
    r'3[1-2]': 'About to Sleep',
    r'33': 'Need Attention',
    r'[3-4][4-5]': 'Loyal Customers',
    r'41': 'Promising Customers',
    r'51': 'New Customers',
    r'[4-5][2-3]': 'Potential Loyal Customers',
    r'5[4-5]': 'Best Customers'
}

df_RFM['Segment'] = df_RFM['R'].map(str) + df_RFM['F'].map(str)
df_RFM['Segment'] = df_RFM['Segment'].replace(segt_map, regex=True)


In [14]:
#change destination path
df_RFM.to_csv('C:/Users/toshiba/Desktop/AT&R/FP_dest_RFM.csv')