# Import libraries

In [6]:
# Import libraries
import pandas as pd
from datetime import timedelta
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline
import squarify

# Import Data

In [7]:
#Import Data
purchase= pd.read_csv("/Users/nikita.maloo/Desktop/P/RFM-Customer-Segmentation/purchase_data.csv" ,index_col=0)

In [8]:
purchase.head()

Unnamed: 0,Invoice_Id,Amount,Purchase_Date,Customer_Id
0,272352054.0,140.01,2019-05-05,Customer: 1
1,299813385.0,37.44,2019-11-16,Customer: 1
2,313339477.0,26.93,2020-03-06,Customer: 2
3,315150354.0,27.4,2020-03-22,Customer: 2
4,305737473.0,28.42,2019-12-13,Customer: 3


In [9]:
purchase.shape

(971102, 4)

In [10]:
#Check unique number of values for each column
def unique_counts(purchase):
    for i in purchase.columns:
        count = purchase[i].nunique()
        print(i," :", count)
unique_counts(purchase)

Invoice_Id  : 644545
Amount  : 30214
Purchase_Date  : 423
Customer_Id  : 338190


In [11]:
#Easy way to check the number of unique values
purchase.nunique()

Invoice_Id       644545
Amount            30214
Purchase_Date       423
Customer_Id      338190
dtype: int64

In [12]:
#Check all the data types
purchase.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 971102 entries, 0 to 971101
Data columns (total 4 columns):
Invoice_Id       970778 non-null float64
Amount           971102 non-null float64
Purchase_Date    971102 non-null object
Customer_Id      971102 non-null object
dtypes: float64(2), object(2)
memory usage: 37.0+ MB


In [13]:
#Converting the 'Purchase_Date' type into datetime
purchase['Purchase_Date'] = pd.to_datetime(purchase['Purchase_Date'])

In [14]:
purchase.dtypes

Invoice_Id              float64
Amount                  float64
Purchase_Date    datetime64[ns]
Customer_Id              object
dtype: object

# RFM Customer Segmentation

In [15]:
#RFM segmentation starts from here
from datetime import date
now = date.today()
print(now)

2020-06-18


# RFM Table

In [None]:
#create a RFM table
rfmTable = purchase.groupby('Customer_Id').agg({'Purchase_Date': 
                                                           lambda Date:(now - Date.max().date()).days,
                                                            'Invoice_Id': lambda x: len(x),
                                                            'Amount': lambda Amount: Amount.sum()})

In [None]:
rfmTable.head(3)

In [None]:
rfmTable.reset_index(inplace=True)

# Recency , Frequency & Monetary Value

In [None]:
#Renaming the columns to the appropriate names
#Recency — number of days since the last purchase
#Frequency — number of transactions made over a given period
#Monetary — amount spent over a given period of time
rfmTable.rename(columns = {'Purchase_Date' : 'Recency',
                           'Invoice_Id' : 'Frequency',
                           'Amount' : 'Monetary_Value'},inplace = True) 

In [None]:
rfmTable.head(3)

In [None]:
##pip install chart_studio

In [None]:
##rfmTable.iplot(kind = 'scatter', x = 'Recency', y = 'Customer_Id', mode = 'markers', size = 5)

In [None]:
##sns.distplot(rfmTable['Recency'], kde = False , bins = 40)

In [None]:
##sns.distplot(rfmTable['Monetary_Value'], kde = False , bins = 10)

##plt.xlim(0, 100)

In [None]:
# sns.distplot(rfmTable['Recency'], kde = False)

In [None]:
# sns.distplot(rfmTable['Frequency'], kde = False , bins = 1000)
# ##sns.plt.ylim(0, 20)
# plt.xlim(0, 30)

In [None]:
# sns.distplot(rfmTable['Monetary_Value'], kde = False , bins = 1000)
# plt.xlim(0, 2000)

In [None]:
#Split the metrics into segments by using quartiles
quantiles =  rfmTable.quantile(q = [0.25,0.5,0.75])
quantiles = quantiles.to_dict()
print(quantiles)

# Segmented RFM Table

In [None]:
#create a segmented RFM Table
segmented_rfm = rfmTable
segmented_rfm.head()

In [None]:
#logic to define the best customers
#The lowest recency, highest frequency and monetary amounts are our best customers.
def RScore(x,d):
    if x <= d['Recency'][0.25]:
        return 1
    elif x<= d['Recency'][0.50]:
        return 2
    elif x<= d['Recency'][0.75]:
        return 3
    else:
        return 4
    
def FScore(x,d):
    if x <= d['Frequency'][0.25]:
        return 4
    elif x<= d['Frequency'][0.50]:
        return 3
    elif x<= d['Frequency'][0.75]:
        return 2
    else:
        return 1

def MScore(x,d):
    if x <= d['Monetary_Value'][0.25]:
        return 4
    elif x<= d['Monetary_Value'][0.50]:
        return 3
    elif x<= d['Monetary_Value'][0.75]:
        return 2
    else:
        return 1

    

In [None]:
#Add segment numbers to the newly created segmented RFM table
segmented_rfm['r_quartile'] = segmented_rfm['Recency'].apply(RScore,args=(quantiles,))
segmented_rfm['f_quartile'] = segmented_rfm['Frequency'].apply(FScore,args=(quantiles,))
segmented_rfm['m_quartile'] = segmented_rfm['Monetary_Value'].apply(MScore,args=(quantiles,))
segmented_rfm.head()

# RFM Distribution Plots

In [None]:
plt.figure(figsize=(12,10))
# Plot distribution of Recency
plt.subplot(3, 1, 1); sns.distplot(segmented_rfm['Recency'])
# Plot distribution of Frequency
plt.subplot(3, 1, 2); sns.distplot(segmented_rfm['Frequency'] , bins = 500)
plt.xlim(0, 60)
# Plot distribution of Monetary_Value
plt.subplot(3, 1, 3); sns.distplot(segmented_rfm['Monetary_Value'], bins = 500)
plt.xlim(0, 2000)
# Show the plot
plt.show()

# Create RFM Segments

In [None]:
#Combine RFM Segment
segmented_rfm['RFM_Segment'] = segmented_rfm.r_quartile.map(str)+segmented_rfm.f_quartile.map(str)+segmented_rfm.m_quartile.map(str)
segmented_rfm.head()

# RFM Score

In [None]:
# Summing the Score
# Calculate RFM_Score
segmented_rfm['RFM_Score'] = segmented_rfm[['r_quartile','f_quartile','m_quartile']].sum(axis=1)
segmented_rfm.head()

In [None]:
# Count number of unique segments(RFM_Segment)
rfm_count_unique = segmented_rfm.groupby('RFM_Segment')['RFM_Segment'].nunique()
print(rfm_count_unique.sum())

In [None]:
segmented_rfm.nunique()

# Top 10 Best Customers

In [None]:
#Who are the top 10 of our best customers!
segmented_rfm[segmented_rfm['RFM_Segment']=='111'].sort_values('Monetary_Value', ascending=False).head(10)

In [None]:
segmented_rfm.head(3)

In [None]:
segmented_rfm.info()

In [None]:
# # Define Segment_Level function
# def Segment_Level(segmented_rfm):
#     if (segmented_rfm['RFM_Segment'] == '111'):
#         return 'Best Customers'
    
#     elif segmented_rfm['RFM_Segment'] == '444': 
#         return 'Lost Cheap Customers' 
    
#     elif segmented_rfm['RFM_Segment'] == '311': 
#         return 'Almost Lost' 
    
#     elif segmented_rfm['RFM_Segment'] == '411': 
#         return 'Lost Customers'   
    
#     elif segmented_rfm['RFM_Segment'][1] == '1' and not segmented_rfm['RFM_Segment'] == '111' and not segmented_rfm['RFM_Segment'][1:] == '13'and not segmented_rfm['RFM_Segment'][1:] == '14'and not segmented_rfm['RFM_Segment'] == '311' and not segmented_rfm['RFM_Segment'] == '411':
#         return 'Loyal Customers'
    
#     elif segmented_rfm['RFM_Segment'][2] == '1' and not segmented_rfm['RFM_Segment'] == '111' and not segmented_rfm['RFM_Segment'] == '311' and not segmented_rfm['RFM_Segment'] == '411':
#         return 'Big Spenders'
    
#     elif segmented_rfm['RFM_Segment'][1:] == '13' or segmented_rfm['RFM_Segment'][1:] == '14':
#         return 'Faithful customers'
    
#     elif segmented_rfm['RFM_Segment'][0:1] == '14': 
#         return 'Rookies-Newest Customers'
    
#     elif segmented_rfm['RFM_Segment'][0:1] == '44' and not segmented_rfm['RFM_Segment'] == '444': 
#         return 'Slipping-Once Loyal, Now Gone'
    
#     else:
#         return 'Others'

# Segment Level

In [None]:
# Define Segment_Level function
def Segment_Level(segmented_rfm):
    if (segmented_rfm['RFM_Segment'] == '111'):
        return 'Champions'
    
    elif segmented_rfm['RFM_Segment'] in ['112','211','121']: 
        return 'Loyal Customers' 
    
    elif segmented_rfm['RFM_Segment'] in ['131','141']: 
        return 'Potential Loyalist' 
    
    elif segmented_rfm['RFM_Segment'] in ['142','143','144']: 
        return 'Recent Customers'   
    
    elif segmented_rfm['RFM_Segment'] in ['221','222','122','212','123','132','213','231','321','312','223','322','232']: 
        return 'Promising'
    
    elif segmented_rfm['RFM_Segment'] in ['431','341']: 
        return 'Needs Attention'
    
    elif segmented_rfm['RFM_Segment'] in ['332','333','331','313','323','343','233']: 
        return 'About to Sleep'
    
    elif segmented_rfm['RFM_Segment'] in ['114','113','124','213','214']: 
        return 'Price Sensitive'
    
    elif segmented_rfm['RFM_Segment'] in ['411','421','311']: 
        return 'Cant Lose Them'
    
    elif segmented_rfm['RFM_Segment'] in ['433','434' ,'443','344', '334']: 
        return 'Hibernating'
    
    elif segmented_rfm['RFM_Segment'] in ['444']: 
        return 'Lost'
    
    else:
        return 'Others'
               
    
# Create a new variable RFM_Level
segmented_rfm['Segment_Level'] = segmented_rfm.apply(Segment_Level, axis=1)


# Print the header with top 5 rows to the console
segmented_rfm.head()

In [None]:
segmented_rfm.to_csv('segmented_rfm.csv')

In [None]:
# Calculate average values for each Segment_Name, and return a size of each segment 
rfm_level_agg = segmented_rfm.groupby('Segment_Level').agg({
    'Recency': 'mean',
    'Frequency': 'mean',
    'Monetary_Value': ['mean', 'count']
}).round(1)

# Print the aggregated dataset
print(rfm_level_agg)

In [None]:
rfm_level_agg.columns = rfm_level_agg.columns.droplevel()

In [None]:
print(rfm_level_agg)

In [None]:
rfm_level_agg.columns = ['RecencyMean','FrequencyMean','MonetaryMean', 'Count']

In [None]:
print(rfm_level_agg)

# Plot the Heat Map

In [None]:
#Create the plot
fig = plt.gcf()
ax = fig.add_subplot()
fig.set_size_inches(15, 5)

squarify.plot(sizes=rfm_level_agg['Count'], 
              label=['About to Sleep ',
                     'Cant Lose Them ',
                     'Champions',
                     'Hibernating',
                     'Lost', 
                     'Loyal Customers', 
                     'Needs Attention',
                     'Others',
                     'Potential Loyalist', 
                     'Price Sensitive',
                     'Promising',
                     'Recent Customers'],color=["orange","blue","green","grey","red","yellow","Aqua","Pink","Teal","Gold","violet","navy"],alpha=.8 )

plt.title("RFM Segments",fontsize=25,fontweight="bold")
plt.axis('off')
plt.show()
