#Purpose of RFM Analysis:

RFM analysis helps determine which customers are more valuable to the business by segmenting customers according to three criteria: Recency, Frequency and Monetary.

In this way, marketing strategies can be created more effectively.

##Importing Libraries

In [1]:
import pandas as pd
import datetime as dt
pd.set_option("display.max_columns", None)
pd.set_option("display.width", 500)
pd.set_option("display.expand_frame_repr", False)
import warnings
warnings.filterwarnings('ignore')

##Reading and Examination Dataset

In [2]:
df_ = pd.read_csv("/kaggle/input/customer-segmentation-rfm/customer_segmentation_10k.csv")
df =df_.copy()
df.head()

Unnamed: 0,customer_id,qtt_order,total_spent,last_order
0,0,370,189719.04,2022-10-30 07:00:00
1,1,48,159092.71,2022-06-30 12:00:00
2,2,4,28239.3,2022-08-21 01:00:00
3,3,157,378630.99,2022-06-30 04:00:00
4,4,16,84763.2,2022-03-18 20:00:00


In [3]:
df.isnull().sum()

customer_id    0
qtt_order      0
total_spent    0
last_order     0
dtype: int64

In [4]:
df.describe().T

Unnamed: 0,count,mean,std,min,25%,50%,75%,max
customer_id,10000.0,4999.5,2886.89568,0.0,2499.75,4999.5,7499.25,9999.0
qtt_order,10000.0,102.7768,135.589727,0.0,25.0,54.0,118.0,1274.0
total_spent,10000.0,226520.034685,252625.646706,833.02,63397.4675,138215.65,288853.235,2181515.12


Since 'qtt_order' is zero it may cause an error. Therefore, we remove that data from our dataset.

In [5]:
df = df.loc[df['qtt_order'] > 0]

In [6]:
df.dtypes

customer_id      int64
qtt_order        int64
total_spent    float64
last_order      object
dtype: object

We convert the last_order column to date format and assign today's date to the variable.

In [7]:
df['last_order'] = pd.to_datetime(df['last_order'])
df['last_order'].max()

Timestamp('2023-01-07 17:00:00')

In [8]:
today_date = dt.datetime(2023, 1, 9)

We create our recency, frequency and monetary variables.

Recency, Frequency, and Monetary (**RFM**) analysis is a method used for customer segmentation and behavior analysis. Each component is defined as follows:

**1.** **Recency:** This refers to how recently a customer made a purchase. Customers who have made a purchase more recently tend to have a higher loyalty to the business. This metric is used to understand customer engagement.

**2.** **Frequency:** This measures how many times a customer has made purchases within a specific period. Customers who shop more frequently are generally considered more valuable to the business. This serves as an indicator of customer loyalty.

**3.** **Monetary**: This represents the total amount of money a customer has spent within a specific period. Customers who spend more are viewed as some of the most valuable clients for the business.

In [9]:
df['recency'] = [(today_date - date).days for date in df['last_order']]
df['frequency'] = df['qtt_order']
df['monetary'] = df['total_spent']
rfm = df[['customer_id','recency', 'frequency', 'monetary']]
rfm.head()

Unnamed: 0,customer_id,recency,frequency,monetary
0,0,70,370,189719.04
1,1,192,48,159092.71
2,2,140,4,28239.3
3,3,192,157,378630.99
4,4,296,16,84763.2


In [10]:
rfm['recency_score'] = pd.qcut(x= rfm['recency'], q=5, labels= [5, 4, 3, 2, 1])
rfm['frequency_score'] = pd.qcut(x=rfm['frequency'].rank(method='min'), q=5, labels= [1, 2, 3, 4, 5])
rfm['monetary_score'] = pd.qcut(x=rfm['monetary'], q=5, labels= [1, 2, 3, 4, 5])
rfm.head()

Unnamed: 0,customer_id,recency,frequency,monetary,recency_score,frequency_score,monetary_score
0,0,70,370,189719.04,5,5,4
1,1,192,48,159092.71,3,3,3
2,2,140,4,28239.3,4,1,1
3,3,192,157,378630.99,3,5,5
4,4,296,16,84763.2,2,1,2


In [11]:
rfm.describe().T

Unnamed: 0,count,mean,std,min,25%,50%,75%,max
customer_id,9985.0,4998.189084,2886.868345,0.0,2498.0,4996.0,7498.0,9999.0
recency,9985.0,213.963345,92.04404,1.0,136.0,221.0,291.0,408.0
frequency,9985.0,102.931197,135.632965,1.0,25.0,54.0,118.0,1274.0
monetary,9985.0,226794.454758,252710.715381,833.02,63582.77,138330.44,289040.79,2181515.12


In [12]:
rfm['rf_score'] = rfm['recency_score'].astype(str) + rfm['frequency_score'].astype(str)

In [13]:
seg_map = {
        r'[1-2][1-2]': 'hibernating',  # birinci ve ikinci elemanında 1 ya da 2 görürsen 'hibernating' diye isimlendir
        r'[1-2][3-4]': 'at_Risk',
        r'[1-2]5': 'cant_loose',
        r'3[1-2]': 'about_to_sleep',
        r'33': 'need_attention',  # birinci ve ikini elemanı 3 ise 'need_attention' diye isimlendir
        r'[3-4][4-5]': 'loyal_customers',
        r'41': 'promising',
        r'51': 'new_customers',
        r'[4-5][2-3]': 'potential_loyalists',
        r'5[4-5]': 'champions'
    }

In [14]:
rfm['segment'] = rfm['rf_score'].replace(seg_map, regex=True)

In [15]:
rfm.groupby('segment').agg({'recency':'mean',
                            'frequency':'mean',
                            'monetary':'mean'})

Unnamed: 0_level_0,recency,frequency,monetary
segment,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
about_to_sleep,221.329365,21.026455,160712.618519
at_Risk,306.508452,77.783638,227885.14307
cant_loose,305.620199,306.216216,321558.833585
champions,83.331823,214.851642,340267.283262
hibernating,309.531198,19.863611,141999.742281
loyal_customers,185.20425,211.605077,301316.650809
need_attention,218.172589,55.27665,225756.396929
new_customers,84.211144,10.994135,139871.108328
potential_loyalists,117.238614,43.456062,205740.483303
promising,152.306849,11.336986,149327.320247


##Functionalization of the Entire Process

In [16]:
def rfm_segments(year, month, day):
    df = pd.read_csv("customer_segmentation_10k.csv")
    df = df.loc[df['qtt_order'] > 0]
    df['last_order'] = pd.to_datetime(df['last_order'])
    today_date = dt.datetime(year, month, day)
    #RFM
    df['recency'] = [(today_date - date).days for date in df['last_order']]
    df['frequency'] = df['qtt_order']
    df['monetary'] = df['total_spent']
    rfm = df[['customer_id','recency', 'frequency', 'monetary']]
    #CATEGORIZATION
    rfm['recency_score'] = pd.qcut(x= rfm['recency'], q=5, labels= [5, 4, 3, 2, 1])
    rfm['frequency_score'] = pd.qcut(x=rfm['frequency'].rank(method='min'), q=5, labels= [1, 2, 3, 4, 5])
    rfm['monetary_score'] = pd.qcut(x=rfm['monetary'], q=5, labels= [1, 2, 3, 4, 5])
    #SEGMENTATION
    rfm['rf_score'] = rfm['recency_score'].astype(str) + rfm['frequency_score'].astype(str)
    seg_map = {
        r'[1-2][1-2]': 'hibernating',  # birinci ve ikinci elemanında 1 ya da 2 görürsen 'hibernating' diye isimlendir
        r'[1-2][3-4]': 'at_Risk',
        r'[1-2]5': 'cant_loose',
        r'3[1-2]': 'about_to_sleep',
        r'33': 'need_attention',  # birinci ve ikini elemanı 3 ise 'need_attention' diye isimlendir
        r'[3-4][4-5]': 'loyal_customers',
        r'41': 'promising',
        r'51': 'new_customers',
        r'[4-5][2-3]': 'potential_loyalists',
        r'5[4-5]': 'champions'
    }
    rfm['segment'] = rfm['rf_score'].replace(seg_map, regex=True)
    return rfm


##Lists to Go to the Advertising Department




In [17]:
def adv_lists():
    about_to_sleep = rfm.loc[rfm['segment']== 'about_to_sleep', 'customer_id']
    at_Risk = rfm.loc[rfm['segment']== 'at_Risk', 'customer_id']
    cant_loose = rfm.loc[rfm['segment']== 'cant_loose', 'customer_id']
    champions = rfm.loc[rfm['segment']== 'champions', 'customer_id']
    hibernating = rfm.loc[rfm['segment']== 'hibernating', 'customer_id']
    loyal_customers = rfm.loc[rfm['segment']== 'loyal_customers', 'customer_id']
    need_attention = rfm.loc[rfm['segment']== 'need_attention', 'customer_id']
    new_customers = rfm.loc[rfm['segment']== 'new_customers', 'customer_id']
    potential_loyalists	 = rfm.loc[rfm['segment']== 'potential_loyalists', 'customer_id']
    promising = rfm.loc[rfm['segment']== 'promising', 'customer_id']

    rfm.to_csv('about_to_sleep.csv', index=False)
    at_Risk.to_csv('at_Risk.csv', index=False)
    cant_loose.to_csv('cant_loose.csv', index=False)
    champions.to_csv('champions.csv', index=False)
    hibernating.to_csv('hibernating.csv', index=False)
    loyal_customers.to_csv('loyal_customers.csv', index=False)
    need_attention.to_csv('need_attention.csv', index=False)
    new_customers.to_csv('new_customers.csv', index=False)
    potential_loyalists.to_csv('potential_loyalists.csv', index=False)
    promising.to_csv('promising.csv', index=False)