### importing libraries 

In [31]:
import pandas as pd 
import plotly.express as px
import plotly.io as pio
import plotly.graph_objects as go
pio.templates.default = "plotly_white"

In [32]:

data = pd.read_csv("rfm_data.csv")
data.head()

Unnamed: 0,CustomerID,PurchaseDate,TransactionAmount,ProductInformation,OrderID,Location
0,8814,2023-04-11,943.31,Product C,890075,Tokyo
1,2188,2023-04-11,463.7,Product A,176819,London
2,4608,2023-04-11,80.28,Product A,340062,New York
3,2559,2023-04-11,221.29,Product A,239145,London
4,9482,2023-04-11,739.56,Product A,194545,Paris


In [33]:
print(data.dtypes)


CustomerID              int64
PurchaseDate           object
TransactionAmount     float64
ProductInformation     object
OrderID                 int64
Location               object
dtype: object


## Calculating RFM Values


In [34]:
from datetime import datetime 

In [35]:
# convert 'PurchseDate' to datetime 
data['PurchaseDate'] = pd.to_datetime(data['PurchaseDate'])

In [36]:
# Calculate Recency 
data['Recency'] = (pd.Timestamp.now() - data['PurchaseDate']).dt.days


In [37]:
# Calculate Frequency
frequency_data = data.groupby ('CustomerID')['OrderID'].count().reset_index()
frequency_data.rename(columns={'OrderID':'Frequency'}, inplace=True)
data = data.merge(frequency_data, on = 'CustomerID', how = 'left')

In [38]:
# calculate Monetary value 
monetary_data =  data.groupby('CustomerID')['TransactionAmount'].sum().reset_index()
monetary_data.rename(columns={'TransactionAmount':'MonetaryValue'}, inplace=True)
data = data.merge(monetary_data, on='CustomerID', how='left')

In [39]:
data.head()

Unnamed: 0,CustomerID,PurchaseDate,TransactionAmount,ProductInformation,OrderID,Location,Recency,Frequency,MonetaryValue
0,8814,2023-04-11,943.31,Product C,890075,Tokyo,462,1,943.31
1,2188,2023-04-11,463.7,Product A,176819,London,462,1,463.7
2,4608,2023-04-11,80.28,Product A,340062,New York,462,1,80.28
3,2559,2023-04-11,221.29,Product A,239145,London,462,1,221.29
4,9482,2023-04-11,739.56,Product A,194545,Paris,462,1,739.56


## Calculating RFM Scores


In [40]:
# Define scoring criteria for each RFM value 
recency_scores = [5,4,3,2,1]
frequency_scores = [1,2,3,4,5]
Monetary_scores = [1,2,3,4,5]


#Calculate RFM scores 
data['RecencyScore'] = pd.cut(data['Recency'], bins=5, labels=recency_scores)
data['FrequencyScore'] = pd.cut(data['Frequency'], bins=5, labels=frequency_scores)
data['MonetaryScore'] = pd.cut(data['MonetaryValue'], bins=5, labels=Monetary_scores)

In [41]:
# Convert RFM scores to numeric type 

data['RecencyScore']=data['RecencyScore'].astype(int)
data['FrequencyScore']=data['FrequencyScore'].astype(int)
data['MonetaryScore']=data['MonetaryScore'].astype(int)

#### RFM Value Segmentation

In [42]:
# Calculate RFM score by combining the individual scores 
data['RFM_Score'] = data['RecencyScore'] + data['FrequencyScore'] + data['MonetaryScore']

# Create RFM segments based on the RFM scores 
segment_label = ['Low-value', 'Mid-Value', 'High-Value']
data['value segment'] = pd.qcut(data['RFM_Score'], q=3, labels = segment_label)


In [43]:
data.head()

Unnamed: 0,CustomerID,PurchaseDate,TransactionAmount,ProductInformation,OrderID,Location,Recency,Frequency,MonetaryValue,RecencyScore,FrequencyScore,MonetaryScore,RFM_Score,value segment
0,8814,2023-04-11,943.31,Product C,890075,Tokyo,462,1,943.31,1,1,2,4,Low-value
1,2188,2023-04-11,463.7,Product A,176819,London,462,1,463.7,1,1,1,3,Low-value
2,4608,2023-04-11,80.28,Product A,340062,New York,462,1,80.28,1,1,1,3,Low-value
3,2559,2023-04-11,221.29,Product A,239145,London,462,1,221.29,1,1,1,3,Low-value
4,9482,2023-04-11,739.56,Product A,194545,Paris,462,1,739.56,1,1,2,4,Low-value


#### Now let’s have a look at the segment distribution:

In [44]:
# RFM Segment Distribution

segments_counts = data['value segment'].value_counts().reset_index()
segments_counts.columns = ['value segment', 'Count']  

# Create the bar chart

pastel_colors = px.colors.qualitative.Pastel

fig_segment_dist = px.bar(segments_counts, x = 'value segment', y = 'Count',
                    color = 'value segment', color_discrete_sequence=pastel_colors,title='RFM Value Segment Distribution')

# update the layout 
fig_segment_dist.update_layout(xaxis_title='RFM value segment', yaxis_title='Count', showlegend = False)

# show the figure 
fig_segment_dist.show() 








In [45]:
# create a new column for RFM Customer Segments 


data['RFM Customer Segments']=''


# Assign RFM segments based on the RFM Score 

data.loc[data['RFM_Score'] >=9, 'RFM Customer Segments'] = 'Champions'
data.loc[(data['RFM_Score'] >=6) & (data['RFM_Score'] <9), 'RFM Customer Segments'] = 'Potential Loyalists'
data.loc[(data['RFM_Score'] >=5) & (data['RFM_Score'] <6), 'RFM Customer Segments'] = 'At Risk Customers'
data.loc[(data['RFM_Score'] >=4) & (data['RFM_Score'] <5), 'RFM Customer Segments'] = "Can't lose"
data.loc[(data['RFM_Score'] >=3) & (data['RFM_Score'] <4), 'RFM Customer Segments'] = 'Lost'

# print the updated data with RFM segments 
print(data[['CustomerID', 'RFM Customer Segments']])

     CustomerID RFM Customer Segments
0          8814            Can't lose
1          2188                  Lost
2          4608                  Lost
3          2559                  Lost
4          9482            Can't lose
..          ...                   ...
995        2970   Potential Loyalists
996        6669   Potential Loyalists
997        8836   Potential Loyalists
998        1440   Potential Loyalists
999        4759   Potential Loyalists

[1000 rows x 2 columns]


In [46]:
data.head()

Unnamed: 0,CustomerID,PurchaseDate,TransactionAmount,ProductInformation,OrderID,Location,Recency,Frequency,MonetaryValue,RecencyScore,FrequencyScore,MonetaryScore,RFM_Score,value segment,RFM Customer Segments
0,8814,2023-04-11,943.31,Product C,890075,Tokyo,462,1,943.31,1,1,2,4,Low-value,Can't lose
1,2188,2023-04-11,463.7,Product A,176819,London,462,1,463.7,1,1,1,3,Low-value,Lost
2,4608,2023-04-11,80.28,Product A,340062,New York,462,1,80.28,1,1,1,3,Low-value,Lost
3,2559,2023-04-11,221.29,Product A,239145,London,462,1,221.29,1,1,1,3,Low-value,Lost
4,9482,2023-04-11,739.56,Product A,194545,Paris,462,1,739.56,1,1,2,4,Low-value,Can't lose


## RFM Analysis

##### Now let’s analyze the distribution of customers across different RFM customer segments within each value segment:

In [47]:
segment_product_counts = data.groupby(['value segment', 'RFM Customer Segments']).size().reset_index(name='Count')

segment_product_counts = segment_product_counts.sort_values('Count', ascending=False)

fig_treemap_segment_product = px.treemap(segment_product_counts, path = ['value segment', 'RFM Customer Segments'], 
                                         values = 'Count', color = 'value segment', color_discrete_sequence=px.colors.qualitative.Pastel, 
                                          title = 'RFM Customer Segments by Value')
fig_treemap_segment_product.show()









#### Now let’s analyze the distribution of RFM values within the Champions segment:

In [48]:
# Filter the data to include only the customers in the champion segment
champions_segment = data[data['RFM Customer Segments'] == 'Champions'] 

fig = go.Figure()
fig.add_trace(go.Box(y=champions_segment['RecencyScore'], name='Recency'))
fig.add_trace(go.Box(y=champions_segment['FrequencyScore'], name='Frequency'))
fig.add_trace(go.Box(y=champions_segment['MonetaryScore'], name='Monetary'))


fig.update_layout(title = 'Distribution of RFM Values within Champions Segment', yaxis_title = 'RFM Value',showlegend=True )
fig.show()


 ##### Now let's analyze the correlation of the recency, frequency, and monetary scores within the champions segment
 

In [49]:
correlation_matrix = champions_segment[['RecencyScore', 'FrequencyScore', 'MonetaryScore']].corr()

# Visualize the correlation matrix using a heatmap 

fig_heatmap = go.Figure(data=go.Heatmap(
    z=correlation_matrix.values,
    x=correlation_matrix.columns, 
    y=correlation_matrix.columns,
    colorscale='RdBu', 
    colorbar=dict( title='Correlation Matrix of RFM Values within Champions Segment' )
))
fig_heatmap.show()

#### Now let’s have a look at the number of customers in all the segments 

In [51]:
import plotly.colors

pastel_colors = plotly.colors.qualitative.Pastel


segments_counts = data['RFM Customer Segments'].value_counts()

# Create a bar chart to compare segment counts
fig = go.Figure(data=[go.Bar(x=segments_counts.index, y=segments_counts.values,
                        marker=dict(color=pastel_colors))])


#set the color of the Champions segment as a different color    
champions_color = 'rgb(158, 202, 225)'
fig.update_traces(marker_color=[champions_color if segment == 'Champions' else pastel_colors[i]
                                for i, segment in enumerate(segments_counts.index)], 
                                marker_line_color = 'rgb(8,48, 107)',
                                marker_line_width = 1.5, opacity=0.6)

# update the layout 
fig.update_layout(title='Comaparison of RFM Segments', xaxis_title = 'RFM Segments', yaxis_title = 'Number of Customers',
                  showlegend=False)

fig.show()

#### Now let’s have a look at the recency, frequency, and monetary scores of all the segments:



In [55]:
# Calculate the average Recency, Frequency and Monetary Scores for each segment

segment_scores = data.groupby('RFM Customer Segments')[['RecencyScore', 'FrequencyScore','MonetaryScore']].mean().reset_index()


fig = go.Figure()

# Add bars for Recency Score 
fig.add_trace(go.Bar(
    x = segment_scores['RFM Customer Segments'],
    y = segment_scores['RecencyScore'],
    name='Recency Score',
    marker_color = 'rgb(158,202,225)' 
))


# Add bars for Frequency Score 
fig.add_trace(go.Bar(
    x = segment_scores['RFM Customer Segments'],
    y = segment_scores['FrequencyScore'],
    name='Frequency Score',
    marker_color = 'rgb(94,158,217)' 
))


# Add bars for Monetary Score 
fig.add_trace(go.Bar(
    x = segment_scores['RFM Customer Segments'],
    y = segment_scores['MonetaryScore'],
    name='Monetary Score',
    marker_color = 'rgb(32,102,148)' 
))


# update the layout 


fig.update_layout(title='Comparison of RFM Segments based on Recency, Frequency, and Monetary Scores', 
                xaxis_title='RFM Segments', 
                yaxis_title = 'Score', 
                barmode = 'group',
                showlegend = True
            )

fig.show()


### SUMMARY

#### RFM Analysis is used to understand and segment customers based on their buying behaviour. RFM stands for recency, frequency, and monetary value, which are three key metrics that provide information about customer engagement, loyalty, and value to a business. I hope you liked this article on RFM Analysis using Python. Feel free to ask valuable questions in the comments section below.