<a href="https://colab.research.google.com/github/priyal6/EDA/blob/main/RFM.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [4]:
import pandas as pd
import plotly.express as px
import plotly.io as pio
import plotly.graph_objects as go
import plotly.graph_objects as go

# Instead of 'pio.template.default', use 'pio.templates.default':
pio.templates.default = "plotly_white"

data = pd.read_csv("rfm_data.csv")
data.head()

Unnamed: 0,CustomerID,PurchaseDate,TransactionAmount,ProductInformation,OrderID,Location
0,8814,2023-04-11,943.31,Product C,890075,Tokyo
1,2188,2023-04-11,463.7,Product A,176819,London
2,4608,2023-04-11,80.28,Product A,340062,New York
3,2559,2023-04-11,221.29,Product A,239145,London
4,9482,2023-04-11,739.56,Product A,194545,Paris


In [5]:
from datetime import datetime
import pandas as pd

# Convert 'PurchaseDate' to datetime
data['PurchaseDate'] = pd.to_datetime(data['PurchaseDate'])

# Calculate Recency using the dt accessor
data['Recency'] = (datetime.now() - data['PurchaseDate']).dt.days

#calculation of frequency
frequency_data = data.groupby('CustomerID')['OrderID'].count().reset_index()
frequency_data.rename(columns={'OrderID': 'Frequency'},inplace=True)
data = data.merge(frequency_data, on = 'CustomerID', how='left')

#monetary
monetary_data = data.groupby('CustomerID')['TransactionAmount'].sum().reset_index()
monetary_data.rename(columns={'TransactionAmount': 'MonetaryValue'}, inplace= True)
data = data.merge(monetary_data, on = 'CustomerID', how = 'left')

In [6]:
print(data.head())

   CustomerID PurchaseDate  TransactionAmount ProductInformation  OrderID  \
0        8814   2023-04-11             943.31          Product C   890075   
1        2188   2023-04-11             463.70          Product A   176819   
2        4608   2023-04-11              80.28          Product A   340062   
3        2559   2023-04-11             221.29          Product A   239145   
4        9482   2023-04-11             739.56          Product A   194545   

   Location  Recency  Frequency  MonetaryValue  
0     Tokyo      655          1         943.31  
1    London      655          1         463.70  
2  New York      655          1          80.28  
3    London      655          1         221.29  
4     Paris      655          1         739.56  


In [7]:
recency_scores = [5, 4, 3, 2, 1]  # Higher score for lower recency (more recent)
frequency_scores = [1, 2, 3, 4, 5]  # Higher score for higher frequency
monetary_scores = [1, 2, 3, 4, 5]

In [9]:
data['RecencyScore'] = pd.cut(data['Recency'], bins= 5, labels = recency_scores)
data['FrequencyScore'] =  pd.cut(data['Frequency'], bins = 5, labels = frequency_scores)
data['MonetaryScore'] = pd.cut(data['MonetaryValue'], bins = 5, labels = monetary_scores)

In [11]:
data['RecencyScore'] = pd.cut(data['Recency'], bins=5, labels=recency_scores, duplicates='drop').astype(int)
data['FrequencyScore'] = pd.cut(data['Frequency'], bins=5, labels=frequency_scores, duplicates='drop').astype(int)
data['MonetaryScore'] = pd.cut(data['MonetaryValue'], bins=5, labels=monetary_scores, duplicates='drop').astype(int)

data['RFM_Score'] = data['RecencyScore'] + data['FrequencyScore'] + data['MonetaryScore']

segment_labels = ['Low-Value','Mid-Value', 'High-Value']
data['Value Segment'] = pd.qcut(data['RFM_Score'], q=3, labels=segment_labels)

In [12]:
print(data.head())

   CustomerID PurchaseDate  TransactionAmount ProductInformation  OrderID  \
0        8814   2023-04-11             943.31          Product C   890075   
1        2188   2023-04-11             463.70          Product A   176819   
2        4608   2023-04-11              80.28          Product A   340062   
3        2559   2023-04-11             221.29          Product A   239145   
4        9482   2023-04-11             739.56          Product A   194545   

   Location  Recency  Frequency  MonetaryValue  RecencyScore  FrequencyScore  \
0     Tokyo      655          1         943.31             1               1   
1    London      655          1         463.70             1               1   
2  New York      655          1          80.28             1               1   
3    London      655          1         221.29             1               1   
4     Paris      655          1         739.56             1               1   

   MonetaryScore  RFM_Score Value Segment  
0           

In [13]:
segment_counts = data['Value Segment'].value_counts().reset_index()
segment_counts.columns = ['Value Segment', 'Count']

pastel_colors = px.colors.qualitative.Pastel

fig_segment_dist = px.bar(segment_counts, x = 'Value Segment', y = 'Count',
                          color = 'Value Segment', color_discrete_sequence = pastel_colors,
                          title = 'RFM Value Segment Distribution')

fig_segment_dist.update_layout(xaxis_title='RFM Value Segment',
                              yaxis_title='Count',
                              showlegend=False)
fig_segment_dist.show()