In [115]:
!pip install plotly 



In [116]:
import pandas as pd
from datetime import timedelta
import plotly.express as px
import plotly.graph_objects as go
import plotly.colors

In [117]:
data = pd.read_csv('/Users/tanyasharan/Downloads/cleaned_dataset2.csv')
data.head()

Unnamed: 0,Account No,Balance,Amount,Third Party Account No,Third Party Name,DateTime
0,839964633.0,3385.0,2602.0,MTrx,Boots,01/01/2025 00:00
1,793069740.0,3318.0,2780.0,MTrx,Mountain Warehouse,01/01/2025 00:00
2,793069740.0,2278.0,-1040.0,705519819,P2P,01/01/2025 00:00
3,705519819.0,2726.0,1040.0,793069740,P2P,01/01/2025 00:00
4,726302171.0,3253.5,1542.5,MTrx,Craftastic,01/01/2025 00:00


In [118]:
# Ensure 'DateTime' is in datetime format
data['DateTime'] = pd.to_datetime(data['DateTime'], errors='coerce')
reference_date = data['DateTime'].max() + timedelta(days=1)
reference_date

Timestamp('2025-12-12 21:21:00')

In [119]:
# Filter only spending transactions (Amount < 0)
spending_data = data[data['Amount'] < 0].copy()  # Only negative transactions (money spent)

# Convert Amount to absolute values (since spending is negative)
spending_data['Amount'] = spending_data['Amount'].abs()


In [120]:
# Calculate RFM metrics
rfm = spending_data.groupby('Account No').agg({
    # Recency: Days since last spending transaction
    'DateTime': lambda x: (reference_date - x.max()).days,
    # Frequency & Monetary  
    'Amount': ['count', 'sum']  
}).reset_index()

In [121]:
# Rename columns properly
rfm.columns = ['Account No', 'Recency', 'Frequency', 'Monetary']
# Display first few rows
rfm.head()

Unnamed: 0,Account No,Recency,Frequency,Monetary
0,100100738.0,8,128,8700.91
1,100837224.0,5,99,6739.93
2,101348775.0,1,434,18226.22
3,103439190.0,1,536,18886.59
4,104009728.0,1,433,27773.9


In [122]:
# Define quantiles for Recency, Frequency, and Monetary
quantiles = rfm[['Recency', 'Frequency', 'Monetary']].quantile(q=[0.25, 0.5, 0.75])

# Assign RFM scores
def RScore(x, p, d):
    if p == 'Recency':  
        if x <= d[p][0.25]:  
            return 4
        elif x <= d[p][0.50]:  
            return 3
        elif x <= d[p][0.75]: 
            return 2
        else:
            return 1
    else:  
        if x <= d[p][0.25]:  
            return 1
        elif x <= d[p][0.50]:  
            return 2
        elif x <= d[p][0.75]:  
            return 3
        else:
            return 4

rfm['R'] = rfm['Recency'].apply(RScore, args=('Recency', quantiles))  
rfm['F'] = rfm['Frequency'].apply(RScore, args=('Frequency', quantiles))  
rfm['M'] = rfm['Monetary'].apply(RScore, args=('Monetary', quantiles))  

rfm['RFM Segment'] = rfm['R'].astype(str) + rfm['F'].astype(str) + rfm['M'].astype(str)
rfm['RFM Score'] = rfm[['R', 'F', 'M']].sum(axis=1)

rfm.head()

Unnamed: 0,Account No,Recency,Frequency,Monetary,R,F,M,RFM Segment,RFM Score
0,100100738.0,8,128,8700.91,1,1,1,111,3
1,100837224.0,5,99,6739.93,2,1,1,211,4
2,101348775.0,1,434,18226.22,4,4,2,442,10
3,103439190.0,1,536,18886.59,4,4,2,442,10
4,104009728.0,1,433,27773.9,4,4,4,444,12


In [124]:
segment_labels = ['Emerging Customers', 'Everyday Banking Customers', 'Established Customers']

def assign_segment(score):
    if score <= 5:
        return segment_labels[0]
    elif score <= 9:
        return segment_labels[1]
    else:
        return segment_labels[2]
    
rfm['Segment'] = rfm['RFM Score'].apply(assign_segment)
rfm.head()

Unnamed: 0,Account No,Recency,Frequency,Monetary,R,F,M,RFM Segment,RFM Score,Segment
0,100100738.0,8,128,8700.91,1,1,1,111,3,Emerging Customers
1,100837224.0,5,99,6739.93,2,1,1,211,4,Emerging Customers
2,101348775.0,1,434,18226.22,4,4,2,442,10,Established Customers
3,103439190.0,1,536,18886.59,4,4,2,442,10,Established Customers
4,104009728.0,1,433,27773.9,4,4,4,444,12,Established Customers


In [126]:

# Count occurrences of each RFM segment
segment_counts = rfm['RFM Segment'].value_counts().reset_index()
segment_counts.columns = ['Segment', 'Count']
segment_counts = segment_counts.sort_values('Count', ascending=False)

# Create the figure using Plotly
fig = px.bar(segment_counts, x='Segment', y='Count', text='Count', 
             labels={'Segment': 'RFM Segment', 'Count': 'Number of Customers'},
             color='Count', color_continuous_scale='blues')

# Customize layout
fig.update_layout(title='Customer Segmentation by RFM Segment', title_x=0.5, title_font_size=20)

# Show plot
fig.show()


In [111]:
rfm['Segment'] = ''

rfm.loc[rfm['RFM Score'] >= 9, '_Segments'] = 'Loyal'
rfm.loc[(rfm['RFM Score'] >= 6) & (rfm['RFM Score'] < 9), 'Segment'] = 'Highly Engaged Customers'
rfm.loc[(rfm['RFM Score'] >= 5) & (rfm['RFM Score'] < 6), 'Segment'] = 'Valuable but At Risk Churners'
rfm.loc[(rfm['RFM Score'] >= 4) & (rfm['RFM Score'] < 5), 'Segment'] = "Dormant Customers"
rfm.loc[(rfm['RFM Score'] >= 3) & (rfm['RFM Score'] < 4), 'Segment'] = "Churning Customers"
segment_counts = rfm['Segment'].value_counts().reset_index()

In [112]:
segment_counts.columns = ['Segment', 'Count']
segment_counts = segment_counts.sort_values('Count', ascending=False)

In [113]:
fig_treemap = px.treemap(segment_counts, path=['Segment'], values='Count',
                            title='TreeMap: Customer Segmentation by RFM Segment',
                            color='Count', color_continuous_scale='blues')
fig_treemap.show()