In [None]:
%matplotlib inline
import warnings
warnings.filterwarnings('ignore')

import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.ticker as mtick # For specifying the axes tick format 
import numpy as np
import seaborn as sns
from sklearn.preprocessing import MinMaxScaler, StandardScaler
from sklearn.cluster import KMeans
from mpl_toolkits.mplot3d import Axes3D
import plotly.graph_objects as go

plt.style.use('ggplot')

In [None]:
df = pd.read_csv('rfm_merged_df.csv')

In [None]:
# repeat rate = customer with at least 2 transactions / total customers number

# count the number of transactions for each customer_id
transaction_counts = rfm_merged_df.groupby('customer_id')['transaction_id'].nunique()

# determine which customers have at least 2 transactions
customer_two_trans = transaction_counts[transaction_counts >= 2].count()

# total number of customers
total_customers = rfm_merged_df['customer_id'].nunique()

# repeat rate
repeat_rate  = customer_two_trans / total_customers

print("Repeat rate:", repeat_rate)

In [None]:
# churn rate = 1 - repat_rate

churn_rate = 1 - repeat_rate
churn_rate

In [None]:
# average customer lifespan (ACL) = 1 / churn rate

acl = 1 / churn_rate
acl

In [None]:
# average purchase frequency (APF) = total number of purchases / total customers number

total_purchases = rfm_merged_df['transaction_id'].nunique()

apf = total_purchases / total_customers
apf

In [None]:
# Avg. Purchase Value (APV) = Total Revenue / Total Number of Purchases

total_revenue = rfm_merged_df['sales_amount'].sum()

apv = total_revenue / total_purchases
apv

In [None]:
# customer lifetime value (CLV) = Avg. Purchase Value * Avg. Purchase Frequency * Avg. Customer Lifespan

clv = apv * apf * acl
clv

In [None]:
# Initialize a DataFrame to hold the average customer lifespan for each cluster
cluster_lifespan_data = []

# Calculate ACL for each cluster
for cluster in rfm_merged_df['cluster'].unique():
    # Subset the data for the current cluster
    cluster_data = rfm_merged_df[rfm_merged_df['cluster'] == cluster]

    # Count the number of transactions for each customer within the cluster
    transaction_counts = cluster_data.groupby('customer_id')['transaction_id'].nunique()

    # Determine customers with at least 2 transactions within the cluster
    customer_two_trans = transaction_counts[transaction_counts >= 2].count()

    # Total number of customers within the cluster
    total_customers = cluster_data['customer_id'].nunique()

    # Repeat rate for the cluster
    repeat_rate = customer_two_trans / total_customers

    # Churn rate for the cluster
    churn_rate = 1 - repeat_rate

    # Average Customer Lifespan (ACL) for the cluster
    acl = 1 / churn_rate if churn_rate != 0 else np.nan

    # Append the ACL to the DataFrame
    cluster_lifespan = cluster_lifespan_data.append({
        'Cluster': cluster,
        'ACL': acl
    })

# Convert the list of dictionaries to a DataFrame
cluster_lifespan_df = pd.DataFrame(cluster_lifespan_data)