# 05 – Commercial Insights

This analysis expands on the monthly subscription activity framework established earlier.  
It focuses on revenue quality, customer value segmentation, acquisition impact, billing mix, and churn resilience.



In [10]:
import os
import pandas as pd
import numpy as np
from statsmodels.tsa.holtwinters import ExponentialSmoothing

DATA_DIR = "data/processed"

customers_df = pd.read_csv(os.path.join(DATA_DIR, "customers.csv"))
plans_df = pd.read_csv(os.path.join(DATA_DIR, "plans.csv"))
subs_df = pd.read_csv(os.path.join(DATA_DIR, "subscriptions.csv"))
activity_df = pd.read_csv(os.path.join(DATA_DIR, "monthly_activity.csv"))

activity_df["month"] = pd.to_datetime(activity_df["month"])
activity_df["cohort_month"] = pd.to_datetime(activity_df["cohort_month"])
customers_df["signup_date"] = pd.to_datetime(customers_df["signup_date"])
subs_df["start_date"] = pd.to_datetime(subs_df["start_date"])
subs_df["cancel_date"] = pd.to_datetime(subs_df["cancel_date"])

# enrich activity with plan and customer attributes
activity_enriched = (
    activity_df
    .merge(plans_df, on="plan_id", how="left")
    .merge(
        customers_df[["customer_id", "country", "acquisition_channel", "device_type"]],
        on="customer_id",
        how="left"
    )
)

# derive cohort age in months
activity_enriched["cohort_age_months"] = (
    (activity_enriched["month"].dt.year - activity_enriched["cohort_month"].dt.year) * 12 +
    (activity_enriched["month"].dt.month - activity_enriched["cohort_month"].dt.month)
)


## Monthly MRR and Growth

This section tracks recurring revenue on a month-start basis and calculates month-on-month changes from the prior period’s MRR base.



In [11]:
monthly_mrr = (
    activity_enriched
    .groupby("month", as_index=False)["mrr_usd"]
    .sum()
    .rename(columns={"mrr_usd": "total_mrr_usd"})
    .sort_values("month")
)

monthly_mrr["mom_growth_pct"] = (
    monthly_mrr["total_mrr_usd"].pct_change() * 100
).round(2)

monthly_mrr


Unnamed: 0,month,total_mrr_usd,mom_growth_pct
0,2022-01-01,825.28,
1,2022-02-01,3302.68,300.19
2,2022-03-01,6467.76,95.83
3,2022-04-01,10917.28,68.8
4,2022-05-01,13995.52,28.2
5,2022-06-01,19154.48,36.86
6,2022-07-01,23539.6,22.89
7,2022-08-01,29030.2,23.32
8,2022-09-01,33083.76,13.96
9,2022-10-01,36049.52,8.96


## Revenue Mix by Plan and Billing Period


In [12]:
plan_mix = (
    activity_enriched
    .groupby(["plan_name", "billing_period"], as_index=False)["mrr_usd"]
    .sum()
    .rename(columns={"mrr_usd": "total_mrr_usd"})
)

total_mrr = plan_mix["total_mrr_usd"].sum()
plan_mix["revenue_share_pct"] = (plan_mix["total_mrr_usd"] / total_mrr * 100).round(2)

plan_mix.sort_values("revenue_share_pct", ascending=False)


Unnamed: 0,plan_name,billing_period,total_mrr_usd,revenue_share_pct
3,Premium,monthly,1448825.0,32.77
2,Premium,annual,1145482.88,25.91
5,Standard,monthly,638970.0,14.45
4,Standard,annual,577224.96,13.05
1,Basic,monthly,353684.0,8.0
0,Basic,annual,257498.36,5.82


## Churn Health – Logo and Revenue Churn



Churn is measured in two ways: logo churn (cancelled subscriptions) and gross MRR churn (monthly revenue lost from cancellations).


In [13]:
# active subs per month
active_by_month = (
    activity_enriched[activity_enriched["is_active"]]
    .groupby("month", as_index=False)["subscription_id"]
    .nunique()
    .rename(columns={"subscription_id": "active_subscriptions"})
)

# subs that churned in a given month
churned = activity_enriched[activity_enriched["churned_this_month"]]

churned_subs = (
    churned
    .groupby("month", as_index=False)["subscription_id"]
    .nunique()
    .rename(columns={"subscription_id": "churned_subscriptions"})
)

# churned MRR in that month (based on that month's MRR)
churned_mrr = (
    churned
    .groupby("month", as_index=False)["mrr_usd"]
    .sum()
    .rename(columns={"mrr_usd": "churned_mrr_usd"})
)

# total MRR by month
mrr_by_month = (
    activity_enriched
    .groupby("month", as_index=False)["mrr_usd"]
    .sum()
    .rename(columns={"mrr_usd": "total_mrr_usd"})
)

churn_summary = (
    active_by_month
    .merge(churned_subs, on="month", how="left")
    .merge(churned_mrr, on="month", how="left")
    .merge(mrr_by_month, on="month", how="left")
)

churn_summary["churned_subscriptions"] = churn_summary["churned_subscriptions"].fillna(0).astype(int)
churn_summary["churned_mrr_usd"] = churn_summary["churned_mrr_usd"].fillna(0)

# logo churn vs previous month's active base
churn_summary["logo_churn_rate_pct"] = (
    churn_summary["churned_subscriptions"] /
    churn_summary["active_subscriptions"].shift(1)
    * 100
).round(2)

# gross MRR churn vs previous month's MRR
churn_summary["gross_mrr_churn_rate_pct"] = (
    churn_summary["churned_mrr_usd"] /
    churn_summary["total_mrr_usd"].shift(1)
    * 100
).round(2)

churn_summary


Unnamed: 0,month,active_subscriptions,churned_subscriptions,churned_mrr_usd,total_mrr_usd,logo_churn_rate_pct,gross_mrr_churn_rate_pct
0,2022-01-01,10,3,133.84,825.28,,
1,2022-02-01,38,6,355.84,3302.68,60.0,43.12
2,2022-03-01,84,7,606.68,6467.76,18.42,18.37
3,2022-04-01,149,16,1109.52,10917.28,19.05,17.15
4,2022-05-01,212,5,327.76,13995.52,3.36,3.0
5,2022-06-01,285,13,998.44,19154.48,6.13,7.13
6,2022-07-01,351,20,1423.28,23539.6,7.02,7.43
7,2022-08-01,435,23,1365.8,29030.2,6.55,5.8
8,2022-09-01,488,23,1453.04,33083.76,5.29,5.01
9,2022-10-01,544,17,1205.36,36049.52,3.48,3.64


## Cohort Value (Average Cumulative Revenue by Cohort Age)


LTV progression is approximated by cumulative MRR at each subscription’s cohort age, aligned to period-level cohort reporting rather than unit economics.


In [14]:
# cumulative revenue per subscription over time
activity_enriched = activity_enriched.sort_values(["subscription_id", "month"])
activity_enriched["cumulative_revenue_usd"] = (
    activity_enriched
    .groupby("subscription_id")["mrr_usd"]
    .cumsum()
)

cohort_ltv = (
    activity_enriched
    .groupby(["cohort_month", "cohort_age_months"], as_index=False)["cumulative_revenue_usd"]
    .mean()
    .rename(columns={"cumulative_revenue_usd": "avg_cumulative_revenue_usd"})
    .sort_values(["cohort_month", "cohort_age_months"])
)

cohort_ltv


Unnamed: 0,cohort_month,cohort_age_months,avg_cumulative_revenue_usd
0,2022-01-01,0,63.483077
1,2022-01-01,1,116.670769
2,2022-01-01,2,169.858462
3,2022-01-01,3,213.892308
4,2022-01-01,4,250.240000
...,...,...,...
1076,2025-08-01,1,124.739130
1077,2025-08-01,2,183.588913
1078,2025-09-01,0,64.098343
1079,2025-09-01,1,120.787182


## Subscription Segments (Value vs Churn Behaviour)




In [15]:
sub_metrics = (
    activity_enriched
    .groupby("subscription_id")
    .agg(
        mrr_mean=("mrr_usd", "mean"),
        mrr_peak=("mrr_usd", "max"),
        months_active=("is_active", "sum"),
        churn_events=("churned_this_month", "sum"),
    )
    .reset_index()
)

q25 = sub_metrics["mrr_mean"].quantile(0.25)
q75 = sub_metrics["mrr_mean"].quantile(0.75)

def assign_segment(row):
    if row["mrr_mean"] >= q75:
        return "High value"
    if row["churn_events"] > 0:
        return "At risk"
    if row["mrr_mean"] <= q25:
        return "Low value"
    return "Stable mid-tier"

sub_metrics["segment"] = sub_metrics.apply(assign_segment, axis=1)

segment_summary = (
    sub_metrics
    .groupby("segment", as_index=False)
    .agg(
        subscriptions=("subscription_id", "nunique"),
        avg_mrr_mean=("mrr_mean", "mean"),
        avg_months_active=("months_active", "mean"),
    )
    .sort_values("subscriptions", ascending=False)
)

segment_summary


Unnamed: 0,segment,subscriptions,avg_mrr_mean,avg_months_active
0,At risk,2159,18.098437,7.525706
1,High value,1826,92.174243,15.123768
2,Stable mid-tier,1468,34.765858,15.154632


## Channel Performance and Billing Mix


This section compares channel-level signups, subscription conversions, and the proportion of activity months attributed to monthly versus annual billing cycles.


In [16]:
# signups per channel
signup_by_channel = (
    customers_df
    .groupby("acquisition_channel", as_index=False)["customer_id"]
    .nunique()
    .rename(columns={"customer_id": "signups"})
)

# subscriptions per channel
subs_by_channel = (
    subs_df
    .merge(customers_df[["customer_id", "acquisition_channel"]], on="customer_id", how="left")
    .groupby("acquisition_channel", as_index=False)["subscription_id"]
    .nunique()
    .rename(columns={"subscription_id": "subscriptions"})
)

channel_conv = signup_by_channel.merge(subs_by_channel, on="acquisition_channel", how="left")
channel_conv["subscriptions"] = channel_conv["subscriptions"].fillna(0).astype(int)
channel_conv["conversion_rate_pct"] = (
    channel_conv["subscriptions"] / channel_conv["signups"] * 100
).round(2)

# billing mix by channel
billing_mix = (
    activity_enriched
    .groupby(["acquisition_channel", "billing_period"], as_index=False)["subscription_id"]
    .nunique()
    .rename(columns={"subscription_id": "unique_subscriptions"})
)

total_per_channel = (
    billing_mix
    .groupby("acquisition_channel", as_index=False)["unique_subscriptions"]
    .sum()
    .rename(columns={"unique_subscriptions": "total_subscriptions"})
)

billing_mix = billing_mix.merge(total_per_channel, on="acquisition_channel", how="left")
billing_mix["share_pct"] = (
    billing_mix["unique_subscriptions"] / billing_mix["total_subscriptions"] * 100
).round(2)

channel_conv, billing_mix.sort_values(["acquisition_channel", "billing_period"])


(  acquisition_channel  signups  subscriptions  conversion_rate_pct
 0               Email     1139            768                67.43
 1             Organic     2850           1949                68.39
 2            Paid Ads     1994           1350                67.70
 3         Partnership      819            547                66.79
 4            Referral     1198            839                70.03,
   acquisition_channel billing_period  unique_subscriptions  \
 0               Email         annual                   370   
 1               Email        monthly                   398   
 2             Organic         annual                   922   
 3             Organic        monthly                  1027   
 4            Paid Ads         annual                   689   
 5            Paid Ads        monthly                   661   
 6         Partnership         annual                   268   
 7         Partnership        monthly                   279   
 8            Referral  

## Baseline 12-Month MRR Forecast


In [17]:
# baseline 12-month forecast
mrr_series = monthly_mrr.set_index("month")["total_mrr_usd"]

forecast_model = ExponentialSmoothing(
    mrr_series,
    trend="add",
    seasonal=None
).fit(optimized=True)

forecast_12m = forecast_model.forecast(12).reset_index()
forecast_12m.columns = ["month", "forecast_mrr_usd"]

forecast_12m


  self._init_dates(dates, freq)


Unnamed: 0,month,forecast_mrr_usd
0,2025-11-01,206399.071538
1,2025-12-01,210909.383109
2,2026-01-01,215419.69468
3,2026-02-01,219930.006252
4,2026-03-01,224440.317823
5,2026-04-01,228950.629394
6,2026-05-01,233460.940965
7,2026-06-01,237971.252536
8,2026-07-01,242481.564107
9,2026-08-01,246991.875678


## Net Revenue Retention (NRR)


In [18]:
# Net Revenue Retention (MRR recovery vs previous month base)
activity_enriched = activity_enriched.sort_values(["subscription_id", "month"])
activity_enriched["cumulative_mrr_usd"] = activity_enriched.groupby("subscription_id")["mrr_usd"].cumsum()

monthly_mrr = (
    activity_enriched
    .groupby("month", as_index=False)["mrr_usd"]
    .sum()
    .rename(columns={"mrr_usd": "total_mrr_usd"})
    .sort_values("month")
)

monthly_mrr["nrr_pct"] = (
    monthly_mrr["total_mrr_usd"] / monthly_mrr["total_mrr_usd"].shift(1) * 100
).round(2)

monthly_mrr


Unnamed: 0,month,total_mrr_usd,nrr_pct
0,2022-01-01,825.28,
1,2022-02-01,3302.68,400.19
2,2022-03-01,6467.76,195.83
3,2022-04-01,10917.28,168.8
4,2022-05-01,13995.52,128.2
5,2022-06-01,19154.48,136.86
6,2022-07-01,23539.6,122.89
7,2022-08-01,29030.2,123.32
8,2022-09-01,33083.76,113.96
9,2022-10-01,36049.52,108.96
