In [None]:
"""
Calculate the trailing one year member renewal rate.

- TODO: make the trailing period configurable.

Any time a member renews and they've previously had an active membership, consider it a renewal.
As a result, the renewal rate can retroactively change.
"""
import pandas as pd

DATA_HOME = "~/chtl/data"
users = pd.read_pickle(f'{DATA_HOME}/processed/users.pkl')
users.dtypes

In [None]:
# Some exploration of the data. Are there any memberships with 'First Membership Started' and a
# 'Current Membership Type' other than 'Prospective'
#
# Nope! Just 'Prospective'

set(users[users['First Membership Started'].isna()]['Current Membership Type'])

In [None]:
activated_users = users[users['Current Membership Type'] != 'Prospective']
len(activated_users)

In [None]:
# What about memberships that seem expired? Do they have multiple statuses?
#
# The 'Current Membership Type' doesn't change.
from datetime import datetime

expired_users = activated_users[activated_users['Expiration'] < datetime.now()]
set(expired_users['Current Membership Type'])

In [None]:
#
# Gets complicated preeeeeeettty quick. For now let's just exclude members created within
# the last year. Properly done would mean excluding members based on the type ('Standard (Monthly)')
from datetime import datetime, timedelta

year_ago = datetime.now() - timedelta(weeks=52)

expirable_users = activated_users[activated_users['First Membership Started'] < year_ago]
expirable_users

In [None]:
# Lifetime renewal rate is 15.3% as of 2025
f'{1 - (len(expired_users)/len(expirable_users)):.1%}'

In [None]:
# If we want to see "how many people are renewing this month", that's different
# then "how many people eventually renew given a long enough tail".
#
# Let's start with trailing "people who renewed within "registration period".
#
# When they renew twice, we lose that granularity. We'd have to look at the actual
# transactions. Maybe for each member, create a group of "membership created", "membership renewed"
# etc.

# Group transacitons by membership ID?
txns = pd.read_pickle(f'{DATA_HOME}/processed/transactions.pkl')
# set(txns[~txns['Transaction Type'].isna()]['Transaction Type'])
# {'Check Out', 'Check Out (Renewal)', 'Donation', 'Late Fee', 'Loan Fee', 'Loan Return',
# 'Membership Change', 'Membership Extension', 'Membership Fee', 'Other'}

# set(txns[~txns['Item Type'].isna()]['Item Type'])
# Nothing? Nothing.

renewals_maybe = txns[(txns['Transaction Type'] == 'Membership Change') & (txns['Membership Type'] != 'Prospective')]


In [None]:
txns[txns['Transaction Type'] == 'Membership Extension']

In [None]:
# users[users['Membership ID'] == '8366']
users[users['Membership ID'] == '2291']


In [None]:
txns[txns['Transaction ID'] == 2134991]
txns[txns['Member ID'] == '8366'][:30]

In [None]:
# Yikes. All the ways you can renew a membership.
#
# Do my best....
#
# 
"""
Transaction Type = 'Membership Fee'
* If it's on the same day as "First Membership Started", then it is the first membership.
* Otherwise it is the renewal.

Transaction Type = 'Membership Change'
* 'Membership Type' = 'Prospective' then it's the membership creation
* 'Membership Type' != 'Prospective' then it is be a renewal, or actual change of membership. We'll assume renewal.

- For each member it might be better to group the transactions into a "creation", and "fee", and "change".
- Each time there's a fee or change, we should compare it to the expiration date and the expected renewal
period.

The best I think I can do is:

for each member
    iterate through their transactions, and create a renewal log.
      If it's less than a year then it's an early renewal. If the membership type was the same then it
        maybe extends by a year?
      If it's special-automated-processes then it's an auto-renew

What will I do with a log?  For each month, look at the members that were expiring in the X few months (based on
the renewal log), and then calculate how many renewed within those X few months (or within a year).
"""
inv = pd.read_pickle(f'{DATA_HOME}/processed/inventory.pkl')
inv.dtypes

In [None]:
# Data structure I want is a row for each time a member renewed, including:
#
# * Member ID
# * Date
# * Previous Expiration Date
# * New Expiration Date
# * Renewal Date
# 
# Take a slice of the users who had expiration time in the preceding window
# Then take a slice of the user who renewed during that time.
from datetime import timedelta

membership_txns_by_member_asc = txns[
    txns['Transaction Type'].isin({'Membership Change', 'Membership Fee'})
    ].sort_values(['Member ID', 'Date'], ascending=True).groupby(['Member ID'])

membership_type_to_extension_amount = {
    'regular': timedelta(weeks=52),
    'Standard (Annual)': timedelta(weeks=52),
    'Flexible': timedelta(weeks=52),
    'Sustaining (Annual)': timedelta(weeks=52),
    'Standard (Monthly)': timedelta(days=31),
    # 'Extremely Overdue Items': timedelta(weeks=500),
    # 'Shop Use Only': timedelta(weeks=500),
}

def get_renewal(txn, prev_txn, prev_renewal):
    # Skip account creation, bannign users, shop users
    if txn['Transaction Type'] == 'Membership Change' and txn['Membership Type'] in {'Prospective', 'ExtremelyOverdueItems', 'Shop Use Only', 'Discontinued'}:
        return None
    # Try to avoid double entries when there's both a 'Membership Fee' and 'Membership Change' for the same action
    if txn['Transaction Type'] == 'Membership Fee' and prev_txn is not None and prev_renewal is not None and ((txn['Date'] - prev_txn['Date']) < timedelta(minutes=5)):
        return None

    ext_amount = membership_type_to_extension_amount[txn['Membership Type']]
    if prev_renewal is None:
        # Extend from current date
        new_exp = txn['Date'] + ext_amount
    else:
        # If the membership type stays the same, by default it extends out a year from greater of current
        # expiration date, or current date.
        if prev_renewal['Membership Type'] == txn['Membership Type']:
            new_exp = max(txn['Date'], prev_renewal['Expiration Date']) + ext_amount
        else:
            # Otherwise it always extends from current date
            new_exp = txn['Date'] + ext_amount
    return {
        'Member ID': member_id,
        'Date': txn['Date'],
        'Membership Type': txn['Membership Type'],
        'Expiration Date': new_exp
    }
            
renewal_log = []

for (member_id,), member_txns in membership_txns_by_member_asc:
    prev_txn = None
    prev_renewal = None
    for _, txn in member_txns.iterrows():
        renewal = get_renewal(txn, prev_txn, prev_renewal)
        if renewal is not None:
            renewal_log.append(renewal)
        prev_txn = txn
        prev_renewal = renewal

renewal_log = pd.DataFrame(renewal_log)
renewal_log

In [None]:
renewal_log[renewal_log['Member ID'] == '8037']

In [None]:
txns[txns['Member ID'] == '2291']

In [None]:
txns[txns['Member ID'] == '8037']

In [None]:
from datetime import datetime, timedelta
import matplotlib.ticker as mtick
import pandas as pd

grace_period = timedelta(weeks=26)
averaging_period = timedelta(weeks=13)

# We didn't start seeing expiring memberships until a year after we started in late July 2016.
# The graph only starts after the initial grace period and ends before it, so that we always have a complete
# data set and are comparing apples to apples.
ix = pd.period_range(start=datetime(2017, 8, 1) + grace_period, end=datetime.now() - grace_period,freq="M")
series = []
member_series = []
for p in ix:
    now = p.start_time
    # Start at the averaging period less the grace period, so we can check the full grace period now.
    period_start = now - averaging_period - grace_period
    period_end = now - grace_period

    trailing = renewal_log[(renewal_log['Expiration Date'] >= period_start) & (renewal_log['Expiration Date'] < period_end)]
    member_ids = set(trailing['Member ID'])
    renewed_member_ids = set(
        renewal_log[
            renewal_log['Member ID'].isin(member_ids) & (renewal_log['Expiration Date'] >= period_end) & (renewal_log['Date'] < now)
        ]['Member ID']
    )
    # print(now, len(member_ids), renewed_member_ids)
    if member_ids:
        series.append(len(renewed_member_ids)/len(member_ids))
    else:
        series.append(0)

graph = pd.DataFrame(series, index=ix).plot(title=f"Preceding {averaging_period.days // 30} Month Average Renewal Rate, Up To {grace_period.days // 30} Months After Expiration", xlabel="Year", ylabel="%", legend=False)
graph.yaxis.set_major_formatter(mtick.PercentFormatter(xmax=1))
# Renewal rate is for membership started a year ago, and have been renewed within three months of expiration


In [None]:
renewal_log.to_csv("~/chtl/data/processed/renewal_log.csv")