# AR2 Full Model Optimization for MX

The first release of the AutoRegression model employed the auto regression method for forecasting retention only and only for low-tenure cohorts (less than 5 months of actuals).

This version AutoRegression2 is a fuller model which uses the autoregression method for forecasting borrower retention and default rates, which were previously the two highest sources of error in our forecasting. 

More documentation on the model and methodology can be found at :

In [1]:
from models import AutoRegression, AutoRegression2, PowerSlope
from dbm import DBM

import pandas as pd
import numpy as np
import plotly
from plotly import graph_objects as go
from plotly.subplots import make_subplots

In [2]:
data = pd.read_csv('data/mx_7-5.csv')

In [3]:
historical_data = pd.read_csv('data/historicals/mx_historic_data_2017-01-01.csv')

## Model Setup

#### Historical

In [4]:
mh = PowerSlope(historical_data, market='mx')

Clean data spans 2017-11 to 2022-05
Total # of cohorts: 55



#### PowerSlope

In [93]:
m1 = PowerSlope(data, market='mx', ltv_expected='mx_ltv_expected_6-24-22.csv')
m1.forecast = m1.forecast_data(m1.data, min_months=3, n_months=50)
m1.backtest, m1.backtest_report = m1.backtest_data(m1.data, hold_months=3, min_months=2)

Clean data spans 2020-09 to 2022-05
Total # of cohorts: 21

Backtesting 3 months.
17 cohorts will be backtested.



Covariance of the parameters could not be estimated



#### AutoRegression

In [94]:
m2 = AutoRegression2(data, market='mx', ltv_expected='mx_ltv_expected_6-24-22.csv')
m2.forecast = m2.forecast_data(m2.data, min_months=3)
m2.backtest, m2.backtest_report = m2.backtest_data(m2.data, hold_months=3, min_months=2)

Clean data spans 2020-09 to 2022-05
Total # of cohorts: 21

Backtesting 3 months.
17 cohorts will be backtested.


## Backtest Comparison

### Retention

In [95]:
param='borrower_retention'

In [96]:
colors = {i: c for i, c in enumerate(plotly.colors.qualitative.Dark24)}
# add additional colors
color_size = len(colors)
for i, c in enumerate(plotly.colors.qualitative.Safe):
    colors[i+color_size] = c
color_size = len(colors)
for i, c in enumerate(plotly.colors.qualitative.Vivid):
    colors[i+color_size] = c

fig = make_subplots(rows=1, cols=2, shared_xaxes=True,
                   subplot_titles=('PowerSlope', 'AutoRegression'))


# LEFT PLOT
curves1 = []
for cohort in m1.backtest.cohort.unique():
    c_data = m1.backtest[m1.backtest.cohort == cohort]

    # append raw data
    output = m1.data[m1.data.cohort == cohort][param]
    output.name = cohort + '-actual'

    curves1.append(output)

    # append forecast
    output = c_data[c_data.data_type == 'forecast'][param]
    output.name = cohort + '-forecast'

    curves1.append(output)
    
for i, cohort in enumerate(curves1):
    c = colors[i]
    if 'forecast' in cohort.name:
        fig.add_trace(go.Scatter(name=cohort.name, x=cohort.index, y=cohort, mode='lines',
                                 line=dict(width=3, dash='dash', color=c), legendgroup=f'{i}'), row=1, col=1)
    else:
        if cohort.notnull().any():
            fig.add_trace(go.Scatter(name=cohort.name, x=cohort.index, y=cohort, mode='markers+lines',
                                     line=dict(width=2, color=c), legendgroup=f'{i}'), row=1, col=1)

            
            
# RIGHT PLOT
curves2=[]
for cohort in m2.backtest.cohort.unique():
    c_data = m2.backtest[m2.backtest.cohort == cohort]

    # append raw data
    output = m2.data[m2.data.cohort == cohort][param]
    output.name = cohort + '-actual'

    curves2.append(output)

    # append forecast
    output = c_data[c_data.data_type == 'forecast'][param]
    output.name = cohort + '-forecast'

    curves2.append(output)
        
for i, cohort in enumerate(curves2):
    c = colors[i]
    if 'forecast' in cohort.name:
        fig.add_trace(go.Scatter(name=cohort.name, x=cohort.index, y=cohort, mode='lines',
                                 line=dict(width=3, dash='dash', color=c), legendgroup=f'{i}', 
                                 showlegend=False), row=1, col=2,)
    else:
        if cohort.notnull().any():
            fig.add_trace(go.Scatter(name=cohort.name, x=cohort.index, y=cohort, mode='markers+lines',
                                     line=dict(width=2, color=c), legendgroup=f'{i}', 
                                     showlegend=False), row=1, col=2)
            
fig.update_xaxes(range=[0, 24])
fig.layout.yaxis1.title='Borrower Retention (%)'
fig.layout.xaxis1.title='Month'
fig.layout.xaxis2.title='Month'
fig.layout.yaxis1.tickformat=".1%"
fig.layout.yaxis2.tickformat=".1%"
fig.show()

### Default Rate

In [97]:
param='default_rate_365dpd'

In [98]:
colors = {i: c for i, c in enumerate(plotly.colors.qualitative.Dark24)}
# add additional colors
color_size = len(colors)
for i, c in enumerate(plotly.colors.qualitative.Safe):
    colors[i+color_size] = c
color_size = len(colors)
for i, c in enumerate(plotly.colors.qualitative.Vivid):
    colors[i+color_size] = c

fig = make_subplots(rows=1, cols=2, shared_xaxes=True,
                   subplot_titles=('PowerSlope', 'AutoRegression'))


# LEFT PLOT
curves1 = []
for cohort in m1.backtest.cohort.unique():
    c_data = m1.backtest[m1.backtest.cohort == cohort]

    # append raw data
    output = m1.data[m1.data.cohort == cohort][param]
    output.name = cohort + '-actual'

    curves1.append(output)

    # append forecast
    output = c_data[c_data.data_type == 'forecast'][param]
    output.name = cohort + '-forecast'

    curves1.append(output)
    
for i, cohort in enumerate(curves1):
    c = colors[i]
    if 'forecast' in cohort.name:
        fig.add_trace(go.Scatter(name=cohort.name, x=cohort.index, y=cohort, mode='lines',
                                 line=dict(width=3, dash='dash', color=c), legendgroup=f'{i}'), row=1, col=1)
    else:
        if cohort.notnull().any():
            fig.add_trace(go.Scatter(name=cohort.name, x=cohort.index, y=cohort, mode='markers+lines',
                                     line=dict(width=2, color=c), legendgroup=f'{i}'), row=1, col=1)

            
            
# RIGHT PLOT
curves2=[]
for cohort in m2.backtest.cohort.unique():
    c_data = m2.backtest[m2.backtest.cohort == cohort]

    # append raw data
    output = m2.data[m2.data.cohort == cohort][param]
    output.name = cohort + '-actual'

    curves2.append(output)

    # append forecast
    output = c_data[c_data.data_type == 'forecast'][param]
    output.name = cohort + '-forecast'

    curves2.append(output)
        
for i, cohort in enumerate(curves2):
    c = colors[i]
    if 'forecast' in cohort.name:
        fig.add_trace(go.Scatter(name=cohort.name, x=cohort.index, y=cohort, mode='lines',
                                 line=dict(width=3, dash='dash', color=c), legendgroup=f'{i}', 
                                 showlegend=False), row=1, col=2,)
    else:
        if cohort.notnull().any():
            fig.add_trace(go.Scatter(name=cohort.name, x=cohort.index, y=cohort, mode='markers+lines',
                                     line=dict(width=2, color=c), legendgroup=f'{i}', 
                                     showlegend=False), row=1, col=2)
            
fig.update_xaxes(range=[0, 50])
fig.update_yaxes(range=[.03, .18])
fig.layout.yaxis1.title='Default Rate (%)'
fig.layout.xaxis1.title='Month'
fig.layout.xaxis2.title='Month'
fig.layout.yaxis1.tickformat=".1%"
fig.layout.yaxis2.tickformat=".1%"
fig.show()

In [99]:
mh.plot_cohorts('default_rate_365dpd', 'clean')

### LTV

In [100]:
param='cumulative_ltv_per_original'

In [101]:
colors = {i: c for i, c in enumerate(plotly.colors.qualitative.Dark24)}
# add additional colors
color_size = len(colors)
for i, c in enumerate(plotly.colors.qualitative.Safe):
    colors[i+color_size] = c
color_size = len(colors)
for i, c in enumerate(plotly.colors.qualitative.Vivid):
    colors[i+color_size] = c

fig = make_subplots(rows=1, cols=2, shared_xaxes=True,
                   subplot_titles=('PowerSlope', 'AutoRegression'))


# LEFT PLOT
curves1 = []
for cohort in m1.backtest.cohort.unique():
    c_data = m1.backtest[m1.backtest.cohort == cohort]

    # append raw data
    output = m1.data[m1.data.cohort == cohort][param]
    output.name = cohort + '-actual'

    curves1.append(output)

    # append forecast
    output = c_data[c_data.data_type == 'forecast'][param]
    output.name = cohort + '-forecast'

    curves1.append(output)
    
for i, cohort in enumerate(curves1):
    c = colors[i]
    if 'forecast' in cohort.name:
        fig.add_trace(go.Scatter(name=cohort.name, x=cohort.index, y=cohort, mode='lines',
                                 line=dict(width=3, dash='dash', color=c), legendgroup=f'{i}'), row=1, col=1)
    else:
        if cohort.notnull().any():
            fig.add_trace(go.Scatter(name=cohort.name, x=cohort.index, y=cohort, mode='markers+lines',
                                     line=dict(width=2, color=c), legendgroup=f'{i}'), row=1, col=1)

            
            
# RIGHT PLOT
curves2=[]
for cohort in m2.backtest.cohort.unique():
    c_data = m2.backtest[m2.backtest.cohort == cohort]

    # append raw data
    output = m2.data[m2.data.cohort == cohort][param]
    output.name = cohort + '-actual'

    curves2.append(output)

    # append forecast
    output = c_data[c_data.data_type == 'forecast'][param]
    output.name = cohort + '-forecast'

    curves2.append(output)
        
for i, cohort in enumerate(curves2):
    c = colors[i]
    if 'forecast' in cohort.name:
        fig.add_trace(go.Scatter(name=cohort.name, x=cohort.index, y=cohort, mode='lines',
                                 line=dict(width=3, dash='dash', color=c), legendgroup=f'{i}', 
                                 showlegend=False), row=1, col=2,)
    else:
        if cohort.notnull().any():
            fig.add_trace(go.Scatter(name=cohort.name, x=cohort.index, y=cohort, mode='markers+lines',
                                     line=dict(width=2, color=c), legendgroup=f'{i}', 
                                     showlegend=False), row=1, col=2)
            
fig.update_xaxes(range=[0, 36])
fig.update_yaxes(range=[-5, 95])
fig.layout.yaxis1.title='Cumulative LTV ($)'
fig.layout.xaxis1.title='Month'
fig.layout.xaxis2.title='Month'
fig.show()

### LTV Forecast Errors

In [102]:
m1.plot_cohorts('cumulative_ltv_per_original-me', 'backtest_report')

In [103]:
mean_rmse1 = m1.backtest_report['cumulative_ltv_per_original-rmse'].mean()

print(f'PowerSlope Mean RMSE: ${round(mean_rmse1, 2)}')

PowerSlope Mean RMSE: $0.63


In [104]:
m2.plot_cohorts('cumulative_ltv_per_original-me', 'backtest_report')

In [105]:
mean_rmse2 = m2.backtest_report['cumulative_ltv_per_original-rmse'].mean()

print(f'AutoRegression Mean RMSE: ${round(mean_rmse2, 2)}')

AutoRegression Mean RMSE: $0.38


## Forecast 

In [106]:
param = 'cumulative_ltv_per_original'

In [107]:
colors = {i: c for i, c in enumerate(plotly.colors.qualitative.Dark24)}
# add additional colors
color_size = len(colors)
for i, c in enumerate(plotly.colors.qualitative.Safe):
    colors[i+color_size] = c
color_size = len(colors)
for i, c in enumerate(plotly.colors.qualitative.Vivid):
    colors[i+color_size] = c

fig = make_subplots(rows=1, cols=2, shared_xaxes=True,
                   subplot_titles=('PowerSlope', 'AutoRegression'))


# LEFT PLOT
curves1 = []
for cohort in m1.forecast.cohort.unique():
    c_data = m1.forecast[m1.forecast.cohort == cohort]

    # append raw data
    output = m1.data[m1.data.cohort == cohort][param]
    output.name = cohort + '-actual'

    curves1.append(output)

    # append forecast
    output = c_data[c_data.data_type == 'forecast'][param]
    output.name = cohort + '-forecast'

    curves1.append(output)
    
for i, cohort in enumerate(curves1):
    c = colors[i]
    if 'forecast' in cohort.name:
        fig.add_trace(go.Scatter(name=cohort.name, x=cohort.index, y=cohort, mode='lines',
                                 line=dict(width=3, dash='dash', color=c), legendgroup=f'{i}'), row=1, col=1)
    else:
        if cohort.notnull().any():
            fig.add_trace(go.Scatter(name=cohort.name, x=cohort.index, y=cohort, mode='markers+lines',
                                     line=dict(width=2, color=c), legendgroup=f'{i}'), row=1, col=1)

            
            
# RIGHT PLOT
curves2=[]
for cohort in m2.forecast.cohort.unique():
    c_data = m2.forecast[m2.forecast.cohort == cohort]

    # append raw data
    output = m2.data[m2.data.cohort == cohort][param]
    output.name = cohort + '-actual'

    curves2.append(output)

    # append forecast
    output = c_data[c_data.data_type == 'forecast'][param]
    output.name = cohort + '-forecast'

    curves2.append(output)
        
for i, cohort in enumerate(curves2):
    c = colors[i]
    if 'forecast' in cohort.name:
        fig.add_trace(go.Scatter(name=cohort.name, x=cohort.index, y=cohort, mode='lines',
                                 line=dict(width=3, dash='dash', color=c), legendgroup=f'{i}', 
                                 showlegend=False), row=1, col=2,)
    else:
        if cohort.notnull().any():
            fig.add_trace(go.Scatter(name=cohort.name, x=cohort.index, y=cohort, mode='markers+lines',
                                     line=dict(width=2, color=c), legendgroup=f'{i}', 
                                     showlegend=False), row=1, col=2)
            
fig.update_xaxes(range=[0, 36])
fig.update_yaxes(range=[-5, 115])
fig.layout.yaxis1.title='Cumulative LTV ($)'
fig.layout.xaxis1.title='Month'
fig.layout.xaxis2.title='Month'
fig.show()

## Extending Forecast to 2 MONTHS OUT

In [108]:
m1.forecast = m1.forecast_data(m1.data, min_months=2, n_months=50)
m2.forecast = m2.forecast_data(m2.data, min_months=2, n_months=50)


Covariance of the parameters could not be estimated



In [109]:
colors = {i: c for i, c in enumerate(plotly.colors.qualitative.Dark24)}
# add additional colors
color_size = len(colors)
for i, c in enumerate(plotly.colors.qualitative.Safe):
    colors[i+color_size] = c
color_size = len(colors)
for i, c in enumerate(plotly.colors.qualitative.Vivid):
    colors[i+color_size] = c

fig = make_subplots(rows=1, cols=2, shared_xaxes=True,
                   subplot_titles=('PowerSlope', 'AutoRegression'))


# LEFT PLOT
curves1 = []
for cohort in m1.forecast.cohort.unique():
    c_data = m1.forecast[m1.forecast.cohort == cohort]

    # append raw data
    output = m1.data[m1.data.cohort == cohort][param]
    output.name = cohort + '-actual'

    curves1.append(output)

    # append forecast
    output = c_data[c_data.data_type == 'forecast'][param]
    output.name = cohort + '-forecast'

    curves1.append(output)
    
for i, cohort in enumerate(curves1):
    c = colors[i]
    if 'forecast' in cohort.name:
        fig.add_trace(go.Scatter(name=cohort.name, x=cohort.index, y=cohort, mode='lines',
                                 line=dict(width=3, dash='dash', color=c), legendgroup=f'{i}'), row=1, col=1)
    else:
        if cohort.notnull().any():
            fig.add_trace(go.Scatter(name=cohort.name, x=cohort.index, y=cohort, mode='markers+lines',
                                     line=dict(width=2, color=c), legendgroup=f'{i}'), row=1, col=1)

            
            
# RIGHT PLOT
curves2=[]
for cohort in m2.forecast.cohort.unique():
    c_data = m2.forecast[m2.forecast.cohort == cohort]

    # append raw data
    output = m2.data[m2.data.cohort == cohort][param]
    output.name = cohort + '-actual'

    curves2.append(output)

    # append forecast
    output = c_data[c_data.data_type == 'forecast'][param]
    output.name = cohort + '-forecast'

    curves2.append(output)
        
for i, cohort in enumerate(curves2):
    c = colors[i]
    if 'forecast' in cohort.name:
        fig.add_trace(go.Scatter(name=cohort.name, x=cohort.index, y=cohort, mode='lines',
                                 line=dict(width=3, dash='dash', color=c), legendgroup=f'{i}', 
                                 showlegend=False), row=1, col=2,)
    else:
        if cohort.notnull().any():
            fig.add_trace(go.Scatter(name=cohort.name, x=cohort.index, y=cohort, mode='markers+lines',
                                     line=dict(width=2, color=c), legendgroup=f'{i}', 
                                     showlegend=False), row=1, col=2)
            
fig.update_xaxes(range=[0, 36])
fig.update_yaxes(range=[-5, 115])
fig.layout.yaxis1.title='Cumulative LTV ($)'
fig.layout.xaxis1.title='Month'
fig.layout.xaxis2.title='Month'
fig.show()

In [111]:
df1 = m1.forecast[m1.forecast.index==50][['cohort', 'cumulative_ltv_per_original']]
df1.set_index('cohort', inplace=True)
df1.columns = ['PowerSlope LTV']
df2 = m2.forecast[m2.forecast.index==50][['cohort', 'cumulative_ltv_per_original']]
df2.set_index('cohort', inplace=True)
df2.columns = ['AutoRegression LTV']

summary = pd.concat([df1, df2], axis=1)
# summary = summary.iloc[:-1]
summary['difference'] = summary['PowerSlope LTV'] - summary['AutoRegression LTV']
summary

Unnamed: 0_level_0,PowerSlope LTV,AutoRegression LTV,difference
cohort,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2020-09,90.001506,95.770148,-5.768643
2020-10,86.379468,93.766762,-7.387294
2020-11,72.912404,79.882428,-6.970024
2020-12,56.794817,58.2199,-1.425082
2021-01,51.271491,52.803558,-1.532067
2021-02,51.07387,53.232478,-2.158608
2021-03,62.257875,69.127631,-6.869756
2021-04,80.439016,86.039304,-5.600288
2021-05,103.04801,113.153662,-10.105652
2021-06,108.472744,117.249064,-8.77632


In [112]:
traces = [
    go.Scatter(name='PowerSlope', x=summary.index, y=summary['PowerSlope LTV'], mode='markers+lines'),
    go.Scatter(name='AutoRegression', x=summary.index, y=summary['AutoRegression LTV'], mode='markers+lines'),
]


fig = go.Figure(traces)

fig.update_layout(title='50mo LTV - PowerSlope vs AR')
fig.layout.xaxis.title = 'Cohort'
fig.layout.yaxis.title = 'Cumulative LTV ($)'
fig.update_yaxes(range=[0, 130])
fig.show()

### M1 & 2 Default Rates

In [123]:
months = [1,3,6,12]

traces = []
for m in months:
    defaults = []
    
    x = []
    y = []
    for c in m1.forecast.cohort.unique():
        x.append(c)
        y.append(m2.forecast[m2.forecast.cohort==c].loc[m, 'default_rate_7dpd'])
        
    traces.append(go.Scatter(name=f'Month {m}', x=x, y=y, mode='markers+lines'))
 
fig = go.Figure(traces)

fig.update_layout(
xaxis=dict(title='Cohort'),
yaxis=dict(title='7dpd Default Rate', tickformat='.1%'))
fig.show()