In [19]:
import pandas as pd
import numpy as np
import plotly
from plotly import graph_objects as go

from base import DataManager
from dbm import DBM
from pyltv2 import PowerSlope, Rolling

In [20]:
mx_data = pd.read_csv('data/mx_6-7.csv')

In [None]:
m = DataManager(mx_data, market='mx')

In [None]:
m.plot_cohorts('default_rate_7dpd', 'clean')

In [None]:
backtest_months = 6
weights=[.5, .25]

In [None]:
m1 = PowerSlope(mx_data, market='mx')
m1.forecast = m1.forecast_data(m1.data)
m1.backtest, m1.backtest_report = m1.backtest_data(m1.data, hold_months=backtest_months, 
                                                   min_months=5)

In [None]:
m2 = Rolling(mx_data, market='mx')
m2.forecast = m2.forecast_data(m2.data)
m2.backtest, m2.backtest_report = m2.backtest_data(m2.data, hold_months=backtest_months, 
                                                   min_months=5, weights=weights)

In [None]:
m1.plot_cohorts('default_rate_7dpd-me', 'backtest_report')

In [None]:
m2.plot_cohorts('default_rate_7dpd-me', 'backtest_report')

In [None]:
cols = [c for c in m1.backtest_report.columns if 'default' in c and 'mape' in c]

m1.backtest_report[cols].mean().sort_values(ascending=False)[:10]

In [None]:
cols = [c for c in m2.backtest_report.columns if 'default' in c and 'mape' in c]

m2.backtest_report[cols].mean().sort_values(ascending=False)[:10]

In [None]:
m1.plot_cohorts('default_rate_7dpd', 'backtest')

In [None]:
m2.plot_cohorts('default_rate_7dpd', 'backtest')

In [21]:
dbm = DBM(user='kenny.liao')

In [30]:
sql="""
select 
    (TO_CHAR(DATE_TRUNC('month', (LOCAL_FIRST_LOAN_DISBURSED_TIME::TIMESTAMP_NTZ) ), 'YYYY-MM')) as "Local Disbursement Month",
    count(LOAN_APPLICATION_ID)
from 
    business_db.credit.loan_basic_mx as loans
where 
    still_owed > 0 
and datediff(day, local_due_date, convert_timezone('America/Mexico_City', current_timestamp())) <7
and LOCAL_FIRST_LOAN_DISBURSED_TIME::TIMESTAMP_NTZ >= TO_TIMESTAMP('09/01/2020')

group by
    TO_CHAR(DATE_TRUNC('month', (LOCAL_FIRST_LOAN_DISBURSED_TIME::TIMESTAMP_NTZ) ), 'YYYY-MM')
"""

results = dbm.query_db(sql)

In [33]:
results = results.sort_values('Local Disbursement Month').reset_index(drop=True)

In [34]:
results.drop('inde')

Unnamed: 0,index,Local Disbursement Month,COUNT(LOAN_APPLICATION_ID)
0,15,2020-09,2025
1,14,2020-10,2849
2,17,2020-11,2501
3,21,2020-12,1888
4,20,2021-01,3414
5,8,2021-02,5368
6,3,2021-03,5954
7,7,2021-04,5293
8,13,2021-05,3894
9,0,2021-06,4127


In [24]:
results.loc[3]

IS_ATLAS                                                       True
LOAN_APPLICATION_ID                                 844424938959911
PERSON_ID                                           844424936709743
SURVEY_INSTANCE_ID                                                1
LOCAL_APPLICATION_TIME             2022-05-13 20:35:51.668061-05:00
LOCAL_DISBURSEMENT_TIME                   2022-05-13 21:21:51-05:00
LOAN_NUMBER                                                       1
LOC_LOAN_NUMBER                                                   0
STATUS                                                           13
LOAN_STATUS                                                    LATE
LOAN_TYPE_ID                                                1009054
REPAYMENT_STRATEGY                                             None
TARGET_REPAYMENT_DATE                                          None
CONSTANT_DAILY_INTEREST_RATE                                   None
DAYS_BORROWED                                   