In [1]:
import os
os.chdir('../')

## Imports

In [2]:
import pandas as pd
import datetime as dt

from risk_suite.economics import EconomicsCalculator

## Data Collect

#### Score Imobiliária

In [12]:
df_bins = pd.read_csv("notebooks/outputs/df_bins.csv")
df_bins.head()

Unnamed: 0,dt_ativacao,id_imobiliaria,predictions,target,old_segments,optimal_bins,optimal_segments,old_transformed_segments
0,2022-01,38,0.033534,0,B,0.001813,1-baixo,B
1,2022-01,50,0.033887,0,B,0.001813,1-baixo,B
2,2022-01,63,0.031284,0,A,0.001813,1-baixo,A
3,2022-01,81,0.135855,0,E,0.055794,3-alto,C
4,2022-01,95,0.097252,0,E,0.055794,3-alto,C


In [13]:
info_contracts = (
    pd.read_csv("data/info_contracts.csv")[["dt_ativacao", "id_imobiliaria", "id_contrato"]]
    .rename(columns={"id_contrato": "contract_id"})
)

info_contracts["dt_ativacao"] = pd.to_datetime(info_contracts["dt_ativacao"]).dt.strftime("%Y-%m")

In [14]:
df_bins.groupby("optimal_segments")["id_imobiliaria"].size()

optimal_segments
1-baixo    1103
2-medio     717
3-alto      466
Name: id_imobiliaria, dtype: int64

In [15]:
df = info_contracts.merge(df_bins, on=["dt_ativacao", "id_imobiliaria"])
df.head()

Unnamed: 0,dt_ativacao,id_imobiliaria,contract_id,predictions,target,old_segments,optimal_bins,optimal_segments,old_transformed_segments
0,2022-01,38,472480,0.033534,0,B,0.001813,1-baixo,B
1,2022-01,38,473211,0.033534,0,B,0.001813,1-baixo,B
2,2022-01,38,477113,0.033534,0,B,0.001813,1-baixo,B
3,2022-01,38,479053,0.033534,0,B,0.001813,1-baixo,B
4,2022-01,38,479355,0.033534,0,B,0.001813,1-baixo,B


#### Risco atual

In [19]:
calendar = pd.DataFrame(pd.date_range("2022-01", "2022-07", freq="M").strftime("%Y-%m"), columns=["dt_ativacao"])

In [20]:
old_score = pd.read_csv("data/old_risco_imob.csv")[["id_imobiliaria", "risco_imobiliaria"]].query("risco_imobiliaria != 0")

old_score["segments"] = (
    old_score["risco_imobiliaria"]
    .transform(
        lambda x: "A" if x == 2      else
                  "B" if x in (3, 4, 5, 6) else 
                  "C" if x == 7      else 
                  None
    )
)

old_score = old_score.merge(calendar, how="cross")
old_score.head()

Unnamed: 0,id_imobiliaria,risco_imobiliaria,segments,dt_ativacao
0,30,6,B,2022-01
1,30,6,B,2022-02
2,30,6,B,2022-03
3,30,6,B,2022-04
4,30,6,B,2022-05


In [21]:
old_score.groupby("segments")["id_imobiliaria"].size()

segments
A     288
B    2928
C    2154
Name: id_imobiliaria, dtype: int64

In [22]:
old_risk = info_contracts.merge(old_score[["dt_ativacao", "id_imobiliaria", "segments"]], on=["dt_ativacao", "id_imobiliaria"])
old_risk.head()

Unnamed: 0,dt_ativacao,id_imobiliaria,contract_id,segments
0,2022-01,30,480160,B
1,2022-02,30,501215,B
2,2022-02,30,503926,B
3,2022-02,30,511972,B
4,2022-02,30,534486,B


#### Calculadora

In [7]:
contracts = pd.read_parquet('../risk_suite/data/contracts.parquet')
defaults = pd.read_parquet('../risk_suite/data/defaults.parquet')
recoveries = pd.read_parquet('../risk_suite/data/recoveries.parquet')
revenues = pd.read_parquet('../risk_suite/data/revenues.parquet')

base_features = [
    'contract_id',
    'activation_date',
    'churn_date',
    'activation_month',
    'activation_quarter',
    'score_serasa',
    'rating',
    'rental_value',
]

contracts = contracts[base_features]
contracts = contracts.dropna(subset=['activation_date', 'rating'])
contracts = contracts.loc[lambda x: x['activation_quarter'] >= pd.Period('2020Q1')]
contracts = contracts.replace(['E1', 'E2', 'E3'], 'E')

## Auxiliary Functions

In [8]:
def report_economics2(aggkeys, revenues_pivot, defaults_pivot, recoveries_pivot):
    
    economics_df = pd.DataFrame(
        {
            'n_contracts': revenues_pivot.groupby(aggkeys).size(),
            'revenue_value': (revenues_pivot.sum(axis=1)).groupby(aggkeys).mean(),
            'prob_default': (defaults_pivot.sum(axis=1) > 0).groupby(aggkeys).mean(),
            'default_value': (defaults_pivot.sum(axis=1)).groupby(aggkeys).mean(),
            'recovery_value': (recoveries_pivot.sum(axis=1)).groupby(aggkeys).mean()
        }
    )

    economics_df = (
        economics_df
        .assign(recovery_efficiency=lambda x: x['recovery_value'] / x['default_value'])
        .assign(unit_economics=lambda x: x['revenue_value'] - x['default_value'] + x['recovery_value'])
        .assign(aggregate_margin=lambda x: x['unit_economics'] * x['n_contracts'])
    )

    return economics_df.sort_index(ascending=[True, False])

## Economics report

In [16]:
df = contracts.merge(df, on="contract_id")

In [17]:
econ_calculator = EconomicsCalculator(df, defaults, recoveries, revenues, max_history_date='2023-01')

defaults_pivot = econ_calculator._build_defaults_pivot()
recoveries_pivot = econ_calculator._build_recoveries_pivot()
revenues_pivot = econ_calculator._build_revenues_pivot()

  event_data


#### Optimal Segmentation

In [55]:
report_optimal = report_economics2(["optimal_segments"], revenues_pivot, defaults_pivot, recoveries_pivot)
report_optimal

Unnamed: 0_level_0,n_contracts,revenue_value,prob_default,default_value,recovery_value,recovery_efficiency,unit_economics,aggregate_margin
optimal_segments,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
1-baixo,8099,1152.191756,0.056797,291.666103,91.62078,0.314129,952.146433,7711433.96
2-medio,9977,1146.940793,0.101834,631.766618,175.677666,0.278074,690.85184,6892628.81
3-alto,16803,1153.008011,0.127358,899.80487,249.654122,0.277454,502.857263,8449510.59


In [50]:
report_opt_rating = report_economics2(["optimal_segments", "rating"], revenues_pivot, defaults_pivot, recoveries_pivot)

In [51]:
# Unit economics por segmento/contrato
pd.pivot_table(report_opt_rating, values="unit_economics", index="optimal_segments", columns="rating")

rating,A,B,C,D,E
optimal_segments,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
1-baixo,1161.125322,1018.422711,900.751947,814.199218,818.05121
2-medio,1075.60328,728.449647,717.307818,597.56894,253.621442
3-alto,950.43948,728.502169,481.633062,202.002717,8.350721


In [59]:
# Probabilidade de default por segmento/contrato
pd.pivot_table(report_opt_rating, values="prob_default", index="optimal_segments", columns="rating")

rating,A,B,C,D,E
optimal_segments,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
1-baixo,0.027516,0.036288,0.05806,0.081458,0.106187
2-medio,0.038015,0.075751,0.095043,0.139188,0.19246
3-alto,0.054757,0.090189,0.126928,0.173615,0.218679


In [63]:
# Eficiência de Cobrança por segmento/contrato
pd.pivot_table(report_opt_rating, values="recovery_efficiency", index="optimal_segments", columns="rating")

rating,A,B,C,D,E
optimal_segments,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
1-baixo,0.362364,0.293756,0.268552,0.34212,0.357697
2-medio,0.218537,0.246845,0.300014,0.297219,0.278451
3-alto,0.310057,0.287935,0.256003,0.266812,0.292936


In [62]:
# Mix de contratos por segmento/contrato
pd.pivot_table(report_opt_rating, values="n_contracts", index="optimal_segments", columns="rating")

rating,A,B,C,D,E
optimal_segments,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
1-baixo,1381,1929,2773,933,1083
2-medio,1552,2297,3409,1207,1512
3-alto,2575,3914,5641,2039,2634


#### Old Segmentation

In [40]:
report_old = report_economics2(["old_segments"], revenues_pivot, defaults_pivot, recoveries_pivot)
report_old

Unnamed: 0_level_0,n_contracts,revenue_value,prob_default,default_value,recovery_value,recovery_efficiency,unit_economics,aggregate_margin
old_segments,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
A,1271,1335.677663,0.052714,270.501857,98.33598,0.363532,1163.511786,1478823.48
B,3354,1214.062275,0.064103,347.241855,107.34912,0.309148,974.169541,3267364.64
C,5833,1140.376235,0.065832,353.497725,107.667615,0.304578,894.546125,5217887.55
D,5967,1132.072073,0.108765,696.862427,195.004444,0.279832,630.214091,3760487.48
E,18454,1136.454054,0.124688,870.064657,239.138413,0.274851,505.52781,9329010.21


##### Transformed

In [41]:
report_old_transformed = report_economics2(["old_transformed_segments"], revenues_pivot, defaults_pivot, recoveries_pivot)
report_old_transformed

Unnamed: 0_level_0,n_contracts,revenue_value,prob_default,default_value,recovery_value,recovery_efficiency,unit_economics,aggregate_margin
old_transformed_segments,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
A,1271,1335.677663,0.052714,270.501857,98.33598,0.363532,1163.511786,1478823.48
B,15154,1153.415172,0.082354,487.315528,141.986648,0.291365,808.086292,12245739.67
C,18454,1136.454054,0.124688,870.064657,239.138413,0.274851,505.52781,9329010.21


#### Risco Atual

In [68]:
old_risk_2 = contracts.merge(old_risk, on="contract_id")

In [69]:
econ_calculator_old = EconomicsCalculator(old_risk_2, defaults, recoveries, revenues, max_history_date='2023-01')

defaults_pivot_old = econ_calculator_old._build_defaults_pivot()
recoveries_pivot_old = econ_calculator_old._build_recoveries_pivot()
revenues_pivot_old = econ_calculator_old._build_revenues_pivot()

  event_data


In [84]:
report_old_rating = report_economics2(["segments", "rating"], revenues_pivot_old, defaults_pivot_old, recoveries_pivot_old)

report_old_risk = report_economics2(["segments"], revenues_pivot_old, defaults_pivot_old, recoveries_pivot_old)
report_old_risk

Unnamed: 0_level_0,n_contracts,revenue_value,prob_default,default_value,recovery_value,recovery_efficiency,unit_economics,aggregate_margin
segments,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
A,1570,1137.982134,0.026752,164.519268,28.650713,0.174148,1002.11358,1573318.32
B,24365,1191.220415,0.071086,454.496445,143.236561,0.315154,879.960531,21440238.33
C,22243,1124.420053,0.136762,875.796947,236.96691,0.270573,485.590016,10800978.73


In [86]:
# Probabilidade de default por segmento/contrato
pd.pivot_table(report_old_rating, values="n_contracts", index="segments", columns="rating")

rating,A,B,C,D,E
segments,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
A,308,356,492,199,215
B,3956,5522,8168,3017,3702
C,3253,5181,7704,2647,3458
