# Module `card.columntransformers`

In [1]:
from pprint import pprint
import json
import pandas as pd
pd.set_option("display.max_columns", None)     # show all columns
pd.set_option("display.expand_frame_repr", False)  # don't wrap to multiple lines

## Class `ColTransPD`

In [2]:
from pcl_monitor.utils.gen_test_data import generate_card_population
from pcl_monitor.scoring_engines.models.model_card import pd_aged
from pcl_monitor.scoring_engines.card.columntransformers import ColTransCPD
X =generate_card_population()
model_dict = pd_aged['1.0']['params']

ct =ColTransCPD(model_dict)

X_t = ct.fit_transform(X)
print('cpd model:')
print(json.dumps(model_dict, indent=2))

print('\nInput risk drivers')
print(X)
print('Scored dataset')
print(X_t)

cpd model:
{
  "x1": 1.2,
  "x2": 0.4,
  "x3": 0.9,
  "x4": 1.4
}

Input risk drivers
        ID  ts        x1        x2        x3        x4
0      101   1  0.542920 -2.594727 -1.211555 -0.974667
1      101   2 -1.701920 -2.656467 -2.875507 -3.084002
2      101   3 -1.291964 -2.016528 -1.018625 -1.476503
3      101   4 -0.083126 -1.280486 -2.764880 -2.980897
4      101   5 -1.568973  0.148923 -3.000109 -1.220337
...    ...  ..       ...       ...       ...       ...
14846  999  14 -0.158710 -2.032610 -2.058786 -3.936424
14847  999  15 -2.729289 -1.728126 -2.270658 -0.985560
14848  999  16 -0.678296 -1.585302 -1.580627 -2.689342
14849  999  17 -1.759399 -1.455608 -2.334335 -1.754033
14850  999  18 -3.465415 -3.047529 -1.151100 -3.009175

[14851 rows x 6 columns]
Scored dataset
        ID  ts        x1        x2        x3        x4       cpd
0      101   1  0.542920 -2.594727 -1.211555 -0.974667  0.055132
1      101   2 -1.701920 -2.656467 -2.875507 -3.084002  0.000045
2      101   3 -1.

## Class `ColTransUPD`

In [3]:
from pcl_monitor.utils.gen_test_data import generate_card_population
from pcl_monitor.scoring_engines.models.model_card import pd_aged
from pcl_monitor.scoring_engines.card.columntransformers import ColTransCPD, ColTransUPD

X =generate_card_population()
model_dict = pd_aged['1.0']['params']

X_cpd =ColTransCPD(model_dict).fit_transform(X)

ct = ColTransUPD(closure_rate=0.0005)
X_t = ct.fit_transform(X_cpd)
print(X_t)

        ID  ts        x1        x2        x3        x4       cpd       upd     cumpd
0      101   1  0.542920 -2.594727 -1.211555 -0.974667  0.055132  0.055132  0.055132
1      101   2 -1.701920 -2.656467 -2.875507 -3.084002  0.000045  0.000042  0.055174
2      101   3 -1.291964 -2.016528 -1.018625 -1.476503  0.004769  0.004501  0.059675
3      101   4 -0.083126 -1.280486 -2.764880 -2.980897  0.000693  0.000651  0.060326
4      101   5 -1.568973  0.148923 -3.000109 -1.220337  0.001962  0.001840  0.062166
...    ...  ..       ...       ...       ...       ...       ...       ...       ...
14846  999  14 -0.158710 -2.032610 -2.058786 -3.936424  0.000232  0.000213  0.074819
14847  999  15 -2.729289 -1.728126 -2.270658 -0.985560  0.000617  0.000567  0.075386
14848  999  16 -0.678296 -1.585302 -1.580627 -2.689342  0.001311  0.001203  0.076589
14849  999  17 -1.759399 -1.455608 -2.334335 -1.754033  0.000710  0.000650  0.077238
14850  999  18 -3.465415 -3.047529 -1.151100 -3.009175  0.000024 

## Pipeline

In [7]:
from pcl_monitor.utils.gen_test_data import generate_card_population
from pcl_monitor.scoring_engines.models.model_card import pd_aged
from pcl_monitor.scoring_engines.card.columntransformers import ColTransCPD, ColTransUPD
from sklearn.pipeline import Pipeline


X =generate_card_population()
model_dict = pd_aged['1.0']['params']


pipeline = Pipeline([
    ('ColTransCPD', ColTransCPD(model_dict=model_dict)),
    ('ColTransUPD', ColTransUPD(closure_rate=0.0005))
])

X_t = pipeline.fit_transform(X)
print(X)
print(X_t)

        ID  ts        x1        x2        x3        x4
0      101   1  0.542920 -2.594727 -1.211555 -0.974667
1      101   2 -1.701920 -2.656467 -2.875507 -3.084002
2      101   3 -1.291964 -2.016528 -1.018625 -1.476503
3      101   4 -0.083126 -1.280486 -2.764880 -2.980897
4      101   5 -1.568973  0.148923 -3.000109 -1.220337
...    ...  ..       ...       ...       ...       ...
14846  999  14 -0.158710 -2.032610 -2.058786 -3.936424
14847  999  15 -2.729289 -1.728126 -2.270658 -0.985560
14848  999  16 -0.678296 -1.585302 -1.580627 -2.689342
14849  999  17 -1.759399 -1.455608 -2.334335 -1.754033
14850  999  18 -3.465415 -3.047529 -1.151100 -3.009175

[14851 rows x 6 columns]
        ID  ts        x1        x2        x3        x4       cpd       upd     cumpd
0      101   1  0.542920 -2.594727 -1.211555 -0.974667  0.055132  0.055132  0.055132
1      101   2 -1.701920 -2.656467 -2.875507 -3.084002  0.000045  0.000042  0.055174
2      101   3 -1.291964 -2.016528 -1.018625 -1.476503  0.0