In [55]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.metrics import f1_score, roc_auc_score
np.random.seed(0)

import warnings
warnings.filterwarnings('ignore')

In [56]:
from datasets import Dataset

D = Dataset(dataset='f')
X_tr, X_ts, y_tr, y_ts = D.get_dataset(split=True)
params = D.params

In [57]:
from ract import Action

action = Action(X_tr, y_target=0, cost_type='MPS', cost_budget=0.3, **params)

In [58]:
from ract import RecourseTreeClassifier

clf = RecourseTreeClassifier(action, max_depth=2)

In [59]:
clf.action.alpha = 0.0
clf = clf.fit(X_tr, y_tr)

print('Acc.: {:.4} | F1: {:.4} | AUC: {:.4}'.format(clf.score(X_ts, y_ts), f1_score(y_ts, clf.predict(X_ts)), roc_auc_score(y_ts, clf.predict_proba(X_ts)[:, 1])))

Acc.: 0.7253 | F1: 0.7504 | AUC: 0.7224


In [60]:
clf.print_tree()

- If ExternalRiskEstimate <= 73:
	- If NumTotalTrades <= 70:
		- predict: Bad (70.2%)
	- Else:
		- predict: Good (66.7%)
- Else:
	- If AverageMInFile <= 32:
		- predict: Bad (60.8%)
	- Else:
		- predict: Good (71.7%)


In [61]:
X_target = X_ts[clf.predict(X_ts) != action.y_target]
results = clf.explain_action(X_target)

print('Cost: {:.4} | Validity: {:.1%} ({}/{}) | Recourse: {:.1%} ({}/{})'.format(results['cost'].mean(), 
                                                                                 results['cost-validity'].mean(), 
                                                                                 results['cost-validity'].sum(), 
                                                                                 results['cost-validity'].shape[0],
                                                                                 1 - (~results['cost-validity']).sum() / X_ts.shape[0], 
                                                                                 X_ts.shape[0] - (~results['cost-validity']).sum(), X_ts.shape[0]))

Cost: 0.0 | Validity: 0.0% (0/1432) | Recourse: 42.0% (1036/2468)


In [68]:
clf.action.alpha = 0.1
clf = clf.fit(X_tr, y_tr)

print('Acc.: {:.4} | F1: {:.4} | AUC: {:.4}'.format(clf.score(X_ts, y_ts), f1_score(y_ts, clf.predict(X_ts)), roc_auc_score(y_ts, clf.predict_proba(X_ts)[:, 1])))

Acc.: 0.7184 | F1: 0.7243 | AUC: 0.7187


In [69]:
clf.print_tree()

- If ExternalRiskEstimate <= 70:
	- If NetFractionRevolvingBurden <= 150:
		- predict: Bad (74.1%)
	- Else:
		- predict: Good (0.0%)
- Else:
	- If NetFractionRevolvingBurden <= 63:
		- predict: Good (68.0%)
	- Else:
		- predict: Bad (60.5%)


In [70]:
X_target = X_ts[clf.predict(X_ts) != action.y_target]
results = clf.explain_action(X_target)

print('Cost: {:.4} | Validity: {:.1%} ({}/{}) | Recourse: {:.1%} ({}/{})'.format(results['cost'].mean(), 
                                                                                 results['cost-validity'].mean(), 
                                                                                 results['cost-validity'].sum(), 
                                                                                 results['cost-validity'].shape[0],
                                                                                 1 - (~results['cost-validity']).sum() / X_ts.shape[0], 
                                                                                 X_ts.shape[0] - (~results['cost-validity']).sum(), X_ts.shape[0]))

Cost: 0.3568 | Validity: 50.7% (627/1237) | Recourse: 75.3% (1858/2468)
