# CE-OCL Case Study

In [148]:
import warnings
warnings.filterwarnings("ignore")
import numpy as np
import pandas as pd
import opticl as oc
import ce_helpers
import os

## Preparation

### Data prep

The (preprocessed) data can be downloaded from: https://datahub.io/machine-learning/credit-g

In [149]:
# model to use: either svm, linear, mlp, rf
alg = 'mlp'

In [150]:
dataset_name = 'german_credit'
df = pd.read_csv('./data/credit-g_csv.csv')

# recode class to 0 and 1
recode = {"class": {"bad": 0, "good": 1}}
df = df.replace(recode)

df.head()

Unnamed: 0,checking_status,duration,credit_history,purpose,credit_amount,savings_status,employment,installment_commitment,personal_status,other_parties,...,property_magnitude,age,other_payment_plans,housing,existing_credits,job,num_dependents,own_telephone,foreign_worker,class
0,<0,6,critical/other existing credit,radio/tv,1169,no known savings,>=7,4,male single,none,...,real estate,67,none,own,2,skilled,1,yes,yes,1
1,0<=X<200,48,existing paid,radio/tv,5951,<100,1<=X<4,2,female div/dep/mar,none,...,real estate,22,none,own,1,skilled,1,none,yes,0
2,no checking,12,critical/other existing credit,education,2096,<100,4<=X<7,2,male single,none,...,real estate,49,none,own,1,unskilled resident,2,none,yes,1
3,<0,42,existing paid,furniture/equipment,7882,<100,4<=X<7,2,male single,guarantor,...,life insurance,45,none,for free,1,skilled,2,none,yes,1
4,<0,24,delayed previously,new car,4870,<100,1<=X<4,3,male single,none,...,no known property,53,none,for free,2,skilled,2,none,yes,0


In [151]:
# outcome variable
target = 'class'

# continuous features
numerical = ['duration', 'credit_amount', 'installment_commitment', 'age',
             'residence_since', 'existing_credits', 'num_dependents']

# integer features
# F_int = ['age', 'num_dependents', 'existing_credits', 'residence_since', 'duration']
F_int = ['num_dependents', 'existing_credits', 'residence_since']
# F_int = []

In [152]:
X = df.drop(target, axis=1)
y = df[target]

X, X_train, X_test, y_train, y_test, F_b, data_pip = ce_helpers.prep_data(X, y, numerical,
                                                             one_hot_encoding = True, scaling = True)


### Train predictive models

In [153]:
version = dataset_name+'_v1'
alg_list_cf = [alg]
#alg_list_dr = ['mlp', 'linear', 'svm', 'rf']
alg_list_dr = ['mlp']

outcome_dict = {'counterfactual_german':{'task': 'binary', 'X features': X_train.columns, 
                                        'class': target, 'alg_list': alg_list_cf,
                                        'X_train':X_train, 'X_test':X_test,
                                        'y_train':y_train, 'y_test':y_test},
               'duration':{'task': 'continuous', 'X features': ['credit_amount'], 
                           'class': 'duration', 'alg_list': alg_list_dr,
                           'X_train':X_train, 'X_test':X_test,
                           'y_train':X_train['duration'], 'y_test':X_test['duration']}}

## uncomment if models should be trained
ce_helpers.train_models(outcome_dict, version)

performance = ce_helpers.perf_trained_models(version, outcome_dict)
performance

Learning a constraint for counterfactual_german
Training mlp
------------- Initialize grid  ----------------
------------- Running model  ----------------
Algorithm = mlp, metric = None
saving... results/mlp_counterfactual_german_trained.pkl
------------- Model evaluation  ----------------
-------------------training evaluation-----------------------
Train Score: 1.0
-------------------testing evaluation-----------------------
Test Score: 0.6924404761904761

Learning a constraint for duration
Training mlp
------------- Initialize grid  ----------------
------------- Running model  ----------------
Algorithm = mlp, metric = None
saving... results/mlp_duration_trained.pkl
------------- Model evaluation  ----------------
-------------------training evaluation-----------------------
Train MSE: 0.01739157551354364
Train R2: 0.44060188793999855
-------------------testing evaluation-----------------------
Test MSE: 0.020288389477880774
Test R2: 0.3799990633454411

Saving the performance...
Do

Unnamed: 0,save_path,seed,cv_folds,parameters,best_params,valid_score,train_score,test_score,outcome,alg,task
0,results/mlp/german_credit_v1_counterfactual_ge...,0,5,"{'hidden_layer_sizes': [(10,), (20,), (50,), (...","{'hidden_layer_sizes': (100,)}",0.678274,1.0,0.69244,counterfactual_german,mlp,binary
1,results/mlp/german_credit_v1_duration_model.csv,0,5,"{'hidden_layer_sizes': [(10,), (20,), (50,), (...","{'hidden_layer_sizes': (100,)}",-0.017651,0.017392,0.020288,duration,mlp,continuous


In [154]:
# load models
algorithms = {'counterfactual_german':alg,
             'duration':'mlp'}
y_pred, y_pred_0, X_test_0, models = ce_helpers.load_model(algorithms, outcome_dict, 'counterfactual_german')  # it should be X_test instead of X

X_test_0.head()
clf = models['counterfactual_german']
clf_Duration = models['duration']
#clf.intercept_, clf.coef_  #check it

In [155]:
## for coherence
F_r = numerical
categorical = df.columns.difference(numerical + [target])

F_coh = {}
for f in categorical:
    F_coh[f] = [i for i in list(X_train.columns.difference(numerical + [target])) if i.startswith('%s_' % f)]

In [156]:
categorical

Index(['checking_status', 'credit_history', 'employment', 'foreign_worker',
       'housing', 'job', 'other_parties', 'other_payment_plans',
       'own_telephone', 'personal_status', 'property_magnitude', 'purpose',
       'savings_status'],
      dtype='object')

### Prep for Optimization

Define how the counterfactual constraint should look like.

In [157]:
algorithm = algorithms['counterfactual_german']
SCM_algo = algorithms['duration']
constraints_embed = ['counterfactual_german']
objectives_embed = {}

In [158]:
model_master = oc.model_selection(performance[performance['alg']==algorithm], constraints_embed, objectives_embed)
model_master['lb'] = 0.5  # this can be changed but it is generally equal to 0.5
model_master['ub'] = None
model_master['SCM_counterfactuals'] = None
model_master['features'] = [[col for col in X.columns]]
model_master

                 outcome model_type  \
0  counterfactual_german        mlp   

                                           save_path    task  objective  
0  results/mlp/german_credit_v1_counterfactual_ge...  binary          0  


Unnamed: 0,outcome,model_type,save_path,task,objective,lb,ub,SCM_counterfactuals,features
0,counterfactual_german,mlp,results/mlp/german_credit_v1_counterfactual_ge...,binary,0,0.5,,,"[checking_status_0<=X<200, checking_status_<0,..."


X1: points in X that have 1 as label. They will be used as trust region

In [159]:
y_ix_1 = np.where(y==1)
#X1 = pd.concat([X_train,X_test]).iloc[y_ix_1[0],:].copy().reset_index(drop=True, inplace=False)
X1 = X.iloc[y_ix_1[0],:].copy().reset_index(drop=True, inplace=False)

### Factual instance

In [160]:
# My sample
# mlp: 1, 5, 13, 15!!, 17
# gbm: 7 (cart for SCM)

u_index = 2
u = X_test_0.iloc[u_index,:]
print(u)
print('predicted label: %d' % (clf.predict([u])))

checking_status_0<=X<200       0.000000
checking_status_<0             1.000000
checking_status_>=200          0.000000
checking_status_no checking    0.000000
credit_history_all paid        0.000000
                                 ...   
installment_commitment         1.000000
age                            0.107143
residence_since                1.000000
existing_credits               0.000000
num_dependents                 0.000000
Name: 775, Length: 61, dtype: float64
predicted label: 0


## Part A: validity, proximity

In [161]:
sp = False
mu = 0
tr_region = False
enlarge_tr = False
num_counterfactuals = 1

In [162]:
# features that can only increase (become larger)
L = []

# immutable features
I = []

# conditionally mutable features
Pers_I = []

P = []

### Optimization

In [163]:
CEs, CEs_, final_model = ce_helpers.opt(X, X1, u, F_r, F_b, F_int, F_coh, I, L, Pers_I, P, 
                                        sp, mu, tr_region, enlarge_tr, num_counterfactuals, model_master, data_pip)

Embedding constraints for counterfactual_german
['checking_status_0<=X<200', 'checking_status_<0', 'checking_status_>=200', 'checking_status_no checking', 'credit_history_all paid', 'credit_history_critical/other existing credit', 'credit_history_delayed previously', 'credit_history_existing paid', 'credit_history_no credits/all paid', 'employment_1<=X<4', 'employment_4<=X<7', 'employment_<1', 'employment_>=7', 'employment_unemployed', 'foreign_worker_no', 'foreign_worker_yes', 'housing_for free', 'housing_own', 'housing_rent', 'job_high qualif/self emp/mgmt', 'job_skilled', 'job_unemp/unskilled non res', 'job_unskilled resident', 'other_parties_co applicant', 'other_parties_guarantor', 'other_parties_none', 'other_payment_plans_bank', 'other_payment_plans_none', 'other_payment_plans_stores', 'own_telephone_none', 'own_telephone_yes', 'personal_status_female div/dep/mar', 'personal_status_male div/sep', 'personal_status_male mar/wid', 'personal_status_male single', 'property_magnitude_

### Visualize DF

In [164]:
CEs

Unnamed: 0,checking_status_0<=X<200,checking_status_<0,checking_status_>=200,checking_status_no checking,credit_history_all paid,credit_history_critical/other existing credit,credit_history_delayed previously,credit_history_existing paid,credit_history_no credits/all paid,employment_1<=X<4,...,savings_status_>=1000,savings_status_no known savings,duration,credit_amount,installment_commitment,age,residence_since,existing_credits,num_dependents,scaled_distance
original,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,...,0.0,1.0,0.2941,0.0699,1.0,0.1071,1.0,0.0,0.0,0.0
sol0,-0.0,1.0,-0.0,-0.0,-0.0,-0.0,-0.0,1.0,-0.0,1.0,...,-0.0,1.0,0.2941,-0.4828,0.5518,0.1071,1.0,-0.0,-0.0,1.0009


In [165]:
CEs_

Unnamed: 0,checking_status_0<=X<200,checking_status_<0,checking_status_>=200,checking_status_no checking,credit_history_all paid,credit_history_critical/other existing credit,credit_history_delayed previously,credit_history_existing paid,credit_history_no credits/all paid,employment_1<=X<4,...,savings_status_<100,savings_status_>=1000,savings_status_no known savings,duration,credit_amount,installment_commitment,age,residence_since,existing_credits,num_dependents
original,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,...,0.0,0.0,1.0,23.9988,1371.2631,4.0,24.9976,4.0,1.0,1.0
sol0,-0.0,1.0,-0.0,-0.0,-0.0,-0.0,-0.0,1.0,-0.0,1.0,...,-0.0,-0.0,1.0,23.9988,-7288.9932,2.6554,24.9976,4.0,1.0,1.0


In [166]:
df_1 = ce_helpers.vis_dataframe(df, CEs_, F_r, F_coh, target, only_changes=True)
df_1

Unnamed: 0,duration,credit_amount,installment_commitment,age,residence_since,existing_credits,num_dependents,checking_status,credit_history,employment,foreign_worker,housing,job,other_parties,other_payment_plans,own_telephone,personal_status,property_magnitude,purpose,savings_status
original,24.0,1371.26,4.0,25.0,4.0,1.0,1.0,<0,existing paid,1<=X<4,yes,rent,skilled,none,none,none,female div/dep/mar,real estate,new car,no known savings
sol0,-,-7288.99,2.66,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-


### Evaluation

In [167]:
CEs = ce_helpers.vis_dataframe(df, CEs_, F_r, F_coh, target, only_changes = False)

CE_perf = ce_helpers.evaluation(clf, CEs, numerical, categorical, CEs_ = CEs_, rounding = True)
CE_perf = CE_perf.set_index(pd.Index(['Part A']))
CE_perf

Unnamed: 0,validity,cat_prox,cont_prox,sparsity,cat_diver,cont_diver,cont_count_divers
Part A,1,1.0,-8661.59,0.9,,,


---

## Part B: validity, proximity, sparsity

In [168]:
sp = True
mu = 10000
tr_region = False
num_counterfactuals = 1

In [169]:
# features that can only increase (become larger)
L = []

# immutable features
I = []

# conditionally mutable features
Pers_I = []

P = []

### optimization

In [170]:
CEs, CEs_, final_model = ce_helpers.opt(X, X1, u, F_r, F_b, F_int, F_coh, I, L, Pers_I, P, sp, mu, 
                           tr_region, enlarge_tr, num_counterfactuals, model_master, data_pip)

Embedding constraints for counterfactual_german
['checking_status_0<=X<200', 'checking_status_<0', 'checking_status_>=200', 'checking_status_no checking', 'credit_history_all paid', 'credit_history_critical/other existing credit', 'credit_history_delayed previously', 'credit_history_existing paid', 'credit_history_no credits/all paid', 'employment_1<=X<4', 'employment_4<=X<7', 'employment_<1', 'employment_>=7', 'employment_unemployed', 'foreign_worker_no', 'foreign_worker_yes', 'housing_for free', 'housing_own', 'housing_rent', 'job_high qualif/self emp/mgmt', 'job_skilled', 'job_unemp/unskilled non res', 'job_unskilled resident', 'other_parties_co applicant', 'other_parties_guarantor', 'other_parties_none', 'other_payment_plans_bank', 'other_payment_plans_none', 'other_payment_plans_stores', 'own_telephone_none', 'own_telephone_yes', 'personal_status_female div/dep/mar', 'personal_status_male div/sep', 'personal_status_male mar/wid', 'personal_status_male single', 'property_magnitude_

### Visualize DF

In [171]:
df_2 = ce_helpers.vis_dataframe(df, CEs_, F_r, F_coh, target, only_changes=True)
df_2

Unnamed: 0,duration,credit_amount,installment_commitment,age,residence_since,existing_credits,num_dependents,checking_status,credit_history,employment,foreign_worker,housing,job,other_parties,other_payment_plans,own_telephone,personal_status,property_magnitude,purpose,savings_status
original,24.0,1371.26,4.0,25.0,4.0,1.0,1.0,<0,existing paid,1<=X<4,yes,rent,skilled,none,none,none,female div/dep/mar,real estate,new car,no known savings
sol0,-,-16721.73,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-


### Evaluation

In [172]:
CEs = ce_helpers.vis_dataframe(df, CEs_, F_r, F_coh, target, only_changes = False)

CE_perf = pd.concat([CE_perf, ce_helpers.evaluation(clf, CEs, numerical, categorical, CEs_ = CEs_, rounding = True)])
CE_perf = CE_perf.set_index(pd.Index(['Part A', 'Part B']))
CE_perf

Unnamed: 0,validity,cat_prox,cont_prox,sparsity,cat_diver,cont_diver,cont_count_divers
Part A,1,1.0,-8661.59,0.9,,,
Part B,1,1.0,-18092.99,0.95,,,


---

## Part C: validity, proximity, sparsity, diversity

In [173]:
sp = True
tr_region = False
num_counterfactuals = 3

In [174]:
# features that can only increase (become larger)
L = []

# immutable features
I = []

# conditionally mutable features
Pers_I = []

P = []

### Optimization

In [175]:
CEs, CEs_, final_model = ce_helpers.opt(X, X1, u, F_r, F_b, F_int,F_coh, I, L, Pers_I, P, sp, mu, 
                           tr_region, enlarge_tr, num_counterfactuals, model_master, data_pip)

Embedding constraints for counterfactual_german
['checking_status_0<=X<200', 'checking_status_<0', 'checking_status_>=200', 'checking_status_no checking', 'credit_history_all paid', 'credit_history_critical/other existing credit', 'credit_history_delayed previously', 'credit_history_existing paid', 'credit_history_no credits/all paid', 'employment_1<=X<4', 'employment_4<=X<7', 'employment_<1', 'employment_>=7', 'employment_unemployed', 'foreign_worker_no', 'foreign_worker_yes', 'housing_for free', 'housing_own', 'housing_rent', 'job_high qualif/self emp/mgmt', 'job_skilled', 'job_unemp/unskilled non res', 'job_unskilled resident', 'other_parties_co applicant', 'other_parties_guarantor', 'other_parties_none', 'other_payment_plans_bank', 'other_payment_plans_none', 'other_payment_plans_stores', 'own_telephone_none', 'own_telephone_yes', 'personal_status_female div/dep/mar', 'personal_status_male div/sep', 'personal_status_male mar/wid', 'personal_status_male single', 'property_magnitude_

### Visualise DF

In [176]:
df_3 = ce_helpers.vis_dataframe(df, CEs_, F_r, F_coh, target, only_changes=True)
df_3

Unnamed: 0,duration,credit_amount,installment_commitment,age,residence_since,existing_credits,num_dependents,checking_status,credit_history,employment,foreign_worker,housing,job,other_parties,other_payment_plans,own_telephone,personal_status,property_magnitude,purpose,savings_status
original,24.0,1371.26,4.0,25.0,4.0,1.0,1.0,<0,existing paid,1<=X<4,yes,rent,skilled,none,none,none,female div/dep/mar,real estate,new car,no known savings
sol0,-,-16721.73,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-
sol1,23.73,-16488.26,-,28.25,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-
sol2,-,-,0.4,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-


### Evaluation

In [177]:
CEs = ce_helpers.vis_dataframe(df, CEs_, F_r, F_coh, target, only_changes = False)

CE_perf = pd.concat([CE_perf, ce_helpers.evaluation(clf, CEs, numerical, categorical, CEs_ = CEs_, rounding = True)])
CE_perf = CE_perf.set_index(pd.Index(['Part A', 'Part B', 'Part C']))
CE_perf

Unnamed: 0,validity,cat_prox,cont_prox,sparsity,cat_diver,cont_diver,cont_count_divers
Part A,1,1.0,-8661.59,0.9,,,
Part B,1,1.0,-18092.99,0.95,,,
Part C,1,1.0,-11986.54,0.92,0.0,12066.74,0.15


---

## Part D: validity, proximity, sparsity, diversity, actionability

In [178]:
sp = True
tr_region = False
num_counterfactuals = 3

In [179]:
# features that can only increase (become larger)
L = ['age', 'residence_since']
# L = ['residence_since']

# immutable features
I = ['personal_status_male div/sep', 'personal_status_male mar/wid','personal_status_male single',
     'purpose_domestic appliance', 'purpose_education', 'purpose_furniture/equipment', 'purpose_new car',
     'purpose_other', 'purpose_radio/tv', 'purpose_repairs', 'purpose_retraining', 'purpose_used car',
     'foreign_worker_yes']

employment = ['employment_unemployed', 'employment_<1', 'employment_1<=X<4','employment_4<=X<7', 'employment_>=7']
Pers_I = [employment] # variables that must be considered for person specific immutable features

P = ['duration', 'installment_commitment', 'num_dependents', 'credit_amount', 'existing_credits']

### Optimization

In [180]:
CEs, CEs_, final_model = ce_helpers.opt(X, X1, u, F_r, F_b, F_int, F_coh, I, L, Pers_I, P, sp, mu, 
                           tr_region, enlarge_tr, num_counterfactuals, model_master, data_pip)

Embedding constraints for counterfactual_german
['checking_status_0<=X<200', 'checking_status_<0', 'checking_status_>=200', 'checking_status_no checking', 'credit_history_all paid', 'credit_history_critical/other existing credit', 'credit_history_delayed previously', 'credit_history_existing paid', 'credit_history_no credits/all paid', 'employment_1<=X<4', 'employment_4<=X<7', 'employment_<1', 'employment_>=7', 'employment_unemployed', 'foreign_worker_no', 'foreign_worker_yes', 'housing_for free', 'housing_own', 'housing_rent', 'job_high qualif/self emp/mgmt', 'job_skilled', 'job_unemp/unskilled non res', 'job_unskilled resident', 'other_parties_co applicant', 'other_parties_guarantor', 'other_parties_none', 'other_payment_plans_bank', 'other_payment_plans_none', 'other_payment_plans_stores', 'own_telephone_none', 'own_telephone_yes', 'personal_status_female div/dep/mar', 'personal_status_male div/sep', 'personal_status_male mar/wid', 'personal_status_male single', 'property_magnitude_

### Visualize DF

In [181]:
df_4 = ce_helpers.vis_dataframe(df, CEs_, F_r, F_coh, target, only_changes=True)
df_4

Unnamed: 0,duration,credit_amount,installment_commitment,age,residence_since,existing_credits,num_dependents,checking_status,credit_history,employment,foreign_worker,housing,job,other_parties,other_payment_plans,own_telephone,personal_status,property_magnitude,purpose,savings_status
original,24.0,1371.26,4.0,25.0,4.0,1.0,1.0,<0,existing paid,1<=X<4,yes,rent,skilled,none,none,none,female div/dep/mar,real estate,new car,no known savings
sol0,-,-,-,-,10.0,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-
sol1,-,-,-,163.55,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-
sol2,26.2,-,-,165.63,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-


### Evaluation

In [182]:
CEs = ce_helpers.vis_dataframe(df, CEs_, F_r, F_coh, target, only_changes = False)

CE_perf = pd.concat([CE_perf, ce_helpers.evaluation(clf, CEs, numerical, categorical, CEs_ = CEs_, rounding = True)])
CE_perf = CE_perf.set_index(pd.Index(['Part A', 'Part B', 'Part C', 'Part D']))
CE_perf

Unnamed: 0,validity,cat_prox,cont_prox,sparsity,cat_diver,cont_diver,cont_count_divers
Part A,1,1.0,-8661.59,0.9,,,
Part B,1,1.0,-18092.99,0.95,,,
Part C,1,1.0,-11986.54,0.92,0.0,12066.74,0.15
Part D,1,1.0,-95.79,0.93,0.0,99.22,0.12


---

## Part E: validity, proximity, sparsity, diversity, actionability, trust region

In [183]:
sp = True
tr_region = True
enlarge_tr = False
num_counterfactuals = 3

In [184]:
# features that can only increase (become larger)
L = ['age', 'residence_since']

# immutable features
I = ['personal_status_male div/sep', 'personal_status_male mar/wid','personal_status_male single',
     'purpose_domestic appliance', 'purpose_education', 'purpose_furniture/equipment', 'purpose_new car',
     'purpose_other', 'purpose_radio/tv', 'purpose_repairs', 'purpose_retraining', 'purpose_used car',
     'foreign_worker_yes']

employment = ['employment_unemployed', 'employment_<1', 'employment_1<=X<4','employment_4<=X<7', 'employment_>=7']
Pers_I = [employment] # variables that must be considered for person specific immutable features

P = ['duration', 'installment_commitment', 'num_dependents', 'credit_amount', 'existing_credits']

### Optimization

In [185]:
try: CEs, CEs_, final_model = ce_helpers.opt(X, X1, u, F_r, F_b, F_int, F_coh, I, L, Pers_I, P, sp, mu,
                                             tr_region, enlarge_tr, num_counterfactuals, model_master, data_pip)
except:
    print('----TRUST REGION IS BEING ENLARGED----')
    enlarge_tr = True
    CEs, CEs_, final_model = ce_helpers.opt(X, X1, u, F_r, F_b, F_int, F_coh, I, L, Pers_I, P, sp, mu,
                                            tr_region, enlarge_tr, num_counterfactuals, model_master, data_pip)

Generating constraints for the trust region using 700 samples.
The trust region is not being enlarged.
... Trust region defined.
Embedding constraints for counterfactual_german
['checking_status_0<=X<200', 'checking_status_<0', 'checking_status_>=200', 'checking_status_no checking', 'credit_history_all paid', 'credit_history_critical/other existing credit', 'credit_history_delayed previously', 'credit_history_existing paid', 'credit_history_no credits/all paid', 'employment_1<=X<4', 'employment_4<=X<7', 'employment_<1', 'employment_>=7', 'employment_unemployed', 'foreign_worker_no', 'foreign_worker_yes', 'housing_for free', 'housing_own', 'housing_rent', 'job_high qualif/self emp/mgmt', 'job_skilled', 'job_unemp/unskilled non res', 'job_unskilled resident', 'other_parties_co applicant', 'other_parties_guarantor', 'other_parties_none', 'other_payment_plans_bank', 'other_payment_plans_none', 'other_payment_plans_stores', 'own_telephone_none', 'own_telephone_yes', 'personal_status_female 

#### Visualize DF

In [186]:
df_5 = ce_helpers.vis_dataframe(df, CEs_, F_r, F_coh, target, only_changes=True)
df_5

Unnamed: 0,duration,credit_amount,installment_commitment,age,residence_since,existing_credits,num_dependents,checking_status,credit_history,employment,foreign_worker,housing,job,other_parties,other_payment_plans,own_telephone,personal_status,property_magnitude,purpose,savings_status
original,24.0,1371.26,4.0,25.0,4.0,1.0,1.0,<0,existing paid,1<=X<4,yes,rent,skilled,none,none,none,female div/dep/mar,real estate,new car,no known savings
sol0,15.0,1402.6,2.0,28.0,-,-,-,-,-,-,-,-,-,-,-,-,-,car,-,<100
sol1,22.0,1283.52,-,-,-,-,-,no checking,-,4<=X<7,-,-,-,-,-,-,-,life insurance,-,-
sol2,12.0,1893.04,-,29.0,-,-,-,-,-,-,-,own,-,guarantor,-,yes,-,life insurance,-,<100


### Evaluating

In [187]:
CEs = ce_helpers.vis_dataframe(df, CEs_, F_r, F_coh, target, only_changes = False)

CE_perf = pd.concat([CE_perf, ce_helpers.evaluation(clf, CEs, numerical, categorical, CEs_ = CEs_, rounding = True)])
CE_perf = CE_perf.set_index(pd.Index(['Part A', 'Part B', 'Part C', 'Part D', 'Part E']))
CE_perf

Unnamed: 0,validity,cat_prox,cont_prox,sparsity,cat_diver,cont_diver,cont_count_divers
Part A,1,1.0,-8661.59,0.9,,,
Part B,1,1.0,-18092.99,0.95,,,
Part C,1,1.0,-11986.54,0.92,0.0,12066.74,0.15
Part D,1,1.0,-95.79,0.93,0.0,99.22,0.12
Part E,1,0.74,-224.29,0.68,0.36,417.01,0.42


---

## Part F: validity, proximity, sparsity, diversity, actionability, trust region, causality

In [188]:
sp = True
tr_region = True
enlarge_tr = False
num_counterfactuals = 3

In [189]:
# features that can only increase (become larger)
L = ['age', 'residence_since']

# immutable features
I = ['personal_status_male div/sep', 'personal_status_male mar/wid','personal_status_male single',
     'purpose_domestic appliance', 'purpose_education', 'purpose_furniture/equipment', 'purpose_new car',
     'purpose_other', 'purpose_radio/tv', 'purpose_repairs', 'purpose_retraining', 'purpose_used car',
     'foreign_worker_yes']

employment = ['employment_unemployed', 'employment_<1', 'employment_1<=X<4','employment_4<=X<7', 'employment_>=7']
Pers_I = [employment] # variables that must be considered for person specific immutable features

P = ['duration', 'installment_commitment', 'num_dependents', 'credit_amount', 'existing_credits']

### define constraints

In [190]:
constraints_embed = ['duration', 'counterfactual_german']

In [191]:
model_master_causality = oc.model_selection(performance[(performance['alg']==algorithm) & (performance['outcome']=='counterfactual_german')], constraints_embed, objectives_embed)
model_master_causality = model_master_causality.append(oc.model_selection(performance[(performance['alg']==SCM_algo) & (performance['outcome']!='counterfactual_german')], constraints_embed, objectives_embed))
model_master_causality['SCM_counterfactuals'] = None
model_master_causality.loc[model_master_causality['outcome']=='duration','SCM_counterfactuals'] =  clf_Duration.predict(pd.DataFrame([u['credit_amount']], columns=['credit_amount'])) - u['duration']
model_master_causality['lb'] = 0.50
model_master_causality['ub'] = None

df_arr = pd.DataFrame({'features': [list(X.columns)]})
result_1 = pd.merge(
    model_master_causality.loc[model_master_causality['outcome']=='counterfactual_german',:] ,
    df_arr,
    how='left',
    left_index=True, # Merge on both indexes, since right only has 0...
    right_index=True # all the other rows will be NaN
)
df_arr = pd.DataFrame({'features':[['credit_amount']]})
result_2 = pd.merge(
    model_master_causality.loc[model_master_causality['outcome']=='duration',:].reset_index(drop=True) ,
    df_arr,
    how='left',
    left_index=True, # Merge on both indexes, since right only has 0...
    right_index=True # all the other rows will be NaN
)
model_master_causality = result_1.append(result_2)
model_master = pd.DataFrame(model_master_causality.iloc[0, :]).T
model_master_causality

                 outcome model_type  \
0  counterfactual_german        mlp   

                                           save_path    task  objective  
0  results/mlp/german_credit_v1_counterfactual_ge...  binary          0  
    outcome model_type                                        save_path  \
0  duration        mlp  results/mlp/german_credit_v1_duration_model.csv   

         task  objective  
0  continuous          0  


Unnamed: 0,outcome,model_type,save_path,task,objective,SCM_counterfactuals,lb,ub,features
0,counterfactual_german,mlp,results/mlp/german_credit_v1_counterfactual_ge...,binary,0,,0.5,,"[checking_status_0<=X<200, checking_status_<0,..."
0,duration,mlp,results/mlp/german_credit_v1_duration_model.csv,continuous,0,-0.137631,0.5,,[credit_amount]


In [192]:
try: CEs, CEs_, final_model = ce_helpers.opt(X, X1, u, F_r, F_b, F_int, F_coh, I, L, Pers_I, P, sp, mu,
                                             tr_region, enlarge_tr, num_counterfactuals, model_master_causality, data_pip)
except:
    print('----TRUST REGION IS BEING ENLARGED----')
    enlarge_tr = True
    CEs, CEs_, final_model = ce_helpers.opt(X, X1, u, F_r, F_b, F_int, F_coh, I, L, Pers_I, P, sp, mu,
                                            tr_region, enlarge_tr, num_counterfactuals, model_master_causality, data_pip)

Generating constraints for the trust region using 700 samples.
The trust region is not being enlarged.
... Trust region defined.
Embedding constraints for counterfactual_german
['checking_status_0<=X<200', 'checking_status_<0', 'checking_status_>=200', 'checking_status_no checking', 'credit_history_all paid', 'credit_history_critical/other existing credit', 'credit_history_delayed previously', 'credit_history_existing paid', 'credit_history_no credits/all paid', 'employment_1<=X<4', 'employment_4<=X<7', 'employment_<1', 'employment_>=7', 'employment_unemployed', 'foreign_worker_no', 'foreign_worker_yes', 'housing_for free', 'housing_own', 'housing_rent', 'job_high qualif/self emp/mgmt', 'job_skilled', 'job_unemp/unskilled non res', 'job_unskilled resident', 'other_parties_co applicant', 'other_parties_guarantor', 'other_parties_none', 'other_payment_plans_bank', 'other_payment_plans_none', 'other_payment_plans_stores', 'own_telephone_none', 'own_telephone_yes', 'personal_status_female 

#### Visualize DF

In [193]:
df_6 = ce_helpers.vis_dataframe(df, CEs_, F_r,F_coh, target, only_changes=True)
df_6

Unnamed: 0,duration,credit_amount,installment_commitment,age,residence_since,existing_credits,num_dependents,checking_status,credit_history,employment,foreign_worker,housing,job,other_parties,other_payment_plans,own_telephone,personal_status,property_magnitude,purpose,savings_status
original,24.0,1371.26,4.0,25.0,4.0,1.0,1.0,<0,existing paid,1<=X<4,yes,rent,skilled,none,none,none,female div/dep/mar,real estate,new car,no known savings
sol0,-,-,2.0,28.0,-,-,-,-,-,-,-,-,-,-,-,-,-,car,-,<100
sol1,-,-,-,-,-,-,-,no checking,-,4<=X<7,-,-,-,-,-,-,-,life insurance,-,-
sol2,-,-,2.0,-,-,-,-,-,-,-,-,-,-,-,-,-,-,car,-,<100


### Evaluating

In [194]:
CEs = ce_helpers.vis_dataframe(df, CEs_, F_r, F_coh, target, only_changes = False)

CE_perf = pd.concat([CE_perf, ce_helpers.evaluation(clf, CEs, numerical, categorical, CEs_ = CEs_, rounding = True)])
CE_perf = CE_perf.set_index(pd.Index(['Part A', 'Part B', 'Part C', 'Part D', 'Part E', 'Part F']))
CE_perf

Unnamed: 0,validity,cat_prox,cont_prox,sparsity,cat_diver,cont_diver,cont_count_divers
Part A,1,1.0,-8661.59,0.9,,,
Part B,1,1.0,-18092.99,0.95,,,
Part C,1,1.0,-11986.54,0.92,0.0,12066.74,0.15
Part D,1,1.0,-95.79,0.93,0.0,99.22,0.12
Part E,1,0.74,-224.29,0.68,0.36,417.01,0.42
Part F,1,0.82,-2.33,0.83,0.21,3.33,0.2


## Table

In [195]:
df_complete = pd.concat([df_1, df_2, df_3, df_4, df_5, df_6])
df_complete

Unnamed: 0,duration,credit_amount,installment_commitment,age,residence_since,existing_credits,num_dependents,checking_status,credit_history,employment,foreign_worker,housing,job,other_parties,other_payment_plans,own_telephone,personal_status,property_magnitude,purpose,savings_status
original,24.0,1371.26,4.0,25.0,4.0,1.0,1.0,<0,existing paid,1<=X<4,yes,rent,skilled,none,none,none,female div/dep/mar,real estate,new car,no known savings
sol0,-,-7288.99,2.66,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-
original,24.0,1371.26,4.0,25.0,4.0,1.0,1.0,<0,existing paid,1<=X<4,yes,rent,skilled,none,none,none,female div/dep/mar,real estate,new car,no known savings
sol0,-,-16721.73,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-
original,24.0,1371.26,4.0,25.0,4.0,1.0,1.0,<0,existing paid,1<=X<4,yes,rent,skilled,none,none,none,female div/dep/mar,real estate,new car,no known savings
sol0,-,-16721.73,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-
sol1,23.73,-16488.26,-,28.25,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-
sol2,-,-,0.4,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-
original,24.0,1371.26,4.0,25.0,4.0,1.0,1.0,<0,existing paid,1<=X<4,yes,rent,skilled,none,none,none,female div/dep/mar,real estate,new car,no known savings
sol0,-,-,-,-,10.0,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-


In [196]:
if not os.path.exists('results/%s/%s/' % (dataset_name, alg)):
    os.makedirs('results/%s/%s/' % (dataset_name, alg))

df_complete.to_csv('results/%s/%s/CE-OCL_%s_%s.csv' % (dataset_name, alg, alg, dataset_name), index=True)
CE_perf.to_csv('results/%s/%s/CE-OCL_%s_%s_eval.csv' % (dataset_name, alg, alg, dataset_name), index=True)