# Metrics for counterfactuals

In [27]:
import numpy as np
import pandas as pd
import re
from statistics import mean
import json

In [28]:
from PythonScripts.metrics_perturbed import CategoricalMetrics, NumericMetrics

## Load data

### Dataset 

In [29]:
# datasets: iris, adult, auto

dataset_name = 'adult'

with open('../Configs/'+dataset_name+'.json') as config_file:
        config = json.load(config_file)

dataset = pd.read_csv('../Data/'+config['filtered_data_with_headers'], header = 0)
perturbed_dataset = pd.read_csv('../Data/'+config['perturbed_data'], header = 0)

dataset = dataset.values[0:config['sample']]
X = dataset[:,0:config['num_features']]
labels = dataset[:,config['target_col']-1]

perturbed_dataset = perturbed_dataset.values[0:config['sample']]
perturbed_X = perturbed_dataset[:,0:config['num_features']]
perturbed_labels = perturbed_dataset[:,config['target_col']]

features = np.arange(1,config['num_features']+1)

### Paths

In [30]:
paths = pd.read_csv('../Outputs/'+config['perturbed_paths'], header = 0)
bins = pd.read_csv('../Outputs/'+config['perturbed_local_bins'], header = 0)

paths = paths.values
bin_vals = bins.values

if 'factors' in config:
    bin_dict = dict((x[0], x[1]) for x in bin_vals) 
else:
    bin_dict = dict((x[0], float(x[1])) for x in bin_vals)

regex = re.compile(config['path_regex'], re.I)

### Depths

In [31]:
depths = pd.read_csv('../Outputs/'+config['tree_depths'], header = 0)

depths = depths.values
depths = depths.flatten()

## Compute metrics

In [32]:
path_list = []

for i in range(2):
    temp = []
    for path in paths[:,i]:
        nodes = path.split(",")
        newpath = []
        for node in nodes:
            matchobj =  re.match(regex, node)
            newpath.append((int(matchobj.group(1)), bin_dict[matchobj.group(2)], matchobj.group(3)))
        temp.append(newpath)
    path_list.append(temp)

if 'factors' in config:
    factors = pd.read_csv('../Outputs/'+config['factors'], header = 0)
    factors = factors.values
    metrics = CategoricalMetrics(path_list[0], labels, features, factors)
    perturbed_metrics = CategoricalMetrics(path_list[1], perturbed_labels, features, factors)
else:
    metrics = NumericMetrics(path_list[0], labels, features)
    perturbed_metrics = NumericMetrics(path_list[1], perturbed_labels, features)

## Results

### Decision set size

In [33]:
print(metrics.decision_paths_size(), perturbed_metrics.decision_paths_size())

1000 1000


### Decision set length

In [34]:
print(metrics.decision_paths_length(), perturbed_metrics.decision_paths_length())

2896 2877


### Average rule length

In [35]:
print(metrics.average_rule_length(), perturbed_metrics.average_rule_length())

2.896 2.877


### Average distinct features

In [36]:
print(metrics.average_distinct_features(), perturbed_metrics.average_distinct_features())

2.791 2.732


### Inter-class overlap

In [37]:
print(metrics.interclass_overlap(X), perturbed_metrics.interclass_overlap(perturbed_X))

7985 1178040


### Intra-class overlap

In [38]:
print(metrics.intraclass_overlap(X), perturbed_metrics.intraclass_overlap(perturbed_X))

101029 1593042


### Total number of classes covered

In [39]:
print(metrics.num_classes_covered(), perturbed_metrics.num_classes_covered())

2 2


### Correct cover

In [40]:
print(metrics.total_correct_cover(X), perturbed_metrics.total_correct_cover(perturbed_X))

0.8728537087912088 0.6746115110136242


### Incorrect cover

In [41]:
print(metrics.total_incorrect_cover(X), perturbed_metrics.total_incorrect_cover(perturbed_X))

0.1271462912087912 0.32538848898637585


### Mean rank

In [42]:
print(metrics.mean_rank(), perturbed_metrics.mean_rank())

[0.08977901 0.08114641 0.06042818 0.17990331 0.00379834 0.09979282
 0.18197514 0.07838398 0.02313536 0.00103591 0.10082873 0.03487569
 0.05110497 0.01381215] [0.14529023 0.06812652 0.06777894 0.17761557 0.00382343 0.12304484
 0.17761557 0.07438304 0.0132082  0.00208551 0.07855405 0.0177268
 0.0420577  0.00868961]


### Feature frequencies at all depths

In [43]:
print(metrics.frequency_at_all_depths())
print(perturbed_metrics.frequency_at_all_depths())

[[0.041      0.025      0.002      0.231      0.008      0.213
  0.212      0.127      0.005      0.         0.097      0.01
  0.027      0.002     ]
 [0.09873708 0.11710677 0.0608496  0.19862227 0.00229621 0.05625718
  0.15729047 0.04822044 0.02525832 0.00229621 0.13318025 0.0413318
  0.04592423 0.01262916]
 [0.1245614  0.10877193 0.10877193 0.1245614  0.00175439 0.03157895
  0.15087719 0.06140351 0.04035088 0.00175439 0.1        0.06315789
  0.05964912 0.02280702]
 [0.16319444 0.125      0.09722222 0.07986111 0.         0.02083333
  0.19097222 0.05208333 0.03125    0.         0.06944444 0.03819444
  0.10416667 0.02777778]
 [0.09016393 0.04918033 0.14754098 0.1147541  0.         0.00819672
  0.25409836 0.06557377 0.04918033 0.         0.00819672 0.05737705
  0.10655738 0.04918033]
 [0.05405405 0.10810811 0.2972973  0.16216216 0.         0.05405405
  0.13513514 0.         0.05405405 0.         0.02702703 0.02702703
  0.08108108 0.        ]
 [0.14285714 0.         0.14285714 0.42857143 

## Comparison

In [44]:
if 'factors' not in config:
    comp = Comparison(metrics, perturbed_metrics)
    comp.print_change_of_class()