# Analysis of Results

In [2]:
import pandas as pd

In [3]:
column_order = ['Explainer','Model','Dataset','Alpha','Param Config','Coverage',
                'Validity', 'Confidence','Sparsity','Proximity L1','Proximity L2', 'Proximity DTW', 'yNN']
df = pd.read_csv('/Users/alan.paredes/Desktop/confetti/benchmark/evaluations/all_evaluation_results.csv')
results = df[column_order]

In [4]:
#Obtain all the rows where the Explainer contain 'Confetti Optimized'
confetti_results = results[results['Explainer'].str.contains('Confetti Optimized', case=False, na=False)]
results_alphas = confetti_results[confetti_results['Alpha'] == True]
results_thetas = confetti_results[confetti_results['Alpha'] == False]

## FCN Results

In [5]:
fcn_alphas = results_alphas[results_alphas['Model']=='fcn']
fcn_thetas = results_thetas[results_thetas['Model']=='fcn']

### Is there a trade-off between Sparsity and Confidence?

Increasing the value of parameter $\alpha$ places greater emphasis on confidence during optimization. In theory, this leads to more confident explanations, but potentially at the expense of sparsity.

In [6]:
sparsity_confidence_tradeoff_fcn = (fcn_alphas.groupby(['Param Config'])
                                    .agg({'Confidence':'mean', 'Sparsity': 'mean'})
                                    .reset_index().rename(columns={'Param Config': 'Alpha'}))
sparsity_confidence_tradeoff_fcn

Unnamed: 0,Alpha,Confidence,Sparsity
0,0.0,0.58613,0.877996
1,0.1,0.699091,0.855778
2,0.3,0.706571,0.854514
3,0.5,0.706057,0.85329
4,0.7,0.706668,0.853624
5,0.9,0.703002,0.854933
6,1.0,0.750051,0.820321


### Is there a trade-off between Confidence and Coverage?

As the value of parameter $\theta$ increases, the required confidence threshold becomes higher. This can result in more confident explanations, but may reduce coverage by making it harder to find valid counterfactuals for some instances.

In [7]:
confidence_coverage_tradeoff_fcn = (fcn_thetas.groupby(['Param Config'])
                                    .agg({'Confidence':'mean', 'Coverage': 'mean'})
                                    .reset_index().rename(columns={'Param Config': 'Theta'}))
confidence_coverage_tradeoff_fcn

Unnamed: 0,Theta,Confidence,Coverage
0,0.55,0.735919,99.404762
1,0.65,0.804366,99.404762
2,0.75,0.865282,98.214286
3,0.85,0.924346,98.214286
4,0.95,0.976844,97.619048


### Is there a trade-off between Sparsity and Proximity?

Fewer changes to the input typically lead to a counterfactual that is closer to the original instance, but this comes at the cost of sparsity. In theory, increasing the parameter $\alpha$ should improve proximity by favoring more confident and thus closer counterfactuals.

In [8]:
proximity_sparsity_tradeoff_fcn = (fcn_alphas.groupby(['Param Config'])
                                   .agg({'Sparsity': 'mean',
                                         'Proximity L1': 'mean',
                                         'Proximity L2': 'mean',
                                         'Proximity DTW': 'mean'})
                                   .reset_index().rename(columns={'Param Config': 'Alpha'}))
proximity_sparsity_tradeoff_fcn

Unnamed: 0,Alpha,Sparsity,Proximity L1,Proximity L2,Proximity DTW
0,0.0,0.877996,99.336514,16.059606,15.490128
1,0.1,0.855778,109.715119,16.756481,16.118825
2,0.3,0.854514,111.53745,16.864634,16.22983
3,0.5,0.85329,111.650035,16.854173,16.22823
4,0.7,0.853624,111.187449,16.829234,16.189499
5,0.9,0.854933,110.534459,16.81444,16.183381
6,1.0,0.820321,121.982337,17.322259,16.619745


### Is there a trade-off between Confidence and Proximity?

As the value of parameter $\theta$ increases, the required confidence for the target class becomes higher. This can lead to counterfactuals that more closely resemble the nearest unlike neighbor, but often at the cost of being less proximate to the original instance.

In [9]:
proximity_confidence_tradeoff_fcn = (fcn_thetas.groupby(['Param Config'])
                                     .agg({'Confidence': 'mean',
                                           'Proximity L1': 'mean',
                                           'Proximity L2': 'mean',
                                           'Proximity DTW': 'mean'})
                                     .reset_index().rename(columns={'Param Config': 'Theta'}))
proximity_confidence_tradeoff_fcn

Unnamed: 0,Theta,Confidence,Proximity L1,Proximity L2,Proximity DTW
0,0.55,0.735919,114.034765,17.124106,16.46494
1,0.65,0.804366,119.632767,17.914094,17.27585
2,0.75,0.865282,120.787211,18.150429,17.446291
3,0.85,0.924346,129.187229,18.857921,18.14655
4,0.95,0.976844,147.828728,20.137653,19.400299


## ResNet Results

In [20]:
resnet_alphas = results_alphas[results_alphas['Model']=='resnet']
resnet_thetas = results_thetas[results_thetas['Model']=='resnet']

In [21]:
resnet_alphas

Unnamed: 0,Explainer,Model,Dataset,Alpha,Param Config,Coverage,Validity,Confidence,Sparsity,Proximity L1,Proximity L2,Proximity DTW,yNN
138,Confetti Optimized (alpha=0.0),resnet,ArticularyWordRecognition,True,0.0,100.0,0.0,0.075271,1.0,0.0,0.0,0.0,1.0
139,Confetti Optimized (alpha=0.1),resnet,ArticularyWordRecognition,True,0.1,100.0,0.0,0.072888,0.997932,1.124238,0.609823,0.609823,1.0
140,Confetti Optimized (alpha=0.3),resnet,ArticularyWordRecognition,True,0.3,100.0,0.0,0.072728,0.998117,1.117726,0.653181,0.653181,1.0
141,Confetti Optimized (alpha=0.5),resnet,ArticularyWordRecognition,True,0.5,100.0,0.0,0.072608,0.998025,1.15892,0.661674,0.661674,1.0
142,Confetti Optimized (alpha=0.7),resnet,ArticularyWordRecognition,True,0.7,100.0,0.0,0.072737,0.998148,1.124572,0.655808,0.655808,1.0
143,Confetti Optimized (alpha=0.9),resnet,ArticularyWordRecognition,True,0.9,100.0,0.0,0.072643,0.998056,1.157744,0.660445,0.660445,1.0
144,Confetti Optimized (alpha=1.0),resnet,ArticularyWordRecognition,True,1.0,100.0,0.0,0.070823,0.996296,2.007992,0.960853,0.949286,1.0
159,Confetti Optimized (alpha=0.1),resnet,BasicMotions,True,0.1,100.0,0.0,0.042505,0.998042,1.904461,1.409833,1.409833,1.0
160,Confetti Optimized (alpha=0.0),resnet,BasicMotions,True,0.0,100.0,0.0,0.043149,0.999917,0.038219,0.029292,0.029292,1.0
161,Confetti Optimized (alpha=0.3),resnet,BasicMotions,True,0.3,100.0,0.0,0.042616,0.998042,1.904262,1.363991,1.363991,1.0


In [22]:
resnet_thetas

Unnamed: 0,Explainer,Model,Dataset,Alpha,Param Config,Coverage,Validity,Confidence,Sparsity,Proximity L1,Proximity L2,Proximity DTW,yNN
300,Confetti Optimized (theta=0.55),resnet,ArticularyWordRecognition,False,0.55,100.0,0.0,0.072608,0.998025,1.15892,0.661674,0.661674,1.0
301,Confetti Optimized (theta=0.65),resnet,ArticularyWordRecognition,False,0.65,100.0,0.0,0.072608,0.998025,1.15892,0.661674,0.661674,1.0
303,Confetti Optimized (theta=0.75),resnet,ArticularyWordRecognition,False,0.75,96.0,0.0,0.074642,0.997654,1.357769,0.74025,0.74025,1.0
305,Confetti Optimized (theta=0.85),resnet,ArticularyWordRecognition,False,0.85,92.0,0.0,0.072779,0.989558,5.771165,1.463584,1.463257,1.0
307,Confetti Optimized (theta=0.95),resnet,ArticularyWordRecognition,False,0.95,52.0,0.0,0.034624,0.973555,13.169596,2.580141,2.54405,1.0
316,Confetti Optimized (theta=0.55),resnet,BasicMotions,False,0.55,100.0,0.0,0.036084,0.997792,3.48919,2.386236,2.386236,1.0
317,Confetti Optimized (theta=0.65),resnet,BasicMotions,False,0.65,100.0,0.0,0.036084,0.997792,3.48919,2.386236,2.386236,1.0
319,Confetti Optimized (theta=0.75),resnet,BasicMotions,False,0.75,97.5,0.0,0.037224,0.996875,6.116905,3.511443,3.511443,1.0
320,Confetti Optimized (theta=0.85),resnet,BasicMotions,False,0.85,97.5,0.0,0.027226,0.9955,14.668662,5.511981,5.414993,1.0
321,Confetti Optimized (theta=0.95),resnet,BasicMotions,False,0.95,97.5,0.0,0.015283,0.992236,24.571977,7.69944,7.54917,1.0
