In [1]:
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd
import numpy as np
import tqdm
def nop(it, *a, **k):
    return it

tqdm.tqdm = nop
from QUERYLANG import QueryLang, Instances
import queries
from optimization import get_implication_graph, l0_metric, batch_cfs_generation

from tqdm.notebook import tqdm as tqdm_notebook
import pickle
from sklearn import metrics


import warnings
warnings.filterwarnings('ignore')

# Load trained models

In [2]:
models_file = 'models/Adult Income/adult_models.pkl'
instances_file ='Instances/Adult Income/adult_test_instances.pkl'
models = open(models_file, 'rb')
instances = open(instances_file, 'rb')
models = pickle.load(models)
instances = pickle.load(instances)

# Init CfDB object 

In [3]:
querylang = QueryLang(instances, models)

# False Positive predictions

In [4]:
prediction_q = ''' 
SELECT T.PredictionId
FROM (
    SELECT Predictions.PredictionId , ROW_NUMBER() OVER(PARTITION BY Predictions.ClassifierId) AS rank
    FROM Instances, Predictions
    WHERE Instances.InstanceId = Predictions.InstanceId
      AND Instances.income = 0 and  Predictions.Label = 1
      AND Predictions.ClassifierId IN (0,1)) as T
WHERE T.rank <= 5
'''

## CounterFactual View

In [5]:
# counterfactuals that do not change the gender and  the race
cfs_q = cfs_query=(queries.AND_NOT,{'features':['gender','race']})

querylang.create_cfs_view(cf_type='GrowingSpheresCFs', prediction_query=prediction_q, cfs_query=cfs_q)
querylang.create_cfs_view(cf_type='CecCFs', prediction_query=prediction_q, cfs_query=cfs_q)
querylang.create_cfs_view(cf_type='DiverseCFs', prediction_query=prediction_q, cfs_query=cfs_q)

Your relations names are: my_cfs_1 and my_prediction_cfs_1
Your relations names are: my_cfs_2 and my_prediction_cfs_2
Your relations names are: my_cfs_3 and my_prediction_cfs_3


## Analysis: CF - L0

In [6]:
suffix = 1
Q = f''' 
SELECT ClassifierId, AVG(L0) as L0
FROM (
    SELECT ClassifierId, my_cfs_{suffix}.CfId , COUNT(*) AS L0
    FROM  my_cfs_{suffix}, my_prediction_cfs_{suffix}, Predictions
    WHERE my_cfs_{suffix}.CfId = my_prediction_cfs_{suffix}.CfId 
      AND my_prediction_cfs_{suffix}.PredictionId = Predictions.PredictionId
      GROUP BY ClassifierId, my_cfs_{suffix}.CfId ) AS T
GROUP BY ClassifierId
'''

df = querylang.execute(Q, parallel=False).replace({'ClassifierId':{0:'Random Forest', 1:'Linear'}})
df.rename(columns = {'ClassifierId':'Classifier'})

  0%|          | 0/10 [00:00<?, ?it/s]

Unnamed: 0,Classifier,L0
0,Random Forest,1.4
1,Linear,1.8


# CFs

In [7]:
Q = f'''
SELECT distinct CfId
FROM my_cfs_{suffix}
'''
cf_ids = querylang.execute(Q,parallel=False).values.reshape(-1)
querylang.reconstruct(cf_ids)

Unnamed: 0,CfId,education,gender,marital_status,occupation,race,workclass,age,hours_per_week
0,1,Some-college,Male,Married,Professional,White,Private,46,41
1,2,Some-college,Male,Married,Professional,White,Private,51,45
2,3,HS-grad,Male,Married,Sales,White,Private,45,55
3,4,Some-college,Male,Married,Blue-Collar,White,Self-Employed,54,50
4,5,Assoc,Male,Married,Blue-Collar,White,Self-Employed,61,50
5,6,Masters,Male,Separated,Professional,White,Private,46,41
6,7,Bachelors,Male,Divorced,Other/Unknown,White,Other/Unknown,63,40
7,8,Some-college,Male,Married,Service,White,Private,51,45
8,9,School,Female,Single,Professional,White,Government,24,99
9,10,HS-grad,Male,Married,Service,White,Government,56,40


# EFFICIENT EVALUATION - for CeC and Random Forest

In [8]:
prediction_efficient = ''' 
SELECT Predictions.PredictionId
    FROM Instances, Predictions
    WHERE Instances.InstanceId = Predictions.InstanceId
      AND Instances.income = 0 and  Predictions.Label = 1
      AND Predictions.ClassifierId = 0
LIMIT 5
'''

cf_alg = querylang.CF_generations['CecCFs'][0]
encoder = querylang.models[0][1]

In [9]:
#A query looking for CFs that change gender and race.
cfs_q1 = cfs_query=(queries.AND,{'features':['gender','race']})
#A query looking for CFs that change gender or race.
cfs_q2 = cfs_query=(queries.OR,{'features':['gender','race']})

querylang.create_cfs_view(cf_type='CecCFs', prediction_query=prediction_q, cfs_query=cfs_q1)
querylang.create_cfs_view(cf_type='CecCFs', prediction_query=prediction_q, cfs_query=cfs_q2)

Your relations names are: my_cfs_4 and my_prediction_cfs_4
Your relations names are: my_cfs_5 and my_prediction_cfs_5


In [10]:
Q = """ 
SELECT * FROM  my_cfs_4

UNION

SELECT * FROM  my_cfs_5
"""

expressions = querylang.get_expressions(Q)
implication_graph = get_implication_graph(encoder, expressions, is_hashing=True,is_atom_implication=True)
cf_map, cfs = batch_cfs_generation(implication_graph ,l0_metric, 5, querylang, cf_alg)
# (4,68), i.e., view 4 and prediction identify by 68
cfs[cf_map[(4,68)]]

  0%|          | 0/12 [00:00<?, ?it/s]

  0%|          | 0/5 [00:00<?, ?it/s]

Unnamed: 0,education,gender,marital_status,occupation,race,workclass,age,hours_per_week
0,Bachelors,Female,Separated,Sales,Other,Private,45,55
