In [32]:
import numpy 
import pandas as pd
import dice_ml
import warnings
from sklearn.model_selection import train_test_split
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.ensemble import RandomForestClassifier, RandomForestRegressor

warnings.filterwarnings("ignore") 
pd.options.display.max_rows = 500

In [19]:
data = pd.read_csv('../data/TotalClothingValue.csv', index_col=0)

In [20]:
# Working only on TSV now
data = data.drop(columns=['TPV', 'TCV', 'TSL', 'MC', 'SwC'])

In [26]:
features = data.drop('TSV', axis=1).columns.tolist()
target = data['TSV']

In [29]:
datasetX = data.drop('TSV', axis=1)

In [31]:
x_train, x_test, y_train, y_test = train_test_split(datasetX,
                                                    target,
                                                    test_size=0.2,
                                                    random_state=0)

In [33]:
categorical_features = x_train.columns.difference(features)

# We create the preprocessing pipelines for both numeric and categorical data.
numeric_transformer = Pipeline(steps=[
    ('scaler', StandardScaler())])

categorical_transformer = Pipeline(steps=[
    ('onehot', OneHotEncoder(handle_unknown='ignore'))])

transformations = ColumnTransformer(
    transformers=[
        ('num', numeric_transformer, features),
        ('cat', categorical_transformer, categorical_features)])

regr = Pipeline(steps=[('preprocessor', transformations),
                        ('regressor', RandomForestRegressor())])
model = regr.fit(x_train, y_train)

In [34]:
d = dice_ml.Data(dataframe=data, continuous_features=features, outcome_name='TSV')
m = dice_ml.Model(model=model, backend='sklearn', model_type='regressor')

In [35]:
exp = dice_ml.Dice(d, m, method='genetic')

In [44]:
query_instances = x_test[12:15]

In [59]:
query_instances.head(1).to_csv('../results/exp1_query')

In [45]:
cf = exp.generate_counterfactuals(query_instances=query_instances, total_CFs=4, desired_range=[0.0, 2.0])

100%|██████████| 3/3 [00:13<00:00,  4.51s/it]


In [46]:
cf.visualize_as_dataframe(show_only_changes=True)

Query instance (original outcome : -1)


Unnamed: 0,DAY,School,SchoolType,StartTime,AvgMaxDailyTemp,AvgMinDailyTemp,AvgIndoorRelativeHumidity,IndoorTempDuringSurvey,Grade,Age,Gender,FormalClothing,TotalCLOwithChair,TSV
0,2.0,4.0,1.0,1.0,19.6,6.6,59.599998,14.8,3.0,8.0,1.0,1.0,1.69,-0.910667



Diverse Counterfactual set (new outcome: [0.0, 2.0])


Unnamed: 0,DAY,School,SchoolType,StartTime,AvgMaxDailyTemp,AvgMinDailyTemp,AvgIndoorRelativeHumidity,IndoorTempDuringSurvey,Grade,Age,Gender,FormalClothing,TotalCLOwithChair,TSV
0,3.0,2.0,-,-,22.9,6.6,57.2,15.0,-,-,-,-,1.38,0.01
0,3.0,-,-,-,20.0,8.8,60.5,13.6,-,9.0,-,-,1.55,0.0
0,3.0,3.0,-,-,21.2,5.6,62.3,14.0,-,-,0.0,-,1.69,0.01
0,-,2.0,-,-,22.2,5.2,56.9,13.5,4.0,-,-,-,1.55,0.3423333333333332


Query instance (original outcome : -1)


Unnamed: 0,DAY,School,SchoolType,StartTime,AvgMaxDailyTemp,AvgMinDailyTemp,AvgIndoorRelativeHumidity,IndoorTempDuringSurvey,Grade,Age,Gender,FormalClothing,TotalCLOwithChair,TSV
0,2.0,1.0,0.0,5.0,20.5,5.0,57.529999,15.3,5.0,9.0,0.0,0.0,1.26,-0.51



Diverse Counterfactual set (new outcome: [0.0, 2.0])


Unnamed: 0,DAY,School,SchoolType,StartTime,AvgMaxDailyTemp,AvgMinDailyTemp,AvgIndoorRelativeHumidity,IndoorTempDuringSurvey,Grade,Age,Gender,FormalClothing,TotalCLOwithChair,TSV
0,-,-,-,-,-,-,57.5,15.3,-,11.0,-,-,1.21,0.0
0,-,-,-,-,-,-,57.5,15.3,-,10.0,1.0,-,1.07,0.008
0,-,-,-,-,-,-,57.5,15.3,-,11.0,1.0,-,1.07,0.0
0,3.0,-,-,3.0,22.2,5.1,55.1,14.5,4.0,-,-,-,1.21,0.03


Query instance (original outcome : 0)


Unnamed: 0,DAY,School,SchoolType,StartTime,AvgMaxDailyTemp,AvgMinDailyTemp,AvgIndoorRelativeHumidity,IndoorTempDuringSurvey,Grade,Age,Gender,FormalClothing,TotalCLOwithChair,TSV
0,1.0,4.0,1.0,3.0,22.4,4.7,72.0,14.5,3.0,9.0,1.0,1.0,1.5,-0.236667



Diverse Counterfactual set (new outcome: [0.0, 2.0])


Unnamed: 0,DAY,School,SchoolType,StartTime,AvgMaxDailyTemp,AvgMinDailyTemp,AvgIndoorRelativeHumidity,IndoorTempDuringSurvey,Grade,Age,Gender,FormalClothing,TotalCLOwithChair,TSV
0,-,-,-,-,22.4,4.7,-,-,-,8.0,0.0,-,1.71,0.475
0,-,-,-,5.0,22.4,4.7,68.1,13.1,5.0,11.0,-,-,1.66,0.121
0,5.0,-,-,1.0,20.6,4.6,68.8,13.7,-,-,-,-,1.55,0.0
0,2.0,3.0,-,1.0,22.4,10.1,74.2,17.0,-,-,-,-,1.58,0.135


In [51]:
cf.cf_examples_list[0].final_cfs_df.to_csv('../results/exp1_cfs.csv', index=False)