# The interpretations using LIME

# Heart dataset

In [1]:
import pandas as pd
import numpy as np
from datetime import timedelta, date
import datetime as dt
import sys
sys.path.append('./explanation_helper')
import expla_helper as explanation

heart = pd.read_csv('data/datasets_33180_43520_heart.csv', sep = ',')
heart = explanation.to_float(heart)

print(heart.dtypes)

age         float64
sex         float64
cp          float64
trestbps    float64
chol        float64
fbs         float64
restecg     float64
thalach     float64
exang       float64
oldpeak     float64
slope       float64
ca          float64
thal        float64
target        int64
dtype: object


# Wine dataset

In [2]:
import pandas as pd
import numpy as np
from datetime import timedelta, date
import datetime as dt
import sys
sys.path.append('./explanation_helper')
import expla_helper as explanation


wine = pd.read_csv('data/wine_limpo.csv', sep = ',')
# wine.drop(['Unnamed: 0'], axis = 1, inplace = True)

wine = explanation.to_float(wine)

print(wine.dtypes)

fixed acidity           float64
volatile acidity        float64
citric acid             float64
residual sugar          float64
chlorides               float64
free sulfur dioxide     float64
total sulfur dioxide    float64
density                 float64
pH                      float64
sulphates               float64
alcohol                 float64
target                    int64
dtype: object


# Diabetes dataset

In [3]:
import pandas as pd
import numpy as np
from datetime import timedelta, date
import datetime as dt

import sys
sys.path.append('./explanation_helper')
import expla_helper as explanation


diabetes = pd.read_csv('data/diabetes_limpo.csv', sep = ',')
# diabetes.drop(['Unnamed: 0'], axis = 1, inplace = True)

diabetes = explanation.to_float(diabetes)

print(diabetes.dtypes)


Pregnancies                 float64
Glucose                     float64
BloodPressure               float64
SkinThickness               float64
Insulin                     float64
BMI                         float64
DiabetesPedigreeFunction    float64
Age                         float64
target                        int64
dtype: object


# LIME 

## Choosing 4 instances (TP, FP, TN, FN) of each dataset randomly from DT to be our baseline

Code to choose 4 instances (TP, FP, TN, FN) of each dataset randomly, we have choosed the decision tree as the baseline classificator to choose the confusion matrix metrics

### Get the samples from the baseline

In [4]:
import numpy as np
import pandas as pd
import lime
import lime.lime_tabular
import sys
sys.path.append('./ml_helper')
import ml_helper_train_model as model_train
import ml_helper_serialize_model as serializer
sys.path.append('./explanation_helper')
import expla_helper as explanation
from sklearn.model_selection import train_test_split

np.random.seed(1)

heart = pd.read_csv('data/datasets_33180_43520_heart.csv', sep = ',')
model = serializer.load_model(file_name = 'serialized_model/DT_heart.pkl')
heart_sample = explanation.sample_baseline_model(model, heart)

wine = pd.read_csv('data/wine_limpo.csv', sep = ',')
model = serializer.load_model(file_name = 'serialized_model/DT_wine2.pkl')
wine_sample = explanation.sample_baseline_model(model, wine)

diabetes = pd.read_csv('data/diabetes_limpo.csv', sep = ',')
model = serializer.load_model(file_name = 'serialized_model/DT_diabetes2.pkl')
diabetes_sample = explanation.sample_baseline_model(model, diabetes)


     real_label  pred_label mat_conf_mod
143           1           0           FN
203           0           1           FP
242           0           0           TN
96            1           1           TP
      real_label  pred_label mat_conf_mod
86             1           0           FN
1458           0           1           FP
791            0           0           TN
1003           1           1           TP
     real_label  pred_label mat_conf_mod
291           1           0           FN
725           0           1           FP
60            0           0           TN
755           1           1           TP


## MLP

### Heart dataset - explanation

In [5]:
import pandas as pd
import numpy as np
import lime
import lime.lime_tabular
import sys
sys.path.append('./ml_helper')
import ml_helper_train_model as model_train
import ml_helper_serialize_model as serializer
sys.path.append('./explanation_helper')
import expla_helper as explanation
from sklearn.model_selection import train_test_split

heart = pd.read_csv('data/datasets_33180_43520_heart.csv', sep = ',')
heart = explanation.to_float(heart)

model_file_name = 'serialized_model/MLP_heart.pkl'
model = serializer.load_model(model_file_name)

dataset = heart.copy(deep = True)
target_names = np.array(['saudável', 'doente'])

folder = 'serialized_explanation/'
model_dataset = model_file_name.replace('serialized_model/','').replace('.pkl', '').replace('_', '#').replace('_', '#')

for sample in heart_sample:
    instance_index = sample
    num_features = dataset.columns.size - 1 #removing the target

    output_file = """{0}{1}#instance:{2}#num_features:{3}.html""".format(folder, 
                                                                          model_dataset, 
                                                                          instance_index, 
                                                                          num_features)
#     print(output_file)
    explanation.explain_it(model,
                           dataset,
                           target_names,
                           num_features,
                           instance_index,
                           output_file)


### Wine dataset - explanation

In [6]:
# import numpy as np
import lime
import lime.lime_tabular
import sys
sys.path.append('./ml_helper')
import ml_helper_train_model as model_train
import ml_helper_serialize_model as serializer
sys.path.append('./explanation_helper')
import expla_helper as explanation
from sklearn.model_selection import train_test_split

wine = pd.read_csv('data/wine_limpo.csv', sep = ',')
# wine.drop(['Unnamed: 0'], axis = 1, inplace = True)
wine = explanation.to_float(wine)

model_file_name = 'serialized_model/MLP_wine2.pkl'
model = serializer.load_model(model_file_name)

dataset = wine.copy(deep = True)
target_names = np.array(['ruim', 'bom'])

folder = 'serialized_explanation/'
model_dataset = model_file_name.replace('serialized_model/','').replace('.pkl', '').replace('_', '#')

for sample in wine_sample:
    instance_index = sample
    num_features = dataset.columns.size - 1 #removing the target

    output_file = """{0}{1}#instance:{2}#num_features:{3}.html""".format(folder, 
                                                                          model_dataset, 
                                                                          instance_index, 
                                                                          num_features)


    explanation.explain_it(model,
                           dataset,
                           target_names,
                           num_features,
                           instance_index,
                           output_file)


### Diabetes dataset

In [7]:
# import numpy as np
import lime
import lime.lime_tabular
import sys
sys.path.append('./ml_helper')
import ml_helper_train_model as model_train
import ml_helper_serialize_model as serializer
sys.path.append('./explanation_helper')
import expla_helper as explanation
from sklearn.model_selection import train_test_split

diabetes = pd.read_csv('data/diabetes_limpo.csv', sep = ',')
# diabetes.drop(['Unnamed: 0'], axis = 1, inplace = True)
diabetes = explanation.to_float(diabetes)

model_file_name = 'serialized_model/MLP_diabetes2.pkl'
model = serializer.load_model(model_file_name)

dataset = diabetes.copy(deep = True)
target_names = np.array(['saudável', 'doente'])

folder = 'serialized_explanation/'
model_dataset = model_file_name.replace('serialized_model/','').replace('.pkl', '').replace('_', '#')

for sample in diabetes_sample:
    instance_index = sample
    num_features = dataset.columns.size - 1 #removing the target

    output_file = """{0}{1}#instance:{2}#num_features:{3}.html""".format(folder, 
                                                                          model_dataset, 
                                                                          instance_index, 
                                                                          num_features)


    explanation.explain_it(model,
                           dataset,
                           target_names,
                           num_features,
                           instance_index,
                           output_file)


## Random Forest

### Heart dataset

In [8]:
# import numpy as np
import lime
import lime.lime_tabular
import sys
sys.path.append('./ml_helper')
import ml_helper_train_model as model_train
import ml_helper_serialize_model as serializer
sys.path.append('./explanation_helper')
import expla_helper as explanation
from sklearn.model_selection import train_test_split

heart = pd.read_csv('data/datasets_33180_43520_heart.csv', sep = ',')
heart = explanation.to_float(heart)

model_file_name = 'serialized_model/RF_heart.pkl'
model = serializer.load_model(model_file_name)

dataset = heart.copy(deep = True)
target_names = np.array(['saudável', 'doente'])

folder = 'serialized_explanation/'
model_dataset = model_file_name.replace('serialized_model/','').replace('.pkl', '').replace('_', '#')

for sample in heart_sample:
    instance_index = sample
    num_features = dataset.columns.size - 1 #removing the target

    output_file = """{0}{1}#instance:{2}#num_features:{3}.html""".format(folder, 
                                                                          model_dataset, 
                                                                          instance_index, 
                                                                          num_features)

    explanation.explain_it(model,
                           dataset,
                           target_names,
                           num_features,
                           instance_index,
                           output_file)


### Wine dataset

In [9]:
# import numpy as np
import lime
import lime.lime_tabular
import sys
sys.path.append('./ml_helper')
import ml_helper_train_model as model_train
import ml_helper_serialize_model as serializer
sys.path.append('./explanation_helper')
import expla_helper as explanation
from sklearn.model_selection import train_test_split

wine = pd.read_csv('data/wine_limpo.csv', sep = ',')
# wine.drop(['Unnamed: 0'], axis = 1, inplace = True)
wine = explanation.to_float(wine)

model_file_name = 'serialized_model/RF_wine2.pkl'
model = serializer.load_model(model_file_name)

dataset = wine.copy(deep = True)
target_names = np.array(['ruim', 'bom'])

folder = 'serialized_explanation/'
model_dataset = model_file_name.replace('serialized_model/','').replace('.pkl', '').replace('_', '#')

for sample in wine_sample:
    instance_index = sample
    num_features = dataset.columns.size - 1 #removing the target

    output_file = """{0}{1}#instance:{2}#num_features:{3}.html""".format(folder, 
                                                                          model_dataset, 
                                                                          instance_index, 
                                                                          num_features)

    explanation.explain_it(model,
                           dataset,
                           target_names,
                           num_features,
                           instance_index,
                           output_file)


### Diabetes dataset

In [10]:
# import numpy as np
import lime
import lime.lime_tabular
import sys
sys.path.append('./ml_helper')
import ml_helper_train_model as model_train
import ml_helper_serialize_model as serializer
sys.path.append('./explanation_helper')
import expla_helper as explanation
from sklearn.model_selection import train_test_split

diabetes = pd.read_csv('data/diabetes_limpo.csv', sep = ',')
# diabetes.drop(['Unnamed: 0'], axis = 1, inplace = True)
diabetes = explanation.to_float(diabetes)

model_file_name = 'serialized_model/RF_diabetes2.pkl'
model = serializer.load_model(model_file_name)

dataset = diabetes.copy(deep = True)
target_names = np.array(['saudável', 'doente'])

folder = 'serialized_explanation/'
model_dataset = model_file_name.replace('serialized_model/','').replace('.pkl', '').replace('_', '#')

for sample in diabetes_sample:
    instance_index = sample
    num_features = dataset.columns.size - 1 #removing the target

    output_file = """{0}{1}#instance:{2}#num_features:{3}.html""".format(folder, 
                                                                          model_dataset, 
                                                                          instance_index, 
                                                                          num_features)

    explanation.explain_it(model,
                           dataset,
                           target_names,
                           num_features,
                           instance_index,
                           output_file)


## Naive Bayes

### Heart dataset

In [11]:
# import numpy as np
import lime
import lime.lime_tabular
import sys
sys.path.append('./ml_helper')
import ml_helper_train_model as model_train
import ml_helper_serialize_model as serializer
sys.path.append('./explanation_helper')
import expla_helper as explanation
from sklearn.model_selection import train_test_split

heart = pd.read_csv('data/datasets_33180_43520_heart.csv', sep = ',')
heart = explanation.to_float(heart)

model_file_name = 'serialized_model/NaiveBayes_heart.pkl'
model = serializer.load_model(model_file_name)

dataset = heart.copy(deep = True)
target_names = np.array(['saudável', 'doente'])

folder = 'serialized_explanation/'
model_dataset = model_file_name.replace('serialized_model/','').replace('.pkl', '').replace('_', '#')

for sample in heart_sample:
    instance_index = sample
    num_features = dataset.columns.size - 1 #removing the target

    output_file = """{0}{1}#instance:{2}#num_features:{3}.html""".format(folder, 
                                                                          model_dataset, 
                                                                          instance_index, 
                                                                          num_features)

    explanation.explain_it(model,
                           dataset,
                           target_names,
                           num_features,
                           instance_index,
                           output_file)


### Wine dataset

In [12]:
# import numpy as np
import lime
import lime.lime_tabular
import sys
sys.path.append('./ml_helper')
import ml_helper_train_model as model_train
import ml_helper_serialize_model as serializer
sys.path.append('./explanation_helper')
import expla_helper as explanation
from sklearn.model_selection import train_test_split

wine = pd.read_csv('data/wine_limpo.csv', sep = ',')
# wine.drop(['Unnamed: 0'], axis = 1, inplace = True)
wine = explanation.to_float(wine)

model_file_name = 'serialized_model/NaiveBayes_wine2.pkl'
model = serializer.load_model(model_file_name)

dataset = wine.copy(deep = True)
target_names = np.array(['ruim', 'bom'])

folder = 'serialized_explanation/'
model_dataset = model_file_name.replace('serialized_model/','').replace('.pkl', '').replace('_', '#')

for sample in wine_sample:
    instance_index = sample
    num_features = dataset.columns.size - 1 #removing the target

    output_file = """{0}{1}#instance:{2}#num_features:{3}.html""".format(folder, 
                                                                          model_dataset, 
                                                                          instance_index, 
                                                                          num_features)

    explanation.explain_it(model,
                           dataset,
                           target_names,
                           num_features,
                           instance_index,
                           output_file)


### Diabetes dataset

In [13]:
# import numpy as np
import lime
import lime.lime_tabular
import sys
sys.path.append('./ml_helper')
import ml_helper_train_model as model_train
import ml_helper_serialize_model as serializer
sys.path.append('./explanation_helper')
import expla_helper as explanation
from sklearn.model_selection import train_test_split

diabetes = pd.read_csv('data/diabetes_limpo.csv', sep = ',')
# diabetes.drop(['Unnamed: 0'], axis = 1, inplace = True)
diabetes = explanation.to_float(diabetes)

model_file_name = 'serialized_model/NaiveBayes_diabetes2.pkl'
model = serializer.load_model(model_file_name)

dataset = diabetes.copy(deep = True)
target_names = np.array(['saudável', 'doente'])

folder = 'serialized_explanation/'
model_dataset = model_file_name.replace('serialized_model/','').replace('.pkl', '').replace('_', '#')

for sample in diabetes_sample:
    instance_index = sample
    num_features = dataset.columns.size - 1 #removing the target

    output_file = """{0}{1}#instance:{2}#num_features:{3}.html""".format(folder, 
                                                                          model_dataset, 
                                                                          instance_index, 
                                                                          num_features)

    explanation.explain_it(model,
                           dataset,
                           target_names,
                           num_features,
                           instance_index,
                           output_file)


## Decision Tree

### Heart dataset

In [14]:
# import numpy as np
import lime
import lime.lime_tabular
import sys
sys.path.append('./ml_helper')
import ml_helper_train_model as model_train
import ml_helper_serialize_model as serializer
sys.path.append('./explanation_helper')
import expla_helper as explanation
from sklearn.model_selection import train_test_split

heart = pd.read_csv('data/datasets_33180_43520_heart.csv', sep = ',')
heart = explanation.to_float(heart)

model_file_name = 'serialized_model/DT_heart.pkl'
model = serializer.load_model(model_file_name)

dataset = heart.copy(deep = True)
target_names = np.array(['saudável', 'doente'])

folder = 'serialized_explanation/'
model_dataset = model_file_name.replace('serialized_model/','').replace('.pkl', '').replace('_', '#')

for sample in heart_sample:
    instance_index = sample
    num_features = dataset.columns.size - 1 #removing the target

    output_file = """{0}{1}#instance:{2}#num_features:{3}.html""".format(folder, 
                                                                          model_dataset, 
                                                                          instance_index, 
                                                                          num_features)

    explanation.explain_it(model,
                           dataset,
                           target_names,
                           num_features,
                           instance_index,
                           output_file)


### Wine dataset

In [15]:
# import numpy as np
import lime
import lime.lime_tabular
import sys
sys.path.append('./ml_helper')
import ml_helper_train_model as model_train
import ml_helper_serialize_model as serializer
sys.path.append('./explanation_helper')
import expla_helper as explanation
from sklearn.model_selection import train_test_split

wine = pd.read_csv('data/wine_limpo.csv', sep = ',')
# wine.drop(['Unnamed: 0'], axis = 1, inplace = True)
wine = explanation.to_float(wine)

model_file_name = 'serialized_model/DT_wine2.pkl'
model = serializer.load_model(model_file_name)

dataset = wine.copy(deep = True)
target_names = np.array(['ruim', 'bom'])

folder = 'serialized_explanation/'
model_dataset = model_file_name.replace('serialized_model/','').replace('.pkl', '').replace('_', '#')

for sample in wine_sample:
    instance_index = sample
    num_features = dataset.columns.size - 1 #removing the target

    output_file = """{0}{1}#instance:{2}#num_features:{3}.html""".format(folder, 
                                                                          model_dataset, 
                                                                          instance_index, 
                                                                          num_features)

    explanation.explain_it(model,
                           dataset,
                           target_names,
                           num_features,
                           instance_index,
                           output_file)


### Diabetes dataset

In [16]:
# import numpy as np
import lime
import lime.lime_tabular
import sys
sys.path.append('./ml_helper')
import ml_helper_train_model as model_train
import ml_helper_serialize_model as serializer
sys.path.append('./explanation_helper')
import expla_helper as explanation
from sklearn.model_selection import train_test_split

diabetes = pd.read_csv('data/diabetes_limpo.csv', sep = ',')
# diabetes.drop(['Unnamed: 0'], axis = 1, inplace = True)
diabetes = explanation.to_float(diabetes)

model_file_name = 'serialized_model/DT_diabetes2.pkl'
model = serializer.load_model(model_file_name)

dataset = diabetes.copy(deep = True)
target_names = np.array(['saudável', 'doente'])

folder = 'serialized_explanation/'
model_dataset = model_file_name.replace('serialized_model/','').replace('.pkl', '').replace('_', '#')

for sample in diabetes_sample:
    instance_index = sample
    num_features = dataset.columns.size - 1 #removing the target

    output_file = """{0}{1}#instance:{2}#num_features:{3}.html""".format(folder, 
                                                                          model_dataset, 
                                                                          instance_index, 
                                                                          num_features)

    explanation.explain_it(model,
                           dataset,
                           target_names,
                           num_features,
                           instance_index,
                           output_file)


## KNN

### Heart dataset

In [17]:
# import numpy as np
import lime
import lime.lime_tabular
import sys
sys.path.append('./ml_helper')
import ml_helper_train_model as model_train
import ml_helper_serialize_model as serializer
sys.path.append('./explanation_helper')
import expla_helper as explanation
from sklearn.model_selection import train_test_split

heart = pd.read_csv('data/datasets_33180_43520_heart.csv', sep = ',')
heart = explanation.to_float(heart)

# model_file_name = 'serialized_model/KNN_heart.pkl'
model_file_name = 'serialized_model/KNN_heart.pkl'
model = serializer.load_model(model_file_name)

dataset = heart.copy(deep = True)
target_names = np.array(['saudável', 'doente'])

folder = 'serialized_explanation/'
model_dataset = model_file_name.replace('serialized_model/','').replace('.pkl', '').replace('_', '#')

for sample in heart_sample:
    instance_index = sample
    num_features = dataset.columns.size - 1 #removing the target

    output_file = """{0}{1}#instance:{2}#num_features:{3}.html""".format(folder, 
                                                                          model_dataset, 
                                                                          instance_index, 
                                                                          num_features)

    explanation.explain_it(model,
                           dataset,
                           target_names,
                           num_features,
                           instance_index,
                           output_file)


### Wine dataset

In [18]:
# import numpy as np
import lime
import lime.lime_tabular
import sys
sys.path.append('./ml_helper')
import ml_helper_train_model as model_train
import ml_helper_serialize_model as serializer
sys.path.append('./explanation_helper')
import expla_helper as explanation
from sklearn.model_selection import train_test_split

wine = pd.read_csv('data/wine_limpo.csv', sep = ',')
# wine.drop(['Unnamed: 0'], axis = 1, inplace = True)
wine = explanation.to_float(wine)

model_file_name = 'serialized_model/KNN_wine2.pkl'
model = serializer.load_model(model_file_name)

dataset = wine.copy(deep = True)
target_names = np.array(['ruim', 'bom'])

folder = 'serialized_explanation/'
model_dataset = model_file_name.replace('serialized_model/','').replace('.pkl', '').replace('_', '#')

for sample in wine_sample:
    instance_index = sample
    num_features = dataset.columns.size - 1 #removing the target

    output_file = """{0}{1}#instance:{2}#num_features:{3}.html""".format(folder, 
                                                                          model_dataset, 
                                                                          instance_index, 
                                                                          num_features)

    explanation.explain_it(model,
                           dataset,
                           target_names,
                           num_features,
                           instance_index,
                           output_file)


### Diabetes dataset

In [19]:
# import numpy as np
import lime
import lime.lime_tabular
import sys
sys.path.append('./ml_helper')
import ml_helper_train_model as model_train
import ml_helper_serialize_model as serializer
sys.path.append('./explanation_helper')
import expla_helper as explanation
from sklearn.model_selection import train_test_split

diabetes = pd.read_csv('data/diabetes_limpo.csv', sep = ',')
# diabetes.drop(['Unnamed: 0'], axis = 1, inplace = True)
diabetes = explanation.to_float(diabetes)

model_file_name = 'serialized_model/KNN_diabetes2.pkl'
model = serializer.load_model(model_file_name)

dataset = diabetes.copy(deep = True)
target_names = np.array(['saudável', 'doente'])

folder = 'serialized_explanation/'
model_dataset = model_file_name.replace('serialized_model/','').replace('.pkl', '').replace('_', '#')

for sample in diabetes_sample:
    instance_index = sample
    num_features = dataset.columns.size - 1 #removing the target

    output_file = """{0}{1}#instance:{2}#num_features:{3}.html""".format(folder, 
                                                                          model_dataset, 
                                                                          instance_index, 
                                                                          num_features)

    explanation.explain_it(model,
                           dataset,
                           target_names,
                           num_features,
                           instance_index,
                           output_file)


## Gaussian Process

### Heart dataset

In [20]:
# import numpy as np
import lime
import lime.lime_tabular
import sys
sys.path.append('./ml_helper')
import ml_helper_train_model as model_train
import ml_helper_serialize_model as serializer
sys.path.append('./explanation_helper')
import expla_helper as explanation
from sklearn.model_selection import train_test_split

heart = pd.read_csv('data/datasets_33180_43520_heart.csv', sep = ',')
heart = explanation.to_float(heart)

model_file_name = 'serialized_model/GaussianProcessClassifier_heart.pkl'
model = serializer.load_model(model_file_name)

dataset = heart.copy(deep = True)
target_names = np.array(['saudável', 'doente'])

folder = 'serialized_explanation/'
model_dataset = model_file_name.replace('serialized_model/','').replace('.pkl', '').replace('_', '#')

for sample in heart_sample:
    instance_index = sample
    num_features = dataset.columns.size - 1 #removing the target

    output_file = """{0}{1}#instance:{2}#num_features:{3}.html""".format(folder, 
                                                                          model_dataset, 
                                                                          instance_index, 
                                                                          num_features)

    explanation.explain_it(model,
                           dataset,
                           target_names,
                           num_features,
                           instance_index,
                           output_file)


### Wine dataset

In [21]:
# import numpy as np
import lime
import lime.lime_tabular
import sys
sys.path.append('./ml_helper')
import ml_helper_train_model as model_train
import ml_helper_serialize_model as serializer
sys.path.append('./explanation_helper')
import expla_helper as explanation
from sklearn.model_selection import train_test_split

wine = pd.read_csv('data/wine_limpo.csv', sep = ',')
# wine.drop(['Unnamed: 0'], axis = 1, inplace = True)
wine = explanation.to_float(wine)

model_file_name = 'serialized_model/GaussianProcessClassifier_wine2.pkl'
model = serializer.load_model(model_file_name)

dataset = wine.copy(deep = True)
target_names = np.array(['ruim', 'bom'])

folder = 'serialized_explanation/'
model_dataset = model_file_name.replace('serialized_model/','').replace('.pkl', '').replace('_', '#')

for sample in wine_sample:
    instance_index = sample
    num_features = dataset.columns.size - 1 #removing the target

    output_file = """{0}{1}#instance:{2}#num_features:{3}.html""".format(folder, 
                                                                          model_dataset, 
                                                                          instance_index, 
                                                                          num_features)

    explanation.explain_it(model,
                           dataset,
                           target_names,
                           num_features,
                           instance_index,
                           output_file)


### Diabetes dataset

In [22]:
# import numpy as np
import lime
import lime.lime_tabular
import sys
sys.path.append('./ml_helper')
import ml_helper_train_model as model_train
import ml_helper_serialize_model as serializer
sys.path.append('./explanation_helper')
import expla_helper as explanation
from sklearn.model_selection import train_test_split

diabetes = pd.read_csv('data/diabetes_limpo.csv', sep = ',')
# diabetes.drop(['Unnamed: 0'], axis = 1, inplace = True)
diabetes = explanation.to_float(diabetes)

model_file_name = 'serialized_model/GaussianProcessClassifier_diabetes2.pkl'
model = serializer.load_model(model_file_name)

dataset = diabetes.copy(deep = True)
target_names = np.array(['saudável', 'doente'])

folder = 'serialized_explanation/'
model_dataset = model_file_name.replace('serialized_model/','').replace('.pkl', '').replace('_', '#')

for sample in diabetes_sample:
    instance_index = sample
    num_features = dataset.columns.size - 1 #removing the target

    output_file = """{0}{1}#instance:{2}#num_features:{3}.html""".format(folder, 
                                                                          model_dataset, 
                                                                          instance_index, 
                                                                          num_features)

    explanation.explain_it(model,
                           dataset,
                           target_names,
                           num_features,
                           instance_index,
                           output_file)


## SVM

### Heart dataset

In [23]:
# import numpy as np
import lime
import lime.lime_tabular
import sys
sys.path.append('./ml_helper')
import ml_helper_train_model as model_train
import ml_helper_serialize_model as serializer
sys.path.append('./explanation_helper')
import expla_helper as explanation
from sklearn.model_selection import train_test_split

heart = pd.read_csv('data/datasets_33180_43520_heart.csv', sep = ',')
heart = explanation.to_float(heart)

model_file_name = 'serialized_model/svm_heart.pkl'
model = serializer.load_model(model_file_name)

dataset = heart.copy(deep = True)
target_names = np.array(['saudável', 'doente'])

folder = 'serialized_explanation/'
model_dataset = model_file_name.replace('serialized_model/','').replace('.pkl', '').replace('_', '#')

for sample in heart_sample:
    instance_index = sample
    num_features = dataset.columns.size - 1 #removing the target

    output_file = """{0}{1}#instance:{2}#num_features:{3}.html""".format(folder, 
                                                                          model_dataset, 
                                                                          instance_index, 
                                                                          num_features)

    explanation.explain_it(model,
                           dataset,
                           target_names,
                           num_features,
                           instance_index,
                           output_file)


### Wine dataset

In [24]:
# import numpy as np
import lime
import lime.lime_tabular
import sys
sys.path.append('./ml_helper')
import ml_helper_train_model as model_train
import ml_helper_serialize_model as serializer
sys.path.append('./explanation_helper')
import expla_helper as explanation
from sklearn.model_selection import train_test_split

wine = pd.read_csv('data/wine_limpo.csv', sep = ',')
# wine.drop(['Unnamed: 0'], axis = 1, inplace = True)
wine = explanation.to_float(wine)

model_file_name = 'serialized_model/svm_wine2.pkl'
model = serializer.load_model(model_file_name)

dataset = wine.copy(deep = True)
target_names = np.array(['ruim', 'bom'])

folder = 'serialized_explanation/'
model_dataset = model_file_name.replace('serialized_model/','').replace('.pkl', '').replace('_', '#')

for sample in wine_sample:
    instance_index = sample
    num_features = dataset.columns.size - 1 #removing the target

    output_file = """{0}{1}#instance:{2}#num_features:{3}.html""".format(folder, 
                                                                          model_dataset, 
                                                                          instance_index, 
                                                                          num_features)

    explanation.explain_it(model,
                           dataset,
                           target_names,
                           num_features,
                           instance_index,
                           output_file)


### Diabetes dataset

In [25]:
# import numpy as np
import lime
import lime.lime_tabular
import sys
sys.path.append('./ml_helper')
import ml_helper_train_model as model_train
import ml_helper_serialize_model as serializer
sys.path.append('./explanation_helper')
import expla_helper as explanation
from sklearn.model_selection import train_test_split

diabetes = pd.read_csv('data/diabetes_limpo.csv', sep = ',')
# diabetes.drop(['Unnamed: 0'], axis = 1, inplace = True)
diabetes = explanation.to_float(diabetes)

model_file_name = 'serialized_model/svm_diabetes2.pkl'
model = serializer.load_model(model_file_name)

dataset = diabetes.copy(deep = True)
target_names = np.array(['saudável', 'doente'])

folder = 'serialized_explanation/'
model_dataset = model_file_name.replace('serialized_model/','').replace('.pkl', '').replace('_', '#')

for sample in diabetes_sample:
    instance_index = sample
    num_features = dataset.columns.size - 1 #removing the target

    output_file = """{0}{1}#instance:{2}#num_features:{3}.html""".format(folder, 
                                                                          model_dataset, 
                                                                          instance_index, 
                                                                          num_features)

    explanation.explain_it(model,
                           dataset,
                           target_names,
                           num_features,
                           instance_index,
                           output_file)
