# XAI Neural Net Creation

## setup

In [16]:
%load_ext autoreload
%autoreload 2
%reload_ext autoreload

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [17]:
import os
import sys
currDir = os.path.dirname(os.path.realpath("__file__"))
rootDir = os.path.abspath(os.path.join(currDir, '..'))
sys.path.insert(1, rootDir)

In [18]:
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, MinMaxScaler
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.optimizers import RMSprop,Adam
from tensorflow.keras.utils import to_categorical

## generate artificial dataset

In [20]:

from src.data.make_dataset import make_dataset_1, make_dataset_2
from src.utils.file_utils import save_df

df1 = make_dataset_1()
df2 = make_dataset_2()
# save_df(df1, "dataset1.csv")
# save_df(df2, "dataset2.csv")

successfully generated dataset | num_rows: 2600000
successfully generated dataset | num_rows: 504000


In [5]:
df2.describe()
df1.describe()

Unnamed: 0,mode,ei,to,td,tf,vers,consumption
count,2600000.0,2600000.0,2600000.0,2600000.0,2600000.0,2600000.0,2600000.0
mean,11.6,1.510916,29.4,65.05,10.5,4.5,129.6824
std,8.002501,1.132716,38.91838,37.52776,5.766282,2.872282,313.1455
min,5.0,0.5899,1.0,0.1,1.0,0.0,6.479167e-06
25%,5.0,0.933,3.0,32.575,5.75,2.0,5.455395
50%,6.5,1.354955,4.5,65.05,10.5,4.5,28.29305
75%,21.0,1.354955,55.0,97.525,15.25,7.0,115.7842
max,26.0,4.814286,120.0,130.0,20.0,9.0,6292.0


In [6]:
# from src.utils.file_utils import load_df
# df1, df2 = load_df("dataset1.csv"), load_df("dataset2.csv")

## create neural nets

In [21]:
# def build_model():    
#     model = Sequential()
#     model.add(Dense(6, input_dim=5, activation="relu"))#hidden layer
#     model.add(Dense(10, activation="relu"))#hidden layer
#     model.add(Dense(1, activation='sigmoid'))#output layer

#     optimizer = RMSprop(0.001)
#     model.compile(loss='mse', optimizer=optimizer, metrics=['mse', 'mae', 'mape'])
#     return model

def build_model(num_features, num_classes):
    model = Sequential()
    model.add(Dense(32, input_shape=(num_features,), activation='relu'))
    model.add(Dense(16, activation='relu'))
    model.add(Dense(num_classes, activation='softmax'))
    model.compile(loss='categorical_crossentropy', # Cross-entropy
                optimizer='adam', metrics=['accuracy'])
    return model

## train

In [22]:
def prepare_df(df, y_column):
    temp_df = df.copy()
    temp_df.sample(frac=1)
    
    y = to_categorical(
        temp_df[y_column].values)
    temp_df.drop(columns=[y_column], inplace=True, axis=1)
    
    x = temp_df.values
    scaler = StandardScaler()
    scaler.fit(x)
    x = scaler.transform(x)
    
    return x,y

x1, y1 = prepare_df(df1, "vers")
x_train1, x_test1, y_train1, y_test1 = train_test_split(x1, y1, test_size=0.20)

x2, y2 = prepare_df(df2, "vers")
x_train2, x_test2, y_train2, y_test2 = train_test_split(x2, y2, test_size=0.20)

In [9]:
model1 = build_model(6,10)
history1 = model1.fit(x_train1, y_train1, epochs=10, validation_split=0.2, batch_size=128, verbose=2)

Train on 1664000 samples, validate on 416000 samples
Epoch 1/10
1664000/1664000 - 10s - loss: 1.0975 - accuracy: 0.6136 - val_loss: 0.7232 - val_accuracy: 0.7383
Epoch 2/10
1664000/1664000 - 11s - loss: 0.6351 - accuracy: 0.7708 - val_loss: 0.5701 - val_accuracy: 0.7774
Epoch 3/10
1664000/1664000 - 10s - loss: 0.5356 - accuracy: 0.8001 - val_loss: 0.5138 - val_accuracy: 0.8050
Epoch 4/10
1664000/1664000 - 10s - loss: 0.4844 - accuracy: 0.8190 - val_loss: 0.4608 - val_accuracy: 0.8256
Epoch 5/10
1664000/1664000 - 10s - loss: 0.4458 - accuracy: 0.8327 - val_loss: 0.4235 - val_accuracy: 0.8360
Epoch 6/10
1664000/1664000 - 9s - loss: 0.4140 - accuracy: 0.8435 - val_loss: 0.4161 - val_accuracy: 0.8341
Epoch 7/10
1664000/1664000 - 9s - loss: 0.3911 - accuracy: 0.8509 - val_loss: 0.3778 - val_accuracy: 0.8542
Epoch 8/10
1664000/1664000 - 9s - loss: 0.3723 - accuracy: 0.8577 - val_loss: 0.3616 - val_accuracy: 0.8598
Epoch 9/10
1664000/1664000 - 9s - loss: 0.3562 - accuracy: 0.8634 - val_loss: 

In [10]:
model2 = build_model(5,7)
history = model2.fit(x_train2, y_train2, epochs=15, validation_split=0.2, batch_size=128, verbose=2)

Train on 322560 samples, validate on 80640 samples
Epoch 1/15
322560/322560 - 2s - loss: 1.3116 - accuracy: 0.4365 - val_loss: 1.0893 - val_accuracy: 0.5226
Epoch 2/15
322560/322560 - 2s - loss: 0.9939 - accuracy: 0.5559 - val_loss: 0.9198 - val_accuracy: 0.6249
Epoch 3/15
322560/322560 - 2s - loss: 0.8855 - accuracy: 0.6116 - val_loss: 0.8605 - val_accuracy: 0.6006
Epoch 4/15
322560/322560 - 2s - loss: 0.8170 - accuracy: 0.6631 - val_loss: 0.7822 - val_accuracy: 0.7040
Epoch 5/15
322560/322560 - 2s - loss: 0.7586 - accuracy: 0.7065 - val_loss: 0.7336 - val_accuracy: 0.7082
Epoch 6/15
322560/322560 - 2s - loss: 0.7049 - accuracy: 0.7449 - val_loss: 0.6829 - val_accuracy: 0.7425
Epoch 7/15
322560/322560 - 2s - loss: 0.6566 - accuracy: 0.7772 - val_loss: 0.6244 - val_accuracy: 0.8170
Epoch 8/15
322560/322560 - 2s - loss: 0.6116 - accuracy: 0.8056 - val_loss: 0.5961 - val_accuracy: 0.7902
Epoch 9/15
322560/322560 - 2s - loss: 0.5736 - accuracy: 0.8252 - val_loss: 0.5545 - val_accuracy: 0.

## test

In [11]:
print(model1.evaluate(x_test1, y_test1))
print(model2.evaluate(x_test2, y_test2))
# [0.09895923781607832, 0.85723215]

[0.33882213577949083, 0.8641346]
[0.4020612595384083, 0.8942163]


In [12]:
preds = model1.predict_classes(x_test1)
# print(preds)
for i in range(len(y_test2[:10])):
    print(preds[i], list(y_test2[i]).index(1))

1 6
2 3
7 6
1 4
4 2
5 2
9 1
4 4
5 1
3 3


In [13]:
# from utils.file_utils import

In [29]:
import lime
import lime.lime_tabular
explainerx1 = lime.lime_tabular.LimeTabularExplainer(training_data=x_train1, mode='classification')
explanationx1 = explainerx1.explain_instance(x_test1[0], model1.predict_proba, num_features=5)
explainerx2 = lime.lime_tabular.LimeTabularExplainer(training_data=x_train2, mode='classification')
explanationx2 = explainerx2.explain_instance(x_test2[0], model2.predict_proba, num_features=5)

explainery1 = lime.lime_tabular.LimeTabularExplainer(training_data=x_train1, mode='classification')
explanationy1 = explainery1.explain_instance(y_test1[0], model1.predict_proba, num_features=5)
explainery2 = lime.lime_tabular.LimeTabularExplainer(training_data=x_train2, mode='classification')
explanationy2 = explainery2.explain_instance(y_test2[0], model2.predict_proba, num_features=5)
# print(explanation.as_list())
explanationx1.as_pyplot_figure()
explanationx2.as_pyplot_figure()

explanationy1.as_pyplot_figure()
explanationy2.as_pyplot_figure()

from matplotlib import pyplot as plt
# plt.savefig()
#this is to save the figures

KeyError: 6