# XAI Neural Net Creation

## setup

In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import os
import sys
currDir = os.path.dirname(os.path.realpath("__file__"))
rootDir = os.path.abspath(os.path.join(currDir, '..'))
sys.path.insert(1, rootDir)

In [3]:
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, MinMaxScaler
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.optimizers import RMSprop,Adam
from tensorflow.keras.utils import to_categorical



## generate artificial dataset

In [42]:
from src.data.make_dataset import make_dataset_1, make_dataset_2
from src.utils.file_utils import save_df

df1 = make_dataset_1()
df2 = make_dataset_2()
# save_df(df1, "dataset1.csv")
# save_df(df2, "dataset2.csv")

successfully generated dataset | num_rows: 2600000
successfully generated dataset | num_rows: 504000


In [5]:
df2.describe()
df1.describe()

Unnamed: 0,mode,ei,to,td,tf,vers,consumption
count,2600000.0,2600000.0,2600000.0,2600000.0,2600000.0,2600000.0,2600000.0
mean,11.6,1.510916,29.4,65.05,10.5,4.5,148.4044
std,8.002501,1.132716,38.91838,37.52776,5.766282,2.872282,328.2216
min,5.0,0.5899,1.0,0.1,1.0,0.0,6.479167e-06
25%,5.0,0.933,3.0,32.575,5.75,2.0,6.713139
50%,6.5,1.354955,4.5,65.05,10.5,4.5,35.09333
75%,21.0,1.354955,55.0,97.525,15.25,7.0,142.8353
max,26.0,4.814286,120.0,130.0,20.0,9.0,6292.0


In [6]:
from src.utils.file_utils import load_df
df1, df2 = load_df("dataset1.csv"), load_df("dataset2.csv")

## create neural nets

In [59]:
# def build_model():    
#     model = Sequential()
#     model.add(Dense(6, input_dim=5, activation="relu"))#hidden layer
#     model.add(Dense(10, activation="relu"))#hidden layer
#     model.add(Dense(1, activation='sigmoid'))#output layer

#     optimizer = RMSprop(0.001)
#     model.compile(loss='mse', optimizer=optimizer, metrics=['mse', 'mae', 'mape'])
#     return model

def build_model(num_features, num_classes):
    model = Sequential()
    model.add(Dense(32, input_shape=(num_features,), activation='relu'))
    model.add(Dense(16, activation='relu'))
    model.add(Dense(num_classes, activation='softmax'))
    model.compile(loss='categorical_crossentropy', # Cross-entropy
                optimizer='adam', metrics=['accuracy'])
    return model

## train

In [43]:
def prepare_df(df, y_column):
    temp_df = df.copy()
    temp_df.sample(frac=1)
    
    y = to_categorical(
        temp_df[y_column].values)
    temp_df.drop(columns=[y_column], inplace=True, axis=1)
    
    x = temp_df.values
    scaler = StandardScaler()
    scaler.fit(x)
    x = scaler.transform(x)
    
    return x,y

x1, y1 = prepare_df(df1, "vers")
x_train1, x_test1, y_train1, y_test1 = train_test_split(x1, y1, test_size=0.20)

x2, y2 = prepare_df(df2, "vers")
x_train2, x_test2, y_train2, y_test2 = train_test_split(x2, y2, test_size=0.20)

In [60]:
model1 = build_model(6,10)
history1 = model1.fit(x_train1, y_train1, epochs=10, validation_split=0.2, batch_size=128, verbose=2)

Train on 1664000 samples, validate on 416000 samples
Epoch 1/10
1664000/1664000 - 43s - loss: 1.0511 - accuracy: 0.6320 - val_loss: 0.6964 - val_accuracy: 0.7648
Epoch 2/10
1664000/1664000 - 34s - loss: 0.6022 - accuracy: 0.7837 - val_loss: 0.5295 - val_accuracy: 0.8039
Epoch 3/10
1664000/1664000 - 38s - loss: 0.4954 - accuracy: 0.8130 - val_loss: 0.4711 - val_accuracy: 0.8212
Epoch 4/10
1664000/1664000 - 39s - loss: 0.4474 - accuracy: 0.8281 - val_loss: 0.4388 - val_accuracy: 0.8288
Epoch 5/10
1664000/1664000 - 40s - loss: 0.4188 - accuracy: 0.8376 - val_loss: 0.4031 - val_accuracy: 0.8490
Epoch 6/10
1664000/1664000 - 45s - loss: 0.3955 - accuracy: 0.8452 - val_loss: 0.3891 - val_accuracy: 0.8444
Epoch 7/10
1664000/1664000 - 36s - loss: 0.3790 - accuracy: 0.8499 - val_loss: 0.3752 - val_accuracy: 0.8557
Epoch 8/10
1664000/1664000 - 25s - loss: 0.3664 - accuracy: 0.8550 - val_loss: 0.3859 - val_accuracy: 0.8458
Epoch 9/10
1664000/1664000 - 37s - loss: 0.3573 - accuracy: 0.8593 - val_lo

In [66]:
model2 = build_model(5,7)
history = model2.fit(x_train2, y_train2, epochs=15, validation_split=0.2, batch_size=128, verbose=2)

Train on 322560 samples, validate on 80640 samples
Epoch 1/15
322560/322560 - 5s - loss: 1.3234 - accuracy: 0.4344 - val_loss: 1.0813 - val_accuracy: 0.5489
Epoch 2/15
322560/322560 - 4s - loss: 0.9854 - accuracy: 0.5679 - val_loss: 0.9069 - val_accuracy: 0.6536
Epoch 3/15
322560/322560 - 4s - loss: 0.8565 - accuracy: 0.6565 - val_loss: 0.8161 - val_accuracy: 0.6689
Epoch 4/15
322560/322560 - 4s - loss: 0.7729 - accuracy: 0.7157 - val_loss: 0.7424 - val_accuracy: 0.7190
Epoch 5/15
322560/322560 - 4s - loss: 0.7089 - accuracy: 0.7520 - val_loss: 0.6945 - val_accuracy: 0.7386
Epoch 6/15
322560/322560 - 4s - loss: 0.6559 - accuracy: 0.7826 - val_loss: 0.6265 - val_accuracy: 0.8509
Epoch 7/15
322560/322560 - 4s - loss: 0.6119 - accuracy: 0.8053 - val_loss: 0.5908 - val_accuracy: 0.7916
Epoch 8/15
322560/322560 - 4s - loss: 0.5749 - accuracy: 0.8241 - val_loss: 0.5651 - val_accuracy: 0.8217
Epoch 9/15
322560/322560 - 4s - loss: 0.5425 - accuracy: 0.8392 - val_loss: 0.5469 - val_accuracy: 0.

## test

In [67]:
print(model1.evaluate(x_test1, y_test1))
print(model2.evaluate(x_test2, y_test2))
# [0.09895923781607832, 0.85723215]

[0.34577132857785775, 0.8594904]
[0.42430493939017494, 0.8750099]


In [35]:
preds = model2.predict_classes(x_test2)
# print(preds)
for i in range(len(y_test2[:10])):
    print(preds[i], list(y_test2[i]).index(1))

8 8
4 4
7 2
4 4
9 8
2 2
4 4
6 6
5 9
0 2


In [None]:
from src.utils.file_utils import