# XAI Neural Net Creation

## setup

In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import os
import sys
currDir = os.path.dirname(os.path.realpath("__file__"))
rootDir = os.path.abspath(os.path.join(currDir, '..'))
sys.path.insert(1, rootDir)

In [6]:
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, MinMaxScaler
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.optimizers import RMSprop,Adam
from tensorflow.keras.utils import to_categorical

from src.data.make_dataset import make_dataset_1, make_dataset_2
from src.utils.file_utils import save_df, save_model

## generate artificial dataset

In [8]:
df1 = make_dataset_1()
df2 = make_dataset_2()
save_df(df1, "dataset1.csv")
save_df(df2, "dataset2.csv")

successfully generated dataset | num_rows: 2600000
successfully generated dataset | num_rows: 504000
df successfully saved | filename: dataset1.csv, dir: C:\Users\archg\school\senoir\xai-senior-design\data
df successfully saved | filename: dataset2.csv, dir: C:\Users\archg\school\senoir\xai-senior-design\data


In [9]:
df2.describe()
df1.describe()

Unnamed: 0,mode,ei,to,td,tf,vers
count,2600000.0,2600000.0,2600000.0,2600000.0,2600000.0,2600000.0
mean,11.6,1.854442,267.7224,71.555,12.16666,4.5
std,8.002501,3.186181,1555.065,47.02838,11.25711,2.872282
min,5.0,0.347982,1.0,0.1,1.0,0.0
25%,5.0,0.933,3.0,34.3,3.968246,2.0
50%,6.5,1.354955,5.0,68.5,9.0,4.5
75%,21.0,1.354955,55.0,102.7,17.0,7.0
max,26.0,23.17735,14400.0,260.0,60.0,9.0


In [11]:
from src.utils.file_utils import load_df
df1, df2 = load_df("dataset1.csv"), load_df("dataset2.csv")

## create neural nets

In [12]:
# def build_model():    
#     model = Sequential()
#     model.add(Dense(6, input_dim=5, activation="relu"))#hidden layer
#     model.add(Dense(10, activation="relu"))#hidden layer
#     model.add(Dense(1, activation='sigmoid'))#output layer

#     optimizer = RMSprop(0.001)
#     model.compile(loss='mse', optimizer=optimizer, metrics=['mse', 'mae', 'mape'])
#     return model

def build_model(num_features, num_classes):
    model = Sequential()
    model.add(Dense(32, input_shape=(num_features,), activation='relu'))
    model.add(Dense(16, activation='relu'))
    model.add(Dense(num_classes, activation='softmax'))
    model.compile(loss='categorical_crossentropy', # Cross-entropy
                optimizer='adam', metrics=['accuracy'])
    return model

## train

In [13]:
def prepare_df(df, y_column):
    temp_df = df.copy()
    temp_df.sample(frac=1)
    
    y = to_categorical(
        temp_df[y_column].values)
    temp_df.drop(y_column, axis=1, inplace=True)
    features = list(temp_df.columns)
    
    x = temp_df.values
    scaler = StandardScaler()
    scaler.fit(x)
    x = scaler.transform(x)
    
    return x,y,features

x1, y1, features1 = prepare_df(df1, "vers")
x_train1, x_test1, y_train1, y_test1 = train_test_split(x1, y1, test_size=0.20)
norm_train1, norm_test1 = pd.DataFrame(x_train1, columns=features1), pd.DataFrame(x_test1, columns=features1)
save_df(norm_train1, "norm_train1.csv")
save_df(norm_train1, "norm_test1.csv")

x2, y2, features2 = prepare_df(df2, "vers")
x_train2, x_test2, y_train2, y_test2 = train_test_split(x2, y2, test_size=0.20)
norm_train2, norm_test2 = pd.DataFrame(x_train2, columns=features2), pd.DataFrame(x_test2, columns=features2)
save_df(norm_train2, "norm_train2.csv")
save_df(norm_train2, "norm_test2.csv")

df successfully saved | filename: norm_train1.csv, dir: C:\Users\archg\school\senoir\xai-senior-design\data
df successfully saved | filename: norm_train2.csv, dir: C:\Users\archg\school\senoir\xai-senior-design\data


In [15]:
model1 = build_model(5,10)
history1 = model1.fit(x_train1, y_train1, epochs=10, validation_split=0.2, batch_size=128, verbose=2)

Train on 1664000 samples, validate on 416000 samples
Epoch 1/10
1664000/1664000 - 15s - loss: 0.9167 - accuracy: 0.6658 - val_loss: 0.6793 - val_accuracy: 0.7699
Epoch 2/10
1664000/1664000 - 14s - loss: 0.6293 - accuracy: 0.7701 - val_loss: 0.5958 - val_accuracy: 0.7756
Epoch 3/10
1664000/1664000 - 14s - loss: 0.5820 - accuracy: 0.7800 - val_loss: 0.5685 - val_accuracy: 0.7815
Epoch 4/10
1664000/1664000 - 15s - loss: 0.5636 - accuracy: 0.7821 - val_loss: 0.5624 - val_accuracy: 0.7837
Epoch 5/10
1664000/1664000 - 15s - loss: 0.5522 - accuracy: 0.7831 - val_loss: 0.5587 - val_accuracy: 0.7889
Epoch 6/10
1664000/1664000 - 15s - loss: 0.5438 - accuracy: 0.7843 - val_loss: 0.5420 - val_accuracy: 0.7770
Epoch 7/10
1664000/1664000 - 15s - loss: 0.5378 - accuracy: 0.7855 - val_loss: 0.5311 - val_accuracy: 0.7922
Epoch 8/10
1664000/1664000 - 15s - loss: 0.5341 - accuracy: 0.7864 - val_loss: 0.5315 - val_accuracy: 0.7827
Epoch 9/10
1664000/1664000 - 15s - loss: 0.5286 - accuracy: 0.7875 - val_lo

In [16]:
model2 = build_model(4,7)
history = model2.fit(x_train2, y_train2, epochs=15, validation_split=0.2, batch_size=128, verbose=2)

Train on 322560 samples, validate on 80640 samples
Epoch 1/15
322560/322560 - 3s - loss: 0.6130 - accuracy: 0.8152 - val_loss: 0.2700 - val_accuracy: 0.9323
Epoch 2/15
322560/322560 - 3s - loss: 0.2169 - accuracy: 0.9461 - val_loss: 0.1857 - val_accuracy: 0.9556
Epoch 3/15
322560/322560 - 3s - loss: 0.1710 - accuracy: 0.9524 - val_loss: 0.1628 - val_accuracy: 0.9518
Epoch 4/15
322560/322560 - 3s - loss: 0.1567 - accuracy: 0.9550 - val_loss: 0.1509 - val_accuracy: 0.9543
Epoch 5/15
322560/322560 - 3s - loss: 0.1499 - accuracy: 0.9568 - val_loss: 0.1459 - val_accuracy: 0.9587
Epoch 6/15
322560/322560 - 3s - loss: 0.1463 - accuracy: 0.9583 - val_loss: 0.1428 - val_accuracy: 0.9589
Epoch 7/15
322560/322560 - 3s - loss: 0.1437 - accuracy: 0.9591 - val_loss: 0.1402 - val_accuracy: 0.9616
Epoch 8/15
322560/322560 - 3s - loss: 0.1412 - accuracy: 0.9597 - val_loss: 0.1389 - val_accuracy: 0.9581
Epoch 9/15
322560/322560 - 3s - loss: 0.1394 - accuracy: 0.9604 - val_loss: 0.1408 - val_accuracy: 0.

## test

In [17]:
print(model1.evaluate(x_test1, y_test1))
print(model2.evaluate(x_test2, y_test2))
# [0.09895923781607832, 0.85723215]

[0.5345235340760305, 0.7747423]
[0.13025165943990624, 0.9620536]


In [18]:
preds = model2.predict_classes(x_test2)
# print(preds)
for i in range(len(y_test2[:10])):
    print(preds[i], list(y_test2[i]).index(1))

5 5
0 0
2 2
2 2
0 0
1 1
4 4
5 5
3 3
1 1


In [19]:
save_model(model1, "model1.h5")
save_model(model2, "model2.h5")

model successfully saved | file_location: C:\Users\archg\school\senoir\xai-senior-design\models\model1.h5
model successfully saved | file_location: C:\Users\archg\school\senoir\xai-senior-design\models\model2.h5
