# XAI Neural Net Creation

## setup

In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import os
import sys
currDir = os.path.dirname(os.path.realpath("__file__"))
rootDir = os.path.abspath(os.path.join(currDir, '..'))
sys.path.insert(1, rootDir)

In [3]:
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, MinMaxScaler
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.optimizers import RMSprop,Adam
from tensorflow.keras.utils import to_categorical

from src.data.make_dataset import make_dataset_1, make_dataset_2
from src.utils.file_utils import save_df, save_model



## generate artificial dataset

In [4]:
df1 = make_dataset_1()
df2 = make_dataset_2()
save_df(df1, "dataset1.csv")
save_df(df2, "dataset2.csv")

successfully generated dataset | num_rows: 2600000
successfully generated dataset | num_rows: 504000
df successfully saved | filename: dataset1.csv, dir: /Users/test/Documents/GitHub/xai-senior-design/data
df successfully saved | filename: dataset2.csv, dir: /Users/test/Documents/GitHub/xai-senior-design/data


In [5]:
df2.describe()
df1.describe()

Unnamed: 0,mode,ei,to,td,tf,vers
count,2600000.0,2600000.0,2600000.0,2600000.0,2600000.0,2600000.0
mean,11.6,1.854442,267.7224,71.555,12.16666,4.5
std,8.002501,3.186181,1555.065,47.02838,11.25711,2.872282
min,5.0,0.347982,1.0,0.1,1.0,0.0
25%,5.0,0.933,3.0,34.3,3.968246,2.0
50%,6.5,1.354955,5.0,68.5,9.0,4.5
75%,21.0,1.354955,55.0,102.7,17.0,7.0
max,26.0,23.17735,14400.0,260.0,60.0,9.0


In [4]:
from src.utils.file_utils import load_df
df1, df2 = load_df("dataset1.csv"), load_df("dataset2.csv")

## create neural nets

In [69]:
# def build_model():    
#     model = Sequential()
#     model.add(Dense(6, input_dim=5, activation="relu"))#hidden layer
#     model.add(Dense(10, activation="relu"))#hidden layer
#     model.add(Dense(1, activation='sigmoid'))#output layer

#     optimizer = RMSprop(0.001)
#     model.compile(loss='mse', optimizer=optimizer, metrics=['mse', 'mae', 'mape'])
#     return model

def build_model(num_features, num_classes):
    model = Sequential()
    model.add(Dense(256, input_shape=(num_features,), activation='relu'))
    model.add(Dense(256, activation='relu'))
    model.add(Dense(128, activation='relu'))
    model.add(Dense(64, activation='relu'))
    model.add(Dense(num_classes, activation='softmax'))
    model.compile(loss='categorical_crossentropy', # Cross-entropy
                optimizer='adam', metrics=['accuracy'])
    return model

def build_model2(num_features, num_classes):
    model = Sequential()
    model.add(Dense(5, input_shape=(num_features,), activation='relu'))
    model.add(Dense(8, activation='relu'))
    model.add(Dense(num_classes, activation='softmax'))
    model.compile(loss='categorical_crossentropy', # Cross-entropy
                optimizer='adam', metrics=['accuracy'])
    return model

## train

In [9]:
def prepare_df(df, y_column):
    temp_df = df.copy()
    temp_df.sample(frac=1)
    
    y = to_categorical(
        temp_df[y_column].values)
    temp_df.drop(y_column, axis=1, inplace=True)
    features = list(temp_df.columns)
    
    x = temp_df.values
    scaler = StandardScaler()
    scaler.fit(x)
    x = scaler.transform(x)
    
    return x,y,features

x1, y1, features1 = prepare_df(df1, "vers")
x_train1, x_test1, y_train1, y_test1 = train_test_split(x1, y1, test_size=0.20)
norm_train1, norm_test1 = pd.DataFrame(x_train1, columns=features1), pd.DataFrame(x_test1, columns=features1)
# save_df(norm_train1, "norm_train1.csv")
# save_df(norm_train1, "norm_test1.csv")

x2, y2, features2 = prepare_df(df2, "vers")
x_train2, x_test2, y_train2, y_test2 = train_test_split(x2, y2, test_size=0.20)
norm_train2, norm_test2 = pd.DataFrame(x_train2, columns=features2), pd.DataFrame(x_test2, columns=features2)
# save_df(norm_train2, "norm_train2.csv")
# save_df(norm_train2, "norm_test2.csv")

In [70]:
model1 = build_model(5,10)
history1 = model1.fit(x_train1, y_train1, epochs=20, validation_split=0.2, batch_size=128, verbose=2)

Train on 1664000 samples, validate on 416000 samples
Epoch 1/20
1664000/1664000 - 76s - loss: 0.6213 - accuracy: 0.7483 - val_loss: 0.6395 - val_accuracy: 0.7383
Epoch 2/20
1664000/1664000 - 81s - loss: 0.5014 - accuracy: 0.7811 - val_loss: 0.4923 - val_accuracy: 0.7760
Epoch 3/20
1664000/1664000 - 56s - loss: 0.4657 - accuracy: 0.7869 - val_loss: 0.4389 - val_accuracy: 0.7946
Epoch 4/20
1664000/1664000 - 78s - loss: 0.4515 - accuracy: 0.7911 - val_loss: 0.4959 - val_accuracy: 0.7607
Epoch 5/20
1664000/1664000 - 76s - loss: 0.4392 - accuracy: 0.7943 - val_loss: 0.4010 - val_accuracy: 0.8158
Epoch 6/20
1664000/1664000 - 74s - loss: 0.4309 - accuracy: 0.7963 - val_loss: 0.4362 - val_accuracy: 0.7992
Epoch 7/20
1664000/1664000 - 46s - loss: 0.4251 - accuracy: 0.7978 - val_loss: 0.4102 - val_accuracy: 0.8054
Epoch 8/20
1664000/1664000 - 42s - loss: 0.4181 - accuracy: 0.8003 - val_loss: 0.4799 - val_accuracy: 0.7789
Epoch 9/20
1664000/1664000 - 42s - loss: 0.4165 - accuracy: 0.8001 - val_lo

In [65]:
print(model1.evaluate(x_test1, y_test1))

[0.33066846861655896, 0.8231481]


In [54]:
a = model1.predict_classes(x_test1[:100])
b = y_test1[:100]

for idx, val in enumerate(a):
    if b[idx][val] == 0:
        print(x_test1[idx], a[idx],b[idx])

[-0.82474231 -0.15676661 -0.16958935  0.26888026  0.16286073] 0 [0. 0. 0. 0. 0. 0. 1. 0. 0. 0.]
[-0.82474231 -0.15676661 -0.17087547 -1.01119816 -0.72546721] 3 [1. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
[-0.82474231 -0.15676661 -0.17151853 -0.26696652  0.16286073] 9 [0. 0. 0. 0. 0. 1. 0. 0. 0. 0.]
[-0.82474231 -0.00581849 -0.17023241 -1.34716558 -0.9031328 ] 7 [0. 0. 0. 0. 1. 0. 0. 0. 0. 0.]
[-0.82474231 -0.15676661 -0.17151853 -0.16064772 -0.37013603] 9 [1. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
[-0.82474231 -0.15676661 -0.16637405  0.73030388 -0.72546721] 1 [0. 0. 0. 0. 0. 0. 1. 0. 0. 0.]
[-0.82474231 -0.15676661 -0.16958935 -0.05645529 -0.54780162] 9 [1. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
[ 1.79943776  0.92896303 -0.15544203 -0.47960413 -0.37013603] 0 [0. 0. 0. 0. 0. 1. 0. 0. 0. 0.]
[ 0.29990629 -0.33495956 -0.13679329  0.51979264 -0.28130324] 0 [0. 0. 0. 0. 0. 1. 0. 0. 0. 0.]
[ 1.17463298 -0.4728105  -0.12393209  0.35180893 -0.8143    ] 7 [0. 0. 0. 0. 1. 0. 0. 0. 0. 0.]
[-0.82474231 -0.15676661 -0.16958935 -0.

In [20]:
model1_2 = build_model2(5,10)
history1_2 = model1_2.fit(x_train1, y_train1, epochs=10, validation_split=0.2, batch_size=128, verbose=2)

Train on 1664000 samples, validate on 416000 samples
Epoch 1/10
1664000/1664000 - 22s - loss: 1.4032 - accuracy: 0.4843 - val_loss: 1.1149 - val_accuracy: 0.6191
Epoch 2/10
1664000/1664000 - 23s - loss: 1.0531 - accuracy: 0.6301 - val_loss: 1.0188 - val_accuracy: 0.6388
Epoch 3/10
1664000/1664000 - 22s - loss: 0.9960 - accuracy: 0.6466 - val_loss: 0.9791 - val_accuracy: 0.6665
Epoch 4/10
1664000/1664000 - 24s - loss: 0.9657 - accuracy: 0.6612 - val_loss: 0.9565 - val_accuracy: 0.6560
Epoch 5/10
1664000/1664000 - 23s - loss: 0.9470 - accuracy: 0.6649 - val_loss: 0.9427 - val_accuracy: 0.6671
Epoch 6/10
1664000/1664000 - 21s - loss: 0.9319 - accuracy: 0.6680 - val_loss: 0.9262 - val_accuracy: 0.6682
Epoch 7/10
1664000/1664000 - 21s - loss: 0.9187 - accuracy: 0.6702 - val_loss: 0.9206 - val_accuracy: 0.6623
Epoch 8/10
1664000/1664000 - 26s - loss: 0.9093 - accuracy: 0.6707 - val_loss: 0.9207 - val_accuracy: 0.6503
Epoch 9/10
1664000/1664000 - 26s - loss: 0.9021 - accuracy: 0.6712 - val_lo

In [10]:
model2 = build_model(4,7)
history2 = model2.fit(x_train2, y_train2, epochs=15, validation_split=0.2, batch_size=128, verbose=2)

Train on 322560 samples, validate on 80640 samples
Epoch 1/15
322560/322560 - 6s - loss: 0.6668 - accuracy: 0.7958 - val_loss: 0.2820 - val_accuracy: 0.9435
Epoch 2/15
322560/322560 - 5s - loss: 0.2228 - accuracy: 0.9446 - val_loss: 0.1930 - val_accuracy: 0.9606
Epoch 3/15
322560/322560 - 6s - loss: 0.1733 - accuracy: 0.9515 - val_loss: 0.1662 - val_accuracy: 0.9506
Epoch 4/15
322560/322560 - 4s - loss: 0.1575 - accuracy: 0.9544 - val_loss: 0.1537 - val_accuracy: 0.9590
Epoch 5/15
322560/322560 - 5s - loss: 0.1497 - accuracy: 0.9569 - val_loss: 0.1468 - val_accuracy: 0.9596
Epoch 6/15
322560/322560 - 5s - loss: 0.1461 - accuracy: 0.9578 - val_loss: 0.1554 - val_accuracy: 0.9533
Epoch 7/15
322560/322560 - 6s - loss: 0.1433 - accuracy: 0.9589 - val_loss: 0.1580 - val_accuracy: 0.9399
Epoch 8/15
322560/322560 - 6s - loss: 0.1409 - accuracy: 0.9599 - val_loss: 0.1489 - val_accuracy: 0.9548
Epoch 9/15
322560/322560 - 5s - loss: 0.1395 - accuracy: 0.9605 - val_loss: 0.1379 - val_accuracy: 0.

In [22]:
model2_2 = build_model2(4,7)
history2_2 = model2_2.fit(x_train2, y_train2, epochs=15, validation_split=0.2, batch_size=128, verbose=2)

Train on 322560 samples, validate on 80640 samples
Epoch 1/15
322560/322560 - 6s - loss: 1.0903 - accuracy: 0.6064 - val_loss: 0.7503 - val_accuracy: 0.6977
Epoch 2/15
322560/322560 - 5s - loss: 0.5726 - accuracy: 0.7817 - val_loss: 0.4501 - val_accuracy: 0.8514
Epoch 3/15
322560/322560 - 5s - loss: 0.3828 - accuracy: 0.8857 - val_loss: 0.3344 - val_accuracy: 0.9168
Epoch 4/15
322560/322560 - 6s - loss: 0.2958 - accuracy: 0.9191 - val_loss: 0.2660 - val_accuracy: 0.9184
Epoch 5/15
322560/322560 - 5s - loss: 0.2371 - accuracy: 0.9383 - val_loss: 0.2188 - val_accuracy: 0.9437
Epoch 6/15
322560/322560 - 7s - loss: 0.2004 - accuracy: 0.9490 - val_loss: 0.1893 - val_accuracy: 0.9528
Epoch 7/15
322560/322560 - 4s - loss: 0.1775 - accuracy: 0.9530 - val_loss: 0.1709 - val_accuracy: 0.9529
Epoch 8/15
322560/322560 - 5s - loss: 0.1637 - accuracy: 0.9544 - val_loss: 0.1612 - val_accuracy: 0.9564
Epoch 9/15
322560/322560 - 4s - loss: 0.1559 - accuracy: 0.9554 - val_loss: 0.1546 - val_accuracy: 0.

## test

In [23]:
print(model1.evaluate(x_test1, y_test1))
print(model1_2.evaluate(x_test1, y_test1))
# [0.09895923781607832, 0.85723215]

[0.5146434716848227, 0.7934327]
[0.8896572437011279, 0.6934904]


In [24]:
print(model2.evaluate(x_test2, y_test2))
print(model2_2.evaluate(x_test2, y_test2))

[0.13872719309100556, 0.966875]
[0.14134339284477015, 0.95987105]


In [25]:
save_model(model1, "model1.h5")
save_model(model2, "model2.h5")
save_model(model1_2, "model1_2.h5")
save_model(model2_2, "model2_@.h5")

model successfully saved | file_location: /Users/test/Documents/GitHub/xai-senior-design/models/model1.h5
model successfully saved | file_location: /Users/test/Documents/GitHub/xai-senior-design/models/model2.h5
model successfully saved | file_location: /Users/test/Documents/GitHub/xai-senior-design/models/model1_2.h5
model successfully saved | file_location: /Users/test/Documents/GitHub/xai-senior-design/models/model2_@.h5
