In [1]:
import numpy as np
import pandas as pd
from keras.models import Sequential
from keras.layers import Conv1D, MaxPooling1D, Flatten, Dense, Dropout
from sklearn.metrics import confusion_matrix, classification_report

In [2]:
def DNN(path_train,path_test, epoch_num):
    Train = pd.read_csv(path_train)
    Test = pd.read_csv(path_test)
    
    X_train = Train.iloc[:,:-1]
    Y_train = Train.iloc[:,-1]

    X_test = Test.iloc[:,:-1]
    Y_test = Test.iloc[:,-1]

    #binarize the target
    Y_train_binary = Y_train.apply(lambda x: 1 if x > 0 else 0)
    Y_test_binary = Y_test.apply(lambda x: 1 if x > 0 else 0)

    # CNN
    model = Sequential()
    model.add(Dense(64, input_shape=(22,), activation='relu'))
    model.add(Dense(128, activation='relu'))
    model.add(Dense(256, activation='relu'))
    model.add(Dense(2, activation='sigmoid'))

    model.compile(loss='sparse_categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
    
    model.fit(X_train, Y_train_binary, epochs=epoch_num, batch_size=32)
    Y_pred = model.predict(X_test).argmax(axis=1)
    
    cm = confusion_matrix(Y_pred, Y_test_binary)
    print(cm)
    print(classification_report(Y_test_binary, Y_pred,digits = 4))
    
    mismatch = [i for i, (a,b) in enumerate(zip(Y_pred, Y_test_binary)) if a != b]
    model.save(str(path_train[-13:][:3]) +'_dnn.h5')
    print(mismatch)

# Cleveland

In [3]:
path_train = '../cle_train.csv'
path_test = '../cle_test.csv'
DNN(path_train,path_test,19)

Epoch 1/19
Epoch 2/19
Epoch 3/19
Epoch 4/19
Epoch 5/19
Epoch 6/19
Epoch 7/19
Epoch 8/19
Epoch 9/19
Epoch 10/19
Epoch 11/19
Epoch 12/19
Epoch 13/19
Epoch 14/19
Epoch 15/19
Epoch 16/19
Epoch 17/19
Epoch 18/19
Epoch 19/19
[[41  6]
 [12 34]]
              precision    recall  f1-score   support

           0     0.8723    0.7736    0.8200        53
           1     0.7391    0.8500    0.7907        40

    accuracy                         0.8065        93
   macro avg     0.8057    0.8118    0.8053        93
weighted avg     0.8150    0.8065    0.8074        93

[3, 4, 5, 14, 24, 33, 36, 55, 59, 61, 62, 64, 70, 77, 79, 80, 88, 91]


# Virginia

In [4]:
path_train = '../vir_train.csv'
path_test = '../vir_test.csv'
DNN(path_train,path_test, 15)

Epoch 1/15
Epoch 2/15
Epoch 3/15
Epoch 4/15
Epoch 5/15
Epoch 6/15
Epoch 7/15
Epoch 8/15
Epoch 9/15
Epoch 10/15
Epoch 11/15
Epoch 12/15
Epoch 13/15
Epoch 14/15
Epoch 15/15
[[ 5  3]
 [15 39]]
              precision    recall  f1-score   support

           0     0.6250    0.2500    0.3571        20
           1     0.7222    0.9286    0.8125        42

    accuracy                         0.7097        62
   macro avg     0.6736    0.5893    0.5848        62
weighted avg     0.6909    0.7097    0.6656        62

[0, 4, 7, 10, 13, 14, 15, 19, 24, 27, 33, 35, 37, 41, 44, 45, 55, 59]


# Hungary

In [5]:
path_train = '../hun_train.csv'
path_test = '../hun_test.csv'
DNN(path_train,path_test, 20)

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20
[[52  5]
 [ 2 35]]
              precision    recall  f1-score   support

           0     0.9123    0.9630    0.9369        54
           1     0.9459    0.8750    0.9091        40

    accuracy                         0.9255        94
   macro avg     0.9291    0.9190    0.9230        94
weighted avg     0.9266    0.9255    0.9251        94

[4, 39, 44, 66, 68, 86, 90]


# Switzerland

In [6]:
path_train = '../swi_train.csv'
path_test = '../swi_test.csv'
DNN(path_train,path_test, 20)

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20
[[ 1  2]
 [ 1 37]]
              precision    recall  f1-score   support

           0     0.3333    0.5000    0.4000         2
           1     0.9737    0.9487    0.9610        39

    accuracy                         0.9268        41
   macro avg     0.6535    0.7244    0.6805        41
weighted avg     0.9424    0.9268    0.9337        41

[5, 14, 24]


# Combined

In [7]:
path_train = '../com_train.csv'
path_test = '../com_test.csv'
DNN(path_train,path_test, 20)

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20
[[101  14]
 [ 21 140]]
              precision    recall  f1-score   support

           0     0.8783    0.8279    0.8523       122
           1     0.8696    0.9091    0.8889       154

    accuracy                         0.8732       276
   macro avg     0.8739    0.8685    0.8706       276
weighted avg     0.8734    0.8732    0.8727       276

[11, 14, 20, 29, 33, 42, 52, 59, 76, 80, 88, 90, 91, 104, 105, 113, 121, 127, 130, 135, 148, 158, 164, 169, 172, 184, 190, 198, 214, 217, 234, 248, 264, 274, 275]
