In [9]:
import numpy as np
import pandas as pd
from keras.models import Sequential
from keras.layers import Conv1D, MaxPooling1D, Flatten, Dense, Dropout
from sklearn.metrics import confusion_matrix, classification_report

In [10]:
def RNN(path_train,path_test, epoch_num):
    Train = pd.read_csv(path_train)
    Test = pd.read_csv(path_test)
    
    X_train = Train.iloc[:,:-1]
    Y_train = Train.iloc[:,-1]

    X_test = Test.iloc[:,:-1]
    Y_test = Test.iloc[:,-1]

    #binarize the target
    Y_train_binary = Y_train.apply(lambda x: 1 if x > 0 else 0)
    Y_test_binary = Y_test.apply(lambda x: 1 if x > 0 else 0)

    # CNN
    model = Sequential()
    model.add(Dense(64, input_shape=(22,), activation='relu'))
    model.add(Dense(128, activation='relu'))
    model.add(Dense(256, activation='relu'))
    model.add(Dense(2, activation='sigmoid'))

    model.compile(loss='sparse_categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
    
    model.fit(X_train, Y_train_binary, epochs=epoch_num, batch_size=32)
    Y_pred = model.predict(X_test).argmax(axis=1)
    
    cm = confusion_matrix(Y_pred, Y_test_binary)
    print(cm)
    print(classification_report(Y_test_binary, Y_pred,digits = 4))
    
    mismatch = [i for i, (a,b) in enumerate(zip(Y_pred, Y_test_binary)) if a != b]
    model.save(str(path_train[-13:][:3]) +'_dnn.h5')
    print(mismatch)

# Cleveland

In [11]:
path_train = '../traintestdata/cle_train.csv'
path_test = '../traintestdata/cle_test.csv'
RNN(path_train,path_test,19)

Epoch 1/19
Epoch 2/19
Epoch 3/19
Epoch 4/19
Epoch 5/19
Epoch 6/19
Epoch 7/19
Epoch 8/19
Epoch 9/19
Epoch 10/19
Epoch 11/19
Epoch 12/19
Epoch 13/19
Epoch 14/19
Epoch 15/19
Epoch 16/19
Epoch 17/19
Epoch 18/19
Epoch 19/19
[[39  4]
 [12 36]]
              precision    recall  f1-score   support

           0     0.9070    0.7647    0.8298        51
           1     0.7500    0.9000    0.8182        40

    accuracy                         0.8242        91
   macro avg     0.8285    0.8324    0.8240        91
weighted avg     0.8380    0.8242    0.8247        91

[0, 19, 24, 28, 30, 31, 32, 38, 53, 60, 64, 67, 69, 70, 77, 78]


# Virginia

In [12]:
path_train = '../traintestdata/vir_train.csv'
path_test = '../traintestdata/vir_test.csv'
RNN(path_train,path_test, 15)

Epoch 1/15
Epoch 2/15
Epoch 3/15
Epoch 4/15
Epoch 5/15
Epoch 6/15
Epoch 7/15
Epoch 8/15
Epoch 9/15
Epoch 10/15
Epoch 11/15
Epoch 12/15
Epoch 13/15
Epoch 14/15
Epoch 15/15
[[ 2  3]
 [11 44]]
              precision    recall  f1-score   support

           0     0.4000    0.1538    0.2222        13
           1     0.8000    0.9362    0.8627        47

    accuracy                         0.7667        60
   macro avg     0.6000    0.5450    0.5425        60
weighted avg     0.7133    0.7667    0.7240        60

[3, 6, 14, 17, 19, 28, 30, 32, 34, 48, 49, 52, 57, 58]


# Hungary

In [13]:
path_train = '../traintestdata/hun_train.csv'
path_test = '../traintestdata/hun_test.csv'
RNN(path_train,path_test, 20)

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20
[[53  7]
 [ 4 25]]
              precision    recall  f1-score   support

           0     0.8833    0.9298    0.9060        57
           1     0.8621    0.7812    0.8197        32

    accuracy                         0.8764        89
   macro avg     0.8727    0.8555    0.8628        89
weighted avg     0.8757    0.8764    0.8749        89

[8, 12, 14, 23, 29, 30, 42, 46, 54, 59, 86]


# Switzerland

In [14]:
path_train = '../traintestdata/swi_train.csv'
path_test = '../traintestdata/swi_test.csv'
RNN(path_train,path_test, 20)

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20
[[ 1  2]
 [ 1 33]]
              precision    recall  f1-score   support

           0     0.3333    0.5000    0.4000         2
           1     0.9706    0.9429    0.9565        35

    accuracy                         0.9189        37
   macro avg     0.6520    0.7214    0.6783        37
weighted avg     0.9361    0.9189    0.9264        37

[15, 20, 27]


# Europe

In [7]:
path_train = '../traintestdata/euro_train.csv'
path_test = '../traintestdata/euro_test.csv'
RNN(path_train,path_test, 20)

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20
[[55  8]
 [ 4 59]]
              precision    recall  f1-score   support

           0     0.8730    0.9322    0.9016        59
           1     0.9365    0.8806    0.9077        67

    accuracy                         0.9048       126
   macro avg     0.9048    0.9064    0.9047       126
weighted avg     0.9068    0.9048    0.9049       126

[8, 12, 14, 23, 29, 42, 54, 59, 101, 104, 109, 116]


# Combined

In [8]:
path_train = '../traintestdata/com_train.csv'
path_test = '../traintestdata/com_test.csv'
RNN(path_train,path_test, 20)

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20
[[103  27]
 [ 19 127]]
              precision    recall  f1-score   support

           0     0.7923    0.8443    0.8175       122
           1     0.8699    0.8247    0.8467       154

    accuracy                         0.8333       276
   macro avg     0.8311    0.8345    0.8321       276
weighted avg     0.8356    0.8333    0.8338       276

[11, 28, 29, 33, 42, 47, 52, 59, 66, 70, 76, 80, 88, 90, 91, 101, 104, 105, 111, 116, 121, 130, 135, 158, 164, 168, 169, 171, 184, 190, 198, 200, 201, 208, 210, 213, 214, 217, 228, 234, 247, 259, 264, 273, 274, 275]
