In [1]:
import numpy as np
import pandas as pd
from keras.models import Sequential
from keras.layers import Conv1D, MaxPooling1D, Flatten, Dense, Dropout
from sklearn.metrics import confusion_matrix, classification_report

In [4]:
def RNN(path_train,path_test, epoch_num):
    Train = pd.read_csv(path_train)
    Test = pd.read_csv(path_test)
    
    X_train = Train.iloc[:,:-1]
    Y_train = Train.iloc[:,-1]

    X_test = Test.iloc[:,:-1]
    Y_test = Test.iloc[:,-1]

    #binarize the target
    Y_train_binary = Y_train.apply(lambda x: 1 if x > 0 else 0)
    Y_test_binary = Y_test.apply(lambda x: 1 if x > 0 else 0)

    # CNN
    model = Sequential()
    model.add(Dense(64, input_shape=(22,), activation='relu'))
    model.add(Dense(128, activation='relu'))
    model.add(Dense(256, activation='relu'))
    model.add(Dense(2, activation='sigmoid'))

    model.compile(loss='sparse_categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
    
    model.fit(X_train, Y_train_binary, epochs=epoch_num, batch_size=32)
    Y_pred = model.predict(X_test).argmax(axis=1)
    
    cm = confusion_matrix(Y_pred, Y_test_binary)
    print(cm)
    print(classification_report(Y_test_binary, Y_pred))
    
    mismatch = [i for i, (a,b) in enumerate(zip(Y_pred, Y_test_binary)) if a != b]
    print(mismatch)

# Cleveland

In [5]:
path_train = '../traintestdata/cle_train.csv'
path_test = '../traintestdata/cle_test.csv'
RNN(path_train,path_test,19)

Epoch 1/19
Epoch 2/19
Epoch 3/19
Epoch 4/19
Epoch 5/19
Epoch 6/19
Epoch 7/19
Epoch 8/19
Epoch 9/19
Epoch 10/19
Epoch 11/19
Epoch 12/19
Epoch 13/19
Epoch 14/19
Epoch 15/19
Epoch 16/19
Epoch 17/19
Epoch 18/19
Epoch 19/19
[[39  5]
 [12 35]]
              precision    recall  f1-score   support

           0       0.89      0.76      0.82        51
           1       0.74      0.88      0.80        40

    accuracy                           0.81        91
   macro avg       0.82      0.82      0.81        91
weighted avg       0.82      0.81      0.81        91

[0, 19, 24, 28, 30, 31, 32, 38, 53, 60, 64, 67, 69, 70, 77, 78, 89]


# Virginia

In [6]:
path_train = '../traintestdata/vir_train.csv'
path_test = '../traintestdata/vir_test.csv'
RNN(path_train,path_test, 15)

Epoch 1/15
Epoch 2/15
Epoch 3/15
Epoch 4/15
Epoch 5/15
Epoch 6/15
Epoch 7/15
Epoch 8/15
Epoch 9/15
Epoch 10/15
Epoch 11/15
Epoch 12/15
Epoch 13/15
Epoch 14/15
Epoch 15/15
[[ 4  4]
 [ 9 43]]
              precision    recall  f1-score   support

           0       0.50      0.31      0.38        13
           1       0.83      0.91      0.87        47

    accuracy                           0.78        60
   macro avg       0.66      0.61      0.62        60
weighted avg       0.76      0.78      0.76        60

[1, 3, 6, 14, 17, 28, 32, 34, 48, 49, 52, 57, 58]


# Hungary

In [7]:
path_train = '../traintestdata/hun_train.csv'
path_test = '../traintestdata/hun_test.csv'
RNN(path_train,path_test, 20)

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20
[[53  4]
 [ 4 28]]
              precision    recall  f1-score   support

           0       0.93      0.93      0.93        57
           1       0.88      0.88      0.88        32

    accuracy                           0.91        89
   macro avg       0.90      0.90      0.90        89
weighted avg       0.91      0.91      0.91        89

[8, 12, 14, 23, 29, 42, 46, 54]


# Switzerland

In [8]:
path_train = '../traintestdata/swi_train.csv'
path_test = '../traintestdata/swi_test.csv'
RNN(path_train,path_test, 20)

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20
[[ 1  2]
 [ 1 33]]
              precision    recall  f1-score   support

           0       0.33      0.50      0.40         2
           1       0.97      0.94      0.96        35

    accuracy                           0.92        37
   macro avg       0.65      0.72      0.68        37
weighted avg       0.94      0.92      0.93        37

[15, 20, 27]


# Europe

In [9]:
path_train = '../traintestdata/euro_train.csv'
path_test = '../traintestdata/euro_test.csv'
RNN(path_train,path_test, 20)

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20
[[52  6]
 [ 7 61]]
              precision    recall  f1-score   support

           0       0.90      0.88      0.89        59
           1       0.90      0.91      0.90        67

    accuracy                           0.90       126
   macro avg       0.90      0.90      0.90       126
weighted avg       0.90      0.90      0.90       126

[1, 3, 8, 14, 23, 29, 42, 46, 54, 86, 104, 109, 116]


# Combined

In [10]:
path_train = '../traintestdata/com_train.csv'
path_test = '../traintestdata/com_test.csv'
RNN(path_train,path_test, 20)

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20
[[106  38]
 [ 16 116]]
              precision    recall  f1-score   support

           0       0.74      0.87      0.80       122
           1       0.88      0.75      0.81       154

    accuracy                           0.80       276
   macro avg       0.81      0.81      0.80       276
weighted avg       0.82      0.80      0.80       276

[7, 11, 20, 28, 29, 34, 37, 42, 47, 51, 52, 59, 66, 68, 76, 80, 84, 88, 90, 95, 104, 111, 121, 125, 130, 134, 135, 151, 152, 158, 164, 169, 171, 184, 190, 193, 195, 198, 201, 208, 213, 214, 217, 218, 221, 222, 228, 234, 247, 248, 254, 264, 274, 275]
