In [1]:
import numpy as np
import pandas as pd
from keras.models import Sequential
from keras.layers import Conv1D, MaxPooling1D, Flatten, Dense, Dropout
from sklearn.metrics import confusion_matrix, classification_report

In [2]:
def CNN(path_train,path_test, epoch_num):
    Train = pd.read_csv(path_train)
    Test = pd.read_csv(path_test)
    
    X_train = Train.iloc[:,:-1]
    Y_train = Train.iloc[:,-1]

    X_test = Test.iloc[:,:-1]
    Y_test = Test.iloc[:,-1]

    #binarize the target
    Y_train_binary = Y_train.apply(lambda x: 1 if x > 0 else 0)
    Y_test_binary = Y_test.apply(lambda x: 1 if x > 0 else 0)

    # CNN
    model = Sequential()
    model.add(Conv1D(filters=256, kernel_size=3, activation='relu', input_shape=(22,1)))
    model.add(Conv1D(filters=256, kernel_size=3, activation='relu'))
    model.add(MaxPooling1D(pool_size=2))

    model.add(Conv1D(filters=512, kernel_size=3, activation='relu'))
    model.add(MaxPooling1D(pool_size=2))

    model.add(Flatten())
    model.add(Dense(512, activation='relu'))
    model.add(Dense(2, activation='sigmoid'))

    model.compile(loss='sparse_categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
    
    model.fit(X_train, Y_train_binary, epochs=epoch_num, batch_size=32)
    Y_pred = model.predict(X_test).argmax(axis=1)
    
    cm = confusion_matrix(Y_pred, Y_test_binary)
    print(cm)
    print(classification_report(Y_test_binary, Y_pred, digits=4))
    
    mismatch = [i for i, (a,b) in enumerate(zip(Y_pred, Y_test_binary)) if a != b]
    print(mismatch)

# Cleveland

In [3]:
path_train = '../cle_train.csv'
path_test = '../cle_test.csv'
CNN(path_train,path_test,19)

Epoch 1/19
Epoch 2/19
Epoch 3/19
Epoch 4/19
Epoch 5/19
Epoch 6/19
Epoch 7/19
Epoch 8/19
Epoch 9/19
Epoch 10/19
Epoch 11/19
Epoch 12/19
Epoch 13/19
Epoch 14/19
Epoch 15/19
Epoch 16/19
Epoch 17/19
Epoch 18/19
Epoch 19/19
[[40  4]
 [13 36]]
              precision    recall  f1-score   support

           0     0.9091    0.7547    0.8247        53
           1     0.7347    0.9000    0.8090        40

    accuracy                         0.8172        93
   macro avg     0.8219    0.8274    0.8169        93
weighted avg     0.8341    0.8172    0.8180        93

[3, 4, 5, 14, 24, 33, 34, 36, 40, 55, 62, 64, 67, 75, 77, 83, 91]


# Virginia

In [4]:
path_train = '../vir_train.csv'
path_test = '../vir_test.csv'
CNN(path_train,path_test, 15)

Epoch 1/15
Epoch 2/15
Epoch 3/15
Epoch 4/15
Epoch 5/15
Epoch 6/15
Epoch 7/15
Epoch 8/15
Epoch 9/15
Epoch 10/15
Epoch 11/15
Epoch 12/15
Epoch 13/15
Epoch 14/15
Epoch 15/15
[[ 7 11]
 [13 31]]
              precision    recall  f1-score   support

           0     0.3889    0.3500    0.3684        20
           1     0.7045    0.7381    0.7209        42

    accuracy                         0.6129        62
   macro avg     0.5467    0.5440    0.5447        62
weighted avg     0.6027    0.6129    0.6072        62

[0, 4, 7, 10, 13, 14, 15, 17, 19, 20, 24, 27, 32, 33, 41, 44, 45, 48, 51, 54, 55, 57, 59, 60]


# Hungary

In [5]:
path_train = '../hun_train.csv'
path_test = '../hun_test.csv'
CNN(path_train,path_test, 20)

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20
[[51 14]
 [ 3 26]]
              precision    recall  f1-score   support

           0     0.7846    0.9444    0.8571        54
           1     0.8966    0.6500    0.7536        40

    accuracy                         0.8191        94
   macro avg     0.8406    0.7972    0.8054        94
weighted avg     0.8322    0.8191    0.8131        94

[3, 4, 24, 31, 33, 37, 39, 44, 59, 61, 65, 66, 68, 75, 76, 86, 90]


# Switzerland

In [6]:
path_train = '../swi_train.csv'
path_test = '../swi_test.csv'
CNN(path_train,path_test, 20)

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20
[[ 0  0]
 [ 2 39]]
              precision    recall  f1-score   support

           0     0.0000    0.0000    0.0000         2
           1     0.9512    1.0000    0.9750        39

    accuracy                         0.9512        41
   macro avg     0.4756    0.5000    0.4875        41
weighted avg     0.9048    0.9512    0.9274        41

[24, 31]


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


# Combined

In [7]:
path_train = '../com_train.csv'
path_test = '../com_test.csv'
CNN(path_train,path_test, 20)

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20
[[ 99  24]
 [ 23 130]]
              precision    recall  f1-score   support

           0     0.8049    0.8115    0.8082       122
           1     0.8497    0.8442    0.8469       154

    accuracy                         0.8297       276
   macro avg     0.8273    0.8278    0.8275       276
weighted avg     0.8299    0.8297    0.8298       276

[11, 28, 29, 38, 42, 52, 59, 63, 76, 79, 83, 88, 91, 94, 104, 109, 116, 121, 122, 127, 130, 135, 146, 148, 155, 158, 162, 164, 167, 169, 173, 184, 190, 195, 198, 213, 214, 217, 221, 222, 225, 234, 247, 259, 264, 274, 275]
