In [1]:
import numpy as np
import pandas as pd
from keras.models import Sequential
from keras.layers import Conv1D, MaxPooling1D, Flatten, Dense, Dropout
from sklearn.metrics import confusion_matrix, classification_report

In [2]:
def CNN(path_train,path_test,name,epoch_num):
    #Split the data
    Train = pd.read_csv(path_train)
    Test = pd.read_csv(path_test)
    
    X_train = Train.iloc[:,:-1]
    Y_train = Train.iloc[:,-1]

    X_test = Test.iloc[:,:-1]
    Y_test = Test.iloc[:,-1]

    #binarize the target
    Y_train_binary = Y_train.apply(lambda x: 1 if x > 0 else 0)
    Y_test_binary = Y_test.apply(lambda x: 1 if x > 0 else 0)

    # define the model architecture
    model = Sequential()
    model.add(Conv1D(filters=256, kernel_size=3, activation='relu', input_shape=(22,1)))
    model.add(MaxPooling1D(pool_size=2))

    model.add(Conv1D(filters=512, kernel_size=3, activation='relu'))
    model.add(MaxPooling1D(pool_size=2))

    model.add(Flatten())
    model.add(Dense(16, activation='relu'))
    model.add(Dense(2, activation='sigmoid'))

    model.compile(loss='sparse_categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
    
    model.fit(X_train, Y_train_binary, epochs=epoch_num, batch_size=32)
    Y_pred = model.predict(X_test).argmax(axis=1)
    
    cm = confusion_matrix(Y_pred, Y_test_binary)
    print(cm)
    print(classification_report(Y_test_binary, Y_pred))

    model_name = f'{name}_cnn.h5'
    model.save('../Models/CNN_only/' + model_name)
    
    mismatch = [i for i, (a,b) in enumerate(zip(Y_pred, Y_test_binary)) if a != b]
    print(mismatch)
            
    return model

# Cleveland

In [3]:
path_train = '../TrainTestData/cle_train.csv'
path_test = '../TrainTestData/cle_test.csv'
model = CNN(path_train,path_test,'cle',20)

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20
[[40  7]
 [11 33]]
              precision    recall  f1-score   support

           0       0.85      0.78      0.82        51
           1       0.75      0.82      0.79        40

    accuracy                           0.80        91
   macro avg       0.80      0.80      0.80        91
weighted avg       0.81      0.80      0.80        91

[0, 19, 21, 24, 28, 30, 32, 38, 53, 60, 64, 67, 69, 70, 73, 77, 78, 89]


# Virginia

In [4]:
path_train = '../traintestdata/vir_train.csv'
path_test = '../traintestdata/vir_test.csv'
CNN(path_train,path_test,'vir',10)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
[[ 2  0]
 [11 47]]
              precision    recall  f1-score   support

           0       1.00      0.15      0.27        13
           1       0.81      1.00      0.90        47

    accuracy                           0.82        60
   macro avg       0.91      0.58      0.58        60
weighted avg       0.85      0.82      0.76        60

[3, 6, 14, 22, 30, 32, 34, 48, 49, 57, 58]


<keras.engine.sequential.Sequential at 0x1ff7933a370>

# Hungary

In [5]:
path_train = '../traintestdata/hun_train.csv'
path_test = '../traintestdata/hun_test.csv'
CNN(path_train,path_test,'hun',20)

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20
[[53  6]
 [ 4 26]]
              precision    recall  f1-score   support

           0       0.90      0.93      0.91        57
           1       0.87      0.81      0.84        32

    accuracy                           0.89        89
   macro avg       0.88      0.87      0.88        89
weighted avg       0.89      0.89      0.89        89

[8, 12, 14, 23, 29, 42, 46, 54, 64, 86]


<keras.engine.sequential.Sequential at 0x1ff793239a0>

# Switzerland

In [6]:
path_train = '../traintestdata/swi_train.csv'
path_test = '../traintestdata/swi_test.csv'
CNN(path_train,path_test,'swi',10)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
[[ 0  0]
 [ 2 35]]
              precision    recall  f1-score   support

           0       0.00      0.00      0.00         2
           1       0.95      1.00      0.97        35

    accuracy                           0.95        37
   macro avg       0.47      0.50      0.49        37
weighted avg       0.89      0.95      0.92        37

[20, 25]


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


<keras.engine.sequential.Sequential at 0x1ff51cab610>

# Europe

In [7]:
path_train = '../traintestdata/euro_train.csv'
path_test = '../traintestdata/euro_test.csv'
CNN(path_train,path_test,'eur',20)

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20
[[52  6]
 [ 7 61]]
              precision    recall  f1-score   support

           0       0.90      0.88      0.89        59
           1       0.90      0.91      0.90        67

    accuracy                           0.90       126
   macro avg       0.90      0.90      0.90       126
weighted avg       0.90      0.90      0.90       126

[12, 14, 22, 23, 29, 33, 42, 46, 54, 64, 104, 109, 114]


<keras.engine.sequential.Sequential at 0x1ff7946f310>

# Combined

In [8]:
path_train = '../traintestdata/com_train.csv'
path_test = '../traintestdata/com_test.csv'
CNN(path_train,path_test,'com',20)

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20
[[102  20]
 [ 20 134]]
              precision    recall  f1-score   support

           0       0.84      0.84      0.84       122
           1       0.87      0.87      0.87       154

    accuracy                           0.86       276
   macro avg       0.85      0.85      0.85       276
weighted avg       0.86      0.86      0.86       276

[8, 11, 28, 42, 52, 59, 76, 79, 83, 88, 91, 94, 104, 121, 125, 127, 130, 135, 146, 148, 155, 158, 164, 169, 172, 182, 184, 190, 195, 198, 201, 213, 214, 217, 234, 242, 256, 259, 264, 274]


<keras.engine.sequential.Sequential at 0x1ff7c11c610>

In [9]:
path_train = '../traintestdata/euro_train.csv'
path_test = '../traintestdata/euro_test.csv'

In [10]:
Train = pd.read_csv(path_train)
Test = pd.read_csv(path_test)
    
X_train = Train.iloc[:,:-1]
Y_train = Train.iloc[:,-1]

X_test = Test.iloc[:,:-1]
Y_test = Test.iloc[:,-1]

#binarize the target
Y_train_binary = Y_train.apply(lambda x: 1 if x > 0 else 0)
Y_test_binary = Y_test.apply(lambda x: 1 if x > 0 else 0)

data = pd.concat([X_train,X_test])
labels = pd.concat([Y_train_binary,Y_test_binary])

data = data.reset_index()
data = data.drop(['index'],axis = 1)

labels = labels.reset_index()
labels = labels.iloc[:,-1]

labels

0      0
1      1
2      0
3      0
4      0
      ..
412    1
413    1
414    1
415    1
416    1
Name: num, Length: 417, dtype: int64

In [11]:
Y_pred = model.predict(data).argmax(axis=1)
    
cm = confusion_matrix(Y_pred, labels)
print(cm)
print(classification_report(labels, Y_pred))

[[118  18]
 [ 78 203]]
              precision    recall  f1-score   support

           0       0.87      0.60      0.71       196
           1       0.72      0.92      0.81       221

    accuracy                           0.77       417
   macro avg       0.80      0.76      0.76       417
weighted avg       0.79      0.77      0.76       417

