In [1]:
import numpy as np
import pandas as pd
from keras.models import Sequential
from keras.layers import Conv1D, MaxPooling1D, Flatten, Dense, Dropout
from sklearn.metrics import confusion_matrix, classification_report
from sklearn.model_selection import train_test_split
import tensorflow as tf

import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
from kerastuner.tuners import RandomSearch

Using TensorFlow backend


  from kerastuner.tuners import RandomSearch


In [2]:
cle = pd.read_csv('cle_metadata_dnn.csv')
vir = pd.read_csv('vir_metadata_dnn.csv')
hun = pd.read_csv('hun_metadata_dnn.csv')
swi = pd.read_csv('swi_metadata_dnn.csv')

In [3]:
cle_train,cle_test = train_test_split(cle,test_size=0.33, random_state=42)
vir_train,vir_test = train_test_split(vir,test_size=0.33, random_state=42)
hun_train,hun_test = train_test_split(hun,test_size=0.33, random_state=42)
swi_train,swi_test = train_test_split(swi,test_size=0.33, random_state=42)

In [4]:
Train = pd.concat([cle_train,vir_train,hun_train])
Test = pd.concat([cle_test,vir_test,hun_test,swi_test,swi_train])

In [5]:
X_train = Train.iloc[:,:-1]
X_test = Test.iloc[:,:-1]

y_train = Train.iloc[:,-1]
y_test = Test.iloc[:,-1]

Y_train_binary = y_train.apply(lambda x: 1 if x > 0 else 0)
Y_test_binary = y_test.apply(lambda x: 1 if x > 0 else 0)

# CNN

In [26]:
# Define your CNN model function
def build_model(hp):
    model = keras.Sequential()
    
    # Define the hyperparameter search space
    hp_filters = hp.Int('num_filters', min_value=32, max_value=128, step=32)
    hp_kernel_size = hp.Int('kernel_size', min_value=3, max_value=5)
    hp_units = hp.Int('units', min_value=32, max_value=128, step=32)
    hp_dropout = hp.Float('dropout', min_value=0.2, max_value=0.5, step=0.1)
    
    # Add convolutional layers
    model.add(layers.Conv1D(hp_filters, hp_kernel_size, activation='relu', input_shape=(64,1)))
    model.add(layers.Conv1D(hp_filters, hp_kernel_size, activation='relu'))
    model.add(layers.MaxPooling1D(pool_size=2))
              
    model.add(layers.Conv1D(hp_filters, hp_kernel_size, activation='relu'))
    model.add(layers.MaxPooling1D(pool_size=2))
    
    # Add fully connected layers
    model.add(layers.Flatten())
    model.add(layers.Dense(hp_units, activation='relu'))
    model.add(layers.Dense(2, activation='sigmoid'))
    
    # Compile the model
    model.compile(loss='sparse_categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
    
    return model

# Create a RandomSearch tuner
tuner = RandomSearch(
    build_model,
    objective='val_accuracy',  # Hyperparameter optimization goal
    max_trials=100,             # Number of trials (random combinations of hyperparameters)
    directory='random_search', # Directory to save results
    project_name='cnn_tuning'  # Name of the tuning project
)


# Perform the hyperparameter search
tuner.search(X_train, Y_train_binary, epochs=20, validation_split=0.2)

# Get the best hyperparameters
best_hps = tuner.get_best_hyperparameters(num_trials=1)[0]

# Build the final model with the best hyperparameters
best_model = tuner.hypermodel.build(best_hps)

#display the best model
best_model.summary()

Trial 100 Complete [00h 00m 03s]
val_accuracy: 0.822429895401001

Best val_accuracy So Far: 0.9065420627593994
Total elapsed time: 00h 05m 04s
Model: "sequential_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv1d_3 (Conv1D)           (None, 60, 96)            576       
                                                                 
 conv1d_4 (Conv1D)           (None, 56, 96)            46176     
                                                                 
 max_pooling1d_2 (MaxPooling  (None, 28, 96)           0         
 1D)                                                             
                                                                 
 conv1d_5 (Conv1D)           (None, 24, 96)            46176     
                                                                 
 max_pooling1d_3 (MaxPooling  (None, 12, 96)           0         
 1D)                                       

In [23]:
callback = tf.keras.callbacks.EarlyStopping(monitor='loss', patience=5)

best_model.fit(X_train, Y_train_binary, epochs=1000, batch_size=32,callbacks=[callback])
Y_pred = best_model.predict(X_test).argmax(axis=1)
    
cm = confusion_matrix(Y_pred, Y_test_binary)
print(cm)
print(classification_report(Y_test_binary, Y_pred, digits=4))

Epoch 1/1000
Epoch 2/1000
Epoch 3/1000
Epoch 4/1000
Epoch 5/1000
Epoch 6/1000
Epoch 7/1000
Epoch 8/1000
Epoch 9/1000
Epoch 10/1000
Epoch 11/1000
Epoch 12/1000
Epoch 13/1000
Epoch 14/1000
Epoch 15/1000
Epoch 16/1000
Epoch 17/1000
Epoch 18/1000
Epoch 19/1000
Epoch 20/1000
Epoch 21/1000
Epoch 22/1000
Epoch 23/1000
Epoch 24/1000
Epoch 25/1000
Epoch 26/1000
Epoch 27/1000
Epoch 28/1000
Epoch 29/1000
Epoch 30/1000
Epoch 31/1000
Epoch 32/1000
Epoch 33/1000
Epoch 34/1000
Epoch 35/1000
Epoch 36/1000
Epoch 37/1000
Epoch 38/1000
Epoch 39/1000
Epoch 40/1000
Epoch 41/1000
Epoch 42/1000
Epoch 43/1000
Epoch 44/1000
Epoch 45/1000
Epoch 46/1000
Epoch 47/1000
Epoch 48/1000
Epoch 49/1000
Epoch 50/1000
Epoch 51/1000
Epoch 52/1000
Epoch 53/1000
Epoch 54/1000
Epoch 55/1000
Epoch 56/1000
Epoch 57/1000
Epoch 58/1000
Epoch 59/1000
Epoch 60/1000
Epoch 61/1000
Epoch 62/1000
Epoch 63/1000
Epoch 64/1000
Epoch 65/1000
Epoch 66/1000
Epoch 67/1000
Epoch 68/1000
Epoch 69/1000
Epoch 70/1000
Epoch 71/1000
Epoch 72/1000
E

Epoch 83/1000
Epoch 84/1000
Epoch 85/1000
Epoch 86/1000
Epoch 87/1000
Epoch 88/1000
Epoch 89/1000
Epoch 90/1000
Epoch 91/1000
Epoch 92/1000
Epoch 93/1000
Epoch 94/1000
Epoch 95/1000
Epoch 96/1000
Epoch 97/1000
Epoch 98/1000
Epoch 99/1000
Epoch 100/1000
Epoch 101/1000
Epoch 102/1000
Epoch 103/1000
Epoch 104/1000
Epoch 105/1000
Epoch 106/1000
Epoch 107/1000
Epoch 108/1000
Epoch 109/1000
Epoch 110/1000
Epoch 111/1000
Epoch 112/1000
Epoch 113/1000
Epoch 114/1000
Epoch 115/1000
Epoch 116/1000
Epoch 117/1000
Epoch 118/1000
Epoch 119/1000
Epoch 120/1000
Epoch 121/1000
Epoch 122/1000
[[109 125]
 [ 26 127]]
              precision    recall  f1-score   support

           0     0.4658    0.8074    0.5908       135
           1     0.8301    0.5040    0.6272       252

    accuracy                         0.6098       387
   macro avg     0.6479    0.6557    0.6090       387
weighted avg     0.7030    0.6098    0.6145       387

