In [2]:
import numpy as np
import pandas as pd
from keras.models import Sequential
from keras.layers import Conv1D, MaxPooling1D, Flatten, Dense, Dropout
from sklearn.metrics import confusion_matrix, classification_report
from sklearn.model_selection import train_test_split
import tensorflow as tf

import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
from kerastuner.tuners import RandomSearch

Using TensorFlow backend


  from kerastuner.tuners import RandomSearch


In [3]:
cle = pd.read_csv('cle_metadata_dnn.csv')
vir = pd.read_csv('vir_metadata_dnn.csv')
hun = pd.read_csv('hun_metadata_dnn.csv')
swi = pd.read_csv('swi_metadata_dnn.csv')

In [4]:
cle_train,cle_test = train_test_split(cle,test_size=0.33, random_state=42)
vir_train,vir_test = train_test_split(vir,test_size=0.33, random_state=42)
hun_train,hun_test = train_test_split(hun,test_size=0.33, random_state=42)
swi_train,swi_test = train_test_split(swi,test_size=0.33, random_state=42)

In [5]:
Train = pd.concat([cle_train,vir_train,hun_train])
Test = pd.concat([cle_test,vir_test,hun_test,swi_test,swi_train])

In [6]:
X_train = Train.iloc[:,:-1]
X_test = Test.iloc[:,:-1]

y_train = Train.iloc[:,-1]
y_test = Test.iloc[:,-1]

Y_train_binary = y_train.apply(lambda x: 1 if x > 0 else 0)
Y_test_binary = y_test.apply(lambda x: 1 if x > 0 else 0)

# CNN

In [12]:
callback = tf.keras.callbacks.EarlyStopping(monitor='loss', patience=5)

model = Sequential()

model.add(Conv1D(filters=256, kernel_size=4, activation='relu', input_shape=(64,1)))
model.add(Conv1D(filters=64, kernel_size=4, activation='relu'))
model.add(MaxPooling1D(pool_size=2))

model.add(Conv1D(filters=48, kernel_size=4, activation='relu'))
model.add(MaxPooling1D(pool_size=2))

model.add(Flatten())
model.add(Dense(128, activation='relu'))
model.add(Dense(2, activation='sigmoid'))

model.compile(loss='sparse_categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

best_model.fit(X_train, Y_train_binary, epochs=1000, batch_size=32,callbacks=[callback])

Y_pred = best_model.predict(X_test).argmax(axis=1)
    
cm = confusion_matrix(Y_pred, Y_test_binary)
print(cm)
print(classification_report(Y_test_binary, Y_pred, digits=4))

Epoch 1/1000
Epoch 2/1000
Epoch 3/1000
Epoch 4/1000
Epoch 5/1000
Epoch 6/1000
Epoch 7/1000
Epoch 8/1000
[[135 252]
 [  0   0]]
              precision    recall  f1-score   support

           0     0.3488    1.0000    0.5172       135
           1     0.0000    0.0000    0.0000       252

    accuracy                         0.3488       387
   macro avg     0.1744    0.5000    0.2586       387
weighted avg     0.1217    0.3488    0.1804       387



  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


# Hyperparameter Tuning

In [15]:
# Define your CNN model function
def build_model(hp):
    model = keras.Sequential()
    
    # Define the hyperparameter search space
    hp_filters_1 = hp.Int('num_filters', min_value=32, max_value=256, step=32)
    hp_filters_2 = hp.Int('num_filters', min_value=32, max_value=256, step=32)
    hp_filters_3 = hp.Int('num_filters', min_value=32, max_value=256, step=32)
    hp_kernel_size = hp.Int('kernel_size', min_value=3, max_value=5)
    hp_units = hp.Int('units', min_value=32, max_value=128, step=32)
    hp_dropout = hp.Float('dropout', min_value=0.2, max_value=0.5, step=0.1)
    
    # Add convolutional layers
    model.add(layers.Conv1D(hp_filters_1, hp_kernel_size, activation='relu', input_shape=(64,1)))
    model.add(layers.Conv1D(hp_filters_2, hp_kernel_size, activation='relu'))
    model.add(layers.MaxPooling1D(pool_size=2))
    model.add(Dropout(hp_dropout))
              
    model.add(layers.Conv1D(hp_filters_3, hp_kernel_size, activation='relu'))
    model.add(layers.MaxPooling1D(pool_size=2))
    model.add(Dropout(hp_dropout))
    
    # Add fully connected layers
    model.add(layers.Flatten())
    model.add(layers.Dense(hp_units, activation='relu'))
    model.add(layers.Dense(2, activation='sigmoid'))
    
    # Compile the model
    model.compile(loss='sparse_categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
    
    return model

# Create a RandomSearch tuner
tuner = RandomSearch(
    build_model,
    objective='val_accuracy',  # Hyperparameter optimization goal
    max_trials=50,             # Number of trials (random combinations of hyperparameters)
    directory='random_search', # Directory to save results
    project_name='cnn_tuning'  # Name of the tuning project
)


# Perform the hyperparameter search
tuner.search(X_train, Y_train_binary, epochs=20, validation_split=0.2)

# Get the best hyperparameters
best_hps = tuner.get_best_hyperparameters(num_trials=1)[0]

# Build the final model with the best hyperparameters
best_model = tuner.hypermodel.build(best_hps)

#display the best model
best_hps = tuner.get_best_hyperparameters(num_trials=1)[0]

# Print the best hyperparameters
print("Best Hyperparameters:")
print(f"Number of Filters: {best_hps.get('num_filters')}")
print(f"Kernel Size: {best_hps.get('kernel_size')}")
print(f"Number of Units: {best_hps.get('units')}")
print(f"Dropout Rate: {best_hps.get('dropout')}")

Trial 50 Complete [00h 00m 04s]
val_accuracy: 0.663551390171051

Best val_accuracy So Far: 0.84112149477005
Total elapsed time: 00h 04m 15s
Best Hyperparameters:
Number of Filters: 192
Kernel Size: 5
Number of Units: 64
Dropout Rate: 0.2


In [8]:
callback = tf.keras.callbacks.EarlyStopping(monitor='loss', patience=5)

best_model.fit(X_train, Y_train_binary, epochs=1000, batch_size=32,callbacks=[callback])
Y_pred = best_model.predict(X_test).argmax(axis=1)
    
cm = confusion_matrix(Y_pred, Y_test_binary)
print(cm)
print(classification_report(Y_test_binary, Y_pred, digits=4))

Epoch 1/1000
Epoch 2/1000
Epoch 3/1000
Epoch 4/1000
Epoch 5/1000
Epoch 6/1000
Epoch 7/1000
Epoch 8/1000
Epoch 9/1000
Epoch 10/1000
Epoch 11/1000
Epoch 12/1000
Epoch 13/1000
Epoch 14/1000
Epoch 15/1000
Epoch 16/1000
Epoch 17/1000
Epoch 18/1000
Epoch 19/1000
[[135 252]
 [  0   0]]
              precision    recall  f1-score   support

           0     0.3488    1.0000    0.5172       135
           1     0.0000    0.0000    0.0000       252

    accuracy                         0.3488       387
   macro avg     0.1744    0.5000    0.2586       387
weighted avg     0.1217    0.3488    0.1804       387



  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
