In [1]:
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
from keras_tuner.tuners import RandomSearch
from sklearn.model_selection import train_test_split
from sklearn.metrics import f1_score
from keras_tuner import HyperModel, Objective
from sklearn import preprocessing

# Veri yükleme
feature_list =[  'dstport',
  'dstport_class',
  'http.chat',
  'http.notification',
  'http.request.method',
  'ip.flags.df',
  'ip.len',
  'ip.proto',
  'ip.ttl',
  'srcport',
  'srcport_class',
  'tcp.ack',
  'tcp.analysis.ack_rtt',
  'tcp.analysis.bytes_in_flight',
  'tcp.analysis.initial_rtt',
  'tcp.analysis.push_bytes_sent',
  'tcp.completeness',
  'tcp.dstport',
  'tcp.flags',
  'tcp.flags.push',
  'tcp.flags.str',
  'tcp.flags.syn',
  'tcp.hdr_len',
  'tcp.len',
  'tcp.nxtseq',
  'tcp.srcport',
  'tcp.stream',
  'tcp.time_delta',
  'tcp.time_relative',
  'tcp.window_size',
  'tcp.window_size_scalefactor',
  'tcp.window_size_value',
  'tls.record.length',
  'udp.checksum.status',
  'udp.dstport',
  'udp.srcport',
  'udp.time_delta',
  'udp.time_relative',"Label"]
file_list={"HPO":['./small/AD-S1.csv','./small/AD-S2.csv']}

j="HPO"


# CNN

In [2]:
df=pd.read_csv(file_list[j][0],usecols=feature_list) 
X_train = df.iloc[:,0:-1]
df['Label'] = df['Label'].astype('category')
y_train=df['Label'].cat.codes  

df=pd.read_csv(file_list[j][1],usecols=feature_list) 
X_test = df.iloc[:,0:-1]
df['Label'] = df['Label'].astype('category')
y_test=df['Label'].cat.codes  



min_max_scaler = preprocessing.MinMaxScaler()
X_train = min_max_scaler.fit_transform(X_train)
X_test = min_max_scaler.fit_transform(X_test)
# Veriyi CNN modeline uygun hale getirme
X_train = np.reshape(X_train, (X_train.shape[0], X_train.shape[1], 1))
X_test = np.reshape(X_test, (X_test.shape[0], X_test.shape[1], 1))

# CNN modeli oluşturma
def build_model(hp):
    model = keras.Sequential()
    model.add(layers.Conv1D(filters=hp.Int('filters', min_value=32, max_value=128, step=32),
                             kernel_size=hp.Int('kernel_size', min_value=3, max_value=5),
                             activation='relu',
                             input_shape=(X_train.shape[1], X_train.shape[2])))
    model.add(layers.MaxPooling1D(pool_size=2))
    model.add(layers.Flatten())
    
    for i in range(hp.Int('num_layers', 1, 3)):
        model.add(layers.Dense(units=hp.Int(f'dense_{i}_units', min_value=32, max_value=128, step=32),
                               activation='relu'))
        model.add(layers.Dropout(rate=hp.Float(f'dropout_{i}', min_value=0.0, max_value=0.5, step=0.1)))
    
    model.add(layers.Dense(21, activation='softmax'))  # Çok sınıflı bir problem için 3 sınıf var
    
    model.compile(optimizer=keras.optimizers.Adam(hp.Choice('learning_rate', values=[1e-2, 1e-3, 1e-4])),
                  loss='sparse_categorical_crossentropy',
                  metrics=['accuracy'])  # F1Score'u kullanarak ölçüm yapalım
    
    return model



# Hiperparametre araması yapma
tuner = RandomSearch(
    build_model,
    objective='val_accuracy',#Objective("val_f1_score", direction="max"),
    max_trials=25,
    directory='CNNmy_dir',
    project_name='cnn_hyperparameter_tuning')

tuner.search(X_train, y_train,
             epochs=20,
             validation_data=(X_test, y_test))

# En iyi modeli seçme ve değerlendirme
best_model = tuner.get_best_models(num_models=1)[0]

y_pred = best_model.predict(X_test)
y_pred_classes = np.argmax(y_pred, axis=1)
f1 = f1_score(y_test, y_pred_classes, average='macro')  # Weighted F1 score'u kullanıyoruz
print("Weighted F1 Score:", f1)

best_model.summary()
best_hyperparameters = tuner.get_best_hyperparameters(num_trials=1)[0]
print("En iyi modelin hiperparametreleri:")


Trial 25 Complete [00h 02m 03s]
val_accuracy: 0.7481626868247986

Best val_accuracy So Far: 0.8028907179832458
Total elapsed time: 01h 14m 12s


  trackable.load_own_variables(weights_store.get(inner_path))


[1m638/638[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 3ms/step
Weighted F1 Score: 0.7942040611262546


En iyi modelin hiperparametreleri:


# LSTM

In [3]:
df=pd.read_csv(file_list[j][0],usecols=feature_list) 
X_train = df.iloc[:,0:-1]
df['Label'] = df['Label'].astype('category')
y_train=df['Label'].cat.codes  


df=pd.read_csv(file_list[j][1],usecols=feature_list) 
X_test = df.iloc[:,0:-1]
df['Label'] = df['Label'].astype('category')
y_test=df['Label'].cat.codes  


X_train = min_max_scaler.fit_transform(X_train)
X_test = min_max_scaler.fit_transform(X_test)

# Veriyi LSTM modeline uygun hale getirme
X_train = np.reshape(X_train, (X_train.shape[0],  X_train.shape[1],1))
X_test = np.reshape(X_test, (X_test.shape[0],  X_test.shape[1],1))

In [4]:
def build_model(hp):
    model = keras.Sequential()
    model.add(layers.LSTM(units=hp.Int('units', min_value=32, max_value=128, step=32),
                          return_sequences=True,
                          input_shape=(X_train.shape[1], X_train.shape[2])))
    
    model.add(layers.Dropout(rate=hp.Float('dropout_1', min_value=0.0, max_value=0.5, step=0.1)))
    
    for i in range(hp.Int('num_layers', 1, 3)):
        model.add(layers.LSTM(units=hp.Int(f'lstm_{i}_units', min_value=32, max_value=128, step=32),
                              return_sequences=True))
        model.add(layers.Dropout(rate=hp.Float(f'dropout_{i+2}', min_value=0.0, max_value=0.5, step=0.1)))
    
    model.add(layers.LSTM(units=hp.Int('units_last', min_value=32, max_value=128, step=32)))
    model.add(layers.Dropout(rate=hp.Float('dropout_last', min_value=0.0, max_value=0.5, step=0.1)))
    
    model.add(layers.Dense(21, activation='softmax'))  # Çok sınıflı bir problem için 3 sınıf var
    
    model.compile(optimizer=keras.optimizers.Adam(hp.Choice('learning_rate', values=[1e-2, 1e-3, 1e-4])),
                  loss='sparse_categorical_crossentropy',
                  metrics=["accuracy"])  # F1Score'u kullanarak ölçüm yapalım
    
    return model


tuner = RandomSearch(
    build_model,
    objective=Objective("val_accuracy", direction="max"),
    max_trials=25,
    directory='LSTMmy_dir',
    project_name='lstm_hyperparameter_tuning')

tuner.search(X_train, y_train,
             epochs=20,
             validation_data=(X_test, y_test))

best_model = tuner.get_best_models(num_models=1)[0]

# En iyi modelin değerlendirilmesi
y_pred = best_model.predict(X_test)
y_pred_classes = np.argmax(y_pred, axis=1)
f1 = f1_score(y_test, y_pred_classes, average='macro')  # Weighted F1 score'u kullanıyoruz
print("Weighted F1 Score:", f1)

best_model.summary()
print("En iyi modelin hiperparametreleri:")
best_hyperparameters = tuner.get_best_hyperparameters(num_trials=1)[0]
print(best_hyperparameters.values)

Trial 25 Complete [00h 25m 46s]
val_accuracy: 0.7944635152816772

Best val_accuracy So Far: 0.7944635152816772
Total elapsed time: 11h 31m 50s


  trackable.load_own_variables(weights_store.get(inner_path))


[1m638/638[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m23s[0m 36ms/step
Weighted F1 Score: 0.7829057981566172


En iyi modelin hiperparametreleri:
{'units': 64, 'dropout_1': 0.1, 'num_layers': 1, 'lstm_0_units': 64, 'dropout_2': 0.30000000000000004, 'units_last': 128, 'dropout_last': 0.4, 'learning_rate': 0.01, 'lstm_1_units': 96, 'dropout_3': 0.0, 'lstm_2_units': 32, 'dropout_4': 0.4}


# MLP

In [5]:
df=pd.read_csv(file_list[j][0],usecols=feature_list) 
X_train = df.iloc[:,0:-1]
df['Label'] = df['Label'].astype('category')
y_train=df['Label'].cat.codes  

df=pd.read_csv(file_list[j][1],usecols=feature_list) 
X_test = df.iloc[:,0:-1]
df['Label'] = df['Label'].astype('category')
y_test=df['Label'].cat.codes  

# Veriyi ANN modeline uygun hale getirme

X_train = min_max_scaler.fit_transform(X_train)
X_test = min_max_scaler.fit_transform(X_test)


In [6]:
# ANN modeli oluşturma
def build_model(hp):
    model = keras.Sequential()
    model.add(layers.Dense(units=hp.Int('units_input', min_value=32, max_value=128, step=32),
                           activation='relu',
                           input_shape=(X_train.shape[1],)))
    
    for i in range(hp.Int('num_layers', 1, 3)):
        model.add(layers.Dense(units=hp.Int(f'units_{i}', min_value=32, max_value=128, step=32),
                               activation='relu'))
        model.add(layers.Dropout(rate=hp.Float(f'dropout_{i}', min_value=0.0, max_value=0.5, step=0.1)))
    
    model.add(layers.Dense(21, activation='softmax'))  # Çok sınıflı bir problem için 3 sınıf var
    
    model.compile(optimizer=keras.optimizers.Adam(hp.Choice('learning_rate', values=[1e-2, 1e-3, 1e-4])),
                  loss='sparse_categorical_crossentropy',
                  metrics=["accuracy"])  # F1Score'u kullanarak ölçüm yapalım
    
    return model



# Hiperparametre araması yapma
tuner = RandomSearch(
    build_model,
    objective=Objective("val_accuracy", direction="max"),
    max_trials=25,
    directory='ANNmy_dir',
    project_name='ann_hyperparameter_tuning')

tuner.search(X_train, y_train,
             epochs=20,
             validation_data=(X_test, y_test))

# En iyi modeli seçme ve değerlendirme
best_model = tuner.get_best_models(num_models=1)[0]

y_pred = best_model.predict(X_test)
y_pred_classes = np.argmax(y_pred, axis=1)
f1 = f1_score(y_test, y_pred_classes, average='macro')  # Weighted F1 score'u kullanıyoruz
print("Weighted F1 Score:", f1)

best_model.summary()
best_hyperparameters = tuner.get_best_hyperparameters(num_trials=1)[0]
print("En iyi modelin hiperparametreleri:")
print(best_hyperparameters.values)

Trial 25 Complete [00h 01m 37s]
val_accuracy: 0.6356688141822815

Best val_accuracy So Far: 0.8384615182876587
Total elapsed time: 00h 39m 40s
[1m  1/638[0m [37m━━━━━━━━━━━━━━━━━━━━[0m [1m1:00[0m 94ms/step

  trackable.load_own_variables(weights_store.get(inner_path))


[1m638/638[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2ms/step
Weighted F1 Score: 0.8257436809728217


En iyi modelin hiperparametreleri:
{'units_input': 128, 'num_layers': 1, 'units_0': 96, 'dropout_0': 0.0, 'learning_rate': 0.01, 'units_1': 96, 'dropout_1': 0.4, 'units_2': 96, 'dropout_2': 0.2}
