In [1]:
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
from keras_tuner.tuners import RandomSearch
from sklearn.model_selection import train_test_split
from sklearn.metrics import f1_score
from keras_tuner import HyperModel, Objective


In [2]:
feature_list =['dstport',
  'dstport',
  'dstport_class',
  'http.chat',
  'http.notification',
  'http.request.method',
  'ip.flags.df',
  'ip.len',
  'ip.proto',
  'ip.ttl',
  'srcport',
  'srcport_class',
  'tcp.ack',
  'tcp.analysis.ack_rtt',
  'tcp.analysis.bytes_in_flight',
  'tcp.analysis.initial_rtt',
  'tcp.analysis.push_bytes_sent',
  'tcp.completeness',
  'tcp.dstport',
  'tcp.flags',
  'tcp.flags.push',
  'tcp.flags.str',
  'tcp.flags.syn',
  'tcp.hdr_len',
  'tcp.len',
  'tcp.nxtseq',
  'tcp.srcport',
  'tcp.stream',
  'tcp.time_delta',
  'tcp.time_relative',
  'tcp.window_size',
  'tcp.window_size_scalefactor',
  'tcp.window_size_value',
  'tls.record.length',
  'udp.checksum.status',
  'udp.dstport',
  'udp.srcport',
  'udp.time_delta',
  'udp.time_relative',"Label"]
file_list={"HPO":['./small/AD-S1.csv','./small/AD-S2.csv']}

j="HPO"

df=pd.read_csv(file_list[j][0],usecols=feature_list) 
X_train = df.iloc[:,0:-1]
df['Label'] = df['Label'].astype('category')
y_train=df['Label'].cat.codes  


df=pd.read_csv(file_list[j][1],usecols=feature_list) 
X_test = df.iloc[:,0:-1]
df['Label'] = df['Label'].astype('category')
y_test=df['Label'].cat.codes  


# Veriyi LSTM modeline uygun hale getirme
X_train = np.reshape(X_train.values, (X_train.shape[0],  X_train.shape[1],1))
X_test = np.reshape(X_test.values, (X_test.shape[0],  X_test.shape[1],1))

In [3]:
X_train.shape

(20262, 38, 1)

In [4]:
def build_model(hp):
    model = keras.Sequential()
    model.add(layers.LSTM(units=hp.Int('units', min_value=32, max_value=128, step=32),
                          return_sequences=True,
                          input_shape=(X_train.shape[1], X_train.shape[2])))
    
    model.add(layers.Dropout(rate=hp.Float('dropout_1', min_value=0.0, max_value=0.5, step=0.1)))
    
    for i in range(hp.Int('num_layers', 1, 3)):
        model.add(layers.LSTM(units=hp.Int(f'lstm_{i}_units', min_value=32, max_value=128, step=32),
                              return_sequences=True))
        model.add(layers.Dropout(rate=hp.Float(f'dropout_{i+2}', min_value=0.0, max_value=0.5, step=0.1)))
    
    model.add(layers.LSTM(units=hp.Int('units_last', min_value=32, max_value=128, step=32)))
    model.add(layers.Dropout(rate=hp.Float('dropout_last', min_value=0.0, max_value=0.5, step=0.1)))
    
    model.add(layers.Dense(21, activation='softmax'))  # Çok sınıflı bir problem için 3 sınıf var
    
    model.compile(optimizer=keras.optimizers.Adam(hp.Choice('learning_rate', values=[1e-2, 1e-3, 1e-4])),
                  loss='sparse_categorical_crossentropy',
                  metrics=[F1Score()])  # F1Score'u kullanarak ölçüm yapalım
    
    return model

class F1Score(keras.metrics.Metric):
    def __init__(self, name='f1_score', **kwargs):
        super(F1Score, self).__init__(name=name, **kwargs)
        self.true_positives = self.add_weight(name='tp', initializer='zeros')
        self.false_positives = self.add_weight(name='fp', initializer='zeros')
        self.false_negatives = self.add_weight(name='fn', initializer='zeros')

    def update_state(self, y_true, y_pred, sample_weight=None):
        y_pred = tf.argmax(y_pred, axis=1)
        y_true = tf.cast(y_true, tf.int64)
        y_pred = tf.cast(y_pred, tf.int64)
        
        # True positives
        tp = tf.reduce_sum(tf.cast(tf.logical_and(tf.equal(y_true, 1), tf.equal(y_pred, 1)), tf.float32))
        # False positives
        fp = tf.reduce_sum(tf.cast(tf.logical_and(tf.equal(y_true, 0), tf.equal(y_pred, 1)), tf.float32))
        # False negatives
        fn = tf.reduce_sum(tf.cast(tf.logical_and(tf.equal(y_true, 1), tf.equal(y_pred, 0)), tf.float32))

        self.true_positives.assign_add(tp)
        self.false_positives.assign_add(fp)
        self.false_negatives.assign_add(fn)

    def result(self):
        precision = self.true_positives / (self.true_positives + self.false_positives + keras.backend.epsilon())
        recall = self.true_positives / (self.true_positives + self.false_negatives + keras.backend.epsilon())
        f1 = 2 * precision * recall / (precision + recall + keras.backend.epsilon())
        return f1

    def reset_state(self):
        self.true_positives.assign(0)
        self.false_positives.assign(0)
        self.false_negatives.assign(0)

tuner = RandomSearch(
    build_model,
    objective=Objective("val_f1_score", direction="max"),
    max_trials=5,
    directory='LSTMmy_dir',
    project_name='lstm_hyperparameter_tuning')

tuner.search(X_train, y_train,
             epochs=10,
             validation_data=(X_test, y_test))

best_model = tuner.get_best_models(num_models=1)[0]

# En iyi modelin değerlendirilmesi
y_pred = best_model.predict(X_test)
y_pred_classes = np.argmax(y_pred, axis=1)
f1 = f1_score(y_test, y_pred_classes, average='macro')  # Weighted F1 score'u kullanıyoruz
print("Weighted F1 Score:", f1)

best_model.summary()
print("En iyi modelin hiperparametreleri:")
best_hyperparameters = tuner.get_best_hyperparameters(num_trials=1)[0]
print(best_hyperparameters.values)

Trial 5 Complete [00h 16m 37s]
val_f1_score: 0.9957537055015564

Best val_f1_score So Far: 0.9961860775947571
Total elapsed time: 01h 20m 52s


  trackable.load_own_variables(weights_store.get(inner_path))


[1m638/638[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m30s[0m 43ms/step
Weighted F1 Score: 0.37569995152105623


En iyi modelin hiperparametreleri:
{'units': 32, 'dropout_1': 0.30000000000000004, 'num_layers': 3, 'lstm_0_units': 96, 'dropout_2': 0.30000000000000004, 'units_last': 128, 'dropout_last': 0.30000000000000004, 'learning_rate': 0.0001, 'lstm_1_units': 32, 'dropout_3': 0.0, 'lstm_2_units': 32, 'dropout_4': 0.0}
