In [1]:
import pandas as pd
from io import StringIO
import io
from keras.models import Sequential
from keras.layers import LSTM, Dense
from keras.callbacks import EarlyStopping
from keras.metrics import FalseNegatives, FalsePositives
from keras.optimizers import Adam
import numpy as np
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import confusion_matrix, classification_report, accuracy_score
import pickle

In [2]:
def predict_classes(model, x, batch_size=32, verbose=1):
    '''Generate class predictions for the input samples
    batch by batch.
    # Arguments
        x: input data, as a Numpy array or list of Numpy arrays
            (if the model has multiple inputs).
        batch_size: integer.
        verbose: verbosity mode, 0 or 1.
    # Returns
        A numpy array of class predictions.
    '''
    proba = model.predict(x, batch_size=batch_size, verbose=verbose)
    if proba.shape[-1] > 1:
        return proba.argmax(axis=-1)
    else:
        return (proba > 0.5).astype('int32')

In [3]:
base_path = "/content/drive/MyDrive/Okul/İTÜ/Bilgisayar Mühendisliği Yüksek Lisans/Yüksek Lisans/YL Tez/Development/Bot-Iot"
X_train = np.genfromtxt(f"{base_path}/Sampled/30-sampled-x-train.csv", delimiter=',')
y_train = np.genfromtxt(f"{base_path}/Sampled/30-sampled-y-train.csv", delimiter=',')
X_test = np.genfromtxt(f"{base_path}/Sampled/30-sampled-x-test.csv", delimiter=',')
y_test = np.genfromtxt(f"{base_path}/Sampled/30-sampled-y-test.csv", delimiter=',')

In [5]:
y_train = y_train.astype("int64")
y_test = y_test.astype("int64")

X_arr = np.asarray(X_train).astype(np.float32)
X_arr = np.resize(X_train,(X_train.shape[0],1,X_train.shape[1]))

y_arr = np.asarray(y_train).astype(np.float32)

In [12]:
# Model 
model_1 = Sequential()
model_1.add(LSTM(100, activation='tanh', return_sequences=True, input_shape=(1, X_train.shape[1])))
model_1.add(LSTM(49, activation='tanh'))
model_1.add(Dense(1, activation='sigmoid'))
history=model_1.compile(optimizer="adam", loss='binary_crossentropy', metrics=['FalseNegatives'])

callback = EarlyStopping(monitor='false_negatives', patience=5)
model_1.fit(X_arr,y_arr,batch_size=10, epochs=250, callbacks=[callback])
pickle.dump(model_1, open(f"{base_path}/Model/LSTM-model.pkl","wb"))

Epoch 1/250
Epoch 2/250
Epoch 3/250
Epoch 4/250
Epoch 5/250
Epoch 6/250
Epoch 7/250
Epoch 8/250
Epoch 9/250
Epoch 10/250
Epoch 11/250




In [13]:
model_1 = pickle.load(open(f"{base_path}/Model/LSTM-model.pkl", 'rb'))
X_t = np.asarray(X_test).astype(np.float32)
X_t = np.resize(X_t,(X_t.shape[0],1,X_t.shape[1]))
y_pred = predict_classes(model_1,X_t)
print("Model 1")
cm = confusion_matrix(y_test, y_pred)
print(cm)
print(classification_report(y_test, y_pred))
print(f"Accuracy: {accuracy_score(y_test, y_pred)}")
print(f"False Positive rate: {1 - (cm[0][1] / len(y_test))}")
print(f"False Negative rate: {1 - (cm[1][0] / len(y_test))}")

Model 1
[[2723    5]
 [   5 1174]]
              precision    recall  f1-score   support

           0       1.00      1.00      1.00      2728
           1       1.00      1.00      1.00      1179

    accuracy                           1.00      3907
   macro avg       1.00      1.00      1.00      3907
weighted avg       1.00      1.00      1.00      3907

Accuracy: 0.9974404914256463
False Positive rate: 0.9987202457128231
False Negative rate: 0.9987202457128231


In [14]:
np.savetxt(f"{base_path}/Model/lstm-y-pred.csv", y_pred, delimiter=",")