In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.impute import SimpleImputer
from sklearn.ensemble import RandomForestRegressor
from sklearn.preprocessing import StandardScaler
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import Dropout
from sklearn.metrics import confusion_matrix
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

In [None]:
data = pd.read_csv('combined_data.csv')
le = LabelEncoder()
label=le.fit_transform(data['Protocol'])
data['Protocol']=label

In [None]:
data = data.drop(data.columns[0], axis=1)

In [None]:
data.describe()

Unnamed: 0,Time,Protocol,Length,Target
count,317239.0,317239.0,317239.0,317239.0
mean,67.4588,14.254222,492.194393,0.94565
std,191.6876,3.108322,952.918346,0.226708
min,3.61e-07,0.0,42.0,0.0
25%,0.5406614,14.0,66.0,1.0
50%,1.879724,14.0,74.0,1.0
75%,52.3809,17.0,1152.0,1.0
max,969.9905,17.0,44954.0,1.0


In [None]:
data=data.dropna()


In [None]:
corr_matrix = data.corr()
corr_matrix

  corr_matrix = data.corr()


Unnamed: 0,Time,Protocol,Length,Target
Time,1.0,-0.167834,-0.047586,-0.95612
Protocol,-0.167834,1.0,-0.156231,0.104349
Length,-0.047586,-0.156231,1.0,0.016479
Target,-0.95612,0.104349,0.016479,1.0


In [None]:
X = data[['Time','Protocol','Length']]
y = data['Target']

In [None]:
rf = RandomForestRegressor(n_estimators=100, random_state=42)
rf.fit(X, y)
importances = rf.feature_importances_
indices = sorted(range(len(importances)), key=lambda i: importances[i], reverse=True)

In [None]:
X=data[['Time','Length']]
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)
X_scaled


array([[-0.33721991, -0.45984536],
       [-0.33719915, -0.45984536],
       [-0.33694828, -0.45984536],
       ...,
       [-0.33453992,  1.07229252],
       [-0.3345398 ,  1.07229252],
       [-0.33453965,  1.07229252]])

In [None]:
X = data.iloc[:,:-1].values
X[:,0]=X_scaled[:,0]*-1
X


array([[0.3372199095369218, '192.168.112.128', '49.44.194.34', 14, 54,
        '40492  >  80 [ACK] Seq=1 Ack=1 Win=64008 Len=0'],
       [0.3371991473364919, '192.168.112.128', '49.44.194.34', 14, 54,
        '40510  >  80 [ACK] Seq=1 Ack=1 Win=63936 Len=0'],
       [0.33694828393219456, '192.168.112.128', '152.195.38.76', 14, 54,
        '49232  >  80 [ACK] Seq=1 Ack=1 Win=63812 Len=0'],
       ...,
       [0.3345399221130892, '192.168.112.128', '192.168.112.129', 14,
        1514,
        '49876  >  80 [ACK] Seq=12450561 Ack=1 Win=501 Len=1448 TSval=1204965619 TSecr=777797 [TCP segment of a reassembled PDU]'],
       [0.3345397970135399, '192.168.112.128', '192.168.112.129', 14,
        1514,
        '52492  >  80 [ACK] Seq=10219313 Ack=1 Win=501 Len=1448 TSval=1204965619 TSecr=777797 [TCP segment of a reassembled PDU]'],
       [0.3345396510049417, '192.168.112.128', '192.168.112.129', 14,
        1514,
        '52456  >  80 [PSH, ACK] Seq=10889281 Ack=1 Win=501 Len=1448 TSval=12049

In [None]:
y


0         0
1         0
2         0
3         0
4         0
         ..
317234    1
317235    1
317236    1
317237    1
317238    1
Name: Target, Length: 317239, dtype: int64

In [None]:
X_train,X_test,y_train,y_test= train_test_split(X,y,random_state=42,train_size=0.8)
X_train = np.asarray(X_train)
X_test= np.asarray(X_test)
y_train=np.asarray(y_train)
y_test=np.asarray(y_test)

In [None]:
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers

In [None]:
model = keras.Sequential()
model.add(layers.GRU(64, input_shape=(28, 28)))
model.add(layers.BatchNormalization())
model.add(layers.Dense(10))
print(model.summary())

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 gru (GRU)                   (None, 64)                18048     
                                                                 
 batch_normalization (BatchN  (None, 64)               256       
 ormalization)                                                   
                                                                 
 dense (Dense)               (None, 10)                650       
                                                                 
Total params: 18,954
Trainable params: 18,826
Non-trainable params: 128
_________________________________________________________________
None


In [None]:
mnist = keras.datasets.mnist
(X_train, y_train), (X_test, y_test) = mnist.load_data()
X_train, X_test = X_train/255.0, X_test/255.0
X_validate, y_validate = X_test[:-10], y_test[:-10]
X_test, y_test = X_test[-10:], y_test[-10:]

In [None]:
model.compile(
    loss=keras.losses.SparseCategoricalCrossentropy(from_logits=True),
    optimizer="sgd",
    metrics=["accuracy"],
)

In [None]:
model.fit(
    X_train, y_train, validation_data=(X_validate, y_validate), batch_size=64, epochs=10
)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.callbacks.History at 0x28cb635d240>

In [None]:
for i in range(10):
    result = tf.argmax(model.predict(tf.expand_dims(X_test[i], 0)), axis=1)
    print(result.numpy(), y_test[i])

[7] 7
[8] 8
[9] 9
[0] 0
[1] 1
[2] 2
[3] 3
[4] 4
[5] 5
[6] 6


In [None]:
model.fit(
    X_train, y_train, validation_data=(X_validate, y_validate), batch_size=32, epochs=25
)

Epoch 1/25
Epoch 2/25
Epoch 3/25
Epoch 4/25
Epoch 5/25
Epoch 6/25
Epoch 7/25
Epoch 8/25
Epoch 9/25
Epoch 10/25
Epoch 11/25
Epoch 12/25
Epoch 13/25
Epoch 14/25
Epoch 15/25
Epoch 16/25
Epoch 17/25
Epoch 18/25
Epoch 19/25
Epoch 20/25
Epoch 21/25
Epoch 22/25
Epoch 23/25
Epoch 24/25
Epoch 25/25


<keras.callbacks.History at 0x28cb9126710>