In [58]:
import pandas as pd 
import numpy as np 
import matplotlib.pyplot as plt 
import seaborn
from sklearn.preprocessing import StandardScaler,MinMaxScaler
from sklearn.model_selection import train_test_split
import torch
import torch.nn as nn
from torch.utils.data import Dataset,DataLoader
import torch.optim as opt
from sklearn.metrics import accuracy_score


In [2]:
columns= [
        'duration', 'protocol_type', 'service', 'flag', 'src_bytes', 'dst_bytes', 'land',
        'wrong_fragment', 'urgent', 'hot', 'num_failed_logins', 'logged_in', 'num_compromised',
        'root_shell', 'su_attempted', 'num_root', 'num_file_creations', 'num_shells',
        'num_access_files', 'num_outbound_cmds', 'is_host_login', 'is_guest_login', 'count',
        'srv_count', 'serror_rate', 'srv_serror_rate', 'rerror_rate', 'srv_rerror_rate',
        'same_srv_rate', 'diff_srv_rate', 'srv_diff_host_rate', 'dst_host_count',
        'dst_host_srv_count', 'dst_host_same_srv_rate', 'dst_host_diff_srv_rate',
        'dst_host_same_src_port_rate', 'dst_host_srv_diff_host_rate', 'dst_host_serror_rate',
        'dst_host_srv_serror_rate', 'dst_host_rerror_rate', 'dst_host_srv_rerror_rate',
        'attack_label', 'difficulty'
    ]

In [61]:
train=pd.read_csv('data/KDDTrain+.txt',names=columns)
test=pd.read_csv('data/KDDTest+.txt',names=columns)

In [49]:
train.shape

(125973, 43)

In [None]:
#convert categorical features to numerical  for the train dataset
train['protocol_type']=train['protocol_type'].astype('category').cat.codes
train['service']=train['service'].astype('category').cat.codes
train['flag']=train['flag'].astype('category').cat.codes

In [63]:
#convert categorical features to numerical for the test dataset
test['protocol_type']=test['protocol_type'].astype('category').cat.codes
test['service']=test['service'].astype('category').cat.codes
test['flag']=test['flag'].astype('category').cat.codes

In [46]:
X=train.drop('attack_label',axis=1)
X=X.values
y=train['attack_label'].apply(lambda x:0 if x=="normal" else 1)
y=y.values

In [64]:
x=test.drop('attack_label',axis=1)
x=x.values
Y=test['attack_label'].apply(lambda x:0 if x=="normal" else 1)
Y=Y.values

In [47]:
X_train,X_test,Y_train,Y_test=train_test_split(X,y,test_size=0.2,random_state=42)

In [48]:
X_train.shape

(100778, 42)

In [30]:
class NSLKDD(Dataset):
    def __init__(self,data,lables):
        self.data=torch.tensor(data,dtype=torch.float32)
        self.lables=torch.tensor(lables,dtype=torch.long)
    def __len__(self):
        return len(self.data)
    def __getitem__(self, index):
         return self.data[index], self.lables[index]
    

In [36]:
train_dataset=NSLKDD(X_train,Y_train)
test_dataset=NSLKDD(X_test,Y_test)

In [65]:
val_dataset=NSLKDD(x,Y)

In [37]:
batch_size=32
train_loader=DataLoader(train_dataset,batch_size=batch_size,shuffle=True)
test_loader=DataLoader(test_dataset,batch_size=batch_size,shuffle=False)

In [66]:
val_loader=DataLoader(val_dataset,batch_size=batch_size,shuffle=False)

In [57]:
#batch shape
test_batch_shape=next(iter(test_loader))[0].shape
print(test_batch_shape)


torch.Size([32, 42])


In [56]:
#creating the LSTM model 
input_dim=42
hidden_dim=128
output_dim=2

lstm=nn.LSTM(input_size=input_dim,hidden_size=hidden_dim,num_layers=1,batch_first=True)
fc=nn.Linear(hidden_dim,output_dim)

def init_hidden(x):
    h0=torch.zeros(1,x.size(0),hidden_dim)
    c0=torch.zeros(1,x.size(0),hidden_dim)
    return h0,c0

def forward(x):
    h0,c0=init_hidden(x)
    out,_=lstm(x,(h0,c0))
    out=out[:,-1,:]
    out=fc(out)
    return out

cri=nn.CrossEntropyLoss()
opt=opt.Adam(list(lstm.parameters())+list(fc.parameters()),lr=0.001)

for epoch in range(10):
    for batch in train_loader:
        inputs,lables=batch
        inputs=inputs.view(-1,1,input_dim)
        opt.zero_grad()
        outputs=forward(inputs)
        loss=cri(outputs,lables)
        loss.backward()
        opt.step()

    print(f"epoch {epoch +1} loss : {loss.item()}")




epoch 1 loss : 0.4198460578918457
epoch 2 loss : 0.02098611183464527
epoch 3 loss : 0.009137967601418495
epoch 4 loss : 0.0850154384970665
epoch 5 loss : 0.001560243428684771
epoch 6 loss : 0.0006209042621776462
epoch 7 loss : 0.007306045852601528
epoch 8 loss : 0.0011268535163253546
epoch 9 loss : 0.02387847565114498
epoch 10 loss : 0.0006822795839980245


In [59]:
model_output=[]

with torch.no_grad():
    for  batch in test_loader:
        inputs,lables=batch
        inputs=inputs.view(-1,1,input_dim)
        outputs=forward(inputs)
        model_output.extend(torch.argmax(outputs,dim=1))




print(f"accuracy {accuracy_score(Y_test,model_output)}")


accuracy 0.9864258781504267


In [68]:
model_output_val=[
    
]

with torch.no_grad():
    for batch in val_loader:
        inputs,lables=batch
        inputs=inputs.view(-1,1,input_dim)
        outputs=forward(inputs)
        model_output_val.extend(torch.argmax(outputs,dim=1))
print(f"accuracy {accuracy_score(Y,model_output_val)}")

accuracy 0.7978619588360539
