In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import torch
import torch.nn as nn
import torch.optim as optim
from sklearn.preprocessing import MinMaxScaler
from torch.utils.data import Dataset,DataLoader

In [2]:
columns= [
        'duration', 'protocol_type', 'service', 'flag', 'src_bytes', 'dst_bytes', 'land',
        'wrong_fragment', 'urgent', 'hot', 'num_failed_logins', 'logged_in', 'num_compromised',
        'root_shell', 'su_attempted', 'num_root', 'num_file_creations', 'num_shells',
        'num_access_files', 'num_outbound_cmds', 'is_host_login', 'is_guest_login', 'count',
        'srv_count', 'serror_rate', 'srv_serror_rate', 'rerror_rate', 'srv_rerror_rate',
        'same_srv_rate', 'diff_srv_rate', 'srv_diff_host_rate', 'dst_host_count',
        'dst_host_srv_count', 'dst_host_same_srv_rate', 'dst_host_diff_srv_rate',
        'dst_host_same_src_port_rate', 'dst_host_srv_diff_host_rate', 'dst_host_serror_rate',
        'dst_host_srv_serror_rate', 'dst_host_rerror_rate', 'dst_host_srv_rerror_rate',
        'attack_label', 'difficulty'
    ]

In [37]:
train=pd.read_csv('data/KDDTrain+.txt',names=columns)

In [12]:
train.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 125973 entries, 0 to 125972
Data columns (total 43 columns):
 #   Column                       Non-Null Count   Dtype  
---  ------                       --------------   -----  
 0   duration                     125973 non-null  int64  
 1   protocol_type                125973 non-null  object 
 2   service                      125973 non-null  object 
 3   flag                         125973 non-null  object 
 4   src_bytes                    125973 non-null  int64  
 5   dst_bytes                    125973 non-null  int64  
 6   land                         125973 non-null  int64  
 7   wrong_fragment               125973 non-null  int64  
 8   urgent                       125973 non-null  int64  
 9   hot                          125973 non-null  int64  
 10  num_failed_logins            125973 non-null  int64  
 11  logged_in                    125973 non-null  int64  
 12  num_compromised              125973 non-null  int64  
 13 

In [38]:
train['protocol_type']=train['protocol_type'].astype('category').cat.codes
train['service']=train['service'].astype('category').cat.codes
train['flag']=train['flag'].astype('category').cat.codes

In [39]:
X=train.drop('attack_label',axis=1)
X=X.values
y=train['attack_label'].apply(lambda x:0 if x=="normal" else 1)
y=y.values

In [50]:

#categorical_features = ['protocol_type', 'service', 'flag']
#df_encoded = pd.get_dummies(df, columns=categorical_features)

#df_processed = df_encoded.drop('attack_label', axis=1)
scaler = MinMaxScaler()
df_normalized = scaler.fit_transform(X)


In [41]:
class NSLKDD(Dataset):
    def __init__(self,data,lables):
        self.data=torch.tensor(data,dtype=torch.float32)
        self.lables=torch.tensor(lables,dtype=torch.long)
    def __len__(self):
        return len(self.data)
    def __getitem__(self, index):
         return self.data[index], self.lables[index]
    

In [51]:
dataset=NSLKDD(df_normalized,y)
data_loader=DataLoader(dataset,batch_size=32,shuffle=True)


In [48]:
train_batch=next(iter(data_loader))[0]
print(train_batch.shape)

torch.Size([32, 42])


In [43]:

model=nn.Sequential(
    nn.Conv1d(1,10,kernel_size=3),
    nn.BatchNorm1d(10),
    nn.ReLU(),
    nn.MaxPool1d(2,2),
    nn.Flatten(),
    nn.Linear(200,128),
    nn.ReLU(),
    nn.Linear(128,2)
)

In [52]:
crit=nn.CrossEntropyLoss()
opt=optim.Adam(model.parameters(),lr=0.01)

for epoch in range(20):
    for batch_idx,(data,lable) in enumerate(data_loader):
        
        opt.zero_grad()
        data=data.unsqueeze(1)
        outputs=model(data)
        loss=crit(outputs,lable)
        loss.backward()
        opt.step()
    print(f"epoch :{epoch+1}, loss={loss.item()}")

epoch :1, loss=0.0046921721659600735
epoch :2, loss=0.0004011231940239668
epoch :3, loss=0.07911551743745804
epoch :4, loss=0.016105834394693375
epoch :5, loss=4.983932740287855e-06
epoch :6, loss=0.01872785948216915
epoch :7, loss=3.0028663786652032e-06
epoch :8, loss=0.0009316937648691237
epoch :9, loss=0.0003788192698266357
epoch :10, loss=0.030357426032423973
epoch :11, loss=0.0009485709597356617
epoch :12, loss=0.002627538051456213
epoch :13, loss=5.2336647058837116e-05
epoch :14, loss=1.3101166587148327e-05
epoch :15, loss=0.008454369381070137
epoch :16, loss=6.623101944569498e-05
epoch :17, loss=3.431428922340274e-05
epoch :18, loss=2.668010381512431e-07
epoch :19, loss=1.1561968676687684e-05
epoch :20, loss=0.0008123325533233583


In [53]:
test=pd.read_csv('data/KDDTest+.txt',names=columns)

In [54]:
test['protocol_type']=test['protocol_type'].astype('category').cat.codes
test['service']=test['service'].astype('category').cat.codes
test['flag']=test['flag'].astype('category').cat.codes

In [55]:
X_test=test.drop('attack_label',axis=1)
X_test=X_test.values
y_test=test['attack_label'].apply(lambda x:0 if x=="normal" else 1)
y_test=y_test.values

In [56]:

test_normalized = scaler.transform(X_test)

In [57]:
test_dataset=NSLKDD(test_normalized,y_test)
test_loader=DataLoader(test_dataset,batch_size=32,shuffle=False)

In [58]:

all_probs = torch.tensor([])
model.eval()

with torch.no_grad():
    for idx,(data,lable) in enumerate(test_loader):
        output=model(data.unsqueeze(1))
        probs=torch.nn.functional.softmax(output,dim=1)

        all_probs=torch.cat((all_probs,probs),dim=0)

prediction=torch.argmax(all_probs,dim=1)

In [59]:
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

# Get true labels from your test dataset
true_labels = y_test

# Calculate metrics
accuracy = accuracy_score(true_labels, prediction.numpy())
precision = precision_score(true_labels, prediction.numpy())
recall = recall_score(true_labels, prediction.numpy())
f1 = f1_score(true_labels, prediction.numpy())

print(f"Accuracy: {accuracy:.4f}")
print(f"Precision: {precision:.4f}")
print(f"Recall: {recall:.4f}")
print(f"F1-score: {f1:.4f}")

Accuracy: 0.8387
Precision: 0.9665
Recall: 0.7424
F1-score: 0.8398
