In [1]:
import pandas as pd
import numpy as np

import torch
import torch.nn as nn
import joblib

torch.cuda.empty_cache()

In [2]:
device = ("cuda" if torch.cuda.is_available() else "cpu")

In [3]:
model = torch.load("anomaly_detection.pth")
model = model.to(device)
model = model.eval()

In [4]:
df = pd.read_csv("test_data.csv", index_col="Unnamed: 0")

In [5]:
df.head()

Unnamed: 0,index,duration,protocol_type,service,flag,src_bytes,dst_bytes,land,wrong_fragment,urgent,...,dst_host_srv_count,dst_host_same_srv_rate,dst_host_diff_srv_rate,dst_host_same_src_port_rate,dst_host_srv_diff_host_rate,dst_host_serror_rate,dst_host_srv_serror_rate,dst_host_rerror_rate,dst_host_srv_rerror_rate,label
0,0,0,udp,private,SF,105,146,0,0,0,...,254,1.0,0.01,0.0,0.0,0.0,0.0,0.0,0.0,normal.
1,3,0,udp,private,SF,105,146,0,0,0,...,254,1.0,0.01,0.0,0.0,0.0,0.0,0.0,0.0,snmpgetattack.
2,4,0,udp,private,SF,105,146,0,0,0,...,254,1.0,0.01,0.01,0.0,0.0,0.0,0.0,0.0,snmpgetattack.
3,5,0,udp,private,SF,105,146,0,0,0,...,255,1.0,0.0,0.01,0.0,0.0,0.0,0.0,0.0,snmpgetattack.
4,6,0,udp,domain_u,SF,29,0,0,0,0,...,3,0.3,0.3,0.3,0.0,0.0,0.0,0.0,0.0,normal.


In [6]:
columns = ["protocol_type", "logged_in", "count", "srv_count", "srv_diff_host_rate", "dst_host_count", "dst_host_same_src_port_rate"]

In [7]:
testing_data = df[columns]

In [8]:
pipeline = joblib.load("pipeline_updated.pkl")

In [9]:
input_array = pipeline.transform(testing_data)
input_data = torch.tensor(input_array, dtype=torch.float32)
input_data = input_data.to(device=device)

In [10]:
preds = model(input_data)

In [11]:
preds

tensor([[-0.7696, -0.3496,  0.7132,  ...,  0.0165,  0.1593,  0.4487],
        [-0.7641, -0.3321,  0.7169,  ...,  0.0146,  0.1591,  0.4481],
        [-0.7642, -0.3317,  0.7173,  ...,  0.0145,  0.1592,  0.4482],
        ...,
        [-0.7593, -0.3145,  0.7212,  ...,  0.0138,  0.1602,  0.4471],
        [-0.6937, -0.2876, -1.2278,  ...,  0.1090,  0.2225,  0.4720],
        [-0.6980, -0.3052, -1.1674,  ...,  0.1072,  0.2147,  0.4715]],
       device='cuda:0', grad_fn=<AddmmBackward0>)

In [12]:
criterion = nn.MSELoss()

In [13]:
attacks = []
for i in range(df.shape[0]):
    err = criterion(preds[i], input_data[1])
    if err > 0.5:
        attacks.append(i)

In [14]:
attacks = df[df["label"] != "normal."]

In [15]:
attacks

Unnamed: 0,index,duration,protocol_type,service,flag,src_bytes,dst_bytes,land,wrong_fragment,urgent,...,dst_host_srv_count,dst_host_same_srv_rate,dst_host_diff_srv_rate,dst_host_same_src_port_rate,dst_host_srv_diff_host_rate,dst_host_serror_rate,dst_host_srv_serror_rate,dst_host_rerror_rate,dst_host_srv_rerror_rate,label
1,3,0,udp,private,SF,105,146,0,0,0,...,254,1.00,0.01,0.00,0.0,0.0,0.0,0.0,0.0,snmpgetattack.
2,4,0,udp,private,SF,105,146,0,0,0,...,254,1.00,0.01,0.01,0.0,0.0,0.0,0.0,0.0,snmpgetattack.
3,5,0,udp,private,SF,105,146,0,0,0,...,255,1.00,0.00,0.01,0.0,0.0,0.0,0.0,0.0,snmpgetattack.
9,13,0,udp,private,SF,105,146,0,0,0,...,252,0.99,0.01,0.00,0.0,0.0,0.0,0.0,0.0,snmpgetattack.
14,19,0,udp,private,SF,105,146,0,0,0,...,254,1.00,0.01,0.00,0.0,0.0,0.0,0.0,0.0,snmpgetattack.
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
77112,309594,0,udp,private,SF,105,105,0,0,0,...,253,0.99,0.01,0.00,0.0,0.0,0.0,0.0,0.0,snmpgetattack.
77113,309595,0,udp,private,SF,105,105,0,0,0,...,253,0.99,0.01,0.00,0.0,0.0,0.0,0.0,0.0,snmpgetattack.
77145,309742,0,udp,private,SF,105,147,0,0,0,...,255,1.00,0.00,0.00,0.0,0.0,0.0,0.0,0.0,snmpgetattack.
77178,309995,0,udp,private,SF,105,147,0,0,0,...,254,1.00,0.01,0.01,0.0,0.0,0.0,0.0,0.0,snmpgetattack.


In [16]:
point = df.iloc[77112]
point_orig = pd.DataFrame([point])
label = point_orig.label
point = point_orig[columns]
point_tr = pipeline.transform(point)
point_inp = torch.tensor(point_tr, dtype=torch.float32)
point_inp = point_inp.to(device=device)
prediction = model(point_inp)
error = criterion(prediction, point_inp)

print(f"Original datapoint: {point}")
print(f"Pipeline output: {point_tr}")
print(f"Input to the model: {point_inp}")
print(f"Output of the model: {prediction}")
print(f"Label is: {label}")
print(f"Error: {error}")

Original datapoint:       protocol_type  logged_in  count  srv_count  srv_diff_host_rate  \
77112           udp          0      2          1                 0.0   

       dst_host_count  dst_host_same_src_port_rate  
77112             255                          0.0  
Pipeline output: [[-0.72157753 -0.39072118  0.74117485  0.          0.          1.
   0.          0.          0.        ]]
Input to the model: tensor([[-0.7216, -0.3907,  0.7412,  0.0000,  0.0000,  1.0000,  0.0000,  0.0000,
          0.0000]], device='cuda:0')
Output of the model: tensor([[-0.7671, -0.3482,  0.7115,  0.0601,  0.0249,  0.8234,  0.0162,  0.1583,
          0.4475]], device='cuda:0', grad_fn=<AddmmBackward0>)
Label is: 77112    snmpgetattack.
Name: label, dtype: object
Error: 0.029529519379138947


In [17]:
predictions = []
for i in range(df.shape[0]):
    error = criterion(preds[i], input_data[i])
    if error >= 0.1:
        predictions.append(1)
    else:
        predictions.append(0)

In [18]:
results_df = pd.DataFrame({"label": df["label"],
                           "prediction": predictions})

In [19]:
results_df["label"] = results_df["label"].apply(lambda x: 0 if x=="normal." else 1)

In [20]:
from sklearn.metrics import accuracy_score
accuracy_score(results_df["label"], results_df["prediction"])

0.6352615440348811