In [21]:
import pandas as pd
import numpy as np

import torch
import torch.nn as nn
import joblib

torch.cuda.empty_cache()

In [22]:
device = ("cuda" if torch.cuda.is_available() else "cpu")

In [23]:
model = torch.load("anomaly_detection_updated.pth")
model = model.to(device)
model = model.eval()

In [24]:
df = pd.read_csv("test_data.csv", index_col="Unnamed: 0")

In [25]:
df.head()

Unnamed: 0,index,duration,protocol_type,service,flag,src_bytes,dst_bytes,land,wrong_fragment,urgent,...,dst_host_srv_count,dst_host_same_srv_rate,dst_host_diff_srv_rate,dst_host_same_src_port_rate,dst_host_srv_diff_host_rate,dst_host_serror_rate,dst_host_srv_serror_rate,dst_host_rerror_rate,dst_host_srv_rerror_rate,label
0,0,0,udp,private,SF,105,146,0,0,0,...,254,1.0,0.01,0.0,0.0,0.0,0.0,0.0,0.0,normal.
1,3,0,udp,private,SF,105,146,0,0,0,...,254,1.0,0.01,0.0,0.0,0.0,0.0,0.0,0.0,snmpgetattack.
2,4,0,udp,private,SF,105,146,0,0,0,...,254,1.0,0.01,0.01,0.0,0.0,0.0,0.0,0.0,snmpgetattack.
3,5,0,udp,private,SF,105,146,0,0,0,...,255,1.0,0.0,0.01,0.0,0.0,0.0,0.0,0.0,snmpgetattack.
4,6,0,udp,domain_u,SF,29,0,0,0,0,...,3,0.3,0.3,0.3,0.0,0.0,0.0,0.0,0.0,normal.


In [26]:
columns = ["protocol_type", "logged_in", "count", "srv_count", "srv_diff_host_rate", "dst_host_count", "dst_host_same_src_port_rate"]

In [27]:
testing_data = df[columns]

In [28]:
pipeline = joblib.load("pipeline_updated.pkl")

In [29]:
input_array = pipeline.transform(testing_data)
input_data = torch.tensor(input_array, dtype=torch.float32)
input_data = input_data.to(device=device)

In [30]:
preds = model(input_data)

In [31]:
preds

tensor([[-0.1354, -0.2093,  1.1158,  ...,  0.1079,  0.1649,  0.4508],
        [-0.1081, -0.1880,  1.1313,  ...,  0.0932,  0.1698,  0.4562],
        [-0.1085, -0.1881,  1.1313,  ...,  0.0934,  0.1697,  0.4561],
        ...,
        [-0.0809, -0.1669,  1.1466,  ...,  0.0791,  0.1747,  0.4615],
        [-0.4852, -0.4221, -0.6064,  ...,  0.1432,  0.2276,  0.5705],
        [-0.4842, -0.4289, -0.5535,  ...,  0.1448,  0.2260,  0.5628]],
       device='cuda:0', grad_fn=<AddmmBackward0>)

In [32]:
criterion = nn.MSELoss()

In [33]:
attacks = []
for i in range(df.shape[0]):
    err = criterion(preds[i], input_data[1])
    if err > 0.5:
        attacks.append(i)

In [34]:
attacks = df[df["label"] != "normal."]

In [35]:
attacks

Unnamed: 0,index,duration,protocol_type,service,flag,src_bytes,dst_bytes,land,wrong_fragment,urgent,...,dst_host_srv_count,dst_host_same_srv_rate,dst_host_diff_srv_rate,dst_host_same_src_port_rate,dst_host_srv_diff_host_rate,dst_host_serror_rate,dst_host_srv_serror_rate,dst_host_rerror_rate,dst_host_srv_rerror_rate,label
1,3,0,udp,private,SF,105,146,0,0,0,...,254,1.00,0.01,0.00,0.0,0.0,0.0,0.0,0.0,snmpgetattack.
2,4,0,udp,private,SF,105,146,0,0,0,...,254,1.00,0.01,0.01,0.0,0.0,0.0,0.0,0.0,snmpgetattack.
3,5,0,udp,private,SF,105,146,0,0,0,...,255,1.00,0.00,0.01,0.0,0.0,0.0,0.0,0.0,snmpgetattack.
9,13,0,udp,private,SF,105,146,0,0,0,...,252,0.99,0.01,0.00,0.0,0.0,0.0,0.0,0.0,snmpgetattack.
14,19,0,udp,private,SF,105,146,0,0,0,...,254,1.00,0.01,0.00,0.0,0.0,0.0,0.0,0.0,snmpgetattack.
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
77112,309594,0,udp,private,SF,105,105,0,0,0,...,253,0.99,0.01,0.00,0.0,0.0,0.0,0.0,0.0,snmpgetattack.
77113,309595,0,udp,private,SF,105,105,0,0,0,...,253,0.99,0.01,0.00,0.0,0.0,0.0,0.0,0.0,snmpgetattack.
77145,309742,0,udp,private,SF,105,147,0,0,0,...,255,1.00,0.00,0.00,0.0,0.0,0.0,0.0,0.0,snmpgetattack.
77178,309995,0,udp,private,SF,105,147,0,0,0,...,254,1.00,0.01,0.01,0.0,0.0,0.0,0.0,0.0,snmpgetattack.


In [36]:
point = df.iloc[77112]
point_orig = pd.DataFrame([point])
label = point_orig.label
point = point_orig[columns]
point_tr = pipeline.transform(point)
point_inp = torch.tensor(point_tr, dtype=torch.float32)
point_inp = point_inp.to(device=device)
prediction = model(point_inp)
error = criterion(prediction, point_inp)

print(f"Original datapoint: {point}")
print(f"Pipeline output: {point_tr}")
print(f"Input to the model: {point_inp}")
print(f"Output of the model: {prediction}")
print(f"Label is: {label}")
print(f"Error: {error}")

Original datapoint:       protocol_type  logged_in  count  srv_count  srv_diff_host_rate  \
77112           udp          0      2          1                 0.0   

       dst_host_count  dst_host_same_src_port_rate  
77112             255                          0.0  
Pipeline output: [[-0.36923169 -0.47853855  1.12474122  0.          0.          1.
   0.          0.          0.        ]]
Input to the model: tensor([[-0.3692, -0.4785,  1.1247,  0.0000,  0.0000,  1.0000,  0.0000,  0.0000,
          0.0000]], device='cuda:0')
Output of the model: tensor([[-0.1202, -0.2012,  1.1219,  0.1181,  0.2035,  0.6578,  0.1008,  0.1683,
          0.4533]], device='cuda:0', grad_fn=<AddmmBackward0>)
Label is: 77112    snmpgetattack.
Name: label, dtype: object
Error: 0.06171189621090889


In [37]:
predictions = []
for i in range(df.shape[0]):
    error = criterion(preds[i], input_data[i])
    if error >= 0.1:
        predictions.append(1)
    else:
        predictions.append(0)

In [38]:
results_df = pd.DataFrame({"label": df["label"],
                           "prediction": predictions})

In [39]:
results_df["label"] = results_df["label"].apply(lambda x: 0 if x=="normal." else 1)

In [41]:
from sklearn.metrics import accuracy_score, precision_score, recall_score
accuracy_score(results_df["label"], results_df["prediction"])

0.8965209403423425

In [46]:
precision_score(results_df["label"], results_df["prediction"], pos_label=1)

0.9200723043068217