In [23]:
import pandas as pd
import numpy as np

from sklearn.preprocessing import StandardScaler, LabelEncoder

import torch
import torch.nn as nn
import torch.optim as optim
torch.cuda.empty_cache()

In [2]:
device = ("cuda" if torch.cuda.is_available() else "cpu")

In [3]:
class Autoencoder(nn.Module):
    def __init__(self, *args, **kwargs) -> None:
        super(Autoencoder, self).__init__()

        self.encoder = nn.Sequential(
            nn.Linear(7, 32),
            nn.ReLU(),
            nn.Linear(32, 16),
            nn.ReLU(),
            nn.Linear(16, 7),
            nn.ReLU()
        )

        self.decoder = nn.Sequential(
            nn.Linear(7, 16), 
            nn.ReLU(),
            nn.Linear(16, 32),
            nn.ReLU(),
            nn.Linear(32, 7)
        )
    
    def forward(self, x):

        encoded = self.encoder(x)
        decoded = self.decoder(encoded)

        return decoded     

In [4]:
model = torch.load("autoencoder_anomaly_detection.pth")
model = model.to(device)
model = model.eval()

In [10]:
df = pd.read_csv("test_data.csv", index_col="Unnamed: 0")

In [11]:
df.head()

Unnamed: 0,duration,protocol_type,service,flag,src_bytes,dst_bytes,land,wrong_fragment,urgent,hot,...,dst_host_srv_count,dst_host_same_srv_rate,dst_host_diff_srv_rate,dst_host_same_src_port_rate,dst_host_srv_diff_host_rate,dst_host_serror_rate,dst_host_srv_serror_rate,dst_host_rerror_rate,dst_host_srv_rerror_rate,label
0,0,udp,private,SF,105,146,0,0,0,0,...,254,1.0,0.01,0.0,0.0,0.0,0.0,0.0,0.0,normal.
1,0,udp,private,SF,105,146,0,0,0,0,...,254,1.0,0.01,0.0,0.0,0.0,0.0,0.0,0.0,normal.
2,0,udp,private,SF,105,146,0,0,0,0,...,254,1.0,0.01,0.0,0.0,0.0,0.0,0.0,0.0,normal.
3,0,udp,private,SF,105,146,0,0,0,0,...,254,1.0,0.01,0.0,0.0,0.0,0.0,0.0,0.0,snmpgetattack.
4,0,udp,private,SF,105,146,0,0,0,0,...,254,1.0,0.01,0.01,0.0,0.0,0.0,0.0,0.0,snmpgetattack.


In [12]:
columns = ["protocol_type", "logged_in", "count", "srv_count", "srv_diff_host_rate", "dst_host_count", "dst_host_same_src_port_rate"]

In [13]:
testing_data = df[columns]

In [16]:
le = LabelEncoder()
testing_data["protocol_type"] = le.fit_transform(testing_data["protocol_type"].values)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  testing_data["protocol_type"] = le.fit_transform(testing_data["protocol_type"].values)


In [20]:
ss = StandardScaler()
testing_data["count"] = ss.fit_transform(testing_data["count"].values.reshape(-1, 1))

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  testing_data["count"] = ss.fit_transform(testing_data["count"].values.reshape(-1, 1))


In [22]:
testing_data["dst_host_count"] = ss.fit_transform(testing_data["dst_host_count"].values.reshape(-1, 1))

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  testing_data["dst_host_count"] = ss.fit_transform(testing_data["dst_host_count"].values.reshape(-1, 1))


In [26]:
testing_array = np.array(testing_data)
testing_tensor = torch.tensor(testing_array, dtype=torch.float32)
testing_tensor = testing_tensor.to(device=device)

In [27]:
predictions = model(testing_tensor)

In [28]:
predictions

tensor([[ 1.1978,  0.4597, -1.2616,  ...,  0.0932,  0.3157,  0.1252],
        [ 1.1978,  0.4597, -1.2616,  ...,  0.0932,  0.3157,  0.1252],
        [ 1.1978,  0.4597, -1.2616,  ...,  0.0932,  0.3157,  0.1252],
        ...,
        [ 1.0213,  0.4105, -1.0963,  ...,  0.0955,  0.2437,  0.0981],
        [ 0.8116,  0.3413, -0.8475,  ...,  0.0991,  0.1337,  0.0861],
        [ 1.0213,  0.4105, -1.0963,  ...,  0.0955,  0.2437,  0.0981]],
       device='cuda:0', grad_fn=<AddmmBackward0>)

In [29]:
criterion = nn.MSELoss()

In [38]:
testing_tensor[50]

tensor([ 1.0000,  1.0000, -1.1838,  9.0000,  0.0000,  0.3237,  0.0000],
       device='cuda:0')

In [39]:
criterion(predictions[50], testing_tensor[50])

tensor(13.6082, device='cuda:0', grad_fn=<MseLossBackward0>)