In [50]:
import numpy as np
import pandas as pd
import torch
import random

from tqdm import tqdm
from torch import nn, optim
from torch.optim import lr_scheduler
from torch.utils.data import TensorDataset, DataLoader
from sklearn.metrics import accuracy_score, confusion_matrix, f1_score, precision_score, recall_score
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler

In [51]:
batch_size = 1
learning_rate = 1e-3
n_epochs = 5

random_seed = int(random.random() * 100)

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [52]:
df = pd.read_csv('log_file_features.csv').to_numpy()
x = df[:, 1:]
y = df[:, 0]

x = StandardScaler().fit_transform(x)
y = LabelEncoder().fit_transform(y)

x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=random_seed)

x_train = torch.from_numpy(x_train.astype(np.float32)).to(device)
x_test = torch.from_numpy(x_test.astype(np.float32)).to(device)
y_train = torch.from_numpy(y_train.astype(np.int64)).to(device)
y_test = torch.from_numpy(y_test.astype(np.int64))

train_ds = TensorDataset(x_train, y_train)
train_dl = DataLoader(dataset=train_ds, batch_size=batch_size, shuffle=True)

In [53]:
class Net(nn.Module):
    def __init__(self):
        super().__init__()        
        self.flatten = nn.Flatten()
        self.network = nn.Sequential(
            nn.Linear(x_train.shape[1], 128),
            nn.ReLU(),
            nn.Dropout(0.2),
            nn.Linear(128, 32),
            nn.ReLU(),
            nn.Dropout(0.2),
            nn.Linear(32, 8),
            nn.ReLU(),
            nn.Dropout(0.2),
            nn.Linear(8, 2),
            nn.Softmax(dim=1)
        )
        
    def forward(self, x):
        x = self.flatten(x)
        pred = self.network(x)
        
        return pred

In [54]:
def train_loop(n_epochs, model, loss_fn, optimizer, scheduler):
    model.train()
    for epoch in tqdm(range(n_epochs)):
        for xb, yb in train_dl:
            optimizer.zero_grad()
            pred = model(xb)
            loss = loss_fn(pred, yb)
            loss.backward()
            optimizer.step()
            scheduler.step()
        
        if epoch % (n_epochs / 10) == 0:
            print('loss:', loss.item())
            print('lr:', scheduler.get_last_lr())

In [55]:
model = Net().to(device)

optimizer = optim.AdamW(model.parameters(), lr=learning_rate)
scheduler = lr_scheduler.OneCycleLR(optimizer=optimizer, max_lr=learning_rate, epochs=n_epochs, steps_per_epoch=len(train_dl))
#scheduler = lr_scheduler.ExponentialLR(optimizer=optimizer, gamma=1)
loss_fn = nn.CrossEntropyLoss()

train_loop(n_epochs, model, loss_fn, optimizer, scheduler)

 40%|████      | 2/5 [00:00<00:00,  5.20it/s]

0 loss = 1.1196366548538208
lr: [0.0007651482338684975]
1 loss = 0.3477362394332886
lr: [0.0009487472011614595]


 80%|████████  | 4/5 [00:00<00:00,  6.04it/s]

2 loss = 0.31326183676719666
lr: [0.0006073864740750776]
3 loss = 0.3132617473602295
lr: [0.00018516304776608939]


100%|██████████| 5/5 [00:00<00:00,  5.88it/s]

4 loss = 0.31327053904533386
lr: [1.9774033457957812e-08]





In [56]:
model.eval()
with torch.no_grad():
    y_pred = model(x_test).argmax(dim=1).cpu()
    print('Accuracy: %.2f' % accuracy_score(y_test, y_pred))
    print('Precision: %.3f' % precision_score(y_true=y_test, y_pred=y_pred, average='weighted'))
    print('Recall: %.3f' % recall_score(y_true=y_test, y_pred=y_pred, average='weighted'))
    print('F1-measure: %.3f' % f1_score(y_true=y_test, y_pred=y_pred, average='weighted'))
    print('Confusion Matrix:')
    print(confusion_matrix(y_true=y_test, y_pred=y_pred))

Accuracy: 0.97
Precision: 0.969
Recall: 0.966
F1-measure: 0.966
Confusion Matrix:
[[20  1]
 [ 0  8]]
