# Importing libraries

In [None]:
from __future__ import print_function, division

import torch
import torch.nn as nn
import torch.optim as optim
from torch.optim import lr_scheduler
import torch.backends.cudnn as cudnn
import numpy as np
import torchvision
from torchvision import datasets, models, transforms
import matplotlib.pyplot as plt
import time
import os
import copy

cudnn.benchmark = True
plt.ion()   # interactive mode

# Utils

In [None]:
def imshow(inp, title=None):
    inp = inp.numpy().transpose((1, 2, 0))
    # plt.figure(figsize=(10, 10))
    plt.axis('off')
    plt.imshow(inp)
    if title is not None:
        plt.title(title)
    plt.pause(0.001)

# def imshow(inp, title=None):
#     """Imshow for Tensor."""
#     inp = inp.numpy().transpose((1, 2, 0))
#     mean = np.array([0.485, 0.456, 0.406])
#     std = np.array([0.229, 0.224, 0.225])
#     inp = std * inp + mean
#     inp = np.clip(inp, 0, 1)
#     plt.imshow(inp)
#     if title is not None:
#         plt.title(title)
#     plt.pause(0.001)  # pause a bit so that plots are updated

In [None]:
def visualize_model(model, num_images=6):
    was_training = model.training
    model.eval()
    images_so_far = 0
    fig = plt.figure()

    with torch.no_grad():
        for i, (inputs, labels) in enumerate(dataloaders[TEST]):
            inputs = inputs.to(device)
            labels = labels.to(device)

            outputs = model(inputs)
            _, preds = torch.max(outputs, 1)

            for j in range(inputs.size()[0]):
                images_so_far += 1
                ax = plt.subplot(num_images//2, 2, images_so_far)
                ax.axis('off')
                ax.set_title(f'predicted: {class_names[preds[j]]}')
                imshow(inputs.cpu().data[j])

                if images_so_far == num_images:
                    model.train(mode=was_training)
                    return
        model.train(mode=was_training)

In [None]:
def train_model(model, criterion, optimizer, scheduler, num_epochs=25):
    since = time.time()

    best_model_wts = copy.deepcopy(model.state_dict())
    best_acc = 0.0

    for epoch in range(num_epochs):
        print(f'Epoch {epoch}/{num_epochs - 1}')
        print('-' * 10)

        # Each epoch has a training and validation phase
        for phase in [TRAIN, TEST]:
            if phase == TRAIN:
                model.train()  # Set model to training mode
            else:
                model.eval()   # Set model to evaluate mode

            running_loss = 0.0
            running_corrects = 0

            # Iterate over data.
            for inputs, labels in dataloaders[phase]:
                inputs = inputs.to(device)
                labels = labels.to(device)

                # zero the parameter gradients
                optimizer.zero_grad()

                # forward
                # track history if only in train
                with torch.set_grad_enabled(phase == TRAIN):
                    outputs = model(inputs)
                    _, preds = torch.max(outputs, 1)
                    loss = criterion(outputs, labels)

                    # backward + optimize only if in training phase
                    if phase == TRAIN:
                        loss.backward()
                        optimizer.step()

                # statistics
                running_loss += loss.item() * inputs.size(0)
                running_corrects += torch.sum(preds == labels.data)
            if phase == TRAIN:
                scheduler.step()

            epoch_loss = running_loss / dataset_sizes[phase]
            epoch_acc = running_corrects.double() / dataset_sizes[phase]

            print(f'{phase} Loss: {epoch_loss:.4f} Acc: {epoch_acc:.4f}')

            # deep copy the model
            if phase == TEST and epoch_acc > best_acc:
                best_acc = epoch_acc
                best_model_wts = copy.deepcopy(model.state_dict())

        print()

    time_elapsed = time.time() - since
    print(f'Training complete in {time_elapsed // 60:.0f}m {time_elapsed % 60:.0f}s')
    print(f'Best val Acc: {best_acc:4f}')

    # load best model weights
    model.load_state_dict(best_model_wts)
    return model

# Connect to Google Drive (if needed)

In [None]:
# Подключить к google colab содержимое вашего google диск
from google.colab import drive
# This will prompt for authorization.
drive.mount('/content/drive')

Mounted at /content/drive


# NSL-KDD dataset

## Downloading data

In [None]:
%%time
!tar -xzvf "/content/drive/MyDrive/Master/MW/datasets/NSL-KDD/images/NSL_KDD_image_train.tar.gz"

[1;30;43mВыходные данные были обрезаны до нескольких последних строк (5000).[0m
content/NSL_KDD_image_train/normal/81923.jpg
content/NSL_KDD_image_train/normal/122453.jpg
content/NSL_KDD_image_train/normal/83962.jpg
content/NSL_KDD_image_train/normal/54609.jpg
content/NSL_KDD_image_train/normal/49944.jpg
content/NSL_KDD_image_train/normal/87053.jpg
content/NSL_KDD_image_train/normal/8004.jpg
content/NSL_KDD_image_train/normal/105.jpg
content/NSL_KDD_image_train/normal/100667.jpg
content/NSL_KDD_image_train/normal/99119.jpg
content/NSL_KDD_image_train/normal/103539.jpg
content/NSL_KDD_image_train/normal/116458.jpg
content/NSL_KDD_image_train/normal/79508.jpg
content/NSL_KDD_image_train/normal/70123.jpg
content/NSL_KDD_image_train/normal/82329.jpg
content/NSL_KDD_image_train/normal/71792.jpg
content/NSL_KDD_image_train/normal/17658.jpg
content/NSL_KDD_image_train/normal/22060.jpg
content/NSL_KDD_image_train/normal/14407.jpg
content/NSL_KDD_image_train/normal/34152.jpg
content/NSL_KDD_i

In [None]:
%%time
!tar -xzvf "/content/drive/MyDrive/Master/MW/datasets/NSL-KDD/images/NSL_KDD_image_test.tar.gz"

[1;30;43mВыходные данные были обрезаны до нескольких последних строк (5000).[0m
content/NSL_KDD_image_test/normal/8577.jpg
content/NSL_KDD_image_test/normal/17661.jpg
content/NSL_KDD_image_test/normal/14437.jpg
content/NSL_KDD_image_test/normal/16644.jpg
content/NSL_KDD_image_test/normal/699.jpg
content/NSL_KDD_image_test/normal/2664.jpg
content/NSL_KDD_image_test/normal/17191.jpg
content/NSL_KDD_image_test/normal/14659.jpg
content/NSL_KDD_image_test/normal/5923.jpg
content/NSL_KDD_image_test/normal/17362.jpg
content/NSL_KDD_image_test/normal/14129.jpg
content/NSL_KDD_image_test/normal/673.jpg
content/NSL_KDD_image_test/normal/438.jpg
content/NSL_KDD_image_test/normal/9136.jpg
content/NSL_KDD_image_test/normal/5116.jpg
content/NSL_KDD_image_test/normal/1472.jpg
content/NSL_KDD_image_test/normal/21780.jpg
content/NSL_KDD_image_test/normal/13169.jpg
content/NSL_KDD_image_test/normal/18458.jpg
content/NSL_KDD_image_test/normal/22479.jpg
content/NSL_KDD_image_test/normal/6586.jpg
content

## Defining dataloaders

In [None]:
data_dir = 'content'
TRAIN = 'NSL_KDD_image_train'
TEST = 'NSL_KDD_image_test'

data_transforms = {
    TRAIN: transforms.Compose([
        transforms.Resize(224),
        transforms.ToTensor(),
    ]),
    TEST: transforms.Compose([
        transforms.Resize(224),
        transforms.ToTensor(),
    ]),
}

image_datasets = {x: datasets.ImageFolder(os.path.join(data_dir, x),
                                          data_transforms[x])
                  for x in [TRAIN, TEST]}
dataloaders = {x: torch.utils.data.DataLoader(image_datasets[x], batch_size=256,
                                             shuffle=True, num_workers=2)
              for x in [TRAIN, TEST]}
dataset_sizes = {x: len(image_datasets[x]) for x in [TRAIN, TEST]}
class_names = image_datasets[TRAIN].classes

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

for x in [TRAIN, TEST]:
    print("Loaded {} images under {}".format(dataset_sizes[x], x))
    
print("Classes: ")
class_names = image_datasets[TRAIN].classes
print(image_datasets[TRAIN].classes)

Loaded 125973 images under NSL_KDD_image_train
Loaded 22544 images under NSL_KDD_image_test
Classes: 
['attack', 'normal']


In [None]:
# Get a batch of training data
inputs, classes = next(iter(dataloaders[TRAIN]))

# Make a grid from batch
# out = torchvision.utils.make_grid(inputs)

# imshow(out, title=[class_names[x] for x in classes])

## ResNet18

### Creating model

In [None]:
model_resnet18 = torchvision.models.resnet18(pretrained=True)
for param in model_resnet18.parameters():
    param.requires_grad = False

# Parameters of newly constructed modules have requires_grad=True by default
num_features_resnet18 = model_resnet18.fc.in_features
model_resnet18.fc = nn.Linear(num_features_resnet18, 2)

model_conv = model_resnet18.to(device)

### Defining criterion, optimizer and scheduler

In [None]:
criterion_resnet18 = nn.CrossEntropyLoss()

# Observe that only parameters of final layer are being optimized as
# opposed to before.
optimizer_resnet18 = optim.SGD(model_resnet18.fc.parameters(), lr=0.001, momentum=0.9)

# Decay LR by a factor of 0.1 every 7 epochs
exp_lr_scheduler_resnet18 = lr_scheduler.StepLR(optimizer_resnet18, step_size=7, gamma=0.1)

### Training and evaluating model

In [None]:
%%time
model_resnet18 = train_model(model_resnet18, criterion_resnet18, optimizer_resnet18,
                             exp_lr_scheduler_resnet18, num_epochs=2)

Epoch 0/1
----------
NSL_KDD_image_train Loss: 0.1233 Acc: 0.9559
NSL_KDD_image_test Loss: 0.6767 Acc: 0.7932

Epoch 1/1
----------
NSL_KDD_image_train Loss: 0.0965 Acc: 0.9664
NSL_KDD_image_test Loss: 0.9434 Acc: 0.7649

Training complete in 19m 11s
Best val Acc: 0.793160
CPU times: user 17min 3s, sys: 1min 15s, total: 18min 18s
Wall time: 19min 10s


## VGG16_bn

### Creating model

In [None]:
model_vgg16_bn = torchvision.models.vgg16_bn(pretrained=True)
for param in model_vgg16_bn.parameters():
    param.requires_grad = False

# Parameters of newly constructed modules have requires_grad=True by default
num_features_vgg16_bn = model_vgg16_bn.classifier[6].in_features
features_vgg16_bn = list(model_vgg16_bn.classifier.children())[:-1] # Remove last layer
features_vgg16_bn.extend([nn.Linear(num_features_vgg16_bn, len(class_names))]) # Add our layer with 2 outputs
model_vgg16_bn.classifier = nn.Sequential(*features_vgg16_bn) # Replace the model classifier

model_vgg16_bn = model_vgg16_bn.to(device)

### Defining criterion, optimizer and scheduler

In [None]:
criterion_vgg16_bn = nn.CrossEntropyLoss()

# Observe that only parameters of final layer are being optimized as
# opposed to before.
optimizer_vgg16_bn = optim.SGD(model_vgg16_bn.parameters(), lr=0.001, momentum=0.9)

# Decay LR by a factor of 0.1 every 7 epochs
exp_lr_scheduler_vgg16_bn = lr_scheduler.StepLR(optimizer_vgg16_bn, step_size=7, gamma=0.1)

### Training and evaluting model

In [None]:
%%time
model_vgg16 = train_model(model_vgg16_bn, criterion_vgg16_bn, optimizer_vgg16_bn,
                          exp_lr_scheduler_vgg16_bn, num_epochs=2)

Epoch 0/1
----------
NSL_KDD_image_train Loss: 0.1691 Acc: 0.9378
