# Model Training & Evaluation

In [3]:
from config import *
from tqdm.notebook import tqdm
import utils
from torch.nn.utils.rnn import pad_sequence
from train import fit_model

utils.set_device()
utils.set_seed()

Using device = cpu


## Datasets and Dataloaders

In [4]:
from data.dataset import LeakAnomalyDetectionDataset
from torch.utils.data import DataLoader, Subset, random_split, ConcatDataset

leaks_dataset = LeakAnomalyDetectionDataset(normal_data_dir=NORMAL_DATA, anomalous_data_dir=ANOMALOUS_DATA)

In [None]:
# def collate_fn(batch):
#     # Extract sequences, labels, and lengths
#     sequences, labels, lengths = zip(*batch)
#     lengths = torch.tensor(lengths)
    
#     # Sort by lengths in descending order
#     lengths, sort_idx = lengths.sort(descending=True)
#     sequences = [sequences[i] for i in sort_idx]
#     labels = torch.tensor([labels[i] for i in sort_idx])
    
#     # Pad sequences
#     padded_sequences = pad_sequence(sequences, batch_first=True)
    
#     return padded_sequences, labels, lengths

# print(summary(model, input_size=train_loader.dataset[0][0].shape))

### Classification Datasets

In [5]:
train_size = int(TRAIN_SIZE * len(leaks_dataset))
val_size = int(VAL_SIZE * len(leaks_dataset))
test_size = len(leaks_dataset) - train_size - val_size

train_set, val_set, test_set = random_split(leaks_dataset, [train_size, val_size, test_size])

### Autoencoders Datasets

In [38]:
normal_dataset = Subset(leaks_dataset, leaks_dataset.normal_indices)
anomalous_dataset = Subset(leaks_dataset, leaks_dataset.anomalous_indices)

train_size = int(TRAIN_SIZE * len(normal_dataset))
val_size = int(VAL_SIZE * len(normal_dataset))
test_size = len(normal_dataset) - train_size - val_size

train_set, val_set, test_set = random_split(normal_dataset, [train_size, val_size, test_size]) # Split train/val sets which contain only normal data
test_set = ConcatDataset([anomalous_dataset, test_set]) # Create set for testing, consisting of all anomalous dataset and some samples from normal dataset

### DataLoaders

In [6]:
train_loader = DataLoader(train_set, batch_size=BATCH_SIZE, shuffle=True)
val_loader = DataLoader(val_set, batch_size=BATCH_SIZE, shuffle=True)
test_loader = DataLoader(test_set, batch_size=BATCH_SIZE, shuffle=False)

## Models

In [7]:
import torch
from models.classifiers  import RNNClassfier, CNNRNNClassifier
from models.autoencoders import HiddenStateRepeatedAutoencoder, ScheduledSamplingAutoencoder

In [8]:
INPUT_DIM = leaks_dataset.num_features

rnn_classification_model = RNNClassfier(input_dim=INPUT_DIM, hidden_dim=4)
cnn_rnn_classification_model = CNNRNNClassifier(input_dim=INPUT_DIM, hidden_dim=8, cnn_filters=3)

hidden_state_repeated_autoencoder_model = HiddenStateRepeatedAutoencoder(input_dim=INPUT_DIM, hidden_dim=16)
scheduled_sampling_autoencoder_model = ScheduledSamplingAutoencoder(input_dim=INPUT_DIM, hidden_dim=16)

## Training

In [9]:
model = rnn_classification_model

In [10]:
fit_model(model, train_loader, val_loader)

{'learning_rate': 0.1, 'batch_size': 32}


Epoch 1/30:   0%|          | 0/2 [00:00<?, ? batches/s]

Epoch 2/30:   0%|          | 0/2 [00:00<?, ? batches/s]

Epoch 3/30:   0%|          | 0/2 [00:00<?, ? batches/s]

Epoch 4/30:   0%|          | 0/2 [00:00<?, ? batches/s]

Epoch 5/30:   0%|          | 0/2 [00:00<?, ? batches/s]

Epoch 6/30:   0%|          | 0/2 [00:00<?, ? batches/s]

Epoch 7/30:   0%|          | 0/2 [00:00<?, ? batches/s]

Epoch 8/30:   0%|          | 0/2 [00:00<?, ? batches/s]

Epoch 9/30:   0%|          | 0/2 [00:00<?, ? batches/s]

Epoch 10/30:   0%|          | 0/2 [00:00<?, ? batches/s]

Epoch 11/30:   0%|          | 0/2 [00:00<?, ? batches/s]

Epoch 12/30:   0%|          | 0/2 [00:00<?, ? batches/s]

Epoch 13/30:   0%|          | 0/2 [00:00<?, ? batches/s]

Epoch 14/30:   0%|          | 0/2 [00:00<?, ? batches/s]

Epoch 15/30:   0%|          | 0/2 [00:00<?, ? batches/s]

Epoch 16/30:   0%|          | 0/2 [00:00<?, ? batches/s]

Epoch 17/30:   0%|          | 0/2 [00:00<?, ? batches/s]

Epoch 18/30:   0%|          | 0/2 [00:00<?, ? batches/s]

Epoch 19/30:   0%|          | 0/2 [00:00<?, ? batches/s]

Epoch 20/30:   0%|          | 0/2 [00:00<?, ? batches/s]

Epoch 21/30:   0%|          | 0/2 [00:00<?, ? batches/s]

Epoch 22/30:   0%|          | 0/2 [00:00<?, ? batches/s]

Epoch 23/30:   0%|          | 0/2 [00:00<?, ? batches/s]

Epoch 24/30:   0%|          | 0/2 [00:00<?, ? batches/s]

Epoch 25/30:   0%|          | 0/2 [00:00<?, ? batches/s]

Epoch 26/30:   0%|          | 0/2 [00:00<?, ? batches/s]

Epoch 27/30:   0%|          | 0/2 [00:00<?, ? batches/s]

Epoch 28/30:   0%|          | 0/2 [00:00<?, ? batches/s]

Epoch 29/30:   0%|          | 0/2 [00:00<?, ? batches/s]

Epoch 30/30:   0%|          | 0/2 [00:00<?, ? batches/s]

## Evaluation

In [11]:
from sklearn.metrics import accuracy_score, det_curve
import matplotlib.pyplot as plt

### False Alarm Rate

In [12]:
with torch.no_grad():
  for x, y_true in test_loader:
    y_scores = model.forward(x)
    print(y_scores, y_true)
    fpr, fnr, thresholds = det_curve(y_true, y_scores)
    print(fpr, fnr, thresholds)

tensor([0.9272, 0.0114, 0.9272, 0.0157, 0.0159, 0.9270, 0.9272, 0.9272, 0.0140]) tensor([1., 0., 1., 0., 0., 0., 1., 1., 0.])
[0.] [0.] [0.92716193]
