## Model Fitting on Sequential Features

In [1]:
## LOAD DATA FROM PICKLE
import pickle

path = '../Data/HK-CarHackingDataset/'
filename1 =  path + 'X'
filename2 = path + 'y'

with open(filename1,'rb') as f:
    X_back = pickle.load(f)
    print(X_back.shape)

with open(filename2,'rb') as f:
    y_back = pickle.load(f)
    print(y_back.shape)

(199274, 100, 2)
(199274,)


In [None]:
# split train 0.6 / other 0.4
sss = StratifiedShuffleSplit(n_splits = 1, train_size = 0.60)
for train_index, rest_index in sss.split(X, y):
    x_train, x_other = X[train_index, :, :], X[rest_index, :, :]
    y_train, y_other = y[train_index], y[rest_index]

# split Val 0.2 / Test 0.2
sss2 = StratifiedShuffleSplit(n_splits = 1, train_size = 0.50)
for val_index, test_index in sss2.split(x_other, y_other):
    x_val, x_test = x_other[val_index, :, :], x_other[test_index, :, :]
    y_val, y_test = y_other[val_index], y_other[test_index]

# Check proportions
trn, val, tst = len(y_train), len(y_val), len(y_test)
tot = sum([trn, val, tst])

print(f'Train: {round(trn/tot, 2)} ({trn})',
      f'Val: {round(val/tot, 2)} ({val})',
      f'Test: {round(tst/tot, 2)} ({tst})',
      f'\nTotal = {tot}', sep = '\n')

print('Anomalies per group:', sum(y_train), sum(y_val), sum(y_test))

## Build Data Loaders

In [None]:
BATCH_SIZE = 256

################################
## Create Tensor Datasets 
################################
train_data = TensorDataset(torch.from_numpy(x_train), torch.from_numpy(y_train))
val_data = TensorDataset(torch.from_numpy(x_val), torch.from_numpy(y_val))
test_data = TensorDataset(torch.from_numpy(x_test), torch.from_numpy(y_test))


################################
## Dataset Iterators:
################################
train_loader = DataLoader(train_data, shuffle=True, batch_size=BATCH_SIZE, pin_memory=True, drop_last=True)
val_loader = DataLoader(val_data, shuffle=True, batch_size=BATCH_SIZE, pin_memory=True, drop_last=True)
test_loader = DataLoader(test_data, shuffle=True, batch_size=BATCH_SIZE, pin_memory=True, drop_last=True)

print('Training Dimensions\n')
for data_cat in [train_loader, val_loader, test_loader]:
    for batch in data_cat:
        print(f'Sequence matrix size: {batch[0].size()}')
        print(f'Target vector size: {batch[1].size()}\n')
        break

## Multivariate LSTM Network


In [None]:
################################
## DEFINE ACCURACY FUNCTION
################################
def compute_binary_accuracy(model, data_loader, device, print_output=False):
    model.eval()
    all_preds, all_labs= torch.empty(0).to(DEVICE), torch.empty(0).to(DEVICE)

    with torch.no_grad():
        for inpt, labs in data_loader:
            inpt, labs = inpt.float().to(DEVICE), labs.float().to(DEVICE)
            all_labs = torch.cat((all_labs, labs), 0).reshape(-1)

            logits = model(inpt)
            predicted_labels = (logits >= 0.5).reshape(-1)
            all_preds = torch.cat((all_preds, predicted_labels), 0).reshape(-1)

    # Move to CPU and benchmark
    all_preds, all_labs = all_preds.cpu(), all_labs.cpu() 
    accuracy = round(accuracy_score(all_labs, all_preds), 4) 
    precision = round(precision_score(all_labs, all_preds, zero_division = 0), 4) # (TP / TP+FP)
    recall = round(recall_score(all_labs, all_preds, zero_division = 0), 4) # true positive rate (TP / FN+TP)

    if print_output:
        print(confusion_matrix(all_labs, all_preds), end = '\n\n')
        print('Accuracy:', accuracy) 
        print('Precision:', precision) 
        print('Recall:', recall)
        return None
    
    return accuracy, precision, recall

################################
## MODEL
################################
class Anomaly_Detector(nn.Module):
    def __init__(self, n_features, seq_length, vocab_size, batch_size, n_layer = 1, bidirectinoal = False, drop_prob = 0):
        super().__init__()
        # Params
        self.n_features = n_features
        self.seq_len = seq_length
        self.vocab = vocab_size
        self.n_layers = 2
        self.hidden_dim = 64
        self.bidir = 2 if bidirectinoal==True else 1
        self.drop = drop_prob
        self.batch_size = batch_size
        
        # Layers
        #self.embedding = nn.Embedding(self.vocab, 64)
        self.lstm = nn.LSTM(input_size = self.n_features, hidden_size = self.hidden_dim,
                            num_layers = self.n_layers, batch_first = True,
                            dropout = drop_prob, bidirectional = bidirectinoal)

        self.linear = nn.Linear(self.hidden_dim*self.bidir, 1) # multiply hidden by 2 if using bidirectinoality
        self.sigmoid = nn.Sigmoid()
        self.dropout = nn.Dropout(drop_prob)


    def forward(self, sequences):
        sequences = sequences.to(DEVICE) # [batch_size, seq_len] 
        lstm_out, (hidden, cell) = self.lstm(sequences)  
        # lstm_out = [batch_size, seq_length, hidden_dim * num_directions]
        # hidden = [num layers * num directions,  batch size, hidden_dim]
        # cell = [num layers * num directions, batch size, hidden_dim]        
        
        # if using a bidirectional lstm, concatenate the final forward and backward hidden layers 
        if self.bidir==1:
            # if there's more than 1 LSTM layer keep only the last one
            hidden = self.dropout(hidden[-1,:,:])
        elif self.bidir==2: 
            hidden = self.dropout(torch.cat((hidden[-2,:,:], hidden[-1,:,:]), dim = 1))

        return self.sigmoid(self.linear(hidden)).reshape(-1)


## Initialization

In [None]:
################################
## ARCHITECTURAL PARAMETERS
################################
N_FEATURES = 2          # features: time differences and ID sequences
SEQ_LEN = 100           # sequence length
VOCAB_SIZE = n_events   # number of unique IDs 
N_LAYERS = 2            # LSTM layers
BIDIRECTIONAL = True    # directionality
DROPOUT = 0

EPOCHS = 50
LEARNING_RATE = 0.00001 
CLIP = 5

DEVICE = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
torch.backends.cudnn.deterministic = True

## INITIALIZE MODEL
model = Anomaly_Detector(N_FEATURES, SEQ_LEN, VOCAB_SIZE, BATCH_SIZE, N_LAYERS, BIDIRECTIONAL, DROPOUT)
model = model.to(DEVICE)

## LOSS AND OPTIMIZER
criterion = nn.BCELoss() 
optimizer = torch.optim.Adam(model.parameters(), lr=LEARNING_RATE)


## Training

In [None]:
################################
## DEFINE TRAINING + TESTING FUNCTION
################################
def training_function():
    print('\nSTARTING TRAINING\n')
    start_time = time.time()
    for epoch in range(EPOCHS):
        model.train()
        print(f'Starting Epoch: {epoch+1}')
        for batch_idx, batch_data in enumerate(train_loader):
            inpt, labs = batch_data[0], batch_data[1]
            # convert datatypes to prevent dtype bug
            inpt, labs = inpt.float().to(DEVICE), labs.float().to(DEVICE)

            # clear gradients
            optimizer.zero_grad() 
            logits = model(inpt) # logits are the final hidden states)
            cost = criterion(logits, labs) # .squeeze()
            cost.backward()

            # Use clip_grad_norm to prevent exploding gradients
            nn.utils.clip_grad_norm_(model.parameters(), CLIP)
            optimizer.step()

            
            # logging
            if not batch_idx % 100:
                print (f'Batch {batch_idx:03d}/{len(train_loader):03d} | '
                       f'BCE Cost: {cost:.4f}')

        with torch.set_grad_enabled(False):
            model.eval()
            a, p, r = compute_binary_accuracy(model, train_loader, DEVICE)
            a2, p2, r2 = compute_binary_accuracy(model, val_loader, DEVICE)
            print(f'Epoch Completed: '
                  f'\n\twith training: {a:.3f}% - {p:.3f}% - {r:.3f}%'
                  f'\n\tand validation: {a2:.3f}% - {p2:.3f}% - {r2:.3f}%')

        print(f'\tTime elapsed: {(time.time() - start_time)/60:.2f} min\n')

    ## Final Test Accuracy
    print(f'Total Training Time: {(time.time() - start_time)/60:.2f} min')
    print('----------------------------------------------')
    print('Final Test Accuracy/Precision/Recall : ', compute_binary_accuracy(model, test_loader, DEVICE))
    print('----------------------------------------------')

training_function()

## Saving Models

*Performance metrics refer only to results from the test set.*

**Features** | **model1** | **model2** | **model3**
---         | ---       | ---       | ---
seq_len     | 100       | 100       | 100
learn_rate  | 1e-5      | **1e-4**  | **1e-5**
epochs      | 50        | **25**    | **50**
batch       | 256       | 256       | 256
layers      | 2         | 2         | 2
bidir       | True      | True      | True
dropout     | 0.5       | **0**     | **0**
hidden      | 64        | 64        | 64
accuracy    | 0.981     | 0.9872    | 0.9936
precision   | 0.993     | 0.9993    | 0.9965
recall      | 0.9731    | 0.9609    | 0.9835
train_time  | 15 min    | 8 min     | 16 min




In [None]:
path = '../Model/CarHacking_models/'

model = 'model3.pt'

# Save
torch.save(Anomaly_Detector, path + model)

# Load
name = 'name here'
#model = Net()
#model.load_state_dict(torch.load(path + name))
#model.eval()