# Defending the model

In this section we will look at defences for the ML model(s)

In [1]:
from art.attacks.evasion import DecisionTreeAttack, HopSkipJump
from art.estimators.classification import SklearnClassifier
from art.estimators.classification.scikitlearn import ScikitlearnDecisionTreeClassifier

from models import Model
from utils import compare_data, parse_df_for_pcap_validity, get_training_data, get_testing_data
import numpy as np
import pandas as pd

## Defence: Training with adversarial samples

In [2]:
# generate white box samples
attack_data_pcap = "datasets/AdversaryPingFlood.pcap"
model = Model(None, save_model_name="time_model_dt")
target_attack_x, target_attack_y, preds = model.test(attack_data_pcap, malicious=1, return_x_y_preds=True, verbose=False)
target_attack_x, target_attack_y = target_attack_x[np.where(preds == 1)], target_attack_y[np.where(preds == 1)]
# Create a dataframe for ease retraining model
target_attack_df = pd.DataFrame(target_attack_x, columns=model.features)
target_attack_df["malicious"] = 1
# White-box Attack
art_classifier = ScikitlearnDecisionTreeClassifier(model=model.get_classifier())
dt_attack = DecisionTreeAttack(classifier=art_classifier)
# adversarial samples
white_box_adversarial = dt_attack.generate(x=target_attack_x)
valid_white_box_adversarial = parse_df_for_pcap_validity(white_box_adversarial, original_data=target_attack_x, columns=model.features)

save_model_path exists, loading model and config....
DecisionTreeClassifier()
['time_delta', 'IP__ttl', 'Ethernet__type_2048.0', 'Ethernet__type_2054.0', 'Ethernet__type_0.0', 'Ethernet__type_34525.0', 'Ethernet__type_32821.0', 'IP__proto_6.0', 'IP__proto_17.0', 'IP__proto_0.0', 'IP__proto_1.0', 'IP__proto_2.0']
Opening datasets/AdversaryPingFlood.pcap ...
done parsing datasets/AdversaryPingFlood.pcap


Decision tree attack:   0%|          | 0/1886 [00:00<?, ?it/s]

In [3]:
# generate black-box samples
art_classifier = SklearnClassifier(model=model.get_classifier())
hsj_attack = HopSkipJump(classifier=art_classifier)
# adversarial samples
# target_attack_x = target_attack_x # 2k samples, takes longer
target_attack_x = target_attack_x[:100]

black_box_adversarial = hsj_attack.generate(x=target_attack_x, y=np.zeros(len(target_attack_x)))
valid_black_box_adversarial = parse_df_for_pcap_validity(black_box_adversarial, original_data=target_attack_x, columns=model.features)

HopSkipJump:   0%|          | 0/100 [00:00<?, ?it/s]

In [7]:
model.test(white_box_adversarial)

# retrain model with white box samples
model.train(white_box_adversarial, continue_training=True)

# check classification on adversarial samples 
model.test(white_box_adversarial)

-----
Testing acc: 1.00, f1: 1.00, tpr: 1.00, tnr 0.00
[[   0.    0.]
 [   0. 1886.]]
-----
not saving model as model already exists
-----
Training acc: 1.00, f1: 1.00, tpr: 1.00, tnr 0.00
[[   0.    0.]
 [   0. 1886.]]
-----
-----
Testing acc: 1.00, f1: 1.00, tpr: 1.00, tnr 0.00
[[   0.    0.]
 [   0. 1886.]]
-----


In [8]:
# check classification accuracy on test_test
test_set = get_testing_data()
model.test(test_set)

1    418874
0    148467
Name: malicious, dtype: int64
pingFloodDDoS          194436
tcpSYNFloodDDoS        182094
clean                  148467
modbusQueryFlooding     42344
Name: attack_type, dtype: int64
-----
Testing acc: 0.74, f1: 0.85, tpr: 1.00, tnr 0.00
[[     0 148467]
 [     0 418874]]
-----


### Part 3 exercises

In [None]:
# Try retraining models with the white box / black box / valid / invalid packets and see how it impacts 
# classification accuracy
#
# You may want to combine the adversarial samples with the original training set to avoid catastrophic forgetting
# To combine datasets use pd.concat(<list>)
# 
# e.g.
# training_set = pd.get_training_set()
# pd.concat([target_attack_df, training_set])

### ---------- End of Part 3 ----------