# Defending the model

In this section we will look at defences for the ML model(s)

In [2]:
from art.attacks.evasion import DecisionTreeAttack, HopSkipJump
from art.estimators.classification import SklearnClassifier
from art.estimators.classification.scikitlearn import ScikitlearnDecisionTreeClassifier

from models import Model
from utils import compare_data, parse_df_for_pcap_validity, get_testing_set, get_training_set
import numpy as np

## Defence: Training with adversarial samples

In [None]:
# generate white box samples
attack_data_pcap = "datasets/AdversaryPingFlood.pcap"
model = Model(None, save_model_name="time_model_dt")
target_attack_x, target_attack_y, preds = model.test(attack_data_pcap, malicious=1, return_x_y_preds=True, verbose=False)
target_attack_x, target_attack_y = target_attack_x[np.where(preds == 1)], target_attack_y[np.where(preds == 1)]
# Create a dataframe for ease retraining model
target_attack_df = pd.DataFrame(target_attack_x, columns=model.features)
target_attack_df["malicious"] = 1
# White-box Attack
art_classifier = ScikitlearnDecisionTreeClassifier(model=model.get_classifier())
dt_attack = DecisionTreeAttack(classifier=art_classifier)
# adversarial samples
white_box_adversarial = dt_attack.generate(x=target_attack_x)
valid_white_box_adversarial = parse_df_for_pcap_validity(white_box_adversarial, original_data=target_attack_x, columns=model.features)

In [None]:
# generate black-box samples
art_classifier = ScikitlearnClassifier(model=model.get_classifier())
hsj_attack = HopSkipJumpAttack(classifier=art_classifier)
# adversarial samples
x_test_adv = hsj_attack.generate(x=target_attack_x, y=np.zeros(len(target_attack_x)))
valid_black_box_adversarial = parse_df_for_pcap_validity(white_box_adversarial, original_data=target_attack_x, columns=model.features)

In [8]:
# retrain model with white box samples
model.train(white_box_adversarial, retrain=True)

# check classification on adversarial samples 
model.test(white_box_adversarial)

NameError: name 'model' is not defined

In [None]:
# check classification accuracy on test_test
test_set = get_testing_set()
model.test(test_set)

### Part 3 exercises

In [6]:
# Try retraining models with the white box / black box / valid / invalid packets and see how it impacts 
# classification accuracy
#
# You may want to combine the adversarial samples with the original training set to avoid catastrophic forgetting
# To combine datasets use pd.concat(<list>)
# 
# e.g.
# training_set = pd.get_training_set()
# pd.concat([target_attack_df, training_set])

### ---------- End of Part 3 ----------