In [1]:
import h5py
import numpy as np
import tensorflow as tf
import pickle
from tqdm import tqdm
from sklearn.preprocessing import OneHotEncoder
from tensorflow.keras.models import clone_model
from sklearn.metrics import precision_score, recall_score, fbeta_score

import pathlib
import sys
import os


In [2]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [3]:
attack_vector='cache' #network
attack_vector= 'network'
machine_setting='dynamic_browser_version' # static_browser_version

root_path=f'/content/drive/MyDrive/Colab Notebooks/concept-drift-online/'
data_path=f'{root_path}/data/{machine_setting}/{attack_vector}'

In [4]:
os.getcwd()

'/content'

In [5]:
sys.path.append(str(root_path)) # Add the directory to the path
from utils import get_data_paths_ordered,prepare_hdf5_data,test_on_hdf5,predict_on_hdf5,OpenWorldLabelEncoder

In [6]:

import tensorflow as tf
print("Num GPUs Available: ", len(tf.config.list_physical_devices('GPU')))


Num GPUs Available:  0


In [7]:
ordered_paths=get_data_paths_ordered(data_path)

In [8]:
model_path=f'{data_path}/models/cnn_lstm_cache.keras'
le_dir=f'{data_path}/models/label_encoder_100.pkl'
model0 = tf.keras.models.load_model(model_path)
with open(le_dir, 'rb') as f:
    le = pickle.load(f)

In [9]:
batch=256
max_epochs=20

In [None]:

n_incremental_samples_per_website=10

last_model=clone_model(model0)
last_model.set_weights(model0.get_weights())
last_model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=1e-5),loss='categorical_crossentropy',  metrics=['accuracy'])
results={}
for path in tqdm(ordered_paths):
    curr_date_i=path.stem
    train_path_i = (path/'train.h5').as_posix()
    test_path_i = (path/'test.h5').as_posix()
    train_i=prepare_hdf5_data(train_path_i,le,batch_size=batch,n_samples=n_incremental_samples_per_website,sample_validation=-1,verbose=0)['data']
    last_model.fit(train_i,epochs=max_epochs,verbose=0)
    pred_i=predict_on_hdf5(test_path_i,last_model,le,batch_size=32)
    with h5py.File(test_path_i, 'r') as hdf:
      y_testi = le.transform((hdf[f'site_name'][:]).reshape(-1,1)).toarray()
    acci=(y_testi.argmax(axis=1)==pred_i.argmax(axis=1)).mean()
    results.update({curr_date_i:acci})
    print(f'accuracy: {acci}')

  0%|          | 0/7 [00:00<?, ?it/s]

[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m14s[0m 424ms/step


 14%|█▍        | 1/7 [25:38<2:33:50, 1538.47s/it]

accuracy: 0.917
[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m15s[0m 463ms/step


 29%|██▊       | 2/7 [51:15<2:08:07, 1537.59s/it]

accuracy: 0.87


In [15]:
results={}
non_sensitive_label=69
n_incremental_samples_per_website=20
owle = OpenWorldLabelEncoder(le,non_sensitive_label+1)

ow_model_path=f'{data_path}/models/cnn_lstm_ow.keras'
ow_model = tf.keras.models.load_model(ow_model_path)
last_model=clone_model(ow_model)
last_model.set_weights(ow_model.get_weights())
last_model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=1e-5),loss='categorical_crossentropy',  metrics=['accuracy'])
for path in tqdm(ordered_paths):
    curr_date_i=path.stem
    train_path_i = (path/'train.h5').as_posix()
    test_path_i = (path/'test.h5').as_posix()
    train_i=prepare_hdf5_data(train_path_i,owle,batch_size=batch,n_samples=n_incremental_samples_per_website,sample_validation=-1,verbose=0)['data']
    last_model=clone_model(last_model)
    last_model.set_weights(last_model.get_weights())
    last_model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=1e-5),loss='categorical_crossentropy',  metrics=['accuracy'])
    last_model.fit(train_i,epochs=max_epochs,verbose=0)
    test_datasets_i=prepare_hdf5_data(test_path_i,owle,batch_size=batch,sample_validation=-1)['data']
    all_predictions = []
    all_true_labels = []
    for features, labels in test_datasets_i:
        batch_predictions = last_model.predict_on_batch(features)
        all_predictions.append(batch_predictions)
        all_true_labels.append(labels.numpy())
    pred_i = np.concatenate(all_predictions, axis=0)
    y_test_aligned = np.concatenate(all_true_labels, axis=0)
    acci=(pred_i.argmax(axis=1)==y_test_aligned.argmax(axis=1)).mean()
    y_true_indices = y_test_aligned.argmax(axis=1)
    y_pred_indices = pred_i.argmax(axis=1)
    precision = precision_score(y_true_indices, y_pred_indices, labels=[non_sensitive_label], average='weighted')
    recall = recall_score(y_true_indices, y_pred_indices, labels=[non_sensitive_label], average='weighted')
    f2score = fbeta_score(y_true_indices, y_pred_indices, beta=2, labels=[non_sensitive_label], average='weighted')

    results.update({curr_date_i: {'accuracy': acci, 'precision': precision, 'recall': recall, 'f2score': f2score}})
    print(f'Date: {curr_date_i}, Accuracy: {acci}, Precision (Non-sensitive): {precision}, Recall (Non-sensitive): {recall}, F2-score (Non-sensitive): {f2score}')


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
 14%|█▍        | 1/7 [00:57<05:42, 57.06s/it]

Date: 2021_08_27, Accuracy: 0.036, Precision (Non-sensitive): 0.0, Recall (Non-sensitive): 0.0, F2-score (Non-sensitive): 0.0


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
 29%|██▊       | 2/7 [01:35<03:51, 46.30s/it]

Date: 2021_09_05, Accuracy: 0.059, Precision (Non-sensitive): 0.0, Recall (Non-sensitive): 0.0, F2-score (Non-sensitive): 0.0


 43%|████▎     | 3/7 [02:18<02:58, 44.53s/it]

Date: 2021_09_12, Accuracy: 0.078, Precision (Non-sensitive): 0.5, Recall (Non-sensitive): 0.02903225806451613, F2-score (Non-sensitive): 0.03577106518282989


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
 57%|█████▋    | 4/7 [02:56<02:06, 42.01s/it]

Date: 2021_10_24, Accuracy: 0.049, Precision (Non-sensitive): 0.0, Recall (Non-sensitive): 0.0, F2-score (Non-sensitive): 0.0


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
 71%|███████▏  | 5/7 [03:34<01:21, 40.76s/it]

Date: 2021_11_28, Accuracy: 0.041, Precision (Non-sensitive): 0.0, Recall (Non-sensitive): 0.0, F2-score (Non-sensitive): 0.0


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
 86%|████████▌ | 6/7 [04:16<00:41, 41.01s/it]

Date: 2022_01_23, Accuracy: 0.045, Precision (Non-sensitive): 0.0, Recall (Non-sensitive): 0.0, F2-score (Non-sensitive): 0.0


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
100%|██████████| 7/7 [04:52<00:00, 41.84s/it]

Date: 2022_02_21, Accuracy: 0.067, Precision (Non-sensitive): 0.0, Recall (Non-sensitive): 0.0, F2-score (Non-sensitive): 0.0



