In [1]:
import h5py
import numpy as np
import tensorflow as tf
import pickle
from sklearn.preprocessing import OneHotEncoder
from tensorflow.keras.models import clone_model
from sklearn.neighbors import KNeighborsClassifier
import pathlib
import sys
import os
from tqdm import tqdm

In [2]:
tf.__version__

'2.18.0'

In [3]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [4]:
attack_vector='cache' #network
machine_setting='dynamic_browser_version' # static_browser_version

root_path=f'/content/drive/MyDrive/Colab Notebooks/concept-drift-online/'
data_path=f'{root_path}/data/{machine_setting}/{attack_vector}'

In [5]:
os.getcwd()

'/content'

In [6]:
sys.path.append(str(root_path)) # Add the directory to the path
from utils import train_on_hdf5,test_on_hdf5,prepare_hdf5_data,get_data_paths_ordered
from nn_utils import triplet_cnn_lstm,TripletSemiHardLoss,L2NormalizationLayer

In [8]:
le_dir=f'{data_path}/models/label_encoder_100.pkl'
with open(le_dir, 'rb') as f:
    le = pickle.load(f)


triplet_model_path=f'{data_path}/models/triplet_base.h5'

base_model = tf.keras.models.load_model(
    triplet_model_path,
    custom_objects={'TripletSemiHardLoss': TripletSemiHardLoss, 'L2NormalizationLayer': L2NormalizationLayer},
    safe_mode=False,
    compile=False
)

ordered_paths=get_data_paths_ordered(data_path)


In [13]:
batch=256

n_knn_samples_per_website=20
k_neighbors=1
results={}
for path in tqdm(ordered_paths):
    curr_date_i=path.stem
    train_path_i = (path/'train.h5').as_posix()
    test_path_i = (path/'test.h5').as_posix()
    train_datasets_i=prepare_hdf5_data(train_path_i,le,batch_size=batch,n_samples=n_knn_samples_per_website,sample_validation=-1,verbose=0)['data']
    train_i = train_datasets_i.map(lambda x, y: (x, tf.cast(tf.argmax(y, axis=1), tf.int32)))
    train_embeddings_i = []
    train_labels_i = []
    for sequences_i, labels_i in train_i:
        embeddings_i = base_model.predict(sequences_i,verbose=0)
        train_embeddings_i.append(embeddings_i)
        train_labels_i.append(labels_i.numpy())
    train_embeddings_i = np.concatenate(train_embeddings_i, axis=0)
    train_labels_i = np.concatenate(train_labels_i, axis=0)
    knn = KNeighborsClassifier(n_neighbors=k_neighbors)
    knn.fit(train_embeddings_i, train_labels_i)

    test_datasets_i = prepare_hdf5_data(test_path_i, label_encoder=le, batch_size=batch, n_samples=-1)
    test_i = test_datasets_i['data'].map(lambda x, y: (x, tf.cast(tf.argmax(y, axis=1), tf.int32)))
    test_embeddings_i = []
    test_labels_i = []
    for sequences_i, labels_i in test_i:
        embeddings_i = base_model.predict(sequences_i,verbose=0)
        test_embeddings_i.append(embeddings_i)
        test_labels_i.append(labels_i.numpy())
    test_embeddings_i = np.concatenate(test_embeddings_i, axis=0)
    test_labels_i = np.concatenate(test_labels_i, axis=0)
    pred_i = knn.predict(test_embeddings_i)
    acci=(test_labels_i==pred_i).mean()
    results.update({curr_date_i:acci})
    print(f'accuracy: {acci}')

 14%|█▍        | 1/7 [00:08<00:53,  8.87s/it]

accuracy: 0.72


 29%|██▊       | 2/7 [00:18<00:47,  9.53s/it]

accuracy: 0.7688888888888888


 43%|████▎     | 3/7 [00:27<00:35,  8.92s/it]

accuracy: 0.7966666666666666


 57%|█████▋    | 4/7 [00:36<00:27,  9.01s/it]

accuracy: 0.6822222222222222


 71%|███████▏  | 5/7 [00:45<00:18,  9.16s/it]

accuracy: 0.7477777777777778


 86%|████████▌ | 6/7 [00:54<00:08,  8.90s/it]

accuracy: 0.78


100%|██████████| 7/7 [01:04<00:00,  9.25s/it]

accuracy: 0.7855555555555556



