In [18]:
import h5py
import numpy as np
import tensorflow as tf
import pickle
from sklearn.preprocessing import OneHotEncoder
from tensorflow.keras.models import clone_model
import pathlib
import sys
import os
from tqdm import tqdm

In [2]:
tf.__version__

'2.18.0'

In [3]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [7]:
attack_vector='cache' #network
machine_setting='dynamic_browser_version' # static_browser_version

root_path=f'/content/drive/MyDrive/Colab Notebooks/concept-drift-online/'
data_path=f'{root_path}/data/{machine_setting}/{attack_vector}'
model_0_train_path=f'2021_08_27/train.h5'

In [8]:
baseline_model_path=f'{data_path}/models/cnn_lstm_cache.keras'
baseline_model = tf.keras.models.load_model(baseline_model_path)


In [17]:
sys.path.append(str(root_path)) # Add the directory to the path
from utils import train_on_hdf5,test_on_hdf5,prepare_hdf5_data,get_data_paths_ordered
from nn_utils import triplet_cnn_lstm,TripletSemiHardLoss,L2NormalizationLayer

In [21]:
hdf5_train_path0 = f'{data_path}/2021_08_27/train.h5'
hdf5_test_path0 = f'{data_path}/2021_08_27/test.h5'
le_dir=f'{data_path}/models/label_encoder_100.pkl'
with open(le_dir, 'rb') as f:
    le = pickle.load(f)

batch=256


datasets=prepare_hdf5_data(hdf5_train_path0,label_encoder=le,batch_size=batch,n_samples=-1)
train=datasets['data'].map(lambda x,y: (x, tf.cast(tf.argmax(y,axis=1),tf.int32)))
val=datasets['val'].map(lambda x,y: (x, tf.cast(tf.argmax(y,axis=1),tf.int32)))

In [22]:
tf.keras.config.disable_traceback_filtering()

In [36]:
base_model = triplet_cnn_lstm(15000,output_size=128,filters=256,strides=3,pool_size=4,units=32,dropout=0.8,lr=0.001)


In [37]:
# prompt: copy all layers weights except for the last layer from baseline_model to base_model

for i in range(len(baseline_model.layers) - 1):
    base_model.layers[i].set_weights(baseline_model.layers[i].get_weights())

In [39]:
max_epochs=20
early_stopping = tf.keras.callbacks.EarlyStopping(
        monitor='val_loss',
        patience=5,
        restore_best_weights=True
    )
base_model.fit(train,validation_data=val, verbose=1,epochs=max_epochs,callbacks=[early_stopping])

Epoch 1/20
[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m12s[0m 392ms/step - loss: 0.7262 - val_loss: 5.6885e-04
Epoch 2/20
[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 210ms/step - loss: 0.0303 - val_loss: 3.4192e-04
Epoch 3/20
[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 209ms/step - loss: 0.0230 - val_loss: 1.7419e-04
Epoch 4/20
[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 205ms/step - loss: 0.0192 - val_loss: 1.2057e-04
Epoch 5/20
[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 211ms/step - loss: 0.0166 - val_loss: 9.0958e-05
Epoch 6/20
[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 210ms/step - loss: 0.0143 - val_loss: 8.0312e-05
Epoch 7/20
[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 209ms/step - loss: 0.0129 - val_loss: 5.9592e-05
Epoch 8/20
[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 207ms/step - loss: 0.0115 - val_loss: 5.5659e-05
Epoch 

<keras.src.callbacks.history.History at 0x7df57a2e4fd0>

In [40]:
triplet_model_path=f'{data_path}/models/triplet_base.h5'
base_model.save(triplet_model_path)




In [None]:
from sklearn.neighbors import KNeighborsClassifier
ordered_paths=get_data_paths_ordered(data_path)

batch=256

n_knn_samples_per_website=10
k_neighbors=1
results={}
for path in tqdm(ordered_paths):
    curr_date_i=path.stem
    train_path_i = (path/'train.h5').as_posix()
    test_path_i = (path/'test.h5').as_posix()
    train_datasets_i=prepare_hdf5_data(train_path_i,le,batch_size=batch,n_samples=n_knn_samples_per_website,sample_validation=-1,verbose=0)['data']
    train_i = train_datasets_i.map(lambda x, y: (x, tf.cast(tf.argmax(y, axis=1), tf.int32)))
    train_embeddings_i = []
    train_labels_i = []
    for sequences_i, labels_i in train_i:
        embeddings_i = base_model.predict(sequences_i,verbose=0)
        train_embeddings_i.append(embeddings_i)
        train_labels_i.append(labels_i.numpy())
    train_embeddings_i = np.concatenate(train_embeddings_i, axis=0)
    train_labels_i = np.concatenate(train_labels_i, axis=0)
    knn = KNeighborsClassifier(n_neighbors=k_neighbors)
    knn.fit(train_embeddings_i, train_labels_i)

    test_datasets_i = prepare_hdf5_data(test_path_i, label_encoder=le, batch_size=batch, n_samples=-1)
    test_i = test_datasets_i['data'].map(lambda x, y: (x, tf.cast(tf.argmax(y, axis=1), tf.int32)))
    test_embeddings_i = []
    test_labels_i = []
    for sequences_i, labels_i in test_i:
        embeddings_i = base_model.predict(sequences_i,verbose=0)
        test_embeddings_i.append(embeddings_i)
        test_labels_i.append(labels_i.numpy())
    test_embeddings_i = np.concatenate(test_embeddings_i, axis=0)
    test_labels_i = np.concatenate(test_labels_i, axis=0)
    pred_i = knn.predict(test_embeddings_i)
    acci=(test_labels_i==pred_i).mean()
    results.update({curr_date_i:acci})
    print(f'accuracy: {acci}')


 14%|█▍        | 1/7 [00:30<03:03, 30.60s/it]

accuracy: 0.6855555555555556


 29%|██▊       | 2/7 [00:38<01:25, 17.16s/it]

accuracy: 0.7266666666666667


 43%|████▎     | 3/7 [00:44<00:48, 12.17s/it]

accuracy: 0.7444444444444445


 57%|█████▋    | 4/7 [00:52<00:31, 10.37s/it]

accuracy: 0.6433333333333333


 71%|███████▏  | 5/7 [00:58<00:18,  9.05s/it]

accuracy: 0.6744444444444444


 86%|████████▌ | 6/7 [01:08<00:09,  9.32s/it]

accuracy: 0.7577777777777778


100%|██████████| 7/7 [01:22<00:00, 11.76s/it]

accuracy: 0.7566666666666667



