In [None]:
import h5py
import numpy as np
import tensorflow as tf
import pickle
from tqdm import tqdm
from sklearn.preprocessing import OneHotEncoder
from tensorflow.keras.models import clone_model
from sklearn.metrics import precision_score, recall_score, fbeta_score

import pathlib
import sys
import os


In [None]:
try:
    from google.colab import drive
    drive.mount('/content/drive')
    root_path=f'/content/drive/MyDrive/Colab Notebooks/concept-drift-online'
except:
    root_path = pathlib.Path(f'../').resolve()

In [None]:
attack_vector='network' #cache
machine_setting='static_browser_version' # static_browser_version
data_path=f'{root_path}/data/{machine_setting}/{attack_vector}'

In [None]:
os.getcwd()

In [None]:
sys.path.append(str(root_path)) # Add the directory to the path
from utils import prepare_hdf5_data,get_data_paths_ordered,load_hdf5_data,create_subset_encoder#,OpenWorldLabelEncoder

In [None]:

import tensorflow as tf
print("Num GPUs Available: ", len(tf.config.list_physical_devices('GPU')))


In [None]:
ordered_paths=get_data_paths_ordered(data_path)

In [None]:
model_path=f'{data_path}/models/cnn_lstm.keras'
le_dir=f'{data_path}/models/label_encoder_100.pkl'
model0 = tf.keras.models.load_model(model_path)
with open(le_dir, 'rb') as f:
    le = pickle.load(f)

In [None]:
batch=256
max_epochs=20

In [None]:

n_incremental_samples_per_website=10

last_model=clone_model(model0)
last_model.set_weights(model0.get_weights())
last_model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=1e-5),loss='categorical_crossentropy',  metrics=['accuracy'])
results={}
for path in tqdm(ordered_paths):
    curr_date_i=path.stem
    train_path_i = (path/'train.h5').as_posix()
    test_path_i = (path/'test.h5').as_posix()
    X_train_i, y_train_i = load_hdf5_data(train_path_i,le,n_samples=n_incremental_samples_per_website)
    train_i = prepare_hdf5_data(X_train_i, y_train_i, batch_size=batch, sample_validation=-1,shuffle=True)['data']
    last_model.fit(train_i,epochs=max_epochs,verbose=0)

    X_test_i, y_test_i = load_hdf5_data(test_path_i,le)
    test_datasets_i = prepare_hdf5_data(X_test_i, y_test_i, batch_size=batch, sample_validation=-1,shuffle=False)['data']
    all_predictions = []
    all_true_labels = []
    for features, labels in test_datasets_i:
        batch_predictions = last_model.predict_on_batch(features)
        all_predictions.append(batch_predictions)
        all_true_labels.append(labels.numpy())

    pred_i = np.concatenate(all_predictions, axis=0)
    y_test_aligned = np.concatenate(all_true_labels, axis=0)
    acci=(pred_i.argmax(axis=1)==y_test_aligned.argmax(axis=1)).mean()
    results.update({curr_date_i: {'accuracy': acci}})
    print(f'accuracy: {acci}')