In [20]:
import h5py
import numpy as np
import tensorflow as tf
import pathlib
import pickle
from sklearn.preprocessing import OneHotEncoder
import sys
from tqdm import tqdm
import os

In [3]:


import tensorflow as tf
print("Num GPUs Available: ", len(tf.config.list_physical_devices('GPU')))


Num GPUs Available:  0


In [14]:

try:
    from google.colab import drive
    drive.mount('/content/drive')
    root_path=f'/content/drive/MyDrive/Colab Notebooks/concept-drift-online'
except:
    root_path = pathlib.Path(f'../').resolve()

In [16]:
attack_vector='network' #cache
machine_setting= 'static_browser_version' # 'dynamic_browser_version'
n_dataset_features = 15000 if attack_vector == 'cache' else 3000
max_val=8 if attack_vector == 'cache' else 8192
data_path=f'{root_path}/data/{machine_setting}/{attack_vector}'
model_0_train_path=f'2021_08_27/train.h5'


In [18]:
sys.path.append(str(root_path)) # Add the directory to the path
from utils import cnn_lstm,small_cnn_lstm,prepare_hdf5_data,load_hdf5_data,create_subset_encoder

In [29]:
# Example usage:
hdf5_train_path = (pathlib.Path(data_path)/model_0_train_path).as_posix()

nn_config_cache={
    'features_per_layer': 256,
    'strides': 3,
    'pool_size': 4,
    'units': 32,
    'dropout': 0.2,
}

nn_config_network={
            'features_per_layer': 256,
            'pool_size': 3,
            'strides': 3,
            'units': 128,
            'dropout': 0.8,
            'kernel_size': 32,
}
num_of_classes=100
network_config=nn_config_cache if attack_vector == 'cache' else nn_config_network
model_func=cnn_lstm if attack_vector == 'cache' else small_cnn_lstm

new_model=model_func(n_dataset_features=n_dataset_features, num_of_classes=num_of_classes, **network_config)
le=OneHotEncoder()

with h5py.File(hdf5_train_path, 'r') as hdf:
    le.fit( sorted(hdf['site_name'][:].reshape(-1,1)) )

os.makedirs(f'{data_path}/models',exist_ok=True)
le_dir=f'{data_path}/models/label_encoder_100.pkl'
with open(le_dir, 'wb') as f:
    pickle.dump(le, f)


In [None]:
batch=256
max_epochs=100

In [35]:
# First load data using load_hdf5_data
X_data, y_data = load_hdf5_data(hdf5_train_path, le, n_samples=-1)

# Then prepare datasets using prepare_hdf5_data with the loaded data
datasets = prepare_hdf5_data(X_data, y_data, batch_size=batch, sample_validation=0.1,
                            verbose=0, max_val=max_val)

In [36]:
early_stopping = tf.keras.callbacks.EarlyStopping(
        monitor='val_loss',
        patience=5,
        restore_best_weights=True
    )

verbose=1
new_model.fit(datasets['data'],
            validation_data=datasets['val'] ,
            epochs=max_epochs,
            callbacks=[early_stopping],
            verbose=verbose
        )


Epoch 1/3
[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m17s[0m 527ms/step - accuracy: 0.9571 - loss: 0.1465 - val_accuracy: 0.9562 - val_loss: 0.1668
Epoch 2/3
[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m17s[0m 540ms/step - accuracy: 0.9592 - loss: 0.1341 - val_accuracy: 0.9562 - val_loss: 0.1644
Epoch 3/3
[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m17s[0m 545ms/step - accuracy: 0.9625 - loss: 0.1251 - val_accuracy: 0.9630 - val_loss: 0.1583


<keras.src.callbacks.history.History at 0x2330119aad0>

In [37]:
model_path=f'{data_path}/models/cnn_lstm.keras'
new_model.save(model_path)

In [38]:
new_model.summary()

In [39]:
n_open_world_labels=30
num_of_ow_classes=num_of_classes-n_open_world_labels
X_data, y_data = load_hdf5_data(hdf5_train_path, le, n_samples=-1)
cw_mask=y_data[:,-n_open_world_labels:].any(axis=1)==0
owle=create_subset_encoder(le,num_of_ow_classes)

In [40]:
datasets_cw_part = prepare_hdf5_data(X_data[cw_mask], y_data[cw_mask][:,:num_of_ow_classes], batch_size=batch,
                              sample_validation=0.1, verbose=0)

In [41]:
# Then prepare datasets using prepare_hdf5_data with the loaded data


ow_model=model_func(n_dataset_features=n_dataset_features, num_of_classes=num_of_ow_classes, **network_config)

In [42]:
ow_model.summary()

In [43]:
early_stopping = tf.keras.callbacks.EarlyStopping(
        monitor='val_loss',
        patience=5,
        restore_best_weights=True
    )

verbose=1
ow_model.fit(datasets_cw_part['data'],
            validation_data=datasets_cw_part['val'] ,
            epochs=max_epochs,
            callbacks=[early_stopping],
            verbose=verbose
        )

Epoch 1/3
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m13s[0m 536ms/step - accuracy: 0.5893 - loss: 1.6400 - val_accuracy: 0.8670 - val_loss: 0.4590
Epoch 2/3
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m12s[0m 534ms/step - accuracy: 0.9022 - loss: 0.3643 - val_accuracy: 0.9439 - val_loss: 0.2773
Epoch 3/3
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 522ms/step - accuracy: 0.9303 - loss: 0.2414 - val_accuracy: 0.9327 - val_loss: 0.2561


<keras.src.callbacks.history.History at 0x23273dd69e0>

In [44]:
ow_model_path=f'{data_path}/models/cnn_lstm_ow.keras'
ow_model.save(ow_model_path)

In [45]:
ow_model.summary()

In [46]:
ow_model_path

'C:\\Users\\tolik\\repos\\concept-drift/data/static_browser_version/network/models/cnn_lstm_ow.keras'