In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
pip install keras_tuner

In [None]:
import numpy as np
import pandas as pd
import os
from sklearn.preprocessing import MinMaxScaler, StandardScaler
from sklearn.metrics import mean_absolute_error, mean_squared_error
from sklearn.model_selection import TimeSeriesSplit
from keras.models import Sequential
from keras.layers import Conv1D, Dense, Flatten, Dropout, Input, MaxPooling1D
from keras.callbacks import EarlyStopping
from keras import utils
import keras_tuner as kt

utils.set_random_seed(42)

In [None]:
# Define Parameters
LOOKBACK = 24
HORIZON = 24
N_SPLITS = 4
BATCH_SIZE = 32
EPOCHS = 10
scaler = StandardScaler()

# Funcs

In [None]:
# Time series split function (Expanding Window)
def time_series_split(df, n_splits=N_SPLITS, test_size=0.2):
    df = df.sort_values('timestamp')
    test_split_index = int(len(df) * (1 - test_size))
    train_val_df = df.iloc[:test_split_index]
    test_df = df.iloc[test_split_index:]

    tscv = TimeSeriesSplit(n_splits=n_splits)
    splits = [(train_val_df.iloc[train_index], train_val_df.iloc[val_index]) for train_index, val_index in tscv.split(train_val_df)]
    return splits, test_df

In [None]:
# Sequence creation for univariate time series
def create_sequences(df, lookback=LOOKBACK, horizon=HORIZON):
    X, y, anomaly, cell_id = [], [], [], []

    # Loop through each unique cell in the dataset
    for cell in df['cell'].unique():
        # Filter the dataframe for the current cell only
        cell_df = df[df['cell'] == cell]

        # Generate sequences within this cell's data
        for i in range(lookback, len(cell_df) - horizon + 1):
            # Lookback sequence for minRSSI only (univariate)
            X_seq = cell_df.iloc[i - lookback:i][['minRSSI']].values
            # Target horizon sequence for minRSSI
            y_seq = cell_df.iloc[i:i + horizon]['minRSSI'].values
            # Anomaly sequences for later evaluation
            anomaly_seq = cell_df.iloc[i:i + horizon]['anomaly'].values
            # Cell ID for each sequence
            cell_seq = cell_df.iloc[i:i + horizon]['cell'].values

            # Append sequences to output lists
            X.append(X_seq)
            y.append(y_seq)
            anomaly.append(anomaly_seq)
            cell_id.append(cell_seq)

    # Convert lists to numpy arrays for model input
    return np.array(X), np.array(y), np.array(anomaly), np.array(cell_id)

In [None]:
def tune_hyperparameters_hyperband(X_train, y_train, param_grid, max_epochs=30):
    # Define model building function for Keras Tuner
    def build_tunable_1dcnn(hp):
        model = Sequential()
        model.add(Input(shape=(LOOKBACK, 1)))

        # Add Conv1D layers based on tunable parameters
        for i in range(hp.Int('num_conv_layers', min_value=1, max_value=4)):
            model.add(Conv1D(
                filters=hp.Choice('num_filters', values=[16, 32, 64, 128]),
                kernel_size=hp.Choice('filter_size', values=[1, 3, 5, 7]),
                activation='relu'
            ))

            # Optional pooling layer
            if hp.Boolean(f'add_pooling_{i}'):
                pool_size = hp.Choice(f'pool_size_{i}', values=[2, 3])
                model.add(MaxPooling1D(pool_size=pool_size))

            # Optional dropout layer
            if hp.Boolean(f'use_dropout_{i}'):
                dropout_rate = hp.Float(f'dropout_rate_{i}', min_value=0.2, max_value=0.3, step=0.1)
                model.add(Dropout(dropout_rate))

        model.add(Flatten())
        model.add(Dense(HORIZON))

        model.compile(optimizer='adam',
                      loss='mse')
        return model

    # Hyperband tuner instance
    tuner = kt.Hyperband(
        hypermodel=build_tunable_1dcnn,
        objective='val_loss',
        max_epochs=max_epochs,
        factor=3,
        directory='/content/drive/MyDrive/Thesis/Thesis/cnn/univar_tuning',
        project_name='1dcnn_tuning'
    )

    # Fit Hyperband tuner to training data
    tuner.search(X_train, y_train, epochs=max_epochs, batch_size=BATCH_SIZE, validation_split=0.2, verbose=1)

    # Get best model and hyperparameters
    best_model = tuner.get_best_models(num_models=1)[0]
    best_params = tuner.get_best_hyperparameters(num_trials=1)[0].values

    return best_model, best_params

# Prepare data

In [None]:
imp_folder = os.getenv("DATA_PATH", "./default_data_path/")
exp_folder = os.getenv("MODEL_PATH", "./default_model_path/")

df = pd.read_csv(imp_folder + 'cell_undersampled_1.csv')
df = df[['timestamp', 'cell', 'minRSSI', 'anomaly']]

print(df.shape)
df.head()

In [None]:
splits, test_set = time_series_split(df, 4)

for i, (train, val) in enumerate(splits):
    print(f"Split {i + 1}:")
    print(f"  Train set shape: {train.shape}")
    print(f"  Validation set shape: {val.shape}")

print(f"Test set shape: {test_set.shape}")

Split 1:
  Train set shape: (151460, 4)
  Validation set shape: (151459, 4)
Split 2:
  Train set shape: (302919, 4)
  Validation set shape: (151459, 4)
Split 3:
  Train set shape: (454378, 4)
  Validation set shape: (151459, 4)
Split 4:
  Train set shape: (605837, 4)
  Validation set shape: (151459, 4)
Test set shape: (189324, 4)


# Run the tuning

In [None]:
all_best_params = []

for i, (train_set, val_set) in enumerate(splits):
    print(f"Processing split {i + 1}/{N_SPLITS}")

    # Preprocess the data (fit scaler on train, transform on train and val)
    train_set['minRSSI'] = scaler.fit_transform(train_set[['minRSSI']])
    val_set['minRSSI'] = scaler.transform(val_set[['minRSSI']])

    # Create sequences for train and validation sets
    X_train, y_train, _, _ = create_sequences(train_set)
    X_val, y_val, _, _ = create_sequences(val_set)

    # Reshape X for 1D CNN (samples, timesteps, features)
    X_train = X_train.reshape((X_train.shape[0], X_train.shape[1], 1))
    X_val = X_val.reshape((X_val.shape[0], X_val.shape[1], 1))

    # Tune and train the model
    best_model, best_params = tune_hyperparameters_hyperband(X_train, y_train, param_grid=None)
    all_best_params.append(best_params)

    # Evaluate the model on validation set
    y_val_pred = best_model.predict(X_val)
    val_rmse = np.sqrt(mean_squared_error(y_val, y_val_pred))
    val_mae = mean_absolute_error(y_val, y_val_pred)

    print(f"Split {i + 1} Results:")
    print(f"  Best parameters: {best_params}")
    print(f"  Validation RMSE: {val_rmse}")
    print(f"  Validation MAE: {val_mae}")

    # Save the best model for the last split only
    if i == len(splits) - 1:
        best_model.save(os.path.join(exp_folder, 'best_1dcnn_model.h5'))
        print("Saved the best model from the last split.")

print()
print("All best hyperparameters across splits:", all_best_params)

Processing split 1/4
Reloading Tuner from /content/drive/MyDrive/Thesis/Thesis/cnn/univar_tuning/1dcnn_tuning/tuner0.json


  saveable.load_own_variables(weights_store.get(inner_path))


[1m4321/4321[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 1ms/step
Split 1 Results:
  Best parameters: {'num_conv_layers': 3, 'num_filters': 64, 'filter_size': 5, 'use_dropout': False, 'dropout_rate': 0.2, 'tuner/epochs': 2, 'tuner/initial_epoch': 0, 'tuner/bracket': 3, 'tuner/round': 0, 'add_pooling_0': False, 'use_dropout_0': False, 'add_pooling_1': False, 'use_dropout_1': False, 'add_pooling_2': False, 'use_dropout_2': False}
  Validation RMSE: 0.6324001606953417
  Validation MAE: 0.3928407542427677
Processing split 2/4
Reloading Tuner from /content/drive/MyDrive/Thesis/Thesis/cnn/univar_tuning/1dcnn_tuning/tuner0.json
[1m   1/4321[0m [37m━━━━━━━━━━━━━━━━━━━━[0m [1m9:10[0m 127ms/step

  saveable.load_own_variables(weights_store.get(inner_path))


[1m4321/4321[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 1ms/step
Split 2 Results:
  Best parameters: {'num_conv_layers': 3, 'num_filters': 64, 'filter_size': 5, 'use_dropout': False, 'dropout_rate': 0.2, 'tuner/epochs': 2, 'tuner/initial_epoch': 0, 'tuner/bracket': 3, 'tuner/round': 0, 'add_pooling_0': False, 'use_dropout_0': False, 'add_pooling_1': False, 'use_dropout_1': False, 'add_pooling_2': False, 'use_dropout_2': False}
  Validation RMSE: 0.4020272192084934
  Validation MAE: 0.23952492974052864
Processing split 3/4
Reloading Tuner from /content/drive/MyDrive/Thesis/Thesis/cnn/univar_tuning/1dcnn_tuning/tuner0.json


  saveable.load_own_variables(weights_store.get(inner_path))


[1m4321/4321[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 1ms/step
Split 3 Results:
  Best parameters: {'num_conv_layers': 3, 'num_filters': 64, 'filter_size': 5, 'use_dropout': False, 'dropout_rate': 0.2, 'tuner/epochs': 2, 'tuner/initial_epoch': 0, 'tuner/bracket': 3, 'tuner/round': 0, 'add_pooling_0': False, 'use_dropout_0': False, 'add_pooling_1': False, 'use_dropout_1': False, 'add_pooling_2': False, 'use_dropout_2': False}
  Validation RMSE: 0.5591703237498701
  Validation MAE: 0.3439769742082885
Processing split 4/4
Reloading Tuner from /content/drive/MyDrive/Thesis/Thesis/cnn/univar_tuning/1dcnn_tuning/tuner0.json


  saveable.load_own_variables(weights_store.get(inner_path))


[1m4321/4321[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 2ms/step




Split 4 Results:
  Best parameters: {'num_conv_layers': 3, 'num_filters': 64, 'filter_size': 5, 'use_dropout': False, 'dropout_rate': 0.2, 'tuner/epochs': 2, 'tuner/initial_epoch': 0, 'tuner/bracket': 3, 'tuner/round': 0, 'add_pooling_0': False, 'use_dropout_0': False, 'add_pooling_1': False, 'use_dropout_1': False, 'add_pooling_2': False, 'use_dropout_2': False}
  Validation RMSE: 0.9752770029700313
  Validation MAE: 0.5863544068307599
Saved the best model from the last split.

All best hyperparameters across splits: [{'num_conv_layers': 3, 'num_filters': 64, 'filter_size': 5, 'use_dropout': False, 'dropout_rate': 0.2, 'tuner/epochs': 2, 'tuner/initial_epoch': 0, 'tuner/bracket': 3, 'tuner/round': 0, 'add_pooling_0': False, 'use_dropout_0': False, 'add_pooling_1': False, 'use_dropout_1': False, 'add_pooling_2': False, 'use_dropout_2': False}, {'num_conv_layers': 3, 'num_filters': 64, 'filter_size': 5, 'use_dropout': False, 'dropout_rate': 0.2, 'tuner/epochs': 2, 'tuner/initial_epoch':