#### Importing libraries

In [1]:
from numpy import mean
from numpy import std
from numpy import dstack
import pandas as pd
import numpy as np
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import Flatten
from keras.layers import Dropout
from keras.layers import LSTM
from keras.utils import to_categorical
from keras.models import load_model
from keras import metrics
from matplotlib import pyplot
from google.colab import drive
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import confusion_matrix
import random
from datetime import datetime
from datetime import timedelta
import os
import matplotlib.pyplot as plt
from keras.optimizers import Adam
import keras
%matplotlib notebook
from math import isnan
import os

### Preparing Dataset

In [11]:
!wget "https://anonymous.4open.science/api/repo/EdgeCatBSPDataset-1840/file/hsr_misc_datasets.zip"

--2023-02-12 17:13:36--  https://anonymous.4open.science/api/repo/EdgeCatBSPDataset-1840/file/hsr_misc_datasets.zip
Resolving anonymous.4open.science (anonymous.4open.science)... 188.114.96.0, 188.114.97.0, 2a06:98c1:3120::, ...
Connecting to anonymous.4open.science (anonymous.4open.science)|188.114.96.0|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: unspecified [application/zip]
Saving to: ‘hsr_misc_datasets.zip’

hsr_misc_datasets.z     [<=>                 ]       0  --.-KB/s               hsr_misc_datasets.z     [ <=>                ]  10.15M  --.-KB/s    in 0.07s   

2023-02-12 17:13:36 (151 MB/s) - ‘hsr_misc_datasets.zip’ saved [10639219]



In [12]:
!unzip "hsr_misc_datasets.zip"

[1;30;43mStreaming output truncated to the last 5000 lines.[0m
  inflating: with_RAW/ATT_Dataset/1219/1.csv  
  inflating: with_RAW/ATT_Dataset/1219/2.csv  
  inflating: with_RAW/ATT_Dataset/1219/6.csv  
  inflating: with_RAW/ATT_Dataset/1219/3.csv  
  inflating: with_RAW/ATT_Dataset/1219/5.csv  
  inflating: with_RAW/ATT_Dataset/1219/0.csv  
  inflating: with_RAW/ATT_Dataset/1944/4.csv  
  inflating: with_RAW/ATT_Dataset/1944/1.csv  
  inflating: with_RAW/ATT_Dataset/1944/2.csv  
  inflating: with_RAW/ATT_Dataset/1944/6.csv  
  inflating: with_RAW/ATT_Dataset/1944/3.csv  
  inflating: with_RAW/ATT_Dataset/1944/5.csv  
  inflating: with_RAW/ATT_Dataset/1944/0.csv  
  inflating: with_RAW/ATT_Dataset/1448/4.csv  
  inflating: with_RAW/ATT_Dataset/1448/1.csv  
  inflating: with_RAW/ATT_Dataset/1448/2.csv  
  inflating: with_RAW/ATT_Dataset/1448/6.csv  
  inflating: with_RAW/ATT_Dataset/1448/3.csv  
  inflating: with_RAW/ATT_Dataset/1448/5.csv  
  inflating: with_RAW/ATT_Dataset/1448/0.c

# Loading Data without timestamps

In [4]:
def add_to_dataframe(filepath, window_size, rsrp, rsrq, rsrp_raws, rsrq_raws, labels):
  df = pd.read_csv(filepath)
  df.drop(["Unnamed: 0"], axis=1, inplace=True)
  if df.shape[0] >= window_size:
    for i in range(0, df.shape[0]-window_size):
      temp = df.iloc[i:i+window_size]
      rsrp.append(temp["rsrp"])
      rsrq.append(temp["rsrq"])
      rsrp_raws.append(temp["rsrp_raw"])
      rsrq_raws.append(temp["rsrq_raw"])
      label = df.iloc[i]
      label = label["label"]
      labels.append(label)

In [5]:

# dataset_master_folders = ["with_RAW/ATT_Dataset", "with_RAW/HSR_August_Dataset", "with_RAW/HSR_July_Dataset", "with_RAW/Sprint_Dataset", "with_RAW/Verizon_Dataset"]
dataset_master_folders = ["with_RAW/HSR_August_Dataset", "with_RAW/HSR_July_Dataset", "with_RAW/Sprint_Dataset", "with_RAW/Verizon_Dataset"]

# Traverse folders and run function to add to dataframe
def trav_folders(dataset_master_folders, WINDOW_SIZE):
  rsrp = []
  rsrq = []
  rsrp_raws = []
  rsrq_raws = []
  labels = []

  for master_f in dataset_master_folders:
    dataset_folders = os.listdir(master_f)
    for folder in dataset_folders:
      folder_path = master_f + "/" + folder
      files = os.listdir(folder_path)
      for f in files:
        filepath = folder_path + "/" + f
        add_to_dataframe(filepath, WINDOW_SIZE, rsrp, rsrq, rsrp_raws, rsrq_raws, labels)
  
  return rsrp, rsrq, rsrp_raws, rsrq_raws, labels

# labels_copy = labels.copy()

In [6]:
# Shuffling the arrays

# Helper function to shuffle the dataset
def shuffle_datasets(rsrp, rsrq, rsrp_raws, rsrq_raws, labels):

  combined_ds = [] # this will be used to shuffle the dataset, and then separate the shuffled and filtered dataset back into its original form

  for i in range(len(labels)):
    entry = []
    entry.append(rsrp[i])
    entry.append(rsrq[i])
    entry.append(labels[i])
    entry.append(rsrp_raws[i])
    entry.append(rsrq_raws[i])
    combined_ds.append(entry)

  random.shuffle(combined_ds)

  shuffled_rsrp = []
  shuffled_rsrq = []
  shuffled_labels = []
  shuffled_rsrp_raws = []
  shuffled_rsrq_raws = []

  # extracting back into original shape
  for i in range(len(combined_ds)):
    # extracting values
    rsrp_entry = combined_ds[i][0]
    rsrq_entry = combined_ds[i][1]
    label_entry = combined_ds[i][2]
    rsrp_raw_entry = combined_ds[i][3]
    rsrq_raw_entry = combined_ds[i][4]

    # entering values into shuffled lists
    shuffled_rsrp.append(rsrp_entry)
    shuffled_rsrq.append(rsrq_entry)
    shuffled_labels.append(label_entry)
    shuffled_rsrp_raws.append(rsrp_raw_entry)
    shuffled_rsrq_raws.append(rsrq_raw_entry)

  return shuffled_rsrp, shuffled_rsrq, shuffled_labels, shuffled_rsrp_raws, shuffled_rsrq_raws

# labels = labels_copy.copy()

# shuffled_rsrp, shuffled_rsrq, shuffled_labels, shuffled_rsrp_raws, shuffled_rsrq_raws = shuffle_datasets(rsrp, rsrq, rsrp_raws, rsrq_raws, labels)

# Replacing the elements so that we can have integer comparisons instead
def label_encoder(shuffled_labels):
  for i, label in enumerate(shuffled_labels):
    if label == "not_target":
      shuffled_labels[i] = 0
    else:
      shuffled_labels[i] = 1
  return shuffled_labels

# shuffled_labels_cp = shuffled_labels.copy()

# Generating the Final Dataset (without ttp)

In [None]:
def generate_final_dataset(shuffled_rsrp, shuffled_rsrq, shuffled_rsrp_raws, shuffled_rsrq_raws, shuffled_labels):

  # counting the number of elements of each type, so that we don't have imbalanced classes
  shuffled_labels_cp = shuffled_labels.copy()

  zero_count = 0
  one_count = 0
  for index, label in enumerate(shuffled_labels_cp):
    if label == 0:
      zero_count += 1
    else:
      one_count += 1


  # Use the counts of each to sample a dataset that isn't imbalanced

  rsrp_final = []
  rsrq_final = []
  rsrp_raw_final = []
  rsrq_raw_final = []
  labels_final = []

  zero_counter = 0

  for index, label in enumerate(shuffled_labels):
    if label == 0:
      zero_counter += 1
      if zero_counter <= one_count:
        rsrp_final.append(shuffled_rsrp[index])
        rsrq_final.append(shuffled_rsrq[index])
        rsrp_raw_final.append(shuffled_rsrp_raws[index])
        rsrq_raw_final.append(shuffled_rsrq_raws[index])
        labels_final.append(shuffled_labels[index])
    else:
      rsrp_final.append(shuffled_rsrp[index])
      rsrq_final.append(shuffled_rsrq[index])
      rsrp_raw_final.append(shuffled_rsrp_raws[index])
      rsrq_raw_final.append(shuffled_rsrq_raws[index])
      labels_final.append(shuffled_labels[index])

  # Shuffle the data again
  shuffled_rsrp, shuffled_rsrq, shuffled_labels, shuffled_rsrp_raws, shuffled_rsrq_raws = shuffle_datasets(rsrp_final, rsrq_final, rsrp_raw_final, rsrq_raw_final, labels_final)

  # Converting to numpy arrays
  rsrp, rsrq, labels, rsrp_raws, rsrq_raws = np.array(shuffled_rsrp), np.array(shuffled_rsrq), np.array(shuffled_labels), np.array(shuffled_rsrp_raws), np.array(shuffled_rsrq_raws)

  # Using minmax scaling to bring our features between 0 and 1
  scaler = MinMaxScaler(feature_range=(0,1))

  rsrp = scaler.fit_transform(rsrp)
  rsrq = scaler.fit_transform(rsrq)
  rsrp_raws = scaler.fit_transform(rsrp_raws)
  rsrq_raws = scaler.fit_transform(rsrq_raws)

  # Combining features
  features = np.stack([rsrp, rsrq, rsrp_raws, rsrq_raws], axis=2)

  # Reshaping label data
  labels = np.reshape(labels, (len(labels_final),1))

  # Splitting into test and training
  test_count = 500
  X_train, X_test = features[:-1*test_count], features[-1*test_count:]
  y_train, y_test = labels[:-1*test_count], labels[-1*test_count:]

  return X_train, X_test, y_train, y_test

# Model Initialization

In [None]:
from keras import metrics as ks_metrics

def eval_model(trainX, trainY, testX, testY):
    # Defining parameters here
    verbose, epochs, batch_size = 0, 15, 64
    
    n_timesteps, n_features, n_outputs = trainX.shape[1], trainX.shape[2], trainY.shape[1]

    # Defining model here
    model = Sequential()
    model.add(LSTM(32, return_sequences=True, input_shape=(n_timesteps, n_features)))
    model.add(LSTM(16, return_sequences=False))
    
    model.add(Dense(100))
    model.add(Dropout(0.3))
    model.add(Dense(50))
    model.add(Dense(25))
    model.add(Dropout(0.2))
    model.add(Dense(n_outputs))
    optimizer = Adam(learning_rate=0.001)
    model.compile(loss='binary_crossentropy', optimizer=optimizer, metrics=['accuracy'])

    # Fit and begin training
    model.fit(trainX, trainY, epochs=epochs, batch_size=batch_size, verbose=verbose, validation_split=0.2)

    # Model evaluation
    _, accuracy = model.evaluate(testX, testY, batch_size=batch_size, verbose=verbose)
    return accuracy, model

In [None]:
# accuracy, model = eval_model(X_train, y_train, X_test, y_test)

# Model Evaluation

#### Train and Evaluate Models (multiple variations)

In [None]:
def train_and_eval(X_train, y_train, X_test, y_test):
  NUM_ITERS = 10
  i = 0
  f1_scores = []
  f1_scores_nt = []
  tp_rate1 = []
  tp_rate2 = []

  best_model = None
  highest_f1 = 0

  while i < NUM_ITERS:
    _, model = eval_model(X_train, y_train, X_test, y_test)
    predictions = model.predict(X_test)
    predictions_nt = (predictions > 0.37)
    predictions_dt = (predictions > 0.5)
    labels = y_test

    # F1-score for 0.5 threshold
    cm = confusion_matrix(labels, predictions_dt)
    tn, fp, fn, tp = cm.ravel()

    tp_rate_thresh1 = tp/(tp+fn)

    precision = tp/(tp+fp)
    recall = tp/(tp+fn)
    f1_score = 2*precision*recall/(precision+recall)

    if f1_score > highest_f1:
      highest_f1 = f1_score
      best_model = model

    # F1-score for new threshold
    cm2 = confusion_matrix(labels, predictions_nt)
    tn, fp, fn, tp = cm2.ravel()

    tp_rate_thresh2 = tp/(tp+fn)

    precision = tp/(tp+fp)
    recall = tp/(tp+fn)
    f1_score_nt = 2*precision*recall/(precision+recall)

    if isnan(f1_score) or f1_score < 0.7 or isnan(f1_score_nt):
      i -= 1
    else: 
      f1_scores.append(f1_score)
      f1_scores_nt.append(f1_score_nt)
      tp_rate1.append(tp_rate_thresh1)
      tp_rate2.append(tp_rate_thresh2)

    i += 1
  return f1_scores, f1_scores_nt, tp_rate1, tp_rate2, best_model

#### Print results

In [None]:
# MODIFY THIS VARIABLE AND UNCOMMENT LINES 21 AND 22 TO SAVE YOUR TRAINED MODEL TO YOUR GOOGLE DRIVE. YOU WILL HAVE TO ENTER THE FULL PATH AND FILENAME (ends with .h5)
SAVE_PREFIX = ""

def complete_eval():
  for i in range(10):
    WINDOW_SIZE = i + 3
    
    # Generate dataset
    rsrp, rsrq, rsrp_raws, rsrq_raws, labels = trav_folders(dataset_master_folders, WINDOW_SIZE)
    shuffled_rsrp, shuffled_rsrq, shuffled_labels, shuffled_rsrp_raws, shuffled_rsrq_raws = shuffle_datasets(rsrp, rsrq, rsrp_raws, rsrq_raws, labels)
    shuffled_labels = label_encoder(shuffled_labels)
    X_train, X_test, y_train, y_test = generate_final_dataset(shuffled_rsrp, shuffled_rsrq, shuffled_rsrp_raws, shuffled_rsrq_raws, shuffled_labels)
    
    # Train and Evaluate Model
    f1_scores, f1_scores_nt, tp_rate_thresh1, tp_rate_thresh2, best_model = train_and_eval(X_train, y_train, X_test, y_test)
    print("Window Size: ", WINDOW_SIZE)
    print("F1-score with 0.5 threshold: ", f1_scores)
    print("F1-score with 0.37 threshold", f1_scores_nt)
    print("True Positive rate with 0.5 threshold", tp_rate_thresh1)
    print("True Positive rate with 0.37 threshold", tp_rate_thresh2)
    # best_model_filename = SAVE_PREFIX + str(WINDOW_SIZE) + ".h5"
    # best_model.save(best_model_filename)

complete_eval()



  precision = tp/(tp+fp)
  precision = tp/(tp+fp)


Window Size:  4
F1-score with 0.5 threshold:  [0.7684021543985636, 0.7656903765690376, 0.7631027253668763, 0.725, 0.7743190661478598, 0.7672955974842768, 0.7586206896551726, 0.7536842105263158, 0.7586206896551724, 0.7741935483870968]
F1-score with 0.37 threshold [0.6493860845839018, 0.767605633802817, 0.7615658362989324, 0.6485013623978201, 0.7193798449612403, 0.7640845070422536, 0.7438825448613378, 0.6449864498644986, 0.6449864498644986, 0.7585034013605442]
True Positive rate with 0.5 threshold [0.8991596638655462, 0.7689075630252101, 0.7647058823529411, 0.9747899159663865, 0.8361344537815126, 0.7689075630252101, 0.7394957983193278, 0.7521008403361344, 0.9243697478991597, 0.8067226890756303]
True Positive rate with 0.37 threshold [1.0, 0.9159663865546218, 0.8991596638655462, 1.0, 0.9747899159663865, 0.9117647058823529, 0.957983193277311, 1.0, 1.0, 0.9369747899159664]
