In [None]:
import numpy as np
from datetime import datetime
from time import process_time
from tensorflow.keras.models import Model, load_model, Sequential
from sklearn.metrics import confusion_matrix, precision_recall_fscore_support

from helpers import pre_processing_wlan_utils as preprocess_utils
from helpers import classifier_wlan_spectral_utils as classifier_utils
from helpers import tr_models as tr_models

In [None]:
#Set which task you want to solve. This value will be used to select the right labels for the loaded dataset.
#The labels of the dataset are for the following tasks: 'phy' (L1 Technology identification), 'frames' (L2 frame characterization), 'app-type' (L7 App characterization), or 'app' (L7 App identification). 
#The results of the paper are for the three last tasks.
task = 'app'
label = preprocess_utils.label_index[task]
num_classes = preprocess_utils.num_classes[task]
labels_string = preprocess_utils.labels_string[task]
print("Label id: ", label)
print("Num classes in that label: ", num_classes)
print("Labels: ", labels_string)

In [None]:
#This dataset does not contain the unknown label class (L2 frames that were generated with an unknown app), so lets remove that label.
if (task == 'app') or (task == 'app-type'):
    num_classes = num_classes-1
    labels_string = labels_string[0:num_classes]
    print("Label id: ", label)
    print("Num classes in that label: ", num_classes)
    print("Labels: ", labels_string)

In [None]:
#Set lenght of the sequences
seq_length = 3000

#Set type of padding. 
padding = 'post'

#Set path to dataset folder. All the files from the dataset can be downloaded from https://zenodo.org/record/5208201
dataset_folder = '../../dataset/waveforms/'

#Set name of dataset file. In this case we are using once of the balanced dataset (filename_balanced.mat)
dataset_filename = 'waveforms_2G_n_SNR_'+task+'_balanced.mat'

In [None]:
#Let's select the pre-configured model we want to load. You can choose between CNN and GRU (RNN). The hyperparameters will be selected based on a combination between 
#model type and task. The model type is also used to pre-process the shape of the input data, which is different for the CNN and the GRU. 
model_type = 'CNN'

In [None]:
#Let's use the CNN-based pretrained model created on 26-08-2021 that solves the classification task 'app' with input sequence 3K IQ samples
#For testing other models, please check the filename and set the prefix_time, the task, and the input length as the filename indicates. 
#In this case the prefix filename for the pre-trained model is: 260821100426_TC_Spectrum_model_CNN_input_length_3000_num_classes_7_task_app
prefix_time_pretrained_model = '260821100426'
prefix_filenames = prefix_time_pretrained_model+'_TC_Spectrum_model_'+model_type+'_input_length_'+str(seq_length)+'_num_classes_'+str(num_classes)+'_task_'+task

In [None]:
trained_model_filename = 'notebook_results/pre_trained_models/'+prefix_filenames+'_classifier.h5'
pre_trained_model = load_model(trained_model_filename)

In [None]:
#Get X and Y without padding/truncation nor scaling. The function get_raw_xy_spectrum returns the L1 packets (IQ samples) and all the labels associated to them. 
Xraw, Yraw = classifier_utils.get_raw_xy_spectrum(dataset_folder,dataset_filename)

In [None]:
#Let's pad/truncate the L1 packets to a given lenght.
print("Padding/Truncating sequence to a length of ",str(seq_length))
X = classifier_utils.pad_or_trunc_x_and_scale(Xraw, seq_length, padding, scale=False)

In [None]:
#Lets select the labels of the classification task and generate the one-shot labels 
print("Generate one-shot labels")
Y = classifier_utils.get_one_hot_labels(Yraw, num_classes, label)

In [None]:
#Set a seed for pseudo random generator for splitting the dataset
seed = 42
print("Performing data splitting")
X_train, X_val, X_test, Y_train, Y_val, Y_test = classifier_utils.get_xy_4_training(X,Y,seed)
print(X_train.shape,X_val.shape, X_test.shape)

In [None]:
#Adapt the shape of the dataset to match the model type.
print('Starting data preparation and training for model ', model_type)
X_train, X_val, X_test = classifier_utils.reshape_for_model(model_type, X_train, X_val, X_test)
print(X_train.shape,X_val.shape, X_test.shape)

In [None]:
print("Evaluation Training set")
loss_acc_train = pre_trained_model.evaluate(X_train, Y_train)
print("Training Loss:", loss_acc_train[0])
print("Training Accuracy:", loss_acc_train[1])

In [None]:
print("Evaluation Validating set")
loss_acc_val = pre_trained_model.evaluate(X_val, Y_val)
print("Validation Loss:", loss_acc_val[0])
print("Validation Accuracy:", loss_acc_val[1])

In [None]:
print("Evaluation Testing set")
loss_acc_test = pre_trained_model.evaluate(X_test, Y_test)
print("Test Loss:",loss_acc_train[0])
print("Test Accuracy:",loss_acc_train[1])

In [None]:
print('Computing confusion matrix')
Y_pred=np.argmax(pre_trained_model.predict(X_test),1)
Y_true=np.argmax(Y_test,1)
cm_norm = confusion_matrix(Y_true, Y_pred, normalize='true')
cm = confusion_matrix(Y_true, Y_pred)

In [None]:
print('Confusion Matrix nomarlized', cm_norm)

In [None]:
print('Confusion Matrix no nomarlized', cm)

In [None]:
print('Computing precision, recall, and fscore')
#'macro' average: Calculate metrics for each label, and find their unweighted mean. This does not take label imbalance into account.
prf_macro = precision_recall_fscore_support(Y_true, Y_pred, average='macro')
print("Precision:",prf_macro[0])
print("Recall:", prf_macro[1])
print("FScore:", prf_macro[2])
print("Test Accuracy:", loss_acc_test[1])

In [None]:
#'micro' average: Calculate metrics globally by counting the total true positives, false negatives and false positives.
prf_micro = precision_recall_fscore_support(Y_true, Y_pred, average='micro')
print("Precision:",prf_micro[0])
print("Recall:", prf_micro[1])
print("FScore:", prf_micro[2])
print("Test Accuracy:", loss_acc_test[1])

In [None]:
#We also defined a function to compute and store the confusion matix in a pdf file.
classifier_utils.compute_and_save_conf_matrix(pre_trained_model, X_test, Y_test, labels_string, cm_dir = './', filename_prefix = prefix_filenames, precision = "{:0.2f}")

In [None]:
print('Computing prediction time on Training dataset')
start = process_time()
pre_trained_model.predict(X_train)
end = process_time()
print('The prediction time (in secs) was: ', end-start)
print('The prediction time (in secs) per sample was: ', (end-start)/len(X_train))