In [None]:
import numpy as np
from datetime import datetime

from helpers import pre_processing_wlan_utils as preprocess_utils
from helpers import classifier_wlan_spectral_utils as classifier_utils
from helpers import tr_models as tr_models

In [None]:
#Set which task you want to solve. This value will be used to select the right labels for the loaded dataset.
#The labels of the dataset are for the following tasks: 'phy' (L1 Technology identification), 'frames' (L2 frame characterization), 'app-type' (L7 App characterization), or 'app' (L7 App identification). 
#The results of the paper are for the three last tasks.
task = 'app-type'
label = preprocess_utils.label_index[task]
num_classes = preprocess_utils.num_classes[task]
labels_string = preprocess_utils.labels_string[task]
print("Label id: ", label)
print("Num classes in that label: ", num_classes)
print("Labels: ", labels_string)

In [None]:
#This dataset does not contain the unknown label class (L2 frames that were generated with an unknown app), so lets remove that label.
if (task == 'app') or (task == 'app-type'):
    num_classes = num_classes-1
    labels_string = labels_string[0:num_classes]
    print("Label id: ", label)
    print("Num classes in that label: ", num_classes)
    print("Labels: ", labels_string)

In [None]:
#Set lenght of the sequences
seq_length = 3000

#Set type of padding. 
padding = 'post'

#Set path to dataset folder. All the files from the dataset can be downloaded from https://zenodo.org/record/5208201
dataset_folder = '../../dataset/waveforms/'

#Set name of dataset file. In this case we are using once of the balanced dataset (filename_balanced.mat)
dataset_filename = 'waveforms_2G_n_SNR_'+task+'_balanced.mat'

In [None]:
#Get X and Y without padding/truncation nor scaling. The function get_raw_xy_spectrum returns the L1 packets (IQ samples) and all the labels associated to them. 
Xraw, Yraw = classifier_utils.get_raw_xy_spectrum(dataset_folder,dataset_filename)

In [None]:
#Let's pad/truncate the L1 packets to a given lenght.
print("Padding/Truncating sequence to a length of ",str(seq_length))
X = classifier_utils.pad_or_trunc_x_and_scale(Xraw, seq_length, padding, scale=False)

In [None]:
#Lets select the labels of the classification task and generate the one-shot labels 
print("Generate one-shot labels")
Y = classifier_utils.get_one_hot_labels(Yraw, num_classes, label)

In [None]:
#Set a seed for pseudo random generator for splitting the dataset
seed = 42
print("Performing data splitting")
X_train, X_val, X_test, Y_train, Y_val, Y_test = classifier_utils.get_xy_4_training(X,Y,seed)
print(X_train.shape,X_val.shape, X_test.shape)

In [None]:
#Let's select the pre-configured model we want to load. You can choose between CNN and RNN. The hyperparameters will be selected based on a combination between 
#model type and task. The model type is also used to pre-process the shape of the input data, which is different for the CNN and the RNN. 
model_type = 'CNN'

In [None]:
#Adapt the shape of the dataset to match the model type.
print('Starting data preparation and training for model ', model_type)
X_train, X_val, X_test = classifier_utils.reshape_for_model(model_type, X_train, X_val, X_test)
print(X_train.shape,X_val.shape, X_test.shape)

In [None]:
#Lets generate a string to use as prefix to name some output files such as the model (hd5) and the confusion matrix (pdf)
now = datetime.now()
datenow = now.strftime('%d%m%y%H%M%S')
prefix_time = str(datenow)
prefix_filenames = prefix_time+'_TC_Spectrum_model_'+model_type+'_input_length_'+str(seq_length)+'_num_classes_'+str(num_classes)+'_task_'+task

In [None]:
#Now time for create the model and train it. 
#The expected accuracy with an input size of 3K samples with a CNN are: task 'frame'  ~99%, task 'app-type' ~97%, task 'app' ~90.44% 
result, model = classifier_utils.create_and_train_model_tc_spectrum(model_type, task, seq_length, num_classes, prefix_filenames, X_train, Y_train, X_val, Y_val, X_test, Y_test)

In [None]:
print("Model loss in training: ", result['Training'][0])
print("Model accuracy in training: ", result['Training'][1])
print("Model loss in validation: ", result['Validation'][0])
print("Model accuracy in validation: ", result['Validation'][1])
print("Model loss in test: ", result['Test'][0])
print("Model accuracy in test: ", result['Test'][1])

In [None]:
print("The inference time on training dataset is", result['prediction_time_training']['time_pred'], '. It was computed using', result['prediction_time_training']['n_samples'],'samples with an average inference time per sample of', result['prediction_time_training']['t_sample'])
print("The inference time on test dataset is", result['prediction_time_testing']['time_pred'], '. It was computed using', result['prediction_time_testing']['n_samples'],'samples with an average inference time per sample of', result['prediction_time_testing']['t_sample'])

In [None]:
#The output dictionary with the results from the model training also contains other results/metrics that you can access such as the confusion matrix, precision, recall, fscore, and support.
print(result.keys())

In [None]:
#Let's export the results in json format
classifier_utils.save_results_to_json(prefix_filenames, result)

In [None]:
#We also defined a function to compute and store the confusion matix in a pdf file.
classifier_utils.compute_and_save_conf_matrix(model, X_test, Y_test, labels_string, cm_dir = './', filename_prefix = prefix_filenames)