In [1]:
import numpy as np
import importlib
from datetime import datetime
import gc

from helpers import pre_processing_wlan_utils as preprocess_utils
from helpers import classifier_wlan_spectral_utils as classifier_utils
from helpers import tr_models as tr_models

1 Physical GPUs, 1 Logical GPUs


In [2]:
#In case of chaning data in the helpers, then reload the library to avoid restarting the kernel
importlib.reload(tr_models)
importlib.reload(classifier_utils)
importlib.reload(preprocess_utils)

1 Physical GPUs, 1 Logical GPUs


<module 'helpers.pre_processing_wlan_utils' from 'C:\\Users\\migue\\development\\traffic-recognition-2020\\source_code\\python\\helpers\\pre_processing_wlan_utils.py'>

In [3]:
dataset_folder = '../../dataset/waveforms/'
dataset_filename = 'waveforms_SNR_16042020_2G_n_unknown_unknown_v2WLAN_CLASS_wlan-frame_balanced.mat'

In [4]:
#Return a list of numpy arrays. It can not be transformed to a full numpy array as the samples do not have the same number of elements
#Length of spectrum packets is len(example)/2 as the raw IQ example is 1D ->first I samples and then Q samples 
#Each element is in column format and it is int16
X_L1 = classifier_utils.get_raw_x(dataset_folder,dataset_filename, data='spectrum')

  0%|                                                                             | 95/225468 [00:00<03:57, 949.78it/s]

Getting X for spectrum data


100%|████████████████████████████████████████████████████████████████████████| 225468/225468 [03:41<00:00, 1019.83it/s]


In [5]:
#Check firts values are:     [-1567    -1311     -102    -1007     -810     1356    -1695    -7725    13438   -15732     -743     9578] 
print(X_L1[0][0:12])

[[ -1567]
 [ -1311]
 [  -102]
 [ -1007]
 [  -810]
 [  1356]
 [ -1695]
 [ -7725]
 [ 13438]
 [-15732]
 [  -743]
 [  9578]]


In [6]:
#Return a list of numpy arrays. It can not be transformed to a full numpy array as the samples do not have the same number of elements
#Each element is in column format and it is float32
X_L2 = classifier_utils.get_raw_x(dataset_folder,dataset_filename, data='L2')

  0%|                                                                                       | 0/225468 [00:00<?, ?it/s]

Getting X for L2 data


100%|████████████████████████████████████████████████████████████████████████| 225468/225468 [01:23<00:00, 2699.60it/s]


In [7]:
#Check firts values are: [128     0     0     0   255   255   255   255   255 255   226   185   229    34   192    14   226   185]
print(X_L2[0][0:18])

[[128.]
 [  0.]
 [  0.]
 [  0.]
 [255.]
 [255.]
 [255.]
 [255.]
 [255.]
 [255.]
 [226.]
 [185.]
 [229.]
 [ 34.]
 [192.]
 [ 14.]
 [226.]
 [185.]]


In [8]:
#Each element is in row format and it is uint8
Y = np.array(classifier_utils.get_raw_y(dataset_folder,dataset_filename)).reshape(-1,5)

100%|████████████████████████████████████████████████████████████████████████| 225468/225468 [01:14<00:00, 3026.14it/s]


In [9]:
#Only Y can be transformed to a full numpy directly as all lists have same number of elements (5)
Y_as_np = np.array(Y).reshape(-1,5)

In [10]:
print(len(X_L1))
print(len(X_L2))
print(len(Y))

225468
225468
225468


In [11]:
task = "phy"
label_id_l1 = preprocess_utils.label_index[task]
num_classes_l1 = preprocess_utils.num_classes[task]
labels_string_l1 = preprocess_utils.labels_string[task]
print("Label id: ", label_id_l1)
print("Num classes in that label: ", num_classes_l1)
print("Labels: ", labels_string_l1)
Y_L1 = Y_as_np[:,label_id_l1]

Label id:  1
Num classes in that label:  3
Labels:  ['b', 'g', 'n']


In [12]:
task = "frames"
label_id_l2 = preprocess_utils.label_index[task]
num_classes_l2 = preprocess_utils.num_classes[task]
labels_string_l2 = preprocess_utils.labels_string[task]
print("Label id: ", label_id_l2)
print("Num classes in that label: ", num_classes_l2)
print("Labels: ", labels_string_l2)
Y_L2 = Y_as_np[:,label_id_l2]

Label id:  0
Num classes in that label:  3
Labels:  ['Mgmt', 'Ctrl', 'Data']


In [13]:
#Check number of elements per label in L2 (mgmt=0, ctr=1, data=2)
np.bincount(Y_L2)

array([75156, 75156, 75156], dtype=int64)

In [14]:
#Check number of elements per label in L1 (b=0,g=1,n=2)
np.bincount(Y_L1)

array([79007, 74197, 72264], dtype=int64)

In [15]:
#Get indexes of samples using 802.11b (0)
b_idx = np.where(Y_L1==0)[0]
print(len(b_idx))

79007


In [16]:
#Get frames that were transmitted using 802.11b
Y_L2_b=np.take(Y_L2,b_idx)

In [17]:
#Check distribution -> Most Mgmt frames were sent using 802.11b!
np.bincount(Y_L2_b)

array([74662,  1642,  2703], dtype=int64)