## For Deriving Samples
Use this file to derive samples (.npy files) to be used later by models

In [13]:
# The following five lines ensure that we reload the preprocessing functions 
# everytime this cell is called
import helper_files.preprocessing as preprocFuncts
import helper_files.util as util
import parameters as MyParams
import importlib
importlib.reload(preprocFuncts)
importlib.reload(util)
importlib.reload(MyParams)

if True: 
    # DERIVATION PARAMETERS
    WINDOW_LEN = MyParams.WINDOW_LEN # measured in IQ samples
    OVERLAP = MyParams.OVERLAP
    NUM_FEATURES = MyParams.NUM_FEATURES

    # DATA PARAMETERS
    TRAINING_DATA_DIR = MyParams.training_data_dir
    EVAL_DATA_DIR = MyParams.eval_data_dir
    NUM_TRAINING_FILES = MyParams.NUM_TRAINING_FILES # how many files in the saved numpy data for training
    NUM_EVALUATION_FILES = MyParams.NUM_EVALUATION_FILES # how many files in the saved numpy data for evaluation
    MAX_FILES = MyParams.MAX_FILES # if not using the saved numpy data, this is the max files to intake
    USE_SAVED_DATA = False
    SAVE_METRICS_TO_FILE = MyParams.SAVE_METRICS_TO_FILE
    TRAINING_DATASET = MyParams.TRAINING_DATASET
    EVAL_DATASET = MyParams.EVAL_DATASET

    FEATURES_TO_USE = MyParams.FEATURES_TO_USE

    
else: 
    # DERIVATION PARAMETERS
    WINDOW_LEN = 4096 # measured in IQ samples
    OVERLAP = 0.10
    NUM_FEATURES = MyParams.NUM_FEATURES

    # DATA PARAMETERS
    NUM_TRAINING_FILES = 28 # how many files in the saved numpy data for training
    NUM_EVALUATION_FILES = 7 # how many files in the saved numpy data for evaluation
    MAX_FILES = MyParams.MAX_FILES # if not using the saved numpy data, this is the max files to intake
    USE_SAVED_DATA = False
    TRAINING_DATASET = "dji_20mhz_comms"
    EVAL_DATASET = "dji_20mhz_comms"

    FEATURES_TO_USE = MyParams.FEATURES_TO_USE

### Load Training

In [14]:
print("Pulling from directory: ", TRAINING_DATA_DIR)

training_derived_samples, training_labels = preprocFuncts.preprocessFiles(
    TRAINING_DATA_DIR, 
    postfix=f"train_{NUM_FEATURES}ftrs_{NUM_TRAINING_FILES}files_{WINDOW_LEN}win_{'0' + str(int(OVERLAP * 100))}over{"_"+TRAINING_DATASET if TRAINING_DATASET != "" else ""}", 
    features_to_use=FEATURES_TO_USE,
    window_len=WINDOW_LEN,
    overlap=OVERLAP,
    saved_data=USE_SAVED_DATA, 
    max_files=MAX_FILES,
)

if (training_derived_samples.size <= 0):
    raise UserWarning("Array is empty") 

Pulling from directory:  /home/uav-cyberlab-rfml/RFML/test-dataset/test_dji_20mhz_comms_training
Converted 62,500,000 IQ samples to numpy array
Converted 62,500,000 IQ samples to numpy array
Converted 62,500,000 IQ samples to numpy array
Converted 62,500,000 IQ samples to numpy array
Converted 62,500,000 IQ samples to numpy array
Converted 62,500,000 IQ samples to numpy array
Converted 62,500,000 IQ samples to numpy array
Converted 62,500,000 IQ samples to numpy array
Converted 62,500,000 IQ samples to numpy array
Converted 62,500,000 IQ samples to numpy array
Converted 62,500,000 IQ samples to numpy array
Converted 62,500,000 IQ samples to numpy array
Converted 62,500,000 IQ samples to numpy array
Converted 62,500,000 IQ samples to numpy array
Converted 62,500,000 IQ samples to numpy array
Converted 62,500,000 IQ samples to numpy array
Converted 62,500,000 IQ samples to numpy array
Converted 62,500,000 IQ samples to numpy array
Converted 62,500,000 IQ samples to numpy array
Converted 

In [15]:
print("\n==BEFORE BALANCING========")
util.display(training_derived_samples, training_labels)

# training_derived_samples, training_labels = preprocFuncts.balanceByMedian(training_derived_samples, training_labels, unlabeled_downsampling=70_000)

# print("\n==AFTER BALANCING========")
# util.display(training_derived_samples, training_labels)



Number of labels: 854,448
Number of samples: 854,448

Shape of labels: (854448,)
Shape of samples: (854448, 5)

One sample: 
[7.5714419e+05 7.5725062e+05 1.0863329e+03 2.5407424e+13 2.5502678e+01]

The counts:
12,394   "Bluetooth"
158,150  "Ocusync_2.0_mini_4k_DL"
98,162   "Ocusync_2.0_mini_4k_UL"
23,134   "WIFI"
562,608  "background_noise"


### Load Evaluation

In [16]:
print("Pulling from directory: ", EVAL_DATA_DIR)

test_derived_samples, test_labels = preprocFuncts.preprocessFiles(
    EVAL_DATA_DIR, 
    postfix=f"eval_{NUM_FEATURES}ftrs_{NUM_EVALUATION_FILES}files_{WINDOW_LEN}win_{'0' + str(int(OVERLAP * 100))}over{"_"+EVAL_DATASET if EVAL_DATASET != "" else ""}", 
    features_to_use=FEATURES_TO_USE,
    window_len=WINDOW_LEN,
    overlap=OVERLAP,
    saved_data=USE_SAVED_DATA, 
    max_files=MAX_FILES,
)

if (training_derived_samples.size <= 0):
    raise UserWarning("Array is empty") 

Pulling from directory:  /home/uav-cyberlab-rfml/RFML/test-dataset/test_dji_20mhz_comms_eval
Converted 62,500,000 IQ samples to numpy array
Converted 62,500,000 IQ samples to numpy array
Converted 62,500,000 IQ samples to numpy array
Converted 62,500,000 IQ samples to numpy array
Converted 62,500,000 IQ samples to numpy array
Converted 62,500,000 IQ samples to numpy array
Converted 62,500,000 IQ samples to numpy array
Converted 7 files
Deriving samples now!
Samples saved to: ./saved-data/X_eval_5ftrs_6files_4096win_050over_dji_20mhz_comms.npy
Labels saved to: ./saved-data/y_eval_5ftrs_6files_4096win_050over_dji_20mhz_comms.npy


In [17]:
print("\n==BEFORE BALANCING========")
util.display(test_derived_samples, test_labels)

# test_derived_samples, test_labels = preprocFuncts.balanceByMedian(test_derived_samples, test_labels, unlabeled_downsampling=70_000)

# print("\n==AFTER BALANCING========")
# util.display(test_derived_samples, training_labels)



Number of labels: 213,612
Number of samples: 213,612

Shape of labels: (213612,)
Shape of samples: (213612, 5)

One sample: 
[3.9581367e+03 3.8779756e+03 7.8478233e+01 1.3160687e+11 1.1243217e+03]

The counts:
4,906    "Bluetooth"
7,159    "Burst"
19,160   "Ocusync_2.0_mini_4k_DL"
20,965   "Ocusync_2.0_mini_4k_UL"
5,865    "WIFI"
155,557  "background_noise"
