# Preprocessing of RADIOML 2018 Dataset
    Adjusts shape to (None, 2, X, 1) for data, where x is of 1024 or 128.

In [None]:
import h5py
import numpy as np
import os
import tensorflow as tf

In [None]:
# RadioML classes
classes = ['32PSK',
            '16APSK',
            '32QAM',
            'FM',
            'GMSK',
            '32APSK',
            'OQPSK',
            '8ASK',
            'BPSK',
            '8PSK',
            'AM-SSB-SC',
            '4ASK',
            '16PSK',
            '64APSK',
            '128QAM',
            '128APSK',
            'AM-DSB-SC',
            'AM-SSB-WC',
            '64QAM',
            'QPSK',
            '256QAM',
            'AM-DSB-WC',
            'OOK',
            '16QAM']

In [None]:
# Import RadioML dataset
filename = '../Data/2018.01/GOLD_XYZ_OSC.0001_1024.hdf5'
with h5py.File(filename, "r") as f:
    print("Keys: %s" % f.keys())
    a_group_key = list(f.keys())[0]
    b_group_key = list(f.keys())[1]
    c_group_key = list(f.keys())[2]

    data_x = list(f[a_group_key])
    label_y = list(f[b_group_key])
    z_data = list(f[c_group_key])

In [None]:
def chunk_data(data, labels, k_start, classes, path=None):
    '''
    Breaks up large RADIO ML dataset into useable slices of each class
    
    
    Args:
        data: full RFML dataset
        labels: full RFML dataset "Y" values
        k_start: int 0-23 for selecting the starting class
        classes: list containing each class
        path: path for output files
        
    Returns:
        The generic filename for the location of the chunked data.
        
    Raises:
        None
    
    '''
    
    k_start = k_start * 106496
    k_end = k_start + 106496

    I_v = []
    Q_v = []
    Dt = []
    label = []
    k = 0
    c_name = classes[int(k_start / 106496)]
    
    for val in data:
        
        if k >= k_start:
            Dt.append(val)
            label.append(labels[k])
        
        k += 1

        if k >= k_end:
            break
            
    if path == None:
        filename = '../Data/RFML_Data/' + c_name +'_1024' + '.hdf5'
    else:
        filename = path + '/' + c_name +'_1024' + '.hdf5'
    
    with h5py.File(filename, 'a') as hf:
        hf.create_dataset("D", data=Dt)
        hf.create_dataset("Label", data=label)
        
    return filename

In [None]:
fname = chunk_data(data_x, label_y, 23, classes)

In [None]:
fname = '../Data/RFML_Data/' + classes[1]+'_1024.hdf5'
with h5py.File(fname, "r") as f:
    print("Keys: %s" % f.keys())
    a_group_key = list(f.keys())[0]
    b_group_key = list(f.keys())[1]

    D_data = list(f[a_group_key])
    L_data = list(f[b_group_key])

In [None]:
D_data[0][0]

In [None]:
def reshape(classes, c_num, samples=1024, path=None, end_path=None):
    '''
    Converts sliced dataset into proper shape for model.
    Adjust fname and filename as necessary for locations of files.
    
    
    Args:
        Classes: ordered list of classes
        c_num: number of class wished to reshape (value from 0-23)
        samples: sample size to resturcture the data to
        path: path for input files
        end_path: path for output files
        
    Returns:
        The generic filename for the location of the reshaped data.
        
    Raises:
        None
    
    '''
    
    le = 106496
    I_v = []
    Q_v = []
    
    label = []
    k = 0
    
    c_name = classes[c_num]
    
    if path == None:
        fname = '../Data/RFML_Data/' + classes[c_num]+'_1024.hdf5'
    else:
        fname = path + '/' + classes[c_num]+'_1024.hdf5'
    
    with h5py.File(fname, "r") as f:
        a_group_key = list(f.keys())[0]
        b_group_key = list(f.keys())[1]

        data = list(f[a_group_key])
        labels = list(f[b_group_key])

    
    for val in data:

        for i in range(samples):
            I_v.append(val[i][0])
            Q_v.append(val[i][1])
                
        label.append(labels[k])
        
        k += 1    
        
    I_v = tf.reshape(I_v, (le, samples)) # Reshape to form (None, 1024)
    Q_v = tf.reshape(Q_v, (le, samples))
    data_new = np.concatenate((I_v, Q_v), -1) # Concat after each row to shape (None, 2048)
    data_new = tf.reshape(data_new, (le, 2, samples, 1))

    if end_path == None:
        filename = '../Data/RFML_Data/RS_' + str(samples) + '/' + c_name +'_'+ str(samples)
    else:
        filename = end_path + '/' + c_name +'_'+ str(samples)
    
    np.save(filename+'_D', data_new)
    np.save(filename+'_L', label)

    return filename

In [None]:
file = reshape(classes, 0, 1024)

In [None]:
for i in range(1,24):
    file = reshape(classes, i, 1024)

In [None]:
l = os.listdir('../Data/RFML_Data/RS_1024')
len(l)