# Modulation Classification Data Preprocessing

In [1]:
import warnings
warnings.filterwarnings('ignore')

In [46]:
import tensorflow as tf
from tensorflow.keras.layers import Activation, Input, Conv1D, ReLU, BatchNormalization, Add, AveragePooling1D, Flatten, Dense, MaxPooling1D
# from keras.layers.core import Activation, Flatten, Dense, Dropout
#import keras
from tensorflow.keras.layers import Conv2D, Dense, BatchNormalization, Activation, MaxPool2D, GlobalAveragePooling2D, AveragePooling2D, Add, Input, Flatten
import time
import numpy as np
import matplotlib.pyplot as plt
import scipy.io as spio
from scipy import stats as st
import komm
from mat4py import loadmat
import pandas as pd
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
import glob
import os
import pickle

## Data Collection and Modulation Types

In [3]:
types = ['BPSK', 'QPSK', '8PSK', '16QAM', '64QAM', 
         'PAM4','GFSK', 'CPFSK', 'B-FM', 'DSB-AM', 'SSB-AM']

label_dict = {0: '16QAM', 
              1: '64QAM', 
              2: '8PSK', 
              3: 'B-FM', 
              4: 'BPSK', 
              5: 'CPFSK', 
              6: 'DSB-AM',
              7: 'GFSK',
              8: 'PAM4',
              9: 'QPSK',
              10: 'SSB-AM'}

labels = 11

file_path = ""

In [4]:
def data_collection(types=None,file_path=None):
    signals = []
    os.chdir(file_path)
    df = pd.DataFrame(columns=['Data', 'FileName','Type'])
    mat_files = glob.glob('*.mat')
    
    i = 0
    for fname in mat_files:
        data = spio.loadmat(fname)
        
        for k in types: # Determine what type of signal is stored.
            name = "frame"+k
            if fname[:len(name)] == name:
                data_type = k

        df.loc[i] = [data, fname, data_type]
#         print(i)
        i += 1
        
#         if (i // 1000) == 1:
#             print('Current File:', '\t', fname)
    return df

In [5]:
# # Save dataframe
# df = data_collection(types)
# os.chdir(file_path)
# df.to_pickle('Modulation_Data.pkl')

In [6]:
df = pd.read_pickle('Modulation_Data.pkl')
df.head(5)

Unnamed: 0,Data,FileName,Type
0,"{'__header__': b'MATLAB 5.0 MAT-file, Platform...",frame16QAM001.mat,16QAM
1,"{'__header__': b'MATLAB 5.0 MAT-file, Platform...",frame16QAM002.mat,16QAM
2,"{'__header__': b'MATLAB 5.0 MAT-file, Platform...",frame16QAM003.mat,16QAM
3,"{'__header__': b'MATLAB 5.0 MAT-file, Platform...",frame16QAM004.mat,16QAM
4,"{'__header__': b'MATLAB 5.0 MAT-file, Platform...",frame16QAM005.mat,16QAM


In [7]:
df['Data'][0]

{'__header__': b'MATLAB 5.0 MAT-file, Platform: PCWIN64, Created on: Thu May 27 14:24:47 2021',
 '__version__': '1.0',
 '__globals__': [],
 'frame': array([[-0.73433293-0.99518379j],
        [-0.85662654-0.62145629j],
        [-0.90833505-0.33555202j],
        ...,
        [ 1.38139364+0.27414485j],
        [ 1.55690698+0.27962191j],
        [ 1.67416038+0.47210394j]]),
 'None': MatlabOpaque([(b'label', b'MCOS', b'categorical', array([[3707764736],
        [         2],
        [         1],
        [         1],
        [         1],
        [         1]], dtype=uint32))],
              dtype=[('s0', 'O'), ('s1', 'O'), ('s2', 'O'), ('arr', 'O')]),
 '__function_workspace__': array([[ 0,  1, 73, ...,  0,  0,  0]], dtype=uint8)}

In [8]:
df['Data'][0]['frame']

array([[-0.73433293-0.99518379j],
       [-0.85662654-0.62145629j],
       [-0.90833505-0.33555202j],
       ...,
       [ 1.38139364+0.27414485j],
       [ 1.55690698+0.27962191j],
       [ 1.67416038+0.47210394j]])

In [9]:
print('Length: ', df.shape[0])

Length:  110000


## Preprocessing Data

In [10]:
df_temp = df.copy()
df_temp = df_temp.drop('FileName', axis=1)

In [18]:
# Save complex data instead of dict
for i in range(df.shape[0]):
    df_temp['Data'][i] = df['Data'][i]['frame'] # Get the frame data

In [11]:
df_temp['Type'].unique() # This is the order of the encoding

array(['16QAM', '64QAM', '8PSK', 'B-FM', 'BPSK', 'CPFSK', 'DSB-AM',
       'GFSK', 'PAM4', 'QPSK', 'SSB-AM'], dtype=object)

In [20]:
le = LabelEncoder()
df_temp['Type'] = le.fit_transform(df_temp['Type'])# Make the type into numeric
df_temp.head()

Unnamed: 0,Data,Type
0,"[[(-0.734332932559768-0.9951837912122908j)], [...",0
1,"[[(0.40661975118360605+1.1226219212238733j)], ...",0
2,"[[(0.262637238214493+0.4105866430957086j)], [(...",0
3,"[[(0.6068912560223054-0.7565850679552969j)], [...",0
4,[[(-0.052609182806138355+0.03357776168282335j)...,0


In [12]:
labels = len(df_temp['Type'].unique())
df_temp['Type'].unique() # Outputs preprocessed for correct type

array(['16QAM', '64QAM', '8PSK', 'B-FM', 'BPSK', 'CPFSK', 'DSB-AM',
       'GFSK', 'PAM4', 'QPSK', 'SSB-AM'], dtype=object)

In [13]:
# Store processed data
# df_temp.to_pickle('Modulation_Data_Processed.pkl')

In [10]:
df = pd.read_pickle('Modulation_Data_Processed.pkl')
df.head(5)

Unnamed: 0,Data,Type
0,"[[(-0.734332932559768-0.9951837912122908j)], [...",0
1,"[[(0.40661975118360605+1.1226219212238733j)], ...",0
2,"[[(0.262637238214493+0.4105866430957086j)], [(...",0
3,"[[(0.6068912560223054-0.7565850679552969j)], [...",0
4,[[(-0.052609182806138355+0.03357776168282335j)...,0


In [15]:
df['Data'][0].shape

(1024, 1)

In [16]:
(df['Data'][0][0][0])

(-0.734332932559768-0.9951837912122908j)

In [26]:
# Reorder data
for i in range(df.shape[0]):
    temp_list = []
    for k in range(1024):
        temp_list.append(df['Data'][i][k][0])
    
    df_temp['Data'][i] = np.asarray(temp_list) # Get the frame data
#     df_temp['Data'][i] = temp_list

In [17]:
# df_temp.to_pickle('Modulation_Data_Processed_1.pkl')

In [11]:
# Completly Reordered Data
df = pd.read_pickle('Modulation_Data_Processed_1.pkl')
df.head(5)

Unnamed: 0,Data,Type
0,"[(-0.734332932559768-0.9951837912122908j), (-0...",0
1,"[(0.40661975118360605+1.1226219212238733j), (0...",0
2,"[(0.262637238214493+0.4105866430957086j), (0.0...",0
3,"[(0.6068912560223054-0.7565850679552969j), (0....",0
4,"[(-0.052609182806138355+0.03357776168282335j),...",0


In [11]:
df.isna().sum()

Data    0
Type    0
dtype: int64

In [12]:
# Hold the real and imaginary components of the signal
df_new = pd.DataFrame(columns=['Real', 'Imag', 'Target'])

In [13]:
i = 0
for val in df['Data']:
    '''
    Split data into real and imaginary sets
    '''
    x = []
    y = []
    
    for k in range(1024):
        x.append(np.real(val[k]))
        y.append(np.imag(val[k]))

    x = np.asarray(x).astype('float32')
    y = np.asarray(y).astype('float32')

    df_new.loc[i] = [x, y, df['Type'][i]]
    
    i += 1

# df_new['Target'] = df['Type']

In [24]:
df_new.head()
df_new

Unnamed: 0,Real,Imag,Target
0,"[-0.7343329, -0.8566265, -0.90833503, -0.96382...","[-0.99518377, -0.62145627, -0.335552, -0.12873...",0
1,"[0.40661976, 0.3049861, 0.096985176, -0.221202...","[1.1226219, 1.2349485, 1.3255705, 1.1825333, 1...",0
2,"[0.26263723, 0.06726868, -0.1527621, -0.391072...","[0.41058666, 0.28404233, 0.15469582, -0.013441...",0
3,"[0.6068913, 0.4884971, 0.0708011, -0.25839245,...","[-0.75658506, -0.79838735, -0.73175997, -0.686...",0
4,"[-0.052609183, 0.20891644, 0.09371947, 0.24367...","[0.033577763, 0.043213084, -0.09868994, -0.232...",0
...,...,...,...
109995,"[-0.010861694, 0.090149954, 0.15294395, 0.3859...","[-0.61658335, -0.28057382, 0.3828093, 0.309508...",10
109996,"[-0.09629692, 0.2551987, 1.1387882, -0.536693,...","[-0.72275114, -0.07147301, 1.0694059, 0.858091...",10
109997,"[-0.44696736, -0.5747405, 1.6014634, -0.013654...","[-1.0415156, -0.2493575, 0.20104489, -0.486046...",10
109998,"[-0.31324294, 0.57157695, -0.057869017, -1.352...","[0.32837597, 0.045492236, 1.112148, 0.76629114...",10


In [15]:
df_new.to_pickle('MODREC.pkl')

In [12]:
df = pd.read_pickle('MODREC.pkl')

In [18]:
print(len(df))
df.head()

110000


Unnamed: 0,Real,Imag,Target
0,"[-0.7343329, -0.8566265, -0.90833503, -0.96382...","[-0.99518377, -0.62145627, -0.335552, -0.12873...",0
1,"[0.40661976, 0.3049861, 0.096985176, -0.221202...","[1.1226219, 1.2349485, 1.3255705, 1.1825333, 1...",0
2,"[0.26263723, 0.06726868, -0.1527621, -0.391072...","[0.41058666, 0.28404233, 0.15469582, -0.013441...",0
3,"[0.6068913, 0.4884971, 0.0708011, -0.25839245,...","[-0.75658506, -0.79838735, -0.73175997, -0.686...",0
4,"[-0.052609183, 0.20891644, 0.09371947, 0.24367...","[0.033577763, 0.043213084, -0.09868994, -0.232...",0


In [24]:
def preproc(df):
    df_temp = pd.DataFrame(columns=['Data', 'Target'])
    for i in range(len(df)):
        '''
        Concatenates arrays such that it is 2048 values with I followed by Q

        '''
        data = np.concatenate((df['Real'][i], df['Imag'][i]))
        df_temp.loc[i] = [data, df['Target'][i]]
        
    return df_temp

In [25]:
df_temp = preproc(df)
df_temp.head()

Unnamed: 0,Data,Target
0,"[-0.7343329, -0.8566265, -0.90833503, -0.96382...",0
1,"[0.40661976, 0.3049861, 0.096985176, -0.221202...",0
2,"[0.26263723, 0.06726868, -0.1527621, -0.391072...",0
3,"[0.6068913, 0.4884971, 0.0708011, -0.25839245,...",0
4,"[-0.052609183, 0.20891644, 0.09371947, 0.24367...",0


In [26]:
df_temp.to_pickle('MODREC_2048.pkl')

In [28]:
df = pd.read_pickle('MODREC_2048.pkl')
df.head()

Unnamed: 0,Data,Target
0,"[-0.7343329, -0.8566265, -0.90833503, -0.96382...",0
1,"[0.40661976, 0.3049861, 0.096985176, -0.221202...",0
2,"[0.26263723, 0.06726868, -0.1527621, -0.391072...",0
3,"[0.6068913, 0.4884971, 0.0708011, -0.25839245,...",0
4,"[-0.052609183, 0.20891644, 0.09371947, 0.24367...",0


In [38]:
df['Data'][0].shape

(2048,)

In [53]:
us_data = []
us_target = []
for i in range(len(df)):
    '''
    Makes lists of values from dataframe to reshape
    '''
    us_data.append(df['Data'][i])
    us_target.append(df['Target'][i])

In [41]:
us_data

[array([-0.7343329 , -0.8566265 , -0.90833503, ...,  0.27414486,
         0.2796219 ,  0.47210395], dtype=float32),
 array([ 0.40661976,  0.3049861 ,  0.09698518, ..., -0.02216188,
         0.038523  , -0.11552447], dtype=float32),
 array([ 0.26263723,  0.06726868, -0.1527621 , ..., -0.5229387 ,
        -0.43406186, -0.30139205], dtype=float32),
 array([ 0.6068913 ,  0.4884971 ,  0.0708011 , ..., -0.666639  ,
        -0.33007205, -0.17698418], dtype=float32),
 array([-0.05260918,  0.20891644,  0.09371947, ...,  0.21145618,
        -0.12783438,  0.04888685], dtype=float32),
 array([ 0.01408334, -0.17306486, -0.31739196, ...,  0.7824191 ,
         0.9409861 ,  1.0647267 ], dtype=float32),
 array([-0.11533459, -0.21225347, -0.4043859 , ...,  0.34309444,
         0.07098954, -0.11134837], dtype=float32),
 array([-0.8643953 , -0.9149377 ,  0.21377146, ..., -0.8238865 ,
        -0.9057283 , -0.76148874], dtype=float32),
 array([-0.15626621, -0.15541181, -0.19257174, ..., -0.32510132,
       

In [43]:
test_d = tf.reshape(us_data, (len(df), 2, 1024,1))

In [45]:
test_d[0]

<tf.Tensor: shape=(2, 1024, 1), dtype=float32, numpy=
array([[[-0.7343329 ],
        [-0.8566265 ],
        [-0.90833503],
        ...,
        [ 1.3813937 ],
        [ 1.5569069 ],
        [ 1.6741604 ]],

       [[-0.99518377],
        [-0.62145627],
        [-0.335552  ],
        ...,
        [ 0.27414486],
        [ 0.2796219 ],
        [ 0.47210395]]], dtype=float32)>

In [48]:
np.save('PCS_MDRC_1024', test_d)

In [50]:
np.load('PCS_MDRC_1024.npy').shape

(110000, 2, 1024, 1)

In [56]:
np.save('PCS_MDRC_TARGET', us_target)

In [57]:
np.load('PCS_MDRC_TARGET.npy')

array([ 0,  0,  0, ..., 10, 10, 10])