In [None]:
#Block used to upload kaggle.json, for access to Kaggle API
from google.colab import files
uploaded = files.upload()

In [None]:
#block for installing kaggle, creation of kaggle dir, and copying the json file over to current dir
! pip install kaggle
! mkdir ~/.kaggle
! cp kaggle.json ~/.kaggle/
! chmod 600 ~/.kaggle/kaggle.json

In [None]:
#Block for downloading dataset
! kaggle datasets download pinxau1000/radioml2018

In [None]:
#Block for unziping downloaded file
! unzip radioml2018.zip

In [5]:
#code block for loading in DeepSig data from hdf5 file
import tensorflow as tf
import h5py
import numpy as np
import json
import pandas as pd
import math

# Open the dataset
hdf5_file = h5py.File("/content/GOLD_XYZ_OSC.0001_1024.hdf5",  'r')
# Load the modulation classes. You can also copy and paste the content of classes-fixed.txt.
modulation_classes = json.load(open("/content/classes-fixed.json", 'r'))
#List groups of the hdf5 file
list(hdf5_file.keys())

#opening up new hdf5 file
f_shuffle = h5py.File("./DeepSig_XYZ_OSC.0001_1024_Shuffled.hdf5", "a")
#creating the file groups
x_shuf = f_shuffle.create_dataset("X", (2555904,1024,2), dtype='f')
y_shuf = f_shuffle.create_dataset("Y", (2555904, 24), dtype='i')
z_shuf = f_shuffle.create_dataset("Z", (2555904,1), dtype='i')
#creating the datasets in each group

#indexes representing the quarters of the dataset
#used to take a quarter of the original database at a time, shuffling it, and storing it into new hdf5
qrt_one = 638976
qrt_two = 1277952
qrt_three = 1916928


# Read the HDF5 groups, loading the entire group into numpy arrays
#samples = hdf5_file['X'][:638976:]
#modulation_onehot = hdf5_file['Y'][:638976:]
#snr = hdf5_file['Z'][:638976:]

In [6]:
# Read the HDF5 groups, loading a quarter of the dataset object into numpy arrays
samples = hdf5_file['X'][:qrt_one:]
modulation_onehot = hdf5_file['Y'][:qrt_one:]
snr = hdf5_file['Z'][:qrt_one:]

In [None]:
#creation of  shuffled arrays from DeepSig dataset using sklearns's shuffle function
from sklearn.utils import shuffle
samples, modulation_onehot, snr = shuffle(samples, modulation_onehot, snr, random_state=0)

#debugging print statements
print(samples)
print(modulation_onehot)
print(snr)

In [8]:
#writing to the new hdf5 dataset objects
x_shuf[:qrt_one:] = samples[::]
y_shuf[:qrt_one:] = modulation_onehot[::]
z_shuf[:qrt_one:] = snr[::]

In [None]:
#repeat the process for each quarter

In [9]:
# Read the HDF5 groups, loading a quarter of the dataset object into numpy arrays
samples = hdf5_file['X'][qrt_one:qrt_two:]
modulation_onehot = hdf5_file['Y'][qrt_one:qrt_two:]
snr = hdf5_file['Z'][qrt_one:qrt_two:]

In [None]:
#creation of  shuffled arrays from DeepSig dataset using sklearns's shuffle function
from sklearn.utils import shuffle
samples, modulation_onehot, snr = shuffle(samples, modulation_onehot, snr, random_state=0)

print(samples)
print(modulation_onehot)
print(snr)

In [11]:
#writing to the new hdf5 dataset objects
x_shuf[qrt_one:qrt_two:] = samples[::]
y_shuf[qrt_one:qrt_two:] = modulation_onehot[::]
z_shuf[qrt_one:qrt_two:] = snr[::]

In [12]:
# Read the HDF5 groups, loading a quarter of the dataset object into numpy arrays
samples = hdf5_file['X'][qrt_two:qrt_three:]
modulation_onehot = hdf5_file['Y'][qrt_two:qrt_three:]
snr = hdf5_file['Z'][qrt_two:qrt_three:]

In [None]:
#creation of  shuffled arrays from DeepSig dataset using sklearns's shuffle function
from sklearn.utils import shuffle
samples, modulation_onehot, snr = shuffle(samples, modulation_onehot, snr, random_state=0)

print(samples)
print(modulation_onehot)
print(snr)

In [14]:
#writing to the new hdf5 dataset objects
x_shuf[qrt_two:qrt_three:] = samples[::]
y_shuf[qrt_two:qrt_three:] = modulation_onehot[::]
z_shuf[qrt_two:qrt_three:] = snr[::]

In [15]:
# Read the HDF5 groups, loading a quarter of the dataset object into numpy arrays
samples = hdf5_file['X'][qrt_three::]
modulation_onehot = hdf5_file['Y'][qrt_three::]
snr = hdf5_file['Z'][qrt_three::]

In [None]:
#creation of  shuffled arrays from DeepSig dataset using sklearns's shuffle function
from sklearn.utils import shuffle
samples, modulation_onehot, snr = shuffle(samples, modulation_onehot, snr, random_state=0)

print(samples)
print(modulation_onehot)
print(snr)

In [17]:
#writing to the new hdf5 dataset objects
x_shuf[qrt_three::] = samples[::]
y_shuf[qrt_three::] = modulation_onehot[::]
z_shuf[qrt_three::] = snr[::]

In [18]:
#closing both files
hdf5_file.close()
f_shuffle.close()

In [23]:
#manually moved new hdf5 file and other dataset description files(license, classes, etc.)
#into one folder and zip it
!zip -r /content/DS_shuffled.zip /content/DeepSig_shuffled

  adding: content/DeepSig_shuffled/ (stored 0%)
  adding: content/DeepSig_shuffled/datasets.desktop (deflated 19%)
  adding: content/DeepSig_shuffled/DeepSig_XYZ_OSC.0001_1024_Shuffled.hdf5 (deflated 8%)
  adding: content/DeepSig_shuffled/classes-fixed.json (deflated 54%)
  adding: content/DeepSig_shuffled/LICENSE.TXT (deflated 69%)
  adding: content/DeepSig_shuffled/classes.txt (deflated 55%)
  adding: content/DeepSig_shuffled/classes-fixed.txt (deflated 76%)


In [34]:
#mounted google drive
#move the zipped dataset file to google drive
! mv ./DS_shuffled.zip ./drive/MyDrive 

In [35]:
#flush buffer and unmount google drive
from google.colab import drive
drive.flush_and_unmount()