### Create spectrograms from raw data

Note: This file has already served its purpose. The spectrograms, the labels and the subjects arrays have already been created, and saved in the folder datasets/

In [9]:
import os
import scipy.signal as signal
import numpy as np
import matplotlib.pyplot as plt
from  scipy.signal import ShortTimeFFT
import scipy.signal.windows as wins

cwd = os.getcwd()
base_folder="Dataset_848"
subfolders = [f.path for f in os.scandir(base_folder) if f.is_dir() ]
filenames=[]

for folder in subfolders:
    files = [f.path for f in os.scandir(folder) if f.is_file()]
    filenames.extend(files)

Create subjects array

In [11]:
subjects = []
for filename in filenames:
    name_of_file = filename.split('\\')[-1][:-4]
    subject_id = int(name_of_file.split('P')[1].split('A')[0])
    subjects.append(subject_id)
subjects = np.array(subjects)
subjects.shape
np.save('datasets/subjects_array.npy', subjects)

[ 1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24
 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48
 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72]


Create spectrograms and labels array

In [1]:
# Parameters
t_sweep=0.001
n_rangebins=128
bandwidth=4e8
fs=int(n_rangebins/t_sweep)

winlength=128
overlap=0.5
nfft=2048
win=wins.hann(winlength)
hop=int(winlength*(1-overlap))

spectrograms=[]
labels=[]

b, a = signal.butter(4,0.0075, 'high') # filter coefficients for MTI filter

for filename in filenames: #iterate over the whole dataset
    #get the label based on
    name_of_file = filename.split('\\')[-1][:-4]
    activity = int(name_of_file.split('P')[0]) 
   
    labels.append(activity)

    with open(filename, 'r') as file:
        data = np.loadtxt(file, delimiter=' ', skiprows=4, dtype=np.complex_, converters=lambda s: complex(s.decode().replace('i', 'j')))
        data_numsamples=len(data)
        data_time=data_numsamples*t_sweep/n_rangebins
        n_sweeps=data_time/t_sweep

        data = data.reshape(n_rangebins, int(n_sweeps), order='F') #reshape data from vector to matrix of size rangebins x sweeps

        data_fft = np.fft.fftshift(np.fft.fft(data, axis=0), axes=0) #perform fft on each sweep to convert beat frequency to range returns
        data_fft = data_fft[int(n_rangebins/2):,:]# keep only the bottom half since fft is symmetric
        range_profile = signal.filtfilt(b, a, data_fft, axis=1)# MTI filter
        prof=np.sum(range_profile,axis=0) #sum all range returns to get the profile

        stft=ShortTimeFFT(win, hop, mfft=nfft, fs=fs, scale_to='magnitude', fft_mode='centered')

        Sxx = stft.spectrogram(prof,axis=0) #compute the spectrogram
        nul,nul,f_lo,f_hi=stft.extent(int(fs*n_sweeps))

        spectrograms.append(Sxx)

    '''
    # range and time axes
    freq_axis=np.linspace(0,n_rangebins,n_rangebins)*fs/(2*n_rangebins)
    range_axis=freq_axis*3e8*t_sweep/(2*bandwidth)
    time_axis=np.arange(0, data_time, t_sweep)
    '''  
    
    # check progress every 50 samples 
    '''if len(labels)%50==0:
        print(len(labels))'''

## Resampling Spectrograms to match the 80 samples of the 5 seconds activities
Then normalize and convert to logscale

In [17]:
from scipy.signal import resample

dataset=list(zip(spectrograms, labels))
dataset_resampled=[]
for i in range(len(dataset)):
    if dataset[i][0].shape[1]!=80:
        dataset_resampled.append((resample(dataset[i][0], 80, axis=1), dataset[i][1])) #resample the spectrogram to 80 time bins and keep the label
    else:
        dataset_resampled.append(dataset[i]) #keep the spectrogram as is and keep the label

In [23]:
spec_resampled, labels = zip(*dataset_resampled)

spec_resampled_array=np.array(spec_resampled)
labels_array=np.array(labels)

print(spec_resampled_array.shape)
print(labels_array.shape)

#save the resampled data
np.save('datasets/spec_resampled_array.npy', spec_resampled_array)
np.save('datasets/labels_array.npy', labels_array)

num_samples, num_freqs, num_frames = spec_resampled_array.shape
scaled_specs = np.zeros((num_samples, num_freqs, num_frames))

for i in range(num_samples):
    spec = spec_resampled_array[i,:,:]
    scaled_specs[i,:,:] = 20*np.log10(abs(spec_resampled_array)/np.max(abs(spec_resampled_array)))

np.save('datasets/scaled_spec_resampled_array.npy', scaled_specs)

(1754, 2048, 80)
(1754,)


In [None]:
np.all(scaled_specs == 20*np.log10(abs(spec_resampled_array)/np.max(abs(spec_resampled_array)))) #check if the scaling is correct