# GT2021-60283 Demo I - Load and Transform Data

In [1]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import glob

In [2]:
LF_file = "data/NTNU_LF.h5"
HF_files = glob.glob("data/*kW.h5")

In [3]:
with pd.HDFStore(LF_file, 'r') as store:
    LF_df = store.get("LF0000")
LF_df['orig_idx'] = LF_df.index
for i, HF_file in enumerate(HF_files):
    key = HF_file.split('//')[-1].split('.')[0]
    with pd.HDFStore(HF_file, 'r') as store:
        if i ==0:
            HF_df = store.get("T0000")
            HF_df['key'] = key.split('\\')[-1]
        else:
            temp_HF = store.get("T0000")
            temp_HF['key'] = key.split('\\')[-1]
            HF_df = HF_df.append(temp_HF)

In [6]:
from scipy.signal import welch
def slice_signal(signal, time_window=4000, stepsize=100, rolling=True,
                 pad_front=True):
    # Split the timeseries into a 2d vector of time windows each of which
    # will have the Hurst exponent evaluated
    sig = np.array(signal).reshape(-1,)
    if rolling:
        # Pad the timeseries and then create a new array where each row is a
        # step of a rolling window across the signal
        if pad_front:
            padded_sig = np.pad(sig, (time_window, 0), 'constant')
        else:
            padded_sig = np.pad(sig, (0, time_window), 'constant')
        new_sig = np.zeros((int(np.ceil(sig.shape[0]/ stepsize)),
                                             time_window))

        try:
            for i in range(time_window):
                new_sig[:, i] = padded_sig[i:-(time_window-i):stepsize]
        except:
            for i in range(time_window):
                new_sig[:, i] = padded_sig[i:-(time_window-i):stepsize][:-1]
        K = new_sig.shape[0]

    else:
        K = np.floor_divide(sig.shape[0], time_window)
        new_sig = sig[:int(K*time_window)].reshape(K, time_window)
        assert(new_sig.shape == (K, time_window))


    return new_sig
def gen_welch(HF_df, col, LF_df, sample_length=8192, fs=50000):

    raw_sig = slice_signal(HF_df[col].values, time_window=sample_length, stepsize=200)
    f, Px1 = welch(raw_sig[0, :], fs = fs, nperseg=512, noverlap=80, detrend = 'linear', scaling = 'spectrum', return_onesided=True)
    welch_dicts = []
    welch_labels = ["{}_welch_{}".format(col, freq) for freq in f[:len(f)//2]]
    welch_data = np.zeros((raw_sig.shape[0], len(welch_labels)))
    for i in range(raw_sig.shape[0]):
        temp_sig = raw_sig[i, :]
        f, wel = welch(temp_sig, fs = fs, nperseg=512, noverlap=80, detrend = 'linear', scaling = 'spectrum', return_onesided=True)
        welch_data[i, :] = np.abs(wel)[:len(wel)//2]
    welch_df = pd.DataFrame(HF_df['TIME'].values[::200], columns=['TIME'])
    welch_df = welch_df.assign(**dict.fromkeys(welch_labels, 0))
    welch_df[welch_labels] = welch_data
    welch_df['orig_idx'] = LF_df['orig_idx'].values
    welch_df = welch_df.set_index('orig_idx', drop=False)
    
    return welch_df

In [7]:
for key in HF_df['key'].unique():
    temp_df = HF_df[HF_df['key']==key]
    temp_welch = gen_welch(temp_df, 'upper_mic_theta_000', LF_df[LF_df['key']==key])
    with pd.HDFStore('data/{}_welch.h5'.format(key)) as store:
        store.put("T0000", temp_welch)