In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

In [2]:
metadata = pd.read_csv("train_final.csv", index_col=0)

In [3]:
metadata

Unnamed: 0,eeg_id,offset_bins,eeg_sub_id,eeg_label_offset_seconds,spectrogram_id,spectrogram_sub_id,spectrogram_label_offset_seconds,label_id,patient_id,expert_consensus,...,lpd_vote,gpd_vote,lrda_vote,grda_vote,other_vote,eeg_50sec_nan_row_count,eeg_10sec_nan_row_count,spectrogram_600sec_nan_row_count,spectrogram_10sec_nan_row_count,total_votes
0,568657,0,0,0.0,789577333,0,0.0,1825637311,20654,Other,...,0.000000,0.25,0.000000,0.166667,0.583333,0,0,0,0,12
1,582999,0,0,0.0,1552638400,0,0.0,1722186807,20230,LPD,...,0.857143,0.00,0.071429,0.000000,0.071429,0,0,0,0,14
2,582999,2,6,20.0,1552638400,6,20.0,2663298457,20230,LPD,...,0.857143,0.00,0.071429,0.000000,0.071429,0,0,0,0,14
3,642382,0,0,0.0,14960202,12,1008.0,3254468733,5955,Other,...,0.000000,0.00,0.000000,0.000000,1.000000,0,0,0,0,1
4,642382,2,1,24.0,14960202,13,1032.0,2552357208,5955,Other,...,0.000000,0.00,0.000000,0.000000,1.000000,0,0,0,0,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
31234,4293144208,0,0,0.0,1224582295,1,234.0,3884397953,64445,Other,...,0.000000,0.00,0.000000,0.000000,1.000000,0,0,0,0,3
31235,4293306306,0,0,0.0,819682076,3,168.0,1974235411,37409,GPD,...,0.066667,0.40,0.000000,0.133333,0.400000,0,0,0,0,15
31236,4293354003,0,0,0.0,1188113564,0,0.0,447244163,16610,GRDA,...,0.000000,0.00,0.000000,0.500000,0.500000,0,0,0,0,2
31237,4293843368,0,0,0.0,1549502620,0,0.0,1618953053,15065,GRDA,...,0.000000,0.00,0.000000,0.500000,0.500000,0,0,0,0,2


In [4]:
def get_relative_signals(data):
    LL_data = pd.DataFrame()
    RL_data = pd.DataFrame()
    LP_data = pd.DataFrame()
    RP_data = pd.DataFrame()
    C_data = pd.DataFrame()

    LL_data['Fp1 - F7'] = data['Fp1'] - data['F7']
    LL_data['F7 - T3'] = data['F7'] - data['T3']
    LL_data['T3 - T5'] = data['T3'] - data['T5']
    LL_data['T5 - O1'] = data['T5'] - data['O1']

    RL_data['Fp2 - F8'] = data['Fp2'] - data['F8']
    RL_data['F8 - T4'] = data['F8'] - data['T4']
    RL_data['T4 - T6'] = data['T4'] - data['T6']
    RL_data['T6 - O2'] = data['T6'] - data['O2']

    LP_data['Fp1 - F3'] = data['Fp1'] - data['F3']
    LP_data['F3 - C3'] = data['F3'] - data['C3']
    LP_data['C3 - P3'] = data['C3'] - data['P3']
    LP_data['P3 - O1'] = data['P3'] - data['O1']

    RP_data['Fp2 - F4'] = data['Fp2'] - data['F4']
    RP_data['F4 - C4'] = data['F4'] - data['C4']
    RP_data['C4 - P4'] = data['C4'] - data['P4']
    RP_data['P4 - O2'] = data['P4'] - data['O2']

    C_data['Fz - Cz'] = data['Fz'] - data['Cz']
    C_data['Cz - Pz'] = data['Cz'] - data['Pz']
    
    return(LL_data, RL_data, LP_data, RP_data, C_data, pd.DataFrame(data['EKG']))

In [5]:
def eeg_plot(LL_data, RL_data, LP_data, RP_data, C_data):
    fig, axs = plt.subplots(5, figsize=(24,12))
    
    axs[0].plot(LL_data['Fp1 - F7'], label='Fp1 - F7')
    axs[0].plot(LL_data['F7 - T3'], label='F7 - T3')
    axs[0].plot(LL_data['T3 - T5'], label='T3 - T5')
    axs[0].plot(LL_data['T5 - O1'], label='T5 - O1')
    axs[0].legend(loc='upper right')
    axs[0].set_title('LL')

    axs[1].plot(RL_data['Fp2 - F8'], label='Fp2 - F8')
    axs[1].plot(RL_data['F8 - T4'], label='F8 - T4')
    axs[1].plot(RL_data['T4 - T6'], label='T4 - T6')
    axs[1].plot(RL_data['T6 - O2'], label='T6 - O2')
    axs[1].legend(loc='upper right')
    axs[1].set_title('RL')

    axs[2].plot(LP_data['Fp1 - F3'], label='Fp1 - F3')
    axs[2].plot(LP_data['F3 - C3'], label='F3 - C3')
    axs[2].plot(LP_data['C3 - P3'], label='C3 - P3')
    axs[2].plot(LP_data['P3 - O1'], label='P3 - O1')
    axs[2].legend(loc='upper right')
    axs[2].set_title('LP')

    axs[3].plot(RP_data['Fp2 - F4'], label='Fp2 - F4')
    axs[3].plot(RP_data['F4 - C4'], label='F4 - C4')
    axs[3].plot(RP_data['C4 - P4'], label='C4 - P4')
    axs[3].plot(RP_data['P4 - O2'], label='P4 - O2')
    axs[3].legend(loc='upper right')
    axs[3].set_title('RP')

    axs[4].plot(C_data['Fz - Cz'], label='Fz - Cz')
    axs[4].plot(C_data['Cz - Pz'], label='Cz - Pz')
    axs[4].legend(loc='upper right')
    axs[4].set_title('C')    
    
    plt.tight_layout()
    plt.show()

In [6]:
# Define EEG bands
eeg_bands = {'Delta': (0, 4),
             'Theta': (4, 8),
             'Alpha': (8, 12),
             'Beta': (12, 30),
             'Gamma': (30, 45)}

In [7]:
def get_frequencies(data):
    # Get real amplitudes of FFT (only in postive frequencies)
    fft_vals = np.absolute(np.fft.rfft(data.to_numpy(), axis=0))
    
    # Get frequencies for amplitudes in Hz
    fft_freq = np.fft.rfftfreq(len(data), 1.0/512)
    
    # Take the mean of the fft amplitude for each EEG band
    eeg_band_fft = dict()
    for band in eeg_bands:  
        freq_ix = np.where((fft_freq >= eeg_bands[band][0]) & 
                           (fft_freq <= eeg_bands[band][1]))[0]
        eeg_band_fft[band] = np.mean(fft_vals[freq_ix])

    return eeg_band_fft

In [8]:
def plot_frequencies(eeg_band_fft):
    df = pd.DataFrame(columns=['band', 'val'])
    df['band'] = eeg_bands.keys()
    df['val'] = [eeg_band_fft[band] for band in eeg_bands]
    ax = df.plot.bar(x='band', y='val', legend=False)
    ax.set_xlabel("EEG band")
    ax.set_ylabel("Mean band Amplitude")

In [9]:
data_list = ["LL_data", "RL_data", "LP_data", "RP_data", "C_data", "EKG_data"]

In [10]:
def get_freq_data(eeg_id, offset):
    eeg_data = pd.read_parquet(f'train_eegs/{eeg_id}.parquet', engine='fastparquet')

    if(~eeg_data.ffill().isna().any().any()):
        eeg_data.ffill(inplace=True)
    else:
        eeg_data.bfill(inplace=True)
    
    mid = (int(offset) + 25) * 200
    data = eeg_data.iloc[mid - 1000 : mid + 1000, :]

    LL_data, RL_data, LP_data, RP_data, C_data, EKG_data = get_relative_signals(data)
    
    freq_df = pd.DataFrame({"band": ["Delta", "Theta", "Alpha", "Beta", "Gamma"]})
    i = 0
    for d in [LL_data, RL_data, LP_data, RP_data, C_data, EKG_data]:
        eeg_band_fft = get_frequencies(d.mean(axis=1))
        freq_df[data_list[i]] = pd.DataFrame({data_list[i]: list(eeg_band_fft.values())})
        i += 1
    freq_df.set_index("band", inplace=True)
    freq_df = freq_df.unstack().to_frame().T
    freq_df.columns = freq_df.columns.map('{0[0]}_{0[1]}'.format)
    
    data50 = eeg_data.iloc[mid - 5000 : mid + 5000, :]

    LL_data, RL_data, LP_data, RP_data, C_data, EKG_data = get_relative_signals(data50)
    
    freq_df50 = pd.DataFrame({"band": ["Delta", "Theta", "Alpha", "Beta", "Gamma"]})
    i = 0
    for d in [LL_data, RL_data, LP_data, RP_data, C_data, EKG_data]:
        eeg_band_fft = get_frequencies(d.mean(axis=1))
        freq_df50[data_list[i]] = pd.DataFrame({data_list[i]: list(eeg_band_fft.values())})
        i += 1
    freq_df50.set_index("band", inplace=True)
    freq_df50 = freq_df50.unstack().to_frame().T
    freq_df50.columns = freq_df50.columns.map('{0[0]}_{0[1]}'.format)
    
    return(freq_df, freq_df50)

In [11]:
df_temp = pd.DataFrame()

for index, row in metadata.iterrows():
    freq_df, freq_df50 = get_freq_data(row['eeg_id'], row['eeg_label_offset_seconds'])

    new_names = {}
    for c in freq_df50.columns:
        new_names[c] = c+'50'
    freq_df50.rename(columns=new_names, inplace=True)

    temp = pd.concat( [ pd.DataFrame(row[['eeg_id', 'eeg_label_offset_seconds']]).T.reset_index(drop=True), freq_df, freq_df50], axis=1  )
    df_temp = pd.concat([df_temp, temp], axis=0)
    

In [12]:
df_temp.head(10)

Unnamed: 0,eeg_id,eeg_label_offset_seconds,LL_data_Delta,LL_data_Theta,LL_data_Alpha,LL_data_Beta,LL_data_Gamma,RL_data_Delta,RL_data_Theta,RL_data_Alpha,...,C_data_Delta50,C_data_Theta50,C_data_Alpha50,C_data_Beta50,C_data_Gamma50,EKG_data_Delta50,EKG_data_Theta50,EKG_data_Alpha50,EKG_data_Beta50,EKG_data_Gamma50
0,568657,0.0,8010.578059,2971.468897,1924.555911,985.575931,238.797026,8430.884711,3147.872343,1494.102237,...,25508.158589,11281.712701,7832.951854,4711.258393,2080.350816,1053045.0,277137.86646,461404.903785,274709.784096,618072.060417
0,582999,0.0,5333.503738,854.648812,389.599933,212.736487,109.88013,2823.464721,1215.050483,577.812989,...,8209.92164,3038.644196,1187.551197,644.29016,287.484984,2273.423,730.315887,535.56828,270.153198,150.479892
0,582999,20.0,7679.9574,842.794335,510.446204,299.402825,152.557449,4163.973685,579.421425,335.374285,...,7604.129933,2920.542221,1118.270078,610.328315,284.182693,2255.297,720.363933,492.873816,270.916435,145.381178
0,642382,0.0,2252.19017,380.869613,357.070358,227.594601,77.020779,2958.965543,482.515635,462.919713,...,6693.550589,1927.547525,1994.998592,1109.63523,354.583427,16240.88,3588.627931,1809.419995,3035.71478,3558.494688
0,642382,24.0,1694.726835,752.171597,473.751305,245.594889,60.258457,2056.015141,726.681767,518.93989,...,6549.241663,2392.034494,2434.627375,1116.550666,355.3607,16646.14,3632.473414,1932.224674,2888.141855,3218.003455
0,751790,0.0,3517.297695,2763.785048,1805.869995,528.528897,165.162628,1832.116619,2434.664818,2208.447382,...,7841.657157,9595.54107,7260.865761,2239.346245,496.971983,30767.17,26383.297496,14275.02574,6031.351614,2445.204966
0,778705,0.0,1554.10471,818.83485,599.3489,303.726635,164.400528,4263.1303,1660.8754,995.2673,...,19329.622475,4353.772604,2130.774465,971.817392,334.783911,6048.307,3677.966327,5189.821974,4926.852928,4588.447859
0,1629671,0.0,1564.665171,422.964776,230.556254,117.004272,60.114413,3191.657871,680.416119,249.141145,...,6239.910507,1177.773475,609.65691,333.416992,208.982057,63664.99,13156.441747,13810.508201,11233.425307,7801.956313
0,1629671,46.0,1684.387595,365.579645,153.160242,106.896991,66.036336,2378.3177,522.054962,299.940436,...,5189.112713,1177.915177,573.053899,433.45325,261.010234,14415.21,9089.431458,11656.927998,10558.066274,7873.850893
0,1629671,68.0,1175.953622,187.618345,123.854238,110.501845,65.430579,2706.048739,330.309223,174.171621,...,5341.957796,1481.696939,613.376411,442.392987,243.728422,14662.79,8859.414523,11727.97736,10726.211917,7726.936226


In [14]:
merged = metadata.merge(df_temp, on=['eeg_id', 'eeg_label_offset_seconds'], how='left') 

In [17]:
merged.to_csv('train_final_with_freq_bands.csv')

In [None]:
#plot_frequencies(eeg_band_fft)