<a href="https://colab.research.google.com/github/thesalmonification/DSCI400_Revamp/blob/master/BDF_Full_Converter.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

Imports

In [0]:
import numpy as np
import pandas as pd
import os
import matplotlib.pyplot as plt
import glob
import xml.etree.ElementTree as ET
import mne

Collecting mne
[?25l  Downloading https://files.pythonhosted.org/packages/a1/7c/ad1b52a3fdd4be8f55e183f1eff7d76f48cd1bee83c5630f9c26770e032e/mne-0.19.2-py3-none-any.whl (6.4MB)
[K     |████████████████████████████████| 6.4MB 2.8MB/s 
Installing collected packages: mne
Successfully installed mne-0.19.2


Connect to Shared Drive

In [0]:
#This file was written on collab, so must import shared drives...
from google.colab import drive
drive.mount('/content/drive')

#Outdated from when run on local machine...

#os.chdir('/media/duncan/DATA/hci_db_raw/Sessions')
#subfolders = os.listdir()

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


List all sessions to parse over

In [0]:
#Enter the sessions directory, this contains all raw BDF files of the experiment...
subfolders = os.listdir('/content/drive/Shared drives/DSCI400/Sessions')

Parse the EEG waveforms and save to hdf5 files...

In [0]:
#List of EEG channels used in this study
#['Fp1', 'AF3', 'F3', 'F7', 'FC5', 'FC1', 'C3', 'T7', 'CP5', 'CP1', 'P3', 'P7', 'PO3', 'O1', 'Oz', 'Pz', 'Fp2', 'AF4', 'Fz', 'F4', 'F8', 'FC6', 'FC2', 'Cz', 'C4', 'T8', 'CP6', 'CP2', 'P4', 'P8', 'PO4', 'O2']




for subfolder in subfolders:
    print(subfolder)
    os.chdir('/content/drive/Shared drives/DSCI400/Sessions/' + subfolder)
    #bdf_files = glob.glob('/content/drive/Shared drives/DSCI400/Sessions/' + subfolder + '/*.bdf')
    #xml_files = glob.glob('/content/drive/Shared drives/DSCI400/Sessions/session.xml')
    bdf_files = glob.glob('*.bdf')
    xml_files = glob.glob('session.xml')
    if (len(bdf_files) == 0) or (len(xml_files) == 0):
        os.chdir('..')
        continue #case where no .bdf/.xml file saved so useless to keep
    
    
    #Parse the .xml file and get the emotional attributes
    #Save the file as a .csv
    root = ET.parse('session.xml').getroot()
    root_dict = root.attrib
    emotion_dict = {k:root_dict[k] for k in ('feltEmo','feltArsl','feltVlnc','feltCtrl','feltPred','mediaFile') if k in root_dict}
    df = pd.DataFrame.from_dict(emotion_dict,orient='index',columns=['Value'])
    df.index.name = 'Label'
    
    print(df)

    #df.to_csv('/media/duncan/DATA/hci_db_raw/Label_CSV/'+subfolder+'_labels.csv') #outdated, will no longer use csv
    df.to_hdf('/content/drive/Shared drives/DSCI400/Label_Data.h5',key='t' + subfolder+'_labels',complevel=4,format='table')
    
    
    
    

    #In the other case, I have a single .bdf/.xml file pair to parse 
    #I want to build .csv files to replace the .bdf files for every trial
    #Open the raw data
    raw = mne.io.read_raw_bdf(bdf_files[0], preload=True)
    chs = raw.info['ch_names'][:32]
    
    #parse over every EEG channel and save to an array
    #Start a data array by grabbing first channel
    
    #Older style (no trimming)
    #data_array = raw[chs[0]][0]
    
    #Added 2/18 to trim portions where EEG is "shut off"
    ####################################
    ch0 = raw[chs[0]][0][0]
    
    ch0[abs(ch0)<3e-8] = 0
    ch0 = np.trim_zeros(ch0)
    
    ch0 = ch0.reshape(-1,1).T
    
    data_array = ch0
    ####################################
    
    
    
    
    for ch in chs[1:]:   
        
        #Uncomment if you want to see before/after for trimming
        #plt.figure()
        #plt.plot(raw[ch][0][0])
        
        
        #Older style (no trimming)
        #data = raw[ch][0] #the 1st index is just idx samples counting up (ie not needed)
        
        #Added 2/18 to trim portions where EEG is "shut off"
        #####################################
        data = raw[ch][0][0]
        
        data[abs(data)<3e-8] = 0
        data = np.trim_zeros(data)
        
        data = data.reshape(-1,1).T
        
        
        #Uncomment if you want to see before/after for trimming
        #plt.figure()
        #plt.plot(data[0])
        
        
        
        ####################################
        data_array = np.concatenate((data_array,data),axis=0)
        

    
    num_samples = max(data_array.shape)

    #Save the file to CSV using Pandas
    df = pd.DataFrame(data_array,index=chs,columns=range(0,num_samples)).T
    
    print(df.head())

    #df.to_csv('/media/duncan/DATA/hci_db_raw/Waveform_Trimmed_CSV/'+subfolder + '.csv',index=chs,columns=range(0,num_samples)) #Outdated no longer using CSV
    df.to_hdf('/content/drive/Shared drives/DSCI400/Waveform_Data.h5',key='t' + subfolder ,complevel=4,format='table')
    print('-----------------------------------')
    os.chdir('..')

Build Keys files to make parsing of the HDF5 files easier

In [0]:
#Save files called Waveform_Data_Keys.h5 and Label_Data_Keys.h5, to be able to parse Waveform_Data and Label_Data with ease later.

waveform_keys = []
label_keys = []

for folder in susbfolders:
  waveform_keys.append('t' + folder)
  label_keys.append('t' + folder + '_labels')


waveform_df = df.DataFrame(waveform_keys)
label_df = df.DataFrame(label_keys)

waveform_df.to_csv('/content/drive/Shared drives/DSCI400/Waveform_Data_Keys.csv')
label_df.to_csv('/content/drive/Shared drives/DSCI400/Label_Data_Keys.csv')