In [78]:
import numpy as np
import pandas as pd
import peakutils

from peakutils.plot import plot as pplot
import matplotlib.pyplot as plt
from pylab import savefig

import re
import time
import os

In [79]:
%matplotlib inline
%config InlineBackend.figure_format = 'retina'

In [80]:
project = 'BaGELS' # or 'other' for uCLIMB, etc. 

dataFolder = 'data/'

In [81]:
def getSubjectNumber(filename):
    subjNumberRegex = re.compile('''
    # filename will be something like 'OpenBCI-RAW-28211_SBR_Pre1.txt'
    # Regex looks for a 5-digit string in the filename
    # Separators are inconsistent, so parsing only by continuous digits
    (\d\d\d\d\d)
    ''', re.VERBOSE)

    extractSubjNumber = subjNumberRegex.findall(filename)
    subjNumber = extractSubjNumber[0]

    return subjNumber

In [82]:
def getBaGELSSubjectNumber(filename):
    subjNumberRegex = re.compile('''
    # filename will be something like 'OpenBCI-RAW-BaGELS_sEBR_9001.txt'
    # Regex looks for a 4-digit string in the filename
    # Separators are inconsistent, so parsing only by continuous digits
    (\d\d\d\d)
    ''', re.VERBOSE)

    extractSubjNumber = subjNumberRegex.findall(filename)
    subjNumber = extractSubjNumber[0]

    return subjNumber

In [83]:
def tsDenoise(df):
    '''
    Denoise data by making timeseries stationary and removing low & high frequency signal components.
    '''
    
    halfSecond = int(np.max(df[0])/4) # rolling window width used for mean estimation
    samplingRate = np.max(df[0]) # extracting sampling rate from max index value   
    lowerBoundary = samplingRate*30 # lower cutoff for sEBR recording (exclude first 30 seconds)
    upperBoundary = len(df[1])-(lowerBoundary) # upper cutoff for sEBR recording (exclude last 30 seconds)


    # Making time series mean stationary to account for signal drift on a second-by-second basis
    df1RollingMean = df[1].rolling(window=samplingRate*15).mean() # computing rolling window average
    channelOneStationary = df[1] - df1RollingMean # subtracting TS mean from each datapoint
    
    df2RollingMean = df[2].rolling(window=samplingRate*15).mean()
    channelTwoStationary = df[2] - df1RollingMean
    
    # Discarding first and last 30 seconds of data
    channelOneRelevant = channelOneStationary[lowerBoundary:upperBoundary] # dropping first and last 30 seconds
    
    channelTwoRelevant = channelTwoStationary[lowerBoundary:upperBoundary]
    
    # fft to remove unwanted frequencies
    channelOneNorm_fft = np.fft.fft(channelOneRelevant) # taking TS to freq space
    channelOneNorm_fft[0:1] = 0 # removing low frequencies
    channelOneNorm_fft[2000:] = 0 # removing high frequencies
    channelOneNorm_ifft = np.fft.ifft(channelOneNorm_fft) # taking TS back to native space

    channelTwoNorm_fft = np.fft.fft(channelTwoRelevant)
    channelTwoNorm_fft[0:1] = 0
    channelTwoNorm_fft[2000:] = 0
    channelTwoNorm_ifft = np.fft.ifft(channelTwoNorm_fft)

    channelOneTwo_ifft = pd.DataFrame(channelOneNorm_ifft) # creating Pandas DF to use rolling window later
    channelOneTwo_ifft[1] = channelTwoNorm_ifft # adding channel two data into DF
    
    return channelOneTwo_ifft

In [84]:
%%capture
# Counting blinks for all .txt files in data directory. 
# Creates .csv with subject number and blinks for each channel, saved in root directory. 
subjectNumberList = []
channelOneBlinkList = []
channelTwoBlinkList = []
for folderName, subfolders, filenames in os.walk(dataFolder):

    for file in filenames:
        if file.endswith('.py'):
            pass
        elif file.endswith('.ipynb'):
            pass
        elif file.endswith('.txt'):
            raw = pd.read_table(dataFolder + file, sep = ',', skiprows=6, header=None)

            if project == 'BaGELS':
                subjectNo = getBaGELSSubjectNumber(file)
                subjectNumberList.append(subjectNo)
            else:
                subjectNo = getSubjectNumber(file)
                subjectNumberList.append(subjectNo)
            
            processed = tsDenoise(raw)
            
            channelOnePeaksIndexes = peakutils.indexes(processed[0], min_dist=25, thres=0.75)
            channelTwoPeaksIndexes = peakutils.indexes(processed[1], min_dist=25, thres=0.75)
            
            channelOneBlinks = len(channelOnePeaksIndexes)
            channelTwoBlinks = len(channelTwoPeaksIndexes)
            channelOneBlinkList.append(channelOneBlinks)
            channelTwoBlinkList.append(channelTwoBlinks)
            
            
            # Plotting timeseries
        
            if not os.path.exists('./tsPlots'):
                os.makedirs('./tsPlots')
            
            x = np.arange(len(processed[0]))
            y = processed[0]
            plt.figure(figsize=(30,10))
            pplot(x, y, channelOnePeaksIndexes)
            savefig('./tsPlots/' + subjectNo + '_channelOne.png')
            plt.close()

            x = np.arange(len(processed[1]))
            y = processed[1]            
            plt.figure(figsize=(30,10))
            pplot(x, y, channelTwoPeaksIndexes)
            savefig('./tsPlots/' + subjectNo + '_channelTwo.png')
            plt.close()            


currentDateTime = time.strftime("%m.%d.%Y_%H.%M%p")
filename = './getBlinks_output_' + currentDateTime + '.csv'
blinkOutput = open(filename, 'w')

blinkOutput.write('SubjectNo' + ',' + 'channelOneBlinks' + ',' + 'channelTwoBlinks' + '\n')
zipped = zip(subjectNumberList, channelOneBlinkList, channelTwoBlinkList)
for i, j, k in zipped:
    blinkOutput.write(str(i) + ',' + str(j) + ',' + str(k) + '\n')
blinkOutput.close()

print('sEBR count finished. Please check root folder for output.')