In [1]:
import numpy as np
import pandas as pd
import peakutils

from peakutils.plot import plot as pplot
import matplotlib.pyplot as plt
from pylab import savefig

import re
import time
import os

In [2]:
project = 'BaGELS'

dataFolder = 'data/'

In [3]:
def getSubjectNumber(filename):
    subjNumberRegex = re.compile('''
    # filename will be something like 'OpenBCI-RAW-28211_SBR_Pre1.txt'
    # Regex looks for a 1-to-6 digit string in the filename, as used by CCDL.
    ([0-9]{1,6})
    ''', re.VERBOSE)

    extractSubjNumber = subjNumberRegex.findall(filename)
    subjNumber = extractSubjNumber[0]

    return subjNumber

In [4]:
def tsClean(df, seconds=30):
    '''
    Function to clean data by (1) making timeseries stationary, (2) removing low & high frequency signal components,
    (3) smoothing, and (4) discarding the first and last X seconds of the time series. 
    
    Parameters
    ----------
    df : Pandas DataFrame containing OpenBCI Gangion or Cyton data where column 0 is the sample number, column 1
         is an ndarray for Channel One, and column 2 is an ndarray for Channel Two. Other column data is discarded.
         
    seconds : Length of time series data to discard at the beginning and end of recording. 30 seconds by default.
    
    Returns
    -------
    DataFrame with stationary, denoised, and smoothed, truncated data for Channels One and Two only.
    '''
    
    samplingRate = np.max(df[0]) # extracting sampling rate from max sample index value   
    lowerBoundary = samplingRate*seconds # lower cutoff for sEBR recording
    upperBoundary = len(df[1])-(lowerBoundary) # upper cutoff for sEBR recording


    # (1) Differencing
    period = 1
    c1_shift = df[1] - df[1].shift(periods = period)
    c2_shift = df[2] - df[2].shift(periods = period)
    
    # (2) FFT Denoising
    c1_shift_fft = np.fft.fft(c1_shift[period:]) # [period:] ensures NaN created by pd.shift() is dropped
    c1_shift_fft[0:1] = 0
    c1_shift_fft[2500:] = 0 # ToDo: Refine high frequency cutoff
    c1_shift_ifft = np.fft.ifft(c1_shift_fft)
    
    c2_shift_fft = np.fft.fft(c2_shift[period:])
    c2_shift_fft[0:1] = 0
    c2_shift_fft[2500:] = 0
    c2_shift_ifft = np.fft.ifft(c2_shift_fft)
    
    # (3) Smoothing
    c1_shift_ifft = pd.DataFrame(c1_shift_ifft)
    c1_shift_ifft_smoothed = c1_shift_ifft.rolling(window=25).mean()
    
    c2_shift_ifft = pd.DataFrame(c2_shift_ifft)
    c2_shift_ifft_smoothed = c2_shift_ifft.rolling(window=25).mean()
    
    
    # (4) Truncating
    c1_relevant = c1_shift_ifft_smoothed[lowerBoundary:upperBoundary]
    c2_relevant = c2_shift_ifft_smoothed[lowerBoundary:upperBoundary]
    
    c1c2 = c1_relevant
    c1c2[1] = c2_relevant[0]
    
    return c1c2

In [5]:
%%capture
# Counting blinks for all .txt files in data directory. 
# Creates .csv with subject number and blinks for each channel, saved in root directory. 
subjectNumberList = []
channelOneBlinkList = []
channelTwoBlinkList = []
for folderName, subfolders, filenames in os.walk(dataFolder):

    for file in filenames:
        if file.endswith('.txt'):
            raw = pd.read_table(dataFolder + file, sep = ',', skiprows=6, header=None)

            subjectNo = getSubjectNumber(file)
            subjectNumberList.append(subjectNo)
            
            processed = tsClean(raw)
            
            c1_peakIndexes = peakutils.indexes(pd.Series.as_matrix(processed[0]), min_dist=15, thres=0.60)
            c2_peakIndexes = peakutils.indexes(pd.Series.as_matrix(processed[1]), min_dist=15, thres=0.60)
            
            c1_blinks = len(c1_peakIndexes)
            c2_blinks = len(c2_peakIndexes)
            channelOneBlinkList.append(c1_blinks)
            channelTwoBlinkList.append(c2_blinks)
            
            
            # Plotting timeseries
        
            if not os.path.exists('./tsPlots'):
                os.makedirs('./tsPlots')
            
            x = np.arange(len(pd.Series.as_matrix(processed[0])))
            y = pd.Series.as_matrix((processed[0]))
            plt.figure(figsize=(100,15), dpi = 300)
            pplot(x, y, c1_peakIndexes)
            savefig('./tsPlots/' + subjectNo + '_channelOne.png')
            plt.close()

            x = np.arange(len(pd.Series.as_matrix(processed[1])))
            y = pd.Series.as_matrix((processed[1] ))           
            plt.figure(figsize=(100,15), dpi = 300)
            pplot(x, y, c2_peakIndexes)
            savefig('./tsPlots/' + subjectNo + '_channelTwo.png')
            plt.close() 
        
        else:
            pass


currentDateTime = time.strftime("%m.%d.%Y_%H.%M%p")
filename = project + '_blinkDetect_v4.1_output_' + currentDateTime + '.csv'
blinkOutput = open(filename, 'w')

blinkOutput.write('SubjectNo' + ',' + 'channelOneBlinks' + ',' + 'channelTwoBlinks' + '\n')
zipped = zip(subjectNumberList, channelOneBlinkList, channelTwoBlinkList)
for i, j, k in zipped:
    blinkOutput.write(str(i) + ',' + str(j) + ',' + str(k) + '\n')
blinkOutput.close()

In [None]:
print('sEBR count finished. Please check root folder for output.')