### Importing Libraries

In [56]:
import os
import datetime
import pandas as pd
from scipy import signal

### Setting Constants

In [57]:
ROOT_DIRECTORY = "../DATASETS/bci_competition_3/p300"

In [58]:
SEGMENTED_FILES = ROOT_DIRECTORY+"/segmented_files"

- These variables are about how the signals was recorded. So, it's dataset dependent.

In [59]:
NUMBER_CHANNELS = 64
INTER_STIMULI_SIZE = 42
NUMBER_CHARACTERS = 85
NUMBER_TRIALS = 15
NUMBER_ROWS_COLUMNS = 12
NUMBER_STIMULI = NUMBER_TRIALS * NUMBER_ROWS_COLUMNS #180
SIGNAL_FREQUENCY = 240

- These variable are about how the signals will be processed.

In [60]:
SEGMENT_BEGIN = 0
SEGMENT_SIZE = 160
LOWCUT = 0.1
HIGHCUT = 10.0
FILTER_ORDER = 5
DECIMATION_FACTOR = 12

- This variable is to identify if is train/test dataset to be processed.

In [61]:
is_test_file = True

In [63]:
SIGNAL_FILENAME = '_char_filtered_signals.csv'
CODE_FILENAME = '_codes.csv'
STIMULU_FILENAME = '_stimuli.csv'

### Defining  Variables to Segment Signal Files

In [66]:
if(not is_test_file):
    SUBFILES = ['train_A','train_B']

    SIGNAL_FILES = ["/subject_A/Subject_A_Train_Signal.txt","/subject_B/Subject_B_Train_Signal.txt"]
    STIMULUS_TYPE_FILES = ["/subject_A/Subject_A_Train_StimulusType.txt","/subject_B/Subject_B_Train_StimulusType.txt"]
    STIMULUS_CODE_FILES = ["/subject_A/Subject_A_Train_StimulusCode.txt","/subject_B/Subject_B_Train_StimulusCode.txt"]
else:
    SUBFILES = ['test_A','test_B']

    SIGNAL_FILES = ["/subject_A/Subject_A_Test_Signal.txt","/subject_B/Subject_B_Test_Signal.txt"]
    STIMULUS_CODE_FILES = ["/subject_A/Subject_A_Test_StimulusCode.txt","/subject_B/Subject_B_Test_StimulusCode.txt"]
    STIMULUS_TYPE_FILES = ["",""]

### Defining Functions

In [67]:
def chebyshev1_filter(signals,lowcut, highcut, order,fs=240):

    lowcut = 2 * lowcut / fs
    highcut = 2 * highcut /fs   
  
    b, a = signal.cheby1(order, 0.1,[lowcut, highcut],'bandpass')
    
    filtered_signals = []
    for i in range(len(signals)):
        signal_decimated = signal.decimate(signals[i],DECIMATION_FACTOR)
        filtered_signals.append(signal.lfilter(b, a, signal_decimated))        

    return filtered_signals 

In [68]:
def extract_values(file_values):
    all_extracted_values = []
    for i_char in file_values:
        separated_values = []
        for i_stimuli in range(0,len(file_values[i_char]),INTER_STIMULI_SIZE):
            separated_values.append(file_values[i_char][i_stimuli])
            if(len(separated_values) == 180):
                break
        all_extracted_values.append(separated_values)
    return all_extracted_values

### Loading, Selecting Values and Saving Files

In [71]:
begin_time = datetime.datetime.now()

if not os.path.exists(SEGMENTED_FILES):
    os.makedirs(SEGMENTED_FILES)

for sigs,stims,codes,subfilename in zip(SIGNAL_FILES,STIMULUS_TYPE_FILES,STIMULUS_CODE_FILES,SUBFILES):
    sigs_file = pd.read_csv(ROOT_DIRECTORY+sigs, delim_whitespace=True, header=None)
    codes_file = pd.read_csv(ROOT_DIRECTORY+codes, delim_whitespace=True, header=None)
    
    if(not is_test_file):
        stims_file = pd.read_csv(ROOT_DIRECTORY+stims, delim_whitespace=True, header=None)
    
    
    count_char = 0
    for temp_char in range(0,sigs_file.shape[0],NUMBER_CHANNELS):

        temp_signals = sigs_file.iloc[temp_char:(temp_char+NUMBER_CHANNELS)]

        # Segmenting signals
        segments = []
        for temp_index,temp_signal in temp_signals.iterrows():
            for i_begin in range(0,len(temp_signal),INTER_STIMULI_SIZE):
                segments.append(temp_signal[i_begin+SEGMENT_BEGIN:i_begin+SEGMENT_BEGIN+SEGMENT_SIZE])
                if(i_begin >= INTER_STIMULI_SIZE*(NUMBER_STIMULI-1)):
                    break

        # Filtering segmented signals
        sigs_seg_filtered = chebyshev1_filter(segments,LOWCUT, HIGHCUT, FILTER_ORDER,SIGNAL_FREQUENCY)

        # Checking the if file exist
        folder_to_save = SEGMENTED_FILES+'/'+subfilename
        if not os.path.exists(folder_to_save):
            os.makedirs(folder_to_save)

        pd.DataFrame(sigs_seg_filtered).to_csv(folder_to_save+'/'+str(count_char)+SIGNAL_FILENAME,index=False)
        count_char = count_char + 1
            
    
    # Extracting codes values
    codes_extracted = extract_values(codes_file)
    pd.DataFrame(codes_extracted).to_csv(folder_to_save+'/'+CODE_FILENAME,index=False)
    
    # Extracting stimuli values
    if(not is_test_file):
        stimuli_extracted = extract_values(stims_file)
        pd.DataFrame(stimuli_extracted).to_csv(folder_to_save+'/'+STIMULU_FILENAME,index=False)

    
end_time = datetime.datetime.now()
print('Begin:')
print(begin_time)
print(end_time-begin_time)

Begin:
2019-06-14 03:58:40.480985
0:38:23.201190


### Testing Train File Created

#### In this test, we will check:
1. All files was created
2. For each file:
    1. Number of rows created
    1. Number of values in each row
    

In [79]:
from os import listdir
from os.path import isfile, join
from math import ceil

In [52]:
SEGMENTED_SIGNAL_SHAPE = (NUMBER_CHANNELS*NUMBER_STIMULI,ceil(SEGMENT_SIZE/DECIMATION_FACTOR))
STIMULI_CODE_SHAPE = (NUMBER_CHARACTERS,NUMBER_STIMULI)
NUMBER_OF_FILES = NUMBER_CHARACTERS+2

In [53]:
for subfile in SUBFILES:
    onlyfiles = [f for f in listdir(SEGMENTED_FILES+'/'+subfile) if isfile(join(SEGMENTED_FILES+'/'+subfile, f))]
    assert len(onlyfiles) == NUMBER_OF_FILES, 'The number of file on directory '+SEGMENTED_FILES+'/'+subfile+' it\'s different than expected.'+\
    'The expected is '+str(NUMBER_OF_FILES)+', it was found '+str(len(onlyfiles))+'.'

    for each_file in onlyfiles:
        if('filtered' in each_file):
            temp_file = pd.read_csv(SEGMENTED_FILES+'/'+subfile+'/'+each_file)
            assert temp_file.shape == SEGMENTED_SIGNAL_SHAPE, 'The shape on '+each_file+' isn\'t correct. The expected is '+\
                str(SEGMENTED_SIGNAL_SHAPE)+' and it was found '+str(temp_file.shape)+'.'
        else:
            temp_file = pd.read_csv(SEGMENTED_FILES+'/'+subfile+'/'+each_file)
            assert temp_file.shape == STIMULI_CODE_SHAPE, 'The shape on '+each_file+' isn\'t correct. The expected is '+\
                str(STIMULI_CODE_SHAPE)+' and it was found '+str(temp_file.shape)+'.'
            
            

In [80]:
ceil(SEGMENT_SIZE/DECIMATION_FACTOR)

14