In [1]:
# imports
import sys
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import os
import neurokit2 as nk
#import biobss from local to run without installing
sys.path.append("../")
import biobss
import neurokit2
import flirt




In [2]:
#Load the sample data
data, info = biobss.utils.load_sample_data(data_type='EDA')
sample_data = np.asarray(data['EDA'])
sampling_rate = info['sampling_rate']
L = info['signal_length']

# Create a timestamp for plotting, this is not necessary for the pipeline
timestamp = np.arange(0, len(sample_data)/sampling_rate, 1/sampling_rate)

In [3]:
signals={'Raw':sample_data}

In [None]:
# Plot the data
#ax=plt.subplot(1,1,1)
#ax.figure.set_size_inches(16, 3)
biobss.edatools.plot_eda(signals=signals,sampling_rate=sampling_rate, timestamps=timestamp, timestamp_resolution='s', show_peaks=False, figsize=(16,3))

In [None]:
# Plot the data
#ax=plt.subplot(1,1,1)
#ax.figure.set_size_inches(16, 3)
biobss.edatools.plot_eda(signals=signals,sampling_rate=sampling_rate, timestamps=timestamp, timestamp_resolution='s', method='plotly', show_peaks=False)

In [None]:
# Plot the data
ax=plt.subplot(1,1,1)
ax.figure.set_size_inches(16, 3)
biobss.plottools.create_signal_plot(sample_data, ax, timestamp, 's', 'EDA Signal', 'EDA', None)

# <font color='Green'> Adding processes to the Pipeline </font>

#### <font color='red'> Adding a process </font>

Any process can be added to the pipeline if the input is a signal and ouput is a signal or collection of signals. The process can be added by passing the method to a Bio_Process constructor. The Bio_Process constructor takes the following arguments:

- process_method: The method to be added to the pipeline
- inplace: If True, result of the process will modify the input signal. If False, the result of the process will be returned as a new signal and added as a new channel.
- prefix: The prefix to be added to the output channel name. If inplace is True, prefix will be ignored.
  - If inplace is False, the output channel name will be prefix + input channel name.
  - <code> (Default: None) (if inplace = False  and prefix = None, prefix will be set to 'processed') </code>
- return_index : If a return_index is set, selected index of the result will be set as result. If return_index is None, the whole result will be set as result. (Default: None)
- argmap: this dictionary maps different named arguments. For example, Bio_Channel already have a sampling_rate attribute. If the process method requires sampling rate with a different name, it can be mapped to the Bio_Channel attribute.<code> EXAMPLE : argmap = {'sampling_rate': 'fs'} or argmap = {'sampling_rate': 'sample_rate'} </code>
- **kwargs: Keyword arguments to be passed to the process method






Pipeline processes the given input sequentially. The input is passed from one process to the next.

In [4]:
# Create steps for the pipeline
# EDA decopmosition step, Returns a dictionary with the following keys: EDA_Tonic, EDA_Phasic
decompose = biobss.pipeline.Bio_Process(process_method=biobss.edatools.eda_decompose,method="highpass")


In [5]:
# Create steps for the pipeline

# Filter steps process,return filtered signal for all input signals
clean=biobss.pipeline.Bio_Process(
    process_method=neurokit2.eda_clean, argmap={"sampling_rate":"sampling_rate"})

In [6]:
# Signal normalization step, return normalized signal for all input signals
normalize = biobss.pipeline.Bio_Process(
    process_method=biobss.preprocess.normalize_signal)

In [7]:
# Signal resampling step, return resampled signal for all input signals, signals resampled to 350Hz
resample = biobss.pipeline.Bio_Process(
    process_method=biobss.preprocess.resample_signal_object,target_sample_rate=350)

In [8]:
# Create feature extraction steps

# Extract signal features. input_signals dictionary contains feature prefixes as keys and input signals as values
# For example rms of EDA_Tonic is extracted as Tonic_rms in this case
signal_features = biobss.pipeline.Feature(name="signal_features", function=flirt.get_eda_features, input_signals={'Raw':'EDA_Raw', 'Tonic':'EDA_Tonic', 'Phasic':'EDA_Phasic'})


In [None]:
# Extract statistical features.
# For example mean of EDA_Tonic is extracted as Tonic_mean in this case
stat_features = biobss.pipeline.Feature(name="stat_features", function=biobss.edatools.get_stat_features, input_signals={'Raw':'EDA_Raw', 'Tonic':'EDA_Tonic', 'Phasic':'EDA_Phasic'})

In [9]:
# BIOBSS Pipeline is created
# windowed_process=True means that the pipeline will process the signal in windows
# window_size=60 means that the pipeline will process 60 seconds of data at a time
# step_size=20 means signal will be shifted by 20 seconds for each window
pipeline = biobss.pipeline.Bio_Pipeline(windowed_process=True,window_size=60,step_size=20)

In [10]:
# Input is created as Bio_Channel object
# Bio_Channel object requires following parameters: name, signal, sampling_rate
# Optionally timestamp can be provided
# If timestamp is provided, resolution of the timestamp should be provided. Possible resolutions are:'min', 's', 'ms', 'us'
# If timestamp is not provided, timestamp will be created using sampling_rate. Default resolution is 's'
bio_channel=biobss.pipeline.Bio_Channel(signal=sample_data,name="EDA_Raw",sampling_rate=700)

# a Bio Channel object is created with sample data, with the name EDA_Raw and sampling rate 700
# timestamp is not provided, so timestamp will be created using sampling_rate

# Simply bio_channel the channel object will print the channel properties
bio_channel

EDA_Raw (700Hz) ((5920.0,)s) (1 windows) ((4144000,)) ((4144000,)) (float64)[5.40046692 5.40885925 5.40161133 ... 7.26280212 7.26966858 7.2681427 ]

In [11]:
# bio_channel.channel accesses the data in the channel object
bio_channel.channel

array([5.40046692, 5.40885925, 5.40161133, ..., 7.26280212, 7.26966858,
       7.2681427 ])

In [12]:
# Pipeline input can be set from an array, a dataframe, pandas series, list, Bio_Channel, Bio_Data

# In this case, the pipeline input is set with an array
pipeline.set_input(sample_data,sampling_rate=700,name='EDA_Raw')

# Alternatively, the pipeline input can be set with a Bio_Channel object, in this case the name and sampling rate are not required as they are already provided in the Bio_Channel object
pipeline.set_input(bio_channel)

pipeline.input

Signal object with 1 channel(s)
EDA_Raw (700Hz) ((5920.0,)s) (1 windows) ((4144000,))

In [13]:
# Pipeline steps are added to the pipeline sequentially, the order of the steps is important as the output of one step is the input of the next step
# These steps will be processed in the order they are added to the pipeline
pipeline.preprocess_queue.add_process(clean)
pipeline.preprocess_queue.add_process(normalize)
pipeline.preprocess_queue.add_process(decompose)
pipeline.preprocess_queue.add_process(resample)

# Currently all the steps are added to the preprocess_queue
# After the preprocess_queue is processed, data will be segmented into windows (if windowed_process=True)
# For running process in windows, the process_queue is used

## This structure will change in the future

In [14]:
# Features are added to the pipeline
pipeline.add_feature_step(signal_features)
#pipeline.add_feature_step(stat_features)


In [15]:
# Represetation of the pipeline
pipeline

Bio_Pipeline:
	Preprocessors: Process list:
	1: eda_clean
	2: normalize_signal
	3: eda_decompose
	4: resample_signal_object

	Processors: Process list:

	Postprocessors: Process list:

	Window Size(Seconds): 60
	Step Size: 20

In [16]:
# Pipeline is run, this will process the input data in the pipeline
pipeline.run_pipeline()

In [17]:
# Represetation of the pipeline data after running
pipeline.data

# Data is cleaned
# Data is normalized
# Data is decomposed into tonic and phasic components, these componenets are added to the pipeline data as EDA_Tonic and EDA_Phasic channels
# Lastly, data is resampled to 350Hz (All channels are resampled to 350Hz) (target_sample_rate=350)
# Data is segmented into windows of 60 seconds with a step size of 20 seconds

Signal object with 3 channel(s)
EDA_Raw (350Hz) (60.0s) (294 windows) ((294, 21000))
EDA_Tonic (350Hz) (60.0s) (294 windows) ((294, 21000))
EDA_Phasic (350Hz) (60.0s) (294 windows) ((294, 21000))

In [18]:
# Statistical and signal features are extracted from the EDA_Tonic, EDA_Phasic and EDA_Raw channels
# Supplied prefix is added to the feature name
pipeline.extract_features()

TypeError: get_eda_features() got an unexpected keyword argument 'prefix'

In [None]:
# Represetation of the pipeline features after extraction
# Each row is a feature vector for a window
# index is the timestamp of the window (this can be selected as start, end or center of the window)
pipeline.features