In [None]:
!pip install wget
!pip install PyGithub
!pip install openpyxl
#!pip install pandas --upgrade

# Import libraries

In [None]:
import os
from os import path

import glob
import pandas as pd
import numpy as np
import wget
from github import Github
import shutil

from sklearn.preprocessing import StandardScaler
from tensorflow.keras.utils import to_categorical
from sklearn.preprocessing import LabelEncoder

from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, Activation, Flatten
from tensorflow.keras.optimizers import Adam
from sklearn import metrics

import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split

from tensorflow.keras.callbacks import ModelCheckpoint
from datetime import datetime 

import tensorflow as tf

# signal processing
from scipy import signal
from scipy.ndimage import label as sci_label
from scipy.stats import zscore
from scipy.interpolate import interp1d
from scipy.integrate import trapz

# misc
import warnings

 ### Set and clear output directories ###

In [None]:
# remove all existing csv files
!rm *.csv
!rm *.xlsx


if path.exists('/kaggle/working/Raw Data') == True:
  shutil.rmtree("/kaggle/working/Raw Data")

if path.exists('/kaggle/working/Raw Data/Multimodal/ECG/') == False:
  os.mkdir('Raw Data')
  os.mkdir('Raw Data/Multimodal')
  os.mkdir('Raw Data/Multimodal/ECG')
  


if path.exists('/kaggle/working/Raw Data/Single Modal/ECG') == False:
  os.mkdir('Raw Data/Single Modal')
  os.mkdir('Raw Data/Single Modal/ECG')

!pwd
!ls

# Retrieve datasets

Read xlsx files and convert to csv and transform into datafram for processing

In [None]:
stimulus_desc_file = pd.read_excel('../input/young-adults-affective-data-ecg-and-gsr-signals/ECG_GSR_Emotions/Stimulus_Description.xlsx')
stimulus_desc_file.to_csv('Stimulus_Description.csv', index = None, header=True)
stimulus_desc = pd.read_csv('Stimulus_Description.csv')
#stimulus_desc.head()

In [None]:
self_annotation_multimodal_file = pd.read_excel('../input/young-adults-affective-data-ecg-and-gsr-signals/ECG_GSR_Emotions/Self-Annotation Labels/Self-annotation Multimodal_Use.xlsx')
self_annotation_multimodal_file.to_csv('Self-annotation Multimodal_Use.csv', index = None, header=True)
self_annotation_multimodal = pd.read_csv('Self-annotation Multimodal_Use.csv')
self_annotation_multimodal['annotation'] = 'M'
self_annotation_multimodal.rename(columns = {'V_Label':'Valence', 'A_Label':'Arousal', 'Four_Labels':'Four_Label'}, inplace = True)
#self_annotation_multimodal.head()

In [None]:
self_annotation_singlemodal_file = pd.read_excel('../input/young-adults-affective-data-ecg-and-gsr-signals/ECG_GSR_Emotions/Self-Annotation Labels/Self-annotation Single Modal_Use.xlsx')
self_annotation_singlemodal_file.to_csv('Self-annotation Single Modal_Use.csv', index = None, header=True)
self_annotation_singlemodal = pd.read_csv('Self-annotation Single Modal_Use.csv')
self_annotation_singlemodal['annotation'] = 'S'
self_annotation_singlemodal.rename(columns = {'Male':'Gender', 'Session Id':'Session ID', 'Video Id':'Video ID'}, inplace = True)
#self_annotation_singlemodal.head()

In [None]:
self_annotation_frames = [self_annotation_multimodal, self_annotation_singlemodal]
merged_dataframe = pd.concat(self_annotation_frames)
#merged_dataframe.head()
#merged_dataframe.tail()
table_frame = merged_dataframe.copy()

In [None]:
cols = []
cols.append('Raw Data')
for col in merged_dataframe.columns:
    cols.append(col)

# Data processing

Processed data: Collected ECG signals are converted into numerical series data values.

In [None]:
arr_shape = 1000
raw_data_arr = []

def form_data(data_array = [], annotation = '', data_path = ''):
    #######
    #
    #     If anyone could help me improve this code please do so.
    #     Goals was to product the same data format of raw_dataframe DF below.
    #
    #######
    for filename in os.listdir(data_path):
            data = np.loadtxt(os.path.join(data_path, filename), delimiter=',')
            data = data[0:arr_shape]
            filenames = filename.split('ECGdata_')[1]
            filenames = filenames.split('.dat')[0]
            filenames = filenames.lower()
            s = filenames.split('s')[1].split('p');
            p = s[1].split('v')
            s = s[0]
            v = p[1]
            p = p[0]
            data_row = merged_dataframe.loc[(merged_dataframe['Session ID'] == int(s)) & 
                             (merged_dataframe['Participant Id'] == int(p)) & 
                             (merged_dataframe['Video ID'] == int(v)) &
                             (merged_dataframe['annotation'] == 'M')]
            stim_row = stimulus_desc.loc[(stimulus_desc['Session ID'] == int(s)) & 
                             (stimulus_desc['Video ID'] == int(v))]
            for index, row in data_row.iterrows():
              data_array.append([data, 
                                   row['Participant Id'], row['Session ID'], row['Video ID'],
                                   row['Name'], row['Age'], row['Gender'], row['Valence level'],
                                   row['Arousal level'], row['Dominance level'], row['Happy'],
                                   row['Sad'], row['Fear'], row['Anger'], row['Neutral'],
                                   row['Disgust'], row['Surprised'], row['Familiarity Score'],
                                   row['Emotion'], row['Valence'], row['Arousal'], row['Four_Label'],
                                   row['annotation'],  stim_row['Target Emotion'].iat[0]
                                   ])
    return data_array

    
raw_data_arr =  form_data(data_array = raw_data_arr, annotation = 'M', data_path = "../input/young-adults-affective-data-ecg-and-gsr-signals/ECG_GSR_Emotions/Raw Data/Multimodal/ECG/")
raw_data_arr =  form_data(data_array = raw_data_arr, annotation = 'S', data_path = "../input/young-adults-affective-data-ecg-and-gsr-signals/ECG_GSR_Emotions/Raw Data/Single Modal/ECG/")
cols.append('Target Emotion')
raw_dataframe = pd.DataFrame(raw_data_arr, columns = cols)
raw_dataframe.rename(columns = {'Participant Id':'Participant ID', 'annotation':'Modal', 'Four_Label':'Four label'}, inplace = True)
raw_dataframe['Familiarity Score'] = raw_dataframe['Familiarity Score'].fillna('Never watched')
raw_dataframe = raw_dataframe.replace(np.nan, '', regex=True)
#raw_dataframe.head()

# Data visualization

Using Matplotlib, we can visualized the data/signals per emotion into signal waves

In [None]:
plot_frame = raw_dataframe.copy()
#plot_frame.head()

In [None]:
plot_frame = plot_frame.drop(['Participant ID', 'Session ID', 'Familiarity Score', 'Age', 'Gender', 'Name'], axis = 1)
sad_data = plot_frame.loc[(plot_frame['Emotion'] == 'Sad') & (plot_frame['Target Emotion'] == 'sad')] 
fear_data = plot_frame.loc[(plot_frame['Emotion'] == 'Fear')  & (plot_frame['Target Emotion'] == 'fear')]
happy_data = plot_frame.loc[(plot_frame['Emotion'] == 'Happy') & (plot_frame['Target Emotion'] == 'happy')]
anger_data = plot_frame.loc[(plot_frame['Emotion'] == 'Anger') & (plot_frame['Target Emotion'] == 'anger')]
neutral_data = plot_frame.loc[(plot_frame['Emotion'] == 'Neutral') & (plot_frame['Target Emotion'] == 'neutral')]
mixed_data = plot_frame.loc[(plot_frame['Emotion'] == 'Mixed') & (plot_frame['Target Emotion'] == 'neutral')]
disgust_data = plot_frame.loc[(plot_frame['Emotion'] == 'Disgust') & (plot_frame['Target Emotion'] == 'disgust')]
surprised_data = plot_frame.loc[(plot_frame['Emotion'] == 'Surprise') & (plot_frame['Target Emotion'] == 'surprise')]

In [None]:
def plot_signals(data_arr, title = ''):
    plt.clf()
    plt.figure(figsize=(12, 4))
    
    for index, row in data_arr.iterrows():
        y = row['Raw Data']
        plt.plot(y)
        #x = np.arange(y.size)
        #plt.plot(x, y)
    
    plt.tight_layout()
    plt.title(title)
    plt.show()

**SAD**

In [None]:
plot_signals(data_arr = sad_data, title = 'ECG Signals: SAD')

**FEAR**

In [None]:
plot_signals(data_arr = fear_data, title = 'ECG Signals: FEAR')

**HAPPY**: Signals scatters on different levels

In [None]:
plot_signals(data_arr = happy_data, title = 'ECG Signals: HAPPY')

**ANGER**: Signals scatters on different levels

In [None]:
plot_signals(data_arr = anger_data, title = 'ECG Signals: ANGER')

**NEUTRAL**: Signals above 0 region are scatterd. Some are on -15 to -20 area

In [None]:
plot_signals(data_arr = neutral_data, title = 'ECG Signals: NEUTRAL')

**MIXED**: Most signals resides on zero level

In [None]:
plot_signals(data_arr = mixed_data, title = 'ECG Signals: MIXED')

**DISGUST** : Signals are on 0-5 region and a lot on -20 level

In [None]:
plot_signals(data_arr = disgust_data, title = 'ECG Signals: DISGUST')

**SURPRISED** : signals are all similar and residing near zero region

In [None]:
plot_signals(data_arr = surprised_data, title = 'ECG Signals: SURPRISED')

**Findings:** Uncertain on possible recognized pattern based on visual representation of the data

# Model creation and training

In [None]:
#train_d_frame = plot_frame.copy().drop(['Video ID', 'Valence level', 'Arousal level',
#                                        'Dominance level', 'Happy', 'Sad', 'Fear',
#                                        'Anger', 'Neutral', 'Disgust', 'Surprised',
#                                        'Valence', 'Arousal', 'Four label', 'Modal'
#                                       ], axis = 1)
train_d_frame = plot_frame.copy().drop(['Video ID', 'Happy', 'Sad', 'Fear',
                                        'Anger', 'Neutral', 'Disgust', 'Surprised', 'Four label', 'Modal'
                                       ], axis = 1)
train_d_frame.rename(columns = {'Raw Data':'feature', 'Emotion':'emotion'}, inplace = True)
#train_d_frame.head()


In [None]:
#x = np.array(train_d_frame['feature'].tolist())
#y = np.array(train_d_frame['emotion'].tolist())

In [None]:
#sc = StandardScaler()
#x = sc.fit_transform(x)

In [None]:
#labelencoder = LabelEncoder()
#y = to_categorical(labelencoder.fit_transform(y))

Split the dataset into training and testing dataset

In [None]:
#x_train, x_test, y_train, y_test = train_test_split(x, y, test_size = 0.2, random_state = 0)

In [None]:
#num_labels = y.shape[1]

Create model

In [None]:
#model = Sequential()

###first layer
#model.add(Dense(1, input_shape = (arr_shape,)))
#model.add(Activation('relu'))

###second layer
#model.add(Dense(2))
#model.add(Activation('relu'))

###third layer
#model.add(Dense(4))
#model.add(Activation('relu'))

###fourth layer
#model.add(Dense(8))
#model.add(Activation('relu'))

###final layer
#model.add(Dense(num_labels))
#model.add(Activation('softmax'))

In [None]:
#model.summary()

In [None]:
#model.compile(loss = 'categorical_crossentropy', metrics = ['accuracy'], optimizer = 'adam')

In [None]:
#num_epochs = 10
#num_batch_size = 2

#checkpointer = ModelCheckpoint(filepath = './Raw Data/ecg_emotion_recognizer.hdf5', 
#                               verbose = 1, save_best_only = True)
#start = datetime.now()

#model.fit(x_train, y_train, batch_size = num_batch_size, epochs = num_epochs,
#          validation_data = (x_test, y_test), callbacks = [checkpointer], verbose = 1)

#duration = datetime.now() - start
#print("Training completed in time: ", duration)

# Model accuracy

In [None]:
#test_accuracy = model.evaluate(x_test, y_test, verbose = 0)
#print(test_accuracy)

# Initial Findings

**Initial Findings**: Model displayed low accuracy. Need to look for possible improvements. Will try feature extraction on raw data signals.

In [None]:
from __future__ import division
#from matplotlib import pyplot as plt
import scipy.io as spio
#import numpy as np
import statistics
from scipy.stats import kurtosis
from scipy.stats import skew
import sys
#sys.path.append("/home/chandan/python-workspace/")
#import BOCPD as ocpd #import bocpd from another file
import cProfile
from functools import partial

In [None]:
####################### FEATURE DEFINITIONS ###################################

def detect_peaks(ecg_signal, threshold=0.3, qrs_filter=None):
    '''
    Peak detection algorithm using cross corrrelation and threshold 
    '''
    if qrs_filter is None:
        # create default qrs filter, which is just a part of the sine function
        t = np.linspace(1.5 * np.pi, 3.5 * np.pi, 15)
        qrs_filter = np.sin(t)
    
    # normalize data
    ecg_signal = (ecg_signal - ecg_signal.mean()) / ecg_signal.std()

    # calculate cross correlation
    similarity = np.correlate(ecg_signal, qrs_filter, mode="same")
    similarity = similarity / np.max(similarity)

    # return peaks (values in ms) using threshold
    return ecg_signal[similarity > threshold].index, similarity

def group_peaks(p, threshold=5):
    '''
    The peak detection algorithm finds multiple peaks for each QRS complex. 
    Here we group collections of peaks that are very near (within threshold) and we take the median index 
    '''
    # initialize output
    output = np.empty(0)

    # label groups of sample that belong to the same peak
    peak_groups, num_groups = sci_label(np.diff(p) < threshold)
 
    # iterate through groups and take the mean as peak index
    for i in np.unique(peak_groups)[1:]:
    #for i in np.unique(peak_groups):    
        peak_group = p[np.where(peak_groups == i)]
        output = np.append(output, np.median(peak_group))
    return output

"""TIME DOMAIN"""
#independent function to calculate RMSSD
def calc_rmssd(list):
    diff_nni = np.diff(list)#successive differences
    return np.sqrt(np.mean(diff_nni ** 2))
    
    
 #independent function to calculate AVRR   
def calc_avrr(list):
    return sum(list)/len(list)

 #independent function to calculate SDRR   
def calc_sdrr(list):
    return statistics.stdev(list)

 #independent function to calculate SKEW   
def calc_skew(list):
    return skew(list)

 #independent function to calculate KURT   
def calc_kurt(list):
    return kurtosis(list)

def calc_NNx(list):
    #diff_nni = np.diff(list)
    # detect peaks
    peaks, similarity = detect_peaks(list, threshold=0.3)
    # group peaks so we get a single peak per beat (hopefully)
    grouped_peaks = group_peaks(peaks)
    # RR-intervals are the differences between successive peaks
    rr = np.diff(grouped_peaks)
    nnxx = np.sum(np.abs(np.diff(rr)) > 50)*1
    #return sum(np.abs(diff_nni) > 50)
    return nnxx
    
def calc_pNNx(list):
    #length_int = len(list)
    #diff_nni = np.diff(list)
    #nni_50 = sum(np.abs(diff_nni) > 50)
    #return 100 * nni_50 / length_int
    # detect peaks
    peaks, similarity = detect_peaks(list, threshold=0.3)
    # group peaks so we get a single peak per beat (hopefully)
    grouped_peaks = group_peaks(peaks)
    # RR-intervals are the differences between successive peaks
    rr = np.diff(grouped_peaks)
    pnnxx = 100 * np.sum((np.abs(np.diff(rr)) > 50)*1) / len(rr)
    #return sum(np.abs(diff_nni) > 50)
    return pnnxx
    
"""NON LINEAR DOMAIN"""
 #independent function to calculate SD1
def calc_SD1(list):
    diff_nn_intervals = np.diff(list)
    return np.sqrt(np.std(diff_nn_intervals, ddof=1) ** 2 * 0.5)
 #independent function to calculate SD2
def calc_SD2(list):
    diff_nn_intervals = np.diff(list)
    return np.sqrt(2 * np.std(list, ddof=1) ** 2 - 0.5 * np.std(\
                   diff_nn_intervals, ddof=1) ** 2)
    
 #independent function to calculate SD1/SD2
def calc_SD1overSD2(list):
      diff_nn_intervals = np.diff(list)
      sd1 = np.sqrt(np.std(diff_nn_intervals, ddof=1) ** 2 * 0.5)
      sd2 = np.sqrt(2 * np.std(list, ddof=1) ** 2 - 0.5 * np.std(\
                    diff_nn_intervals, ddof=1) ** 2)
      ratio_sd2_sd1 = sd2 / sd1
      return ratio_sd2_sd1
    
    
 #independent function to calculate CSI
def calc_CSI(list):
    diff_nn_intervals = np.diff(list)
    sd1 = np.sqrt(np.std(diff_nn_intervals, ddof=1) ** 2 * 0.5)
    sd2 = np.sqrt(2 * np.std(list, ddof=1) ** 2 - 0.5 * np.std(\
                  diff_nn_intervals, ddof=1) ** 2)
    L=4 * sd1
    T=4 * sd2
    return L/T
       
 #independent function to calculate CVI
def calc_CVI(list):
    diff_nn_intervals = np.diff(list)
    sd1 = np.sqrt(np.std(diff_nn_intervals, ddof=1) ** 2 * 0.5)
    sd2 = np.sqrt(2 * np.std(list, ddof=1) ** 2 - 0.5 * np.std(\
                  diff_nn_intervals, ddof=1) ** 2)
    L=4 * sd1
    T=4 * sd2
    return np.log10(L * T)
 
 #independent function to calculate modified CVI
def calc_modifiedCVI(list):
    diff_nn_intervals = np.diff(list)
    sd1 = np.sqrt(np.std(diff_nn_intervals, ddof=1) ** 2 * 0.5)
    sd2 = np.sqrt(2 * np.std(list, ddof=1) ** 2 - 0.5 * np.std(\
                  diff_nn_intervals, ddof=1) ** 2)
    L=4 * sd1
    T=4 * sd2
    return L ** 2 / T


def calc_meanrr(list):
    # detect peaks
    peaks, similarity = detect_peaks(list, threshold=0.3)
    # group peaks so we get a single peak per beat (hopefully)
    grouped_peaks = group_peaks(peaks)
    # RR-intervals are the differences between successive peaks
    rr = np.diff(grouped_peaks)
    return np.mean(rr)

def calc_medianrr(list):
    # detect peaks
    peaks, similarity = detect_peaks(list, threshold=0.3)
    # group peaks so we get a single peak per beat (hopefully)
    grouped_peaks = group_peaks(peaks)
    # RR-intervals are the differences between successive peaks
    rr = np.diff(grouped_peaks)
    return np.median(rr)

    
def calc_hr(list):
    # detect peaks
    peaks, similarity = detect_peaks(list, threshold=0.3)
    # group peaks so we get a single peak per beat (hopefully)
    grouped_peaks = group_peaks(peaks)
    # RR-intervals are the differences between successive peaks
    rr = np.diff(grouped_peaks)
    hr = 60000/rr
    return np.mean(hr) 

    

In [None]:
extract_frame = train_d_frame.copy()
#hrv_data = train_d_frame.copy()

medianrr = []
meanrr = []
rmssd = []
sdrr_rmssd = []
hr = []
avrr = []
sdrr = []
skew_ = []
kurt = []
NNx = []
pNNx = []
SD1 = []
SD2 = []
CSI = []
CVI = []
modifiedCVI = []
SD1overSD2 = []
for index, row in extract_frame.iterrows():
    rmssd.append(calc_rmssd(row['feature']))
    avrr.append(calc_avrr(row['feature']))
    sdrr.append(calc_sdrr(row['feature']))
    skew_.append(calc_skew(row['feature']))
    kurt.append(calc_kurt(row['feature']))
    NNx.append(calc_NNx(pd.Series(row['feature'])))
    pNNx.append(calc_pNNx(pd.Series(row['feature'])))
    SD1.append(calc_SD1(row['feature']))
    SD2.append(calc_SD2(row['feature']))
    CSI.append(calc_CSI(row['feature']))
    CVI.append(calc_CVI(row['feature']))
    modifiedCVI.append(calc_modifiedCVI(row['feature']))
    SD1overSD2.append(calc_SD1overSD2(row['feature']))
    meanrr.append(calc_meanrr(pd.Series(row['feature'])))
    medianrr.append(calc_medianrr(pd.Series(row['feature'])))
    hr.append(calc_hr(pd.Series(row['feature'])))
    sdrrrmssd = calc_sdrr(row['feature']) / calc_rmssd(row['feature'])
    sdrr_rmssd.append(sdrrrmssd)
    

extract_frame['meanrr'] = meanrr
extract_frame['medianrr'] = medianrr
extract_frame['sdrr'] = sdrr
extract_frame['rmssd'] = rmssd
extract_frame['sdrr_rmssd'] = sdrr_rmssd
extract_frame['hr'] = hr
extract_frame['NNx'] = NNx
extract_frame['pNNx'] = pNNx
extract_frame['sd1'] = SD1
extract_frame['sd2'] = SD2
extract_frame['avrr'] = avrr
extract_frame['skew'] = skew_
extract_frame['kurt'] = kurt
extract_frame['avrr'] = avrr
extract_frame['csi'] = CSI
extract_frame['cvi'] = CVI
extract_frame['modifiedcvi'] = modifiedCVI

extract_frame = extract_frame.drop(['Valence level', 'Arousal level', 'Dominance level'], axis = 1)
extract_frame2 = extract_frame.copy()
extract_frame = extract_frame.drop(['feature', 'Target Emotion'], axis = 1)
extract_frame = extract_frame.drop(['Valence', 'Arousal'], axis = 1)
extract_frame = extract_frame.drop(['avrr', 'csi', 'cvi', 'modifiedcvi'], axis = 1)
extract_frame.head()

In [None]:
from sklearn import preprocessing
import seaborn as sns

In [None]:
#extract_frame = extract_frame2.drop(['feature'], axis = 1)
#extract_frame = extract_frame2.drop(['Target Emotion'], axis = 1)
le = preprocessing.LabelEncoder()
le.fit(extract_frame['emotion'])
extract_frame['emotion'] = le.transform(extract_frame['emotion'])

In [None]:
plt.figure(figsize=(12,10))
corr = extract_frame.corr()
sns.heatmap(corr, annot=False, cmap=plt.cm.Reds)
plt.show()

In [None]:
def feature_selection(correlation, threshold):
    selected_features = []
    for i in range(corr.shape[0]):
      if corr.iloc[i,0] > threshold:
        selected_features.append(extract_frame.iloc[:,i])
    return pd.DataFrame(selected_features).T

In [None]:
#extract_frame.head()

In [None]:
CORRELATION_THRESHOLD = 0.1
extract_frame = feature_selection(extract_frame, CORRELATION_THRESHOLD)
x = extract_frame.drop(['emotion'],axis=1)
y = extract_frame['emotion']
train_x, test_x, train_y, test_y = train_test_split(x, y, train_size=0.8, test_size=0.2, random_state=0)

extract_frame.head()

In [None]:
#fig, axarr = plt.subplots(2, 2, figsize=(15, 9))
#processed_frame['rmssd'][:70].plot(ax=axarr[0][0])
#processed_frame['sdrr'][:70].plot(ax=axarr[1][0])
#processed_frame['skew'][:70].plot(ax=axarr[0][1])
#processed_frame['kurt'][:70].plot(ax=axarr[1][1])
#axarr[0][0].set_title("rmssd")
#axarr[0][1].set_title("sdrr")
#axarr[1][0].set_title("skew")
#axarr[1][1].set_title("kurt")
#plt.subplots_adjust(hspace=.3)

In [None]:
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import classification_report
from sklearn.ensemble import RandomForestClassifier


dt_model = DecisionTreeClassifier(criterion="entropy", max_depth=12)
dt_model = dt_model.fit(train_x, train_y)
dt_pred_y = dt_model.predict(test_x)

rf_model = RandomForestClassifier()
rf_model.fit(train_x, train_y)
rfpred_y = rf_model.predict(test_x)

**Decision Tree**

In [None]:
print(classification_report(test_y,dt_pred_y))

**Random Forest**

In [None]:
print(classification_report(test_y,rfpred_y))

In [None]:
#def get_plot_ranges(start=10, end=20, n=5):
    '''
    Make an iterator that divides into n or n+1 ranges. 
    - if end-start is divisible by steps, return n ranges
    - if end-start is not divisible by steps, return n+1 ranges, where the last range is smaller and ends at n
    
    # Example:
    >> list(get_plot_ranges())
    >> [(0.0, 3.0), (3.0, 6.0), (6.0, 9.0)]

    '''
#    distance = end - start
#    for i in np.arange(start, end, np.floor(distance/n)):
#        yield (int(i), int(np.minimum(end, np.floor(distance/n) + i)))

In [None]:
#sampfrom = 0
#sampto = 5000
#nr_plots = 1

#for start, stop in get_plot_ranges(sampfrom, sampto, nr_plots):
    # get slice data of ECG data
    #print(start)
    #print(stop)
#    cond_slice = (extract_frame.index >= start) & (extract_frame.index < stop) 
#    ecg_slice = extract_frame.heartrate[cond_slice]

    # detect peaks
#    peaks, similarity = detect_peaks(ecg_slice, threshold=0.3)
    
    # plot similarity
#    plt.figure(figsize=(20, 15))

#    plt.subplot(211)
#    plt.title("ECG signal with found peaks")
#    plt.plot(ecg_slice.index, ecg_slice, label="ECG", color="#51A6D8", linewidth=1)
#    plt.plot(peaks, np.repeat(600, peaks.shape[0]), label="peaks", color="orange", marker="o", linestyle="None")
#    plt.legend(loc="upper right")
#    plt.xlabel("Time (milliseconds)")
#    plt.ylabel("Amplitude (arbitrary unit)")
    
 #   plt.subplot(212)
 #   plt.title('Similarity with QRS template')
 #   plt.plot(ecg_slice.index, similarity, label="Similarity with QRS filter", color="olive", linewidth=1)
 #   plt.legend(loc="upper right")
 #   plt.xlabel("Time (milliseconds)")
 #   plt.ylabel("Similarity (normalized)")

In [None]:
# detect peaks
#peaks, similarity = detect_peaks(df.heartrate, threshold=0.3)

# group peaks
#grouped_peaks = group_peaks(peaks)

# plot peaks
#plt.figure(figsize=(20, 7))
#plt.title("Group similar peaks together")
#plt.plot(df.index, df.heartrate, label="ECG", color="#51A6D8", linewidth=2)
#plt.plot(peaks, np.repeat(600, peaks.shape[0]),label="samples above threshold (found peaks)", color="orange", marker="o", linestyle="None")
#plt.plot(grouped_peaks, np.repeat(620, grouped_peaks.shape[0]), label="median of found peaks", color="k", marker="v", linestyle="None")
#plt.legend(loc="upper right")
#plt.xlabel("Time (ms)")
#plt.ylabel("Amplitude (arbitrary unit)")
#plt.gca().set_xlim(0, 200)
#plt.show()

In [None]:
# detect peaks
#peaks, similarity = detect_peaks(df.heartrate, threshold=0.3)

# group peaks so we get a single peak per beat (hopefully)
#grouped_peaks = group_peaks(peaks)

# RR-intervals are the differences between successive peaks
#rr = np.diff(grouped_peaks)

# plot RR-intervals
#plt.figure(figsize=(20, 7))
#plt.title("RR-intervals")
#plt.xlabel("Time (ms)")
#plt.ylabel("RR-interval (ms)")

#plt.plot(np.cumsum(rr), rr, label="RR-interval", color="#A651D8")
#plt.show()

In [None]:
#plt.figure(figsize=(20, 7))
#plt.title("Distribution of RR-intervals")

#with warnings.catch_warnings():
#    warnings.simplefilter("ignore") # ignore FutureWarning 
#    sns.kdeplot(rr, label="rr-intervals", color="#A651D8", shade=True)

#outlier_low = np.mean(rr) - 2 * np.std(rr)
#outlier_high = np.mean(rr) + 2 * np.std(rr)

#plt.axvline(x=outlier_low)
#plt.axvline(x=outlier_high, label="outlier boundary")
#plt.text(outlier_low - 370, 0.004, "outliers low (< mean - 2 sigma)")
#plt.text(outlier_high + 20, 0.004, "outliers high (> mean + 2 sigma)")

#plt.xlabel("RR-interval (ms)")
#plt.ylabel("Density")

#plt.legend()
#plt.show()

In [None]:
#plt.figure(figsize=(20, 7))

#rr_corrected = rr.copy()
#rr_corrected[np.abs(zscore(rr)) > 2] = np.median(rr)

#plt.title("RR-intervals")
#plt.xlabel("Time (ms)")
#plt.ylabel("RR-interval (ms)")

#plt.plot(rr, color="red", label="RR-intervals")
#plt.plot(rr_corrected, color="green",  label="RR-intervals after correction")
#plt.legend()
#plt.show()