# Peak Detection: HRV plotting and clean snippets extraction
This notebook is made for detecting peaks of the snippets extracted in module 2. Then, it plots the HRV graphs from the snippets with the function found in "4.0-HRV_Extraction_Function.ipynb" and, manually selected, the cleaned snippets are saved into another folder inside the database with another json file.

Checkout list:
- Going through the database and calculating the HRV in the time domain.
- Plotting the HRV graphs in the time domain.
- Manually selection of the clean HRV graphs.
- Stored them into another folder and flag a new json file to detect them a posteriori.


In [2]:
import os
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import json
import pyarrow.feather as feather
from tqdm import tqdm

from misc.render import *
from misc.synthetic_noise import *
from misc.noise_filtering import *
from misc.peakdetection import *
from misc.hrv_extraction import *

In [3]:
#root = 'G:\My Drive\Molecool\Databases\Database1'
database_root = '/Volumes/GoogleDrive/.shortcut-targets-by-id/1sZgDE1M3o-bDINfAQYEXl_vqsKs03WxD/Molecool/Databases/Database1'

#cwd = os.getcwd()
#database_root = os.path.join(cwd,"database") # For Matthieu

In [4]:
def write_peaks(path,rate):
    """Function to open snippet files, get peaks, and write them to a new file"""
    df = pd.read_feather(os.path.join(path,'MSNIP.ftr'))
    df_peaks = filtering_det(df['ecg_0'],rate)
    feather.write_feather(df_peaks, os.path.join(path,'PEAKS.ftr'))

In [5]:
def write_meta(path):
    """Function that creates a new meta file for peak files"""
    data = {'Error Flag': False, 'Error Type': 'No error'}
    with open(os.path.join(path,'PeakMeta.json'), "w") as outfile:
        json.dump(data, outfile)
        outfile.close()

In [6]:
def write_meta_clean(path,length):
    """Function that creates a new meta file for cleaned HRV snippets"""
    data = {'Error Flag': False, 'Length': length, 'Error Type': 'No error'}
    with open(os.path.join(path,'CleanPeakMeta.json'), "w") as outfile:
        json.dump(data, outfile)
        outfile.close()

In [7]:
def find_rate(path):
    """Function which check the meta json file to see the sampling frequency of the original signal"""
    with open(path + '/Meta.json') as json_file:
        metaData = json.load(json_file)
        json_file.close() # Not needed  when you use context
        return int(metaData['Sampling rate'])

In [9]:
def write_hrv(path,rate):
    """Function which goes directly to the manually chosen snippets and saves them into another folder, flagging the json file"""
    if path == root + "/S0250/holter/Snippet001":
        df = pd.read_feather(path_snip + '/PEAKS.ftr')   #inputting the ecg feather files in a data frame
        df_hrv = RR_calculator(df)
        df_clean = pd.DataFrame()
        x_values = []
        rr_values = []
        for index,value in enumerate(df_hrv['x_values']):
            if value >= 45000 and value <= 220000:
                x_values.append(value)
                rr_values.append(df_hrv['R-R Interval Final'][index])
        df_clean['R-R Interval Final'] = rr_values
        df_clean['x_values'] = x_values
        df_clean = _outlier_removal(df_clean)
        rr = df_clean['R-R Interval Final'].to_list()
        list_end = cos_correction(rr)
        df_clean['R-R Interval Final'] = list_end
        feather.write_feather(df_clean, (path_snip + '/clean_snippet.ftr'))
        read_meta_clean(path,len(df_clean['R-R Interval Final']))
    if path == root + "/S0273/holter/Snippet000":
        df = pd.read_feather(path_snip + '/PEAKS.ftr')   #inputting the ecg feather files in a data frame
        df_hrv = RR_calculator(df)
        df_clean = pd.DataFrame()
        x_values = []
        rr_values = []
        for index,value in enumerate(df_hrv['x_values']):
            if value >= 50000 and value <= 300000:
                x_values.append(value)
                rr_values.append(df_hrv['R-R Interval Final'][index])
        df_clean['R-R Interval Final'] = rr_values
        df_clean['x_values'] = x_values
        df_clean = _outlier_removal(df_clean)
        rr = df_clean['R-R Interval Final'].to_list()
        list_end = cos_correction(rr)
        df_clean['R-R Interval Final'] = list_end
        feather.write_feather(df_clean, (path_snip + '/clean_snippet.ftr'))
        read_meta_clean(path,len(df_clean['R-R Interval Final']))
    if path == root + "/S0273/holter/Snippet002":
        df = pd.read_feather(path_snip + '/PEAKS.ftr')   #inputting the ecg feather files in a data frame
        df_hrv = RR_calculator(df)
        df_clean = pd.DataFrame()
        x_values = []
        rr_values = []
        for index,value in enumerate(df_hrv['x_values']):
            if value >= 0 and value <= 250000:
                x_values.append(value)
                rr_values.append(df_hrv['R-R Interval Final'][index])
        df_clean['R-R Interval Final'] = rr_values
        df_clean['x_values'] = x_values
        df_clean = _outlier_removal(df_clean)
        rr = df_clean['R-R Interval Final'].to_list()
        list_end = cos_correction(rr)
        df_clean['R-R Interval Final'] = list_end
        feather.write_feather(df_clean, (path_snip + '/clean_snippet.ftr'))
        read_meta_clean(path,len(df_clean['R-R Interval Final']))
    if path == root + "/S0282/holter/Snippet002":
        df = pd.read_feather(path_snip + '/PEAKS.ftr')   #inputting the ecg feather files in a data frame
        df_hrv = RR_calculator(df)
        df_clean = pd.DataFrame()
        x_values = []
        rr_values = []
        for index,value in enumerate(df_hrv['x_values']):
            if value >= 0 and value <= 100500:
                x_values.append(value)
                rr_values.append(df_hrv['R-R Interval Final'][index])
        df_clean['R-R Interval Final'] = rr_values
        df_clean['x_values'] = x_values
        df_clean = _outlier_removal(df_clean)
        rr = df_clean['R-R Interval Final'].to_list()
        list_end = cos_correction(rr)
        df_clean['R-R Interval Final'] = list_end
        feather.write_feather(df_clean, (path_snip + '/clean_snippet.ftr'))
        read_meta_clean(path,len(df_clean['R-R Interval Final']))
    if path == root + "/S0283/holter/Snippet002":
        df = pd.read_feather(path_snip + '/PEAKS.ftr')   #inputting the ecg feather files in a data frame
        df_hrv = RR_calculator(df)
        df_clean = pd.DataFrame()
        x_values = []
        rr_values = []
        for index,value in enumerate(df_hrv['x_values']):
            if value >= 50000 and value <= 200000:
                x_values.append(value)
                rr_values.append(df_hrv['R-R Interval Final'][index])
        df_clean['R-R Interval Final'] = rr_values
        df_clean['x_values'] = x_values
        df_clean = _outlier_removal(df_clean)
        rr = df_clean['R-R Interval Final'].to_list()
        list_end = cos_correction(rr)
        df_clean['R-R Interval Final'] = list_end
        feather.write_feather(df_clean, (path_snip + '/clean_snippet.ftr'))
        read_meta_clean(path,len(df_clean['R-R Interval Final']))
    if path == root + "/S0288/holter/Snippet001":
        df = pd.read_feather(path_snip + '/PEAKS.ftr')   #inputting the ecg feather files in a data frame
        df_hrv = RR_calculator(df)
        df_clean = pd.DataFrame()
        x_values = []
        rr_values = []
        for index,value in enumerate(df_hrv['x_values']):
            if value >= 20000 and value <= 300000:
                x_values.append(value)
                rr_values.append(df_hrv['R-R Interval Final'][index])
        df_clean['R-R Interval Final'] = rr_values
        df_clean['x_values'] = x_values
        df_clean = _outlier_removal(df_clean)
        rr = df_clean['R-R Interval Final'].to_list()
        list_end = cos_correction(rr)
        df_clean['R-R Interval Final'] = list_end
        feather.write_feather(df_clean, (path_snip + '/clean_snippet.ftr'))
        read_meta_clean(path,len(df_clean['R-R Interval Final']))
    if path == root + "/S0300/holter/Snippet001":
        df = pd.read_feather(path_snip + '/PEAKS.ftr')   #inputting the ecg feather files in a data frame
        df_hrv = RR_calculator(df)
        df_clean = pd.DataFrame()
        x_values = []
        rr_values = []
        for index,value in enumerate(df_hrv['x_values']):
            if value >= 100000 and value <= 200400:
                x_values.append(value)
                rr_values.append(df_hrv['R-R Interval Final'][index])
        df_clean['R-R Interval Final'] = rr_values
        df_clean['x_values'] = x_values
        df_clean = _outlier_removal(df_clean)
        rr = df_clean['R-R Interval Final'].to_list()
        list_end = cos_correction(rr)
        df_clean['R-R Interval Final'] = list_end
        feather.write_feather(df_clean, (path_snip + '/clean_snippet.ftr'))
        read_meta_clean(path,len(df_clean['R-R Interval Final']))
    if path == root + "/S0310/holter/Snippet000":
        df = pd.read_feather(path_snip + '/PEAKS.ftr')   #inputting the ecg feather files in a data frame
        df_hrv = RR_calculator(df)
        df_clean = pd.DataFrame()
        x_values = []
        rr_values = []
        for index,value in enumerate(df_hrv['x_values']):
            if value >= 100000 and value <= 300000:
                x_values.append(value)
                rr_values.append(df_hrv['R-R Interval Final'][index])
        df_clean['R-R Interval Final'] = rr_values
        df_clean['x_values'] = x_values
        df_clean = _outlier_removal(df_clean)
        rr = df_clean['R-R Interval Final'].to_list()
        list_end = cos_correction(rr)
        df_clean['R-R Interval Final'] = list_end
        feather.write_feather(df_clean, (path_snip + '/clean_snippet.ftr'))
        read_meta_clean(path,len(df_clean['R-R Interval Final']))
    if path == root + "/S0317/holter/Snippet001":
        df = pd.read_feather(path_snip + '/PEAKS.ftr')   #inputting the ecg feather files in a data frame
        df_hrv = RR_calculator(df)
        df_clean = pd.DataFrame()
        x_values = []
        rr_values = []
        for index,value in enumerate(df_hrv['x_values']):
            if value >= 50000 and value <= 300000:
                x_values.append(value)
                rr_values.append(df_hrv['R-R Interval Final'][index])
        df_clean['R-R Interval Final'] = rr_values
        df_clean['x_values'] = x_values
        df_clean = _outlier_removal(df_clean)
        rr = df_clean['R-R Interval Final'].to_list()
        list_end = cos_correction(rr)
        df_clean['R-R Interval Final'] = list_end
        feather.write_feather(df_clean, (path_snip + '/clean_snippet.ftr'))
        read_meta_clean(path,len(df_clean['R-R Interval Final']))
    if path == root + "/S0318/holter/Snippet002":
        df = pd.read_feather(path_snip + '/PEAKS.ftr')   #inputting the ecg feather files in a data frame
        df_hrv = RR_calculator(df)
        df_clean = pd.DataFrame()
        x_values = []
        rr_values = []
        for index,value in enumerate(df_hrv['x_values']):
            if value >= 0 and value <= 250000:
                x_values.append(value)
                rr_values.append(df_hrv['R-R Interval Final'][index])
        df_clean['R-R Interval Final'] = rr_values
        df_clean['x_values'] = x_values
        df_clean = _outlier_removal(df_clean)
        rr = df_clean['R-R Interval Final'].to_list()
        list_end = cos_correction(rr)
        df_clean['R-R Interval Final'] = list_end
        feather.write_feather(df_clean, (path_snip + '/clean_snippet.ftr'))
        read_meta_clean(path,len(df_clean['R-R Interval Final']))
    if path == root + "/S0366/holter/Snippet002":
        df = pd.read_feather(path_snip + '/PEAKS.ftr')   #inputting the ecg feather files in a data frame
        df_hrv = RR_calculator(df)
        df_clean = pd.DataFrame()
        x_values = []
        rr_values = []
        for index,value in enumerate(df_hrv['x_values']):
            if value >= 100000 and value <= 300000:
                x_values.append(value)
                rr_values.append(df_hrv['R-R Interval Final'][index])
        df_clean['R-R Interval Final'] = rr_values
        df_clean['x_values'] = x_values
        df_clean = _outlier_removal(df_clean)
        rr = df_clean['R-R Interval Final'].to_list()
        list_end = cos_correction(rr)
        df_clean['R-R Interval Final'] = list_end
        feather.write_feather(df_clean, (path_snip + '/clean_snippet.ftr'))
        read_meta_clean(path,len(df_clean['R-R Interval Final']))
    if path == root + "/S0372/holter/Snippet000":
        df = pd.read_feather(path_snip + '/PEAKS.ftr')   #inputting the ecg feather files in a data frame
        df_hrv = RR_calculator(df)
        df_clean = pd.DataFrame()
        x_values = []
        rr_values = []
        for index,value in enumerate(df_hrv['x_values']):
            if value >= 0 and value <= 250000:
                x_values.append(value)
                rr_values.append(df_hrv['R-R Interval Final'][index])
        df_clean['R-R Interval Final'] = rr_values
        df_clean['x_values'] = x_values
        df_clean = _outlier_removal(df_clean)
        rr = df_clean['R-R Interval Final'].to_list()
        list_end = cos_correction(rr)
        df_clean['R-R Interval Final'] = list_end
        feather.write_feather(df_clean, (path_snip + '/clean_snippet.ftr'))
        read_meta_clean(path,len(df_clean['R-R Interval Final']))
    if path == root + "/S0381/holter/Snippet002":
        df = pd.read_feather(path_snip + '/PEAKS.ftr')   #inputting the ecg feather files in a data frame
        df_hrv = RR_calculator(df)
        df_clean = pd.DataFrame()
        x_values = []
        rr_values = []
        for index,value in enumerate(df_hrv['x_values']):
            if value >= 0 and value <= 2300000:
                x_values.append(value)
                rr_values.append(df_hrv['R-R Interval Final'][index])
        df_clean['R-R Interval Final'] = rr_values
        df_clean['x_values'] = x_values
        df_clean = _outlier_removal(df_clean)
        rr = df_clean['R-R Interval Final'].to_list()
        list_end = cos_correction(rr)
        df_clean['R-R Interval Final'] = list_end
        feather.write_feather(df_clean, (path_snip + '/clean_snippet.ftr'))
        read_meta_clean(path,len(df_clean['R-R Interval Final']))
    if path == root + "/S0398/holter/Snippet001":
        df = pd.read_feather(path_snip + '/PEAKS.ftr')   #inputting the ecg feather files in a data frame
        df_hrv = RR_calculator(df)
        df_clean = pd.DataFrame()
        x_values = []
        rr_values = []
        for index,value in enumerate(df_hrv['x_values']):
            if value >= 90000 and value <= 200000:
                x_values.append(value)
                rr_values.append(df_hrv['R-R Interval Final'][index])
        df_clean['R-R Interval Final'] = rr_values
        df_clean['x_values'] = x_values
        df_clean = _outlier_removal(df_clean)
        rr = df_clean['R-R Interval Final'].to_list()
        list_end = cos_correction(rr)
        df_clean['R-R Interval Final'] = list_end
        feather.write_feather(df_clean, (path_snip + '/clean_snippet.ftr'))
        read_meta_clean(path,len(df_clean['R-R Interval Final']))
    if path == root + "/S0416/holter/Snippet002":
        df = pd.read_feather(path_snip + '/PEAKS.ftr')   #inputting the ecg feather files in a data frame
        df_hrv = RR_calculator(df)
        df_clean = pd.DataFrame()
        x_values = []
        rr_values = []
        for index,value in enumerate(df_hrv['x_values']):
            if value >= 100000 and value <= 300000:
                x_values.append(value)
                rr_values.append(df_hrv['R-R Interval Final'][index])
        df_clean['R-R Interval Final'] = rr_values
        df_clean['x_values'] = x_values
        df_clean = _outlier_removal(df_clean)
        rr = df_clean['R-R Interval Final'].to_list()
        list_end = cos_correction(rr)
        df_clean['R-R Interval Final'] = list_end
        feather.write_feather(df_clean, (path_snip + '/clean_snippet.ftr'))
        read_meta_clean(path,len(df_clean['R-R Interval Final']))
    if path == root + "/S0424/holter/Snippet000":
        df = pd.read_feather(path_snip + '/PEAKS.ftr')   #inputting the ecg feather files in a data frame
        df_hrv = RR_calculator(df)
        df_clean = pd.DataFrame()
        x_values = []
        rr_values = []
        for index,value in enumerate(df_hrv['x_values']):
            if value >= 100000 and value <= 300000:
                x_values.append(value)
                rr_values.append(df_hrv['R-R Interval Final'][index])
        df_clean['R-R Interval Final'] = rr_values
        df_clean['x_values'] = x_values
        df_clean = _outlier_removal(df_clean)
        rr = df_clean['R-R Interval Final'].to_list()
        list_end = cos_correction(rr)
        df_clean['R-R Interval Final'] = list_end
        feather.write_feather(df_clean, (path_snip + '/clean_snippet.ftr'))
        read_meta_clean(path,len(df_clean['R-R Interval Final']))
    if path == root + "/S0435/holter/Snippet000":
        df = pd.read_feather(path_snip + '/PEAKS.ftr')   #inputting the ecg feather files in a data frame
        df_hrv = RR_calculator(df)
        df_clean = pd.DataFrame()
        x_values = []
        rr_values = []
        for index,value in enumerate(df_hrv['x_values']):
            if value >= 0 and value <= 250000:
                x_values.append(value)
                rr_values.append(df_hrv['R-R Interval Final'][index])
        df_clean['R-R Interval Final'] = rr_values
        df_clean['x_values'] = x_values
        df_clean = _outlier_removal(df_clean)
        rr = df_clean['R-R Interval Final'].to_list()
        list_end = cos_correction(rr)
        df_clean['R-R Interval Final'] = list_end
        feather.write_feather(df_clean, (path_snip + '/clean_snippet.ftr'))
        read_meta_clean(path,len(df_clean['R-R Interval Final']))
    if path == root + "/S0435/holter/Snippet004":
        df = pd.read_feather(path_snip + '/PEAKS.ftr')   #inputting the ecg feather files in a data frame
        df_hrv = RR_calculator(df)
        df_clean = pd.DataFrame()
        x_values = []
        rr_values = []
        for index,value in enumerate(df_hrv['x_values']):
            if value >= 25000 and value <= 200000:
                x_values.append(value)
                rr_values.append(df_hrv['R-R Interval Final'][index])
        df_clean['R-R Interval Final'] = rr_values
        df_clean['x_values'] = x_values
        df_clean = _outlier_removal(df_clean)
        rr = df_clean['R-R Interval Final'].to_list()
        list_end = cos_correction(rr)
        df_clean['R-R Interval Final'] = list_end
        feather.write_feather(df_clean, (path_snip + '/clean_snippet.ftr'))
        read_meta_clean(path,len(df_clean['R-R Interval Final']))
    if path == root + "/S0273/holter/Snippet001":
        df = pd.read_feather(path_snip + '/PEAKS.ftr')   #inputting the ecg feather files in a data frame
        df_hrv = RR_calculator(df)
        df_clean = pd.DataFrame()
        df_clean['R-R Interval Final'] = df_hrv['R-R Interval Final']
        df_clean['x_values'] = df_hrv['x_values']
        df_clean = _outlier_removal(df_clean)
        rr = df_clean['R-R Interval Final'].to_list()
        list_end = cos_correction(rr)
        df_clean['R-R Interval Final'] = list_end
        feather.write_feather(df_clean, (path_snip + '/clean_snippet.ftr'))
        read_meta_clean(path,len(df_clean['R-R Interval Final']))
    if path == root + "/S0317/holter/Snippet000":
        df = pd.read_feather(path_snip + '/PEAKS.ftr')   #inputting the ecg feather files in a data frame
        df_hrv = RR_calculator(df)
        df_clean = pd.DataFrame()
        df_clean['R-R Interval Final'] = df_hrv['R-R Interval Final']
        df_clean['x_values'] = df_hrv['x_values']
        df_clean = _outlier_removal(df_clean)
        rr = df_clean['R-R Interval Final'].to_list()
        list_end = cos_correction(rr)
        df_clean['R-R Interval Final'] = list_end
        feather.write_feather(df_clean, (path_snip + '/clean_snippet.ftr'))
        read_meta_clean(path,len(df_clean['R-R Interval Final']))
    if path == root + "/S0326/holter/Snippet000":
        df = pd.read_feather(path_snip + '/PEAKS.ftr')   #inputting the ecg feather files in a data frame
        df_hrv = RR_calculator(df)
        df_clean = pd.DataFrame()
        df_clean['R-R Interval Final'] = df_hrv['R-R Interval Final']
        df_clean['x_values'] = df_hrv['x_values']
        df_clean = _outlier_removal(df_clean)
        rr = df_clean['R-R Interval Final'].to_list()
        list_end = cos_correction(rr)
        df_clean['R-R Interval Final'] = list_end
        feather.write_feather(df_clean, (path_snip + '/clean_snippet.ftr'))
        read_meta_clean(path,len(df_clean['R-R Interval Final']))
    if path == root + "/S0368/holter/Snippet001":
        df = pd.read_feather(path_snip + '/PEAKS.ftr')   #inputting the ecg feather files in a data frame
        df_hrv = RR_calculator(df)
        df_clean = pd.DataFrame()
        df_clean['R-R Interval Final'] = df_hrv['R-R Interval Final']
        df_clean['x_values'] = df_hrv['x_values']
        df_clean = _outlier_removal(df_clean)
        rr = df_clean['R-R Interval Final'].to_list()
        list_end = cos_correction(rr)
        df_clean['R-R Interval Final'] = list_end
        feather.write_feather(df_clean, (path_snip + '/clean_snippet.ftr'))
        read_meta_clean(path,len(df_clean['R-R Interval Final']))
    if path == root + "/S0390/holter/Snippet002":
        df = pd.read_feather(path_snip + '/PEAKS.ftr')   #inputting the ecg feather files in a data frame
        df_hrv = RR_calculator(df)
        df_clean = pd.DataFrame()
        df_clean['R-R Interval Final'] = df_hrv['R-R Interval Final']
        df_clean['x_values'] = df_hrv['x_values']
        df_clean = _outlier_removal(df_clean)
        rr = df_clean['R-R Interval Final'].to_list()
        list_end = cos_correction(rr)
        df_clean['R-R Interval Final'] = list_end
        feather.write_feather(df_clean, (path_snip + '/clean_snippet.ftr'))
        read_meta_clean(path,len(df_clean['R-R Interval Final']))
    if path == root + "/S0409/holter/Snippet000":
        df = pd.read_feather(path_snip + '/PEAKS.ftr')   #inputting the ecg feather files in a data frame
        df_hrv = RR_calculator(df)
        df_clean = pd.DataFrame()
        df_clean['R-R Interval Final'] = df_hrv['R-R Interval Final']
        df_clean['x_values'] = df_hrv['x_values']
        df_clean = _outlier_removal(df_clean)
        rr = df_clean['R-R Interval Final'].to_list()
        list_end = cos_correction(rr)
        df_clean['R-R Interval Final'] = list_end
        feather.write_feather(df_clean, (path_snip + '/clean_snippet.ftr'))
        read_meta_clean(path,len(df_clean['R-R Interval Final']))
    if path == root + "/S0420/holter/Snippet004":
        df = pd.read_feather(path_snip + '/PEAKS.ftr')   #inputting the ecg feather files in a data frame
        df_hrv = RR_calculator(df)
        df_clean = pd.DataFrame()
        df_clean['R-R Interval Final'] = df_hrv['R-R Interval Final']
        df_clean['x_values'] = df_hrv['x_values']
        df_clean = _outlier_removal(df_clean)
        rr = df_clean['R-R Interval Final'].to_list()
        list_end = cos_correction(rr)
        df_clean['R-R Interval Final'] = list_end
        feather.write_feather(df_clean, (path_snip + '/clean_snippet.ftr'))
        read_meta_clean(path,len(df_clean['R-R Interval Final']))
    if path == root + "/S0423/holter/Snippet000":
        df = pd.read_feather(path_snip + '/PEAKS.ftr')   #inputting the ecg feather files in a data frame
        df_hrv = RR_calculator(df)
        df_clean = pd.DataFrame()
        df_clean['R-R Interval Final'] = df_hrv['R-R Interval Final']
        df_clean['x_values'] = df_hrv['x_values']
        df_clean = _outlier_removal(df_clean)
        rr = df_clean['R-R Interval Final'].to_list()
        list_end = cos_correction(rr)
        df_clean['R-R Interval Final'] = list_end
        feather.write_feather(df_clean, (path_snip + '/clean_snippet.ftr'))
        read_meta_clean(path,len(df_clean['R-R Interval Final']))
    if path == root + "/S0423/holter/Snippet002":
        df = pd.read_feather(path_snip + '/PEAKS.ftr')   #inputting the ecg feather files in a data frame
        df_hrv = RR_calculator(df)
        df_clean = pd.DataFrame()
        df_clean['R-R Interval Final'] = df_hrv['R-R Interval Final']
        df_clean['x_values'] = df_hrv['x_values']
        df_clean = _outlier_removal(df_clean)
        rr = df_clean['R-R Interval Final'].to_list()
        list_end = cos_correction(rr)
        df_clean['R-R Interval Final'] = list_end
        feather.write_feather(df_clean, (path_snip + '/clean_snippet.ftr'))
        read_meta_clean(path,len(df_clean['R-R Interval Final']))
    if path == root + "/S0423/holter/Snippet003":
        df = pd.read_feather(path_snip + '/PEAKS.ftr')   #inputting the ecg feather files in a data frame
        df_hrv = RR_calculator(df)
        df_clean = pd.DataFrame()
        df_clean['R-R Interval Final'] = df_hrv['R-R Interval Final']
        df_clean['x_values'] = df_hrv['x_values']
        df_clean = _outlier_removal(df_clean)
        rr = df_clean['R-R Interval Final'].to_list()
        list_end = cos_correction(rr)
        df_clean['R-R Interval Final'] = list_end
        feather.write_feather(df_clean, (path_snip + '/clean_snippet.ftr'))
        read_meta_clean(path,len(df_clean['R-R Interval Final']))
    if path == root + "/S0423/holter/Snippet004":
        df = pd.read_feather(path_snip + '/PEAKS.ftr')   #inputting the ecg feather files in a data frame
        df_hrv = RR_calculator(df)
        df_clean = pd.DataFrame()
        df_clean['R-R Interval Final'] = df_hrv['R-R Interval Final']
        df_clean['x_values'] = df_hrv['x_values']
        df_clean = _outlier_removal(df_clean)
        rr = df_clean['R-R Interval Final'].to_list()
        list_end = cos_correction(rr)
        df_clean['R-R Interval Final'] = list_end
        feather.write_feather(df_clean, (path_snip + '/clean_snippet.ftr'))
        read_meta_clean(path,len(df_clean['R-R Interval Final']))
    if path == root + "/S0426/holter/Snippet002":
        df = pd.read_feather(path_snip + '/PEAKS.ftr')   #inputting the ecg feather files in a data frame
        df_hrv = RR_calculator(df)
        df_clean = pd.DataFrame()
        df_clean['R-R Interval Final'] = df_hrv['R-R Interval Final']
        df_clean['x_values'] = df_hrv['x_values']
        df_clean = _outlier_removal(df_clean)
        rr = df_clean['R-R Interval Final'].to_list()
        list_end = cos_correction(rr)
        df_clean['R-R Interval Final'] = list_end
        feather.write_feather(df_clean, (path_snip + '/clean_snippet.ftr'))
        read_meta_clean(path,len(df_clean['R-R Interval Final']))
    if path == root + "/S0426/holter/Snippet003":
        df = pd.read_feather(path_snip + '/PEAKS.ftr')   #inputting the ecg feather files in a data frame
        df_hrv = RR_calculator(df)
        df_clean = pd.DataFrame()
        df_clean['R-R Interval Final'] = df_hrv['R-R Interval Final']
        df_clean['x_values'] = df_hrv['x_values']
        df_clean = _outlier_removal(df_clean)
        rr = df_clean['R-R Interval Final'].to_list()
        list_end = cos_correction(rr)
        df_clean['R-R Interval Final'] = list_end
        feather.write_feather(df_clean, (path_snip + '/clean_snippet.ftr'))
        read_meta_clean(path,len(df_clean['R-R Interval Final']))
    if path == root + "/S0426/holter/Snippet004":
        df = pd.read_feather(path_snip + '/PEAKS.ftr')   #inputting the ecg feather files in a data frame
        df_hrv = RR_calculator(df)
        df_clean = pd.DataFrame()
        df_clean['R-R Interval Final'] = df_hrv['R-R Interval Final']
        df_clean['x_values'] = df_hrv['x_values']
        df_clean = _outlier_removal(df_clean)
        rr = df_clean['R-R Interval Final'].to_list()
        list_end = cos_correction(rr)
        df_clean['R-R Interval Final'] = list_end
        feather.write_feather(df_clean, (path_snip + '/clean_snippet.ftr'))
        read_meta_clean(path,len(df_clean['R-R Interval Final']))
    if path == root + "/S0427/holter/Snippet001":
        df = pd.read_feather(path_snip + '/PEAKS.ftr')   #inputting the ecg feather files in a data frame
        df_hrv = RR_calculator(df)
        df_clean = pd.DataFrame()
        df_clean['R-R Interval Final'] = df_hrv['R-R Interval Final']
        df_clean['x_values'] = df_hrv['x_values']
        df_clean = _outlier_removal(df_clean)
        rr = df_clean['R-R Interval Final'].to_list()
        list_end = cos_correction(rr)
        df_clean['R-R Interval Final'] = list_end
        feather.write_feather(df_clean, (path_snip + '/clean_snippet.ftr'))
        read_meta_clean(path,len(df_clean['R-R Interval Final']))
    if path == root + "/S0427/holter/Snippet002":
        df = pd.read_feather(path_snip + '/PEAKS.ftr')   #inputting the ecg feather files in a data frame
        df_hrv = RR_calculator(df)
        df_clean = pd.DataFrame()
        df_clean['R-R Interval Final'] = df_hrv['R-R Interval Final']
        df_clean['x_values'] = df_hrv['x_values']
        df_clean = _outlier_removal(df_clean)
        rr = df_clean['R-R Interval Final'].to_list()
        list_end = cos_correction(rr)
        df_clean['R-R Interval Final'] = list_end
        feather.write_feather(df_clean, (path_snip + '/clean_snippet.ftr'))
        read_meta_clean(path,len(df_clean['R-R Interval Final']))
    if path == root + "/S0427/holter/Snippet004":
        df = pd.read_feather(path_snip + '/PEAKS.ftr')   #inputting the ecg feather files in a data frame
        df_hrv = RR_calculator(df)
        df_clean = pd.DataFrame()
        df_clean['R-R Interval Final'] = df_hrv['R-R Interval Final']
        df_clean['x_values'] = df_hrv['x_values']
        df_clean = _outlier_removal(df_clean)
        rr = df_clean['R-R Interval Final'].to_list()
        list_end = cos_correction(rr)
        df_clean['R-R Interval Final'] = list_end
        feather.write_feather(df_clean, (path_snip + '/clean_snippet.ftr'))
        read_meta_clean(path,len(df_clean['R-R Interval Final']))
    if path == root + "/S0430/holter/Snippet003":
        df = pd.read_feather(path_snip + '/PEAKS.ftr')   #inputting the ecg feather files in a data frame
        df_hrv = RR_calculator(df)
        df_clean = pd.DataFrame()
        df_clean['R-R Interval Final'] = df_hrv['R-R Interval Final']
        df_clean['x_values'] = df_hrv['x_values']
        df_clean = _outlier_removal(df_clean)
        rr = df_clean['R-R Interval Final'].to_list()
        list_end = cos_correction(rr)
        df_clean['R-R Interval Final'] = list_end
        feather.write_feather(df_clean, (path_snip + '/clean_snippet.ftr'))
        read_meta_clean(path,len(df_clean['R-R Interval Final']))
    if path == root + "/S0434/holter/Snippet001":
        df = pd.read_feather(path_snip + '/PEAKS.ftr')   #inputting the ecg feather files in a data frame
        df_hrv = RR_calculator(df)
        df_clean = pd.DataFrame()
        df_clean['R-R Interval Final'] = df_hrv['R-R Interval Final']
        df_clean['x_values'] = df_hrv['x_values']
        df_clean = _outlier_removal(df_clean)
        rr = df_clean['R-R Interval Final'].to_list()
        list_end = cos_correction(rr)
        df_clean['R-R Interval Final'] = list_end
        feather.write_feather(df_clean, (path_snip + '/clean_snippet.ftr'))
        read_meta_clean(path,len(df_clean['R-R Interval Final']))
    if path == root + "/S0441/holter/Snippet002":
        df = pd.read_feather(path_snip + '/PEAKS.ftr')   #inputting the ecg feather files in a data frame
        df_hrv = RR_calculator(df)
        df_clean = pd.DataFrame()
        df_clean['R-R Interval Final'] = df_hrv['R-R Interval Final']
        df_clean['x_values'] = df_hrv['x_values']
        df_clean = _outlier_removal(df_clean)
        rr = df_clean['R-R Interval Final'].to_list()
        list_end = cos_correction(rr)
        df_clean['R-R Interval Final'] = list_end
        feather.write_feather(df_clean, (path_snip + '/clean_snippet.ftr'))
        read_meta_clean(path,len(df_clean['R-R Interval Final']))

In [10]:
def write_hrv_std(path,rate):
    """Function that reads the peak file and saves the HRV snippet applying the cos correction function from module 3 to it"""
    df = pd.read_feather(path_snip + '/PEAKS.ftr')   #inputting the ecg feather files in a data frame
    df_hrv = RR_calculator(df)
    df_clean = pd.DataFrame()
    df_clean['R-R Interval Final'] = cos_correction(df_hrv['R-R Interval Final'])
    df_clean['x_values'] = df_hrv['x_values']
    return df_clean

In [11]:
raw_patients = sorted(os.listdir(database_root))

patients = [patient for patient in raw_patients if not patient.endswith(('.json', 'ini','\r'))]
patients = [patient for patient in raw_patients if os.path.isdir(os.path.join(database_root,patient))]
patients_with_holter = [patient for patient in patients if os.path.isdir(os.path.join(database_root,patient,'holter'))]

# Now we need to check the holter folder contains some snippets ---------------------------------------------
# Helper functions (can be offloaded to another file) -------------------------------------------------------


def is_valid_snippet(snippet):
    # We still need to check the expected files are in the folder
    MSNIP.ftr
    
    return True


def has_snippets(path):
    dirs = [a_dir for a_dir in sorted(os.listdir(path)) if os.path.isdir(os.path.join(path,a_dir))]
    snippet_dirs = [a_dir for a_dir in dirs if a_dir.startswith('Snippet')]

    return len(snippet_dirs)>0


def all_snippets(path):
    dirs = [a_dir for a_dir in sorted(os.listdir(path)) if os.path.isdir(os.path.join(path,a_dir))]
    # now we check they are snippet dirs
    snippet_dirs = [a_dir for a_dir in dirs if a_dir.startswith('Snippet')]
    # valid_snippet_dirs = [a_dir for a_dir in snippet_dirs if is_valid_snippet(os.path.join(path,a_dir))]
    return snippet_dirs


# -----------------------------------------------------------------------------------------------------------
    
patients_with_snippets = [patient for patient in patients_with_holter 
                          if has_snippets(os.path.join(database_root,patient,'holter'))]

print(patients_with_snippets)
# Now we have the patient folders with snippets and we know where they are !
# So the loop will be clean

['S0250', 'S0256', 'S0273', 'S0282', 'S0283', 'S0287', 'S0288', 'S0292', 'S0296', 'S0300', 'S0301', 'S0304', 'S0308', 'S0310', 'S0312', 'S0314', 'S0315', 'S0316', 'S0317', 'S0318', 'S0326', 'S0342', 'S0349', 'S0365', 'S0366', 'S0368', 'S0372', 'S0381', 'S0382', 'S0390', 'S0392', 'S0398', 'S0403', 'S0405', 'S0406', 'S0409', 'S0416', 'S0420', 'S0423', 'S0424', 'S0426', 'S0427', 'S0430', 'S0432', 'S0433', 'S0434', 'S0435', 'S0441']


In [12]:
for patient in tqdm(patients_with_snippets): #Looping throught the folders Sxxxx with snippets
    patient_path = os.path.join(database_root,patient)
    holter_path = os.path.join(patient_path,'holter')
    samplingRate = find_rate(holter_path)
    all_patient_snippets = all_snippets(holter_path)
    all_snippets_path = [os.path.join(holter_path,snippet) for snippet in all_patient_snippets]

    
    print(patient)
    for path_snippet in all_snippets_path:
        write_meta(path_snippet)
        try:
            write_peaks(path_snip, samplingRate)
            write_hrv(path_snip, samplingRate)
        except:
            print("Patient holter file not found, not present in the database.") 

  6%|██▊                                         | 3/48 [00:00<00:01, 23.65it/s]

S0250
Patient holter file not found, not present in the database.
Patient holter file not found, not present in the database.
Patient holter file not found, not present in the database.
S0256
Patient holter file not found, not present in the database.
S0273
Patient holter file not found, not present in the database.
Patient holter file not found, not present in the database.
Patient holter file not found, not present in the database.
Patient holter file not found, not present in the database.
Patient holter file not found, not present in the database.
S0282
Patient holter file not found, not present in the database.
Patient holter file not found, not present in the database.
Patient holter file not found, not present in the database.
S0283
Patient holter file not found, not present in the database.
Patient holter file not found, not present in the database.


 19%|████████▎                                   | 9/48 [00:00<00:01, 24.35it/s]

Patient holter file not found, not present in the database.
Patient holter file not found, not present in the database.
S0287
Patient holter file not found, not present in the database.
Patient holter file not found, not present in the database.
Patient holter file not found, not present in the database.
S0288
Patient holter file not found, not present in the database.
Patient holter file not found, not present in the database.
Patient holter file not found, not present in the database.
S0292
Patient holter file not found, not present in the database.
Patient holter file not found, not present in the database.
S0296
Patient holter file not found, not present in the database.
Patient holter file not found, not present in the database.
Patient holter file not found, not present in the database.
S0300
Patient holter file not found, not present in the database.
Patient holter file not found, not present in the database.


 25%|██████████▊                                | 12/48 [00:00<00:01, 21.33it/s]

Patient holter file not found, not present in the database.
S0301
Patient holter file not found, not present in the database.
Patient holter file not found, not present in the database.
Patient holter file not found, not present in the database.
S0304
Patient holter file not found, not present in the database.
S0308
Patient holter file not found, not present in the database.
S0310
Patient holter file not found, not present in the database.
Patient holter file not found, not present in the database.
Patient holter file not found, not present in the database.
Patient holter file not found, not present in the database.


 31%|█████████████▍                             | 15/48 [00:00<00:02, 15.69it/s]

Patient holter file not found, not present in the database.
S0312
Patient holter file not found, not present in the database.
S0314


 35%|███████████████▏                           | 17/48 [00:01<00:02, 14.22it/s]

Patient holter file not found, not present in the database.
Patient holter file not found, not present in the database.
S0315
Patient holter file not found, not present in the database.
S0316
Patient holter file not found, not present in the database.
Patient holter file not found, not present in the database.
Patient holter file not found, not present in the database.


 46%|███████████████████▋                       | 22/48 [00:01<00:01, 14.99it/s]

Patient holter file not found, not present in the database.
S0317
Patient holter file not found, not present in the database.
Patient holter file not found, not present in the database.
Patient holter file not found, not present in the database.
Patient holter file not found, not present in the database.
S0318
Patient holter file not found, not present in the database.
Patient holter file not found, not present in the database.
Patient holter file not found, not present in the database.
S0326
Patient holter file not found, not present in the database.
Patient holter file not found, not present in the database.
Patient holter file not found, not present in the database.
S0342
Patient holter file not found, not present in the database.
Patient holter file not found, not present in the database.
S0349
Patient holter file not found, not present in the database.
S0365


 54%|███████████████████████▎                   | 26/48 [00:01<00:01, 18.33it/s]

Patient holter file not found, not present in the database.
Patient holter file not found, not present in the database.
Patient holter file not found, not present in the database.
S0366
Patient holter file not found, not present in the database.
Patient holter file not found, not present in the database.
Patient holter file not found, not present in the database.
Patient holter file not found, not present in the database.
S0368
Patient holter file not found, not present in the database.
Patient holter file not found, not present in the database.
Patient holter file not found, not present in the database.
S0372
Patient holter file not found, not present in the database.
Patient holter file not found, not present in the database.
Patient holter file not found, not present in the database.
S0381
Patient holter file not found, not present in the database.
Patient holter file not found, not present in the database.


 67%|████████████████████████████▋              | 32/48 [00:01<00:00, 20.25it/s]

Patient holter file not found, not present in the database.
S0382
Patient holter file not found, not present in the database.
S0390
Patient holter file not found, not present in the database.
Patient holter file not found, not present in the database.
Patient holter file not found, not present in the database.
S0392
Patient holter file not found, not present in the database.
Patient holter file not found, not present in the database.
Patient holter file not found, not present in the database.
S0398
Patient holter file not found, not present in the database.
Patient holter file not found, not present in the database.
S0403
Patient holter file not found, not present in the database.
Patient holter file not found, not present in the database.


 73%|███████████████████████████████▎           | 35/48 [00:01<00:00, 21.65it/s]

Patient holter file not found, not present in the database.
Patient holter file not found, not present in the database.
Patient holter file not found, not present in the database.
S0405
Patient holter file not found, not present in the database.
Patient holter file not found, not present in the database.
Patient holter file not found, not present in the database.
S0406
Patient holter file not found, not present in the database.
Patient holter file not found, not present in the database.
Patient holter file not found, not present in the database.
S0409
Patient holter file not found, not present in the database.
S0416
Patient holter file not found, not present in the database.
Patient holter file not found, not present in the database.
Patient holter file not found, not present in the database.
Patient holter file not found, not present in the database.
Patient holter file not found, not present in the database.
S0420
Patient holter file not found, not present in the database.
Patient ho

 79%|██████████████████████████████████         | 38/48 [00:02<00:00, 21.24it/s]

Patient holter file not found, not present in the database.
Patient holter file not found, not present in the database.
S0423
Patient holter file not found, not present in the database.
Patient holter file not found, not present in the database.
Patient holter file not found, not present in the database.
Patient holter file not found, not present in the database.
Patient holter file not found, not present in the database.
S0424


 85%|████████████████████████████████████▋      | 41/48 [00:02<00:00, 13.60it/s]

Patient holter file not found, not present in the database.
Patient holter file not found, not present in the database.
S0426
Patient holter file not found, not present in the database.
Patient holter file not found, not present in the database.
Patient holter file not found, not present in the database.
Patient holter file not found, not present in the database.
Patient holter file not found, not present in the database.
S0427
Patient holter file not found, not present in the database.
Patient holter file not found, not present in the database.
Patient holter file not found, not present in the database.
Patient holter file not found, not present in the database.
Patient holter file not found, not present in the database.
S0430
Patient holter file not found, not present in the database.


 90%|██████████████████████████████████████▌    | 43/48 [00:02<00:00, 13.21it/s]

Patient holter file not found, not present in the database.
Patient holter file not found, not present in the database.
Patient holter file not found, not present in the database.
Patient holter file not found, not present in the database.
S0432
Patient holter file not found, not present in the database.
Patient holter file not found, not present in the database.
Patient holter file not found, not present in the database.
Patient holter file not found, not present in the database.
Patient holter file not found, not present in the database.
Patient holter file not found, not present in the database.
Patient holter file not found, not present in the database.
Patient holter file not found, not present in the database.
Patient holter file not found, not present in the database.
S0433


 98%|██████████████████████████████████████████ | 47/48 [00:02<00:00, 12.33it/s]

Patient holter file not found, not present in the database.
Patient holter file not found, not present in the database.
Patient holter file not found, not present in the database.
Patient holter file not found, not present in the database.
Patient holter file not found, not present in the database.
S0434
Patient holter file not found, not present in the database.
Patient holter file not found, not present in the database.
Patient holter file not found, not present in the database.
Patient holter file not found, not present in the database.
Patient holter file not found, not present in the database.
S0435
Patient holter file not found, not present in the database.
Patient holter file not found, not present in the database.
Patient holter file not found, not present in the database.
Patient holter file not found, not present in the database.
Patient holter file not found, not present in the database.


100%|███████████████████████████████████████████| 48/48 [00:03<00:00, 15.80it/s]

S0441
Patient holter file not found, not present in the database.
Patient holter file not found, not present in the database.
Patient holter file not found, not present in the database.
Patient holter file not found, not present in the database.
Patient holter file not found, not present in the database.



