In [3]:
import pandas as pd
import numpy as np
import pathlib as pl
import datetime as dt
from typing import Union, Tuple, Optional
from functools import reduce #needed to allow merging of multiple datasets      

grace = pl.Path(r'C:\Users\kevin.omalley\OneDrive - University of Limerick\Documents\GitHub\fino2py\all_data\from grace')
ailbhe = pl.Path(r'C:\Users\kevin.omalley\OneDrive - University of Limerick\Documents\GitHub\fino2py\all_data\working_data')
time_stamps = pl.Path(r'C:\Users\kevin.omalley\OneDrive - University of Limerick\Documents\GitHub\fino2py\all_data\Timesheets (1)-2.csv')

In [4]:
for i in ailbhe.glob('**/*.csv'):
    i.unlink()

In [5]:

# this is the full version of the function

def read_raw_finometer_data(folder_path: Union[str, pl.Path], interval: Optional[str] = None, save_csv: bool = False) -> Tuple[pd.DataFrame, str]:
    '''This function imports the raw finometer data and then calculates the average of each measure over the selected time period
    The default time period is 1 minute, but this can be changed by setting the interval parameter to a different value. 
    This function may not be needed in many cases, but it is useful to have, and a good place to start.
    
    Parameters
    ----------
    folder_path : pathlib.Path object or str 
        The path to the folder containing the .txt file
    interval : str, optional
        If provided, the function will resample the data to the given interval and return the resampled data.
    save_csv : bool, optional
        If True, the function will save the imported data as a .csv file in the same folder as the .txt file.
        The default is False.
    Raises
    ------
    TypeError:
        If folder_path is not a pathlib.Path object or a string
    ValueError:
        If folder_path does not exist or is not a directory
        If there is not exactly one .txt file in the folder

    Returns
    -------
    pandas.DataFrame:
        Dataframe with the raw finometer data resampled to the given interval

    ID : str
        The Participant ID of the participant whose data is being imported
    '''
    
    try:
        folder_path = pl.Path(folder_path)
    except TypeError:
        raise TypeError('folder_path must be a pathlib.Path object or a string')

    if not folder_path.exists():
        raise ValueError('folder_path does not exist')

    if folder_path.is_dir():
        files = [file for file in folder_path.glob('*.txt')]
        if len(files) != 1:
            raise ValueError(f'Expected one .txt file, but found {len(files)} in the folder')
        file = files[0]
    elif folder_path.is_file():
        file = folder_path

    ID = file.stem.split('_')[0]



    df = pd.read_csv(
        file,
        sep=';',
        header=0,
        skiprows=8,
        skipfooter=1,
        engine='python',
    )

    df = df.drop(df.columns[13], axis=1)

    df['Time (s)'] = pd.to_datetime(df['Time (s)'], format='%H:%M:%S.%f').dt.floor('ms')



    if interval:

        csv_path = folder_path / file.with_stem(f'imported {interval} data for {ID}').with_suffix('.csv')
        try:
            df_resampled = df.set_index(pd.to_datetime(df['Time (s)'], format='%H:%M:%S.%f')).resample(f'{interval}').mean()
            df_resampled.index = df_resampled.index.strftime('%H:%M:%S.%f').str[:-3]
        except ValueError:
            raise ValueError(f'{interval} is not a valid time period, valid time periods are: 1s, 1T, 1H, 1D, 1W, 1M, 1Q, 1A')
    else:
        csv_path = folder_path / file.with_stem(f'imported data for {ID}').with_suffix('.csv')
        df = df.set_index(pd.to_datetime(df['Time (s)'], format='%H:%M:%S.%f').dt.strftime('%H:%M:%S.%f').str[:-3])
        df = df.drop('Time (s)', axis=1)


    if save_csv:
        df.to_csv(csv_path, index=True)

    return (df_resampled, ID) if interval else (df, ID)

In [6]:
# this is the full version of the function

def convert_time(time):
    '''This function converts the time stamps in the timesheets to datetime objects suitable for the other functions in this module
    Parameters
    ----------
    time : str
        The time stamp in the timesheets
    Returns
    -------
    datetime.datetime
        The time stamp converted to a datetime object
    '''
    time = pd.to_datetime(time)
    time = time.strftime('%H:%M:%S.%f')[:-3]
    return time


In [7]:
# this is the full version of the function

def create_chunk(df, ID, tag, start, end):
    """
    Create a chunk of data from a dataframe between specified start and end times and return a new dataframe
    containing the mean values for each column in the chunk.
    
    Parameters:
    -----------
    df : pandas DataFrame
        The dataframe containing the data to extract a chunk from.
    ID : str
        The participant ID to include in the output dataframe.
    tag : str
        The tag to include in the column names of the output dataframe.
    start : str or None
        The start time of the chunk in the format 'HH:MM:SS' or 'HH:MM:SS.mmm'. If None, the chunk starts at the 
        beginning of the dataframe.
    end : str or None
        The end time of the chunk in the format 'HH:MM:SS' or 'HH:MM:SS.mmm'. If None, the chunk ends at the 
        end of the dataframe.
    
    Returns:
    --------
    pandas DataFrame
        A new dataframe containing the mean values for each column in the specified chunk of the input dataframe.
        The output dataframe has a row for the specified participant ID and columns with names that include the
        specified tag.
    """
    
    # Convert start and end times to datetime objects if they are specified
    if start:
        try:
            start = convert_time(start)
        except:
            print(f"Could not convert {start} to datetime object, it must be a string in the format 'HH:MM:SS' or 'HH:MM:SS.mmm'")
    if end:
        try:
            end = convert_time(end)
        except:
            print(f"Could not convert {end} to datetime object, it must be a string in the format 'HH:MM:SS' or 'HH:MM:SS.mmm'")

    # Extract the chunk of data and compute the mean values for each column
    if start and end:
        chunk = df.loc[start:end].mean().to_frame().T
    elif start:
        chunk = df.loc[start:].mean().to_frame().T
    elif end:
        chunk = df.loc[:end].mean().to_frame().T
    
    # Rename the columns with the specified tag and insert the participant ID as the first column
    chunk.columns = [f"{tag} {i}" for i in chunk.columns]
    chunk.insert(0, 'Participant ID', ID)

    return chunk


In [8]:
# this is the full version of the function

def import_protocol_times(times_file_path, add_seconds=False, flatten_seconds=False, save_csv=False):
    '''This function imports the protocol times from the .csv file and returns a dataframe with the protocol times for each participant
    
    file_path: pathlib.Path object
        The path to the .csv file containing the protocol times
    add_seconds: boolean (optional)
        If True, seconds will be added to the time values (if missing)
    save_csv: boolean (optional)
        If True, the imported data will be saved as a .csv file in the same folder as the .csv file, this is not always needed and should be used sparingly
    flatten_seconds: boolean (optional)
        If True, seconds will be set to 00 for all time values
    '''

    def add_seconds_to_time(time_str):
        '''This function adds seconds to the time string for in case the time string is missing seconds'''
        if len(time_str) == 5:
            time_str += ":00"
        return time_str

    def flatten_seconds(time_str):
        '''This function sets seconds to 00 for a given time string'''
        return time_str[:5] + ':00'

    if not isinstance(times_file_path, pl.Path):#check if folder_path is a pathlib.Path object
        raise TypeError('file_path must be a pathlib.Path object')
    elif not times_file_path.exists(): #  and if it exists
        raise ValueError('file_path does not exist')
    elif not times_file_path.is_file(): #  and is a file 
        raise ValueError('file_path is not a file')
    elif times_file_path.suffix != '.csv': #  and is a csv file
        raise ValueError('file_path is not an csv file')
    else:
        df = pd.read_csv(times_file_path, delimiter= ',')
        df.columns = [col.strip() for col in df.columns]
        cols_to_keep = ['Participant ID', 'Start of Baseline', 'End of Baseline', 'Start of Task 1', 'End of Task 1', 'Start of Recovery Period', 'End of Recovery Period']
        df = df[cols_to_keep].applymap(lambda x: str(x).strip('"') if isinstance(x, str) else x)
        
        if add_seconds and flatten_seconds:
            raise ValueError('Only one of add_seconds and flatten_seconds can be True')

        if add_seconds:
            try:
                df.iloc[:, 1:] = df.iloc[:, 1:].applymap(lambda x: add_seconds_to_time(x) if isinstance(x, str) else x)
                df.iloc[:, 1:] = df.iloc[:, 1:].applymap(lambda x: pd.to_datetime(x, format='%H:%M:%S', errors='coerce'))
            except:
                print('Could not add seconds to time, please check the time format')

        elif flatten_seconds:
            try:
                df.iloc[:, 1:] = df.iloc[:, 1:].applymap(lambda x: add_seconds_to_time(x) if isinstance(x, str) else x)
                df.iloc[:, 1:] = df.iloc[:, 1:].applymap(lambda x: flatten_seconds(x) if isinstance(x, str) else x)
                df.iloc[:, 1:] = df.iloc[:, 1:].applymap(lambda x: pd.to_datetime(x, format='%H:%M:%S', errors='coerce'))
            except:
                print('Could not set seconds to 00, please check the time format')
        
        if save_csv: #if you want to save the csv file (which may be useful if you want to use the data in other ways)
            try:
                df.to_csv(times_file_path.parent / f"cleaned times.csv", index=False)
                print(f"CSV saved for {times_file_path.stem}")
            except Exception as e:
                print(f"Could not save csv file, error: {e}")
            


        return df

In [8]:
y = import_protocol_times(
    time_stamps,
    flatten_seconds = True,
    save_csv=True
    )

CSV saved for Timesheets (1)-2


In [9]:
# testing version of the function 

def import_protocol_averages(frame, id, times=None, save_csv=None):
    '''A function that imports the averaged finometer files (which have already been processed from the raw data)
    to produce averages for each section of the experimental protocol.

    Parameters
    ----------
    frame : pandas.DataFrame 
        The DataFrame containing the averaged finometer data
    id : str
        The participant ID
    save_csv : bool, optional
        If True, the imported data will be saved as a .csv file in the same folder as the .csv file, 
        this is not always needed and should be used sparingly
    times : dict, optional
        A dictionary of tuples of times, with the keys being the names of the time periods.

    Returns
    -------
    pandas.DataFrame
        A DataFrame with the mean values of the given columns during each time period of the study.

    Raises
    ------
    TypeError
        If frame is not a pandas.DataFrame object
        If id is not a string
    ValueError
        If times is not provided as a dictionary with at least one key-value pair
        If there are not enough times provided for a given time period
        If there are too many times provided for a given time period
    '''

    # check if frame is a pandas.DataFrame object
    if not isinstance(frame, pd.DataFrame):
        raise TypeError('''
        frame must be a pandas.DataFrame object, produced by the read_raw_finometer_data function, 
        have you run the read_raw_finometer_data function on the data?''')

    if not isinstance(id, str):
        raise TypeError('id must be a string')

    if not times:
        raise ValueError("times must be a dictionary and at least one key-value pair must be provided.")
    
    # Create an empty list of dataframes, each representing a chunk of the protocol
    chunks = []
    
    for i in times.keys():
        if len(times[i]) < 2:
            raise ValueError(f"There are not enough times provided for the {i}.")
        elif len(times[i]) > 2:
            raise ValueError(f"There are too many times provided for the {i}.")
        elif len(times[i]) == 2:
            if times[i][0] < times[i][1]:
                chunks.append(create_chunk(frame, id, i, times[i][0], times[i][1]))



    data_merge = reduce(lambda left, right: pd.merge(left, right, on=["Participant ID"], how="outer"), chunks)
    data_merge.set_index('Participant ID', inplace=True)

    if save_csv:
        path = pl.Path(save_csv)
        data_merge.to_csv( path / f"{id} protocol_averages.csv")
        print(f"Saved {id} protocol averages.csv to {path.stem}")

    return data_merge


In [13]:
a, a_id = read_raw_finometer_data(
    pl.Path(r'C:\Users\kevin.omalley\OneDrive - University of Limerick\Documents\GitHub\fino2py\all_data\working_data\Data_fyp\Participant 1_2022-10-27_09.07.37\Participant 1_2022-10-27_09.07.37.txt'),
    '1T'
    )

a.head().to_clipboard()

  df_resampled = df.set_index(pd.to_datetime(df['Time (s)'], format='%H:%M:%S.%f')).resample(f'{interval}').mean()


In [14]:
a

Unnamed: 0_level_0,Systolic Pressure (mmHg),Diastolic Pressure (mmHg),Mean Pressure (mmHg),Heart rate (bpm),Stroke Volume (ml),Left Ventricular Ejection Time (ms),Pulse Interval (ms),Maximum Slope (mmHg/s),Cardiac Output (l/min),Total Peripheral Resistance Medical Unit (mmHg.min/l),Total Peripheral Resistance CGS (dyn.s/cm5)
Time (s),Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
09:07:00.000,74.481481,51.666667,62.0,93.555556,16.092593,135.925926,642.777778,478.407407,1.540741,1.172444,1563.185185
09:08:00.000,153.927835,110.402062,131.505155,97.649485,31.357732,272.216495,618.041237,1000.092784,3.058763,2.603629,3471.536082
09:09:00.000,154.236842,111.131579,130.960526,94.039474,30.802632,274.736842,679.013158,1006.657895,2.914474,2.913342,3884.5
09:10:00.000,139.913793,96.0,114.034483,88.793103,37.52069,315.431034,730.172414,980.0,3.243103,2.409086,3212.12069
09:11:00.000,152.783505,123.43299,138.030928,97.587629,18.68866,228.247423,616.649485,592.515464,1.808247,4.886897,6515.85567
09:12:00.000,179.590361,144.759036,160.662651,94.481928,14.636145,229.216867,673.855422,849.891566,1.377108,7.493012,9990.73494
09:13:00.000,177.969388,149.316327,162.408163,99.602041,13.976531,238.877551,605.663265,615.204082,1.388776,7.187663,9583.591837
09:14:00.000,192.783505,157.206186,173.618557,97.061856,15.276289,229.948454,619.329897,867.226804,1.480412,7.139887,9519.835052
09:15:00.000,199.295918,161.428571,178.632653,97.969388,14.811224,190.765306,613.163265,950.755102,1.45,7.456286,9941.704082
09:16:00.000,193.914894,159.691489,176.606383,97.840426,13.393617,202.234043,637.287234,822.244681,1.312766,9.067011,12089.361702


In [32]:
import warnings

warnings.filterwarnings('ignore')
dfs = []

for row in y.iloc[0:5,:].iterrows():
    id = row[1][0]
    times = {'baseline' : [row[1][1], row[1][2]], 'task' : [row[1][3], row[1][4]], 'recovery' : [row[1][5], row[1][6]]}


    for folder in ailbhe.glob('**'):
        if id == folder.stem.split('_')[0]:
            df, df_id = read_raw_finometer_data(folder)

            try:
                dfs.append(import_protocol_averages(df, df_id, times))
            except:
                print(f"Could not import protocol averages for {id}")

warnings.filterwarnings('default')








In [33]:
main = pd.concat(dfs, axis=1)

In [34]:
main

Unnamed: 0_level_0,baseline Systolic Pressure (mmHg),baseline Diastolic Pressure (mmHg),baseline Mean Pressure (mmHg),baseline Heart rate (bpm),baseline Stroke Volume (ml),baseline Left Ventricular Ejection Time (ms),baseline Pulse Interval (ms),baseline Maximum Slope (mmHg/s),baseline Cardiac Output (l/min),baseline Total Peripheral Resistance Medical Unit (mmHg.min/l),...,recovery Mean Pressure (mmHg),recovery Heart rate (bpm),recovery Stroke Volume (ml),recovery Left Ventricular Ejection Time (ms),recovery Pulse Interval (ms),recovery Maximum Slope (mmHg/s),recovery Cardiac Output (l/min),recovery Total Peripheral Resistance Medical Unit (mmHg.min/l),recovery Total Peripheral Resistance CGS (dyn.s/cm5),recovery Markers
Participant ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
Participant 1,201.232086,158.522995,175.791444,94.136898,16.80738,240.326203,642.13369,1114.324064,1.577647,6.806619,...,,,,,,,,,,
Participant 2,,,,,,,,,,,...,,,,,,,,,,
Participant 3,,,,,,,,,,,...,,,,,,,,,,
Participant 4,,,,,,,,,,,...,,,,,,,,,,
Participant 5,,,,,,,,,,,...,88.36673,67.362949,34.604537,293.667297,907.759924,397.132325,2.31966,2.364981,3153.31569,
