# Finometer to python example project.

This document is a working example of _*blah*_ 


In [2]:
import pandas as pd
import numpy as np
import pathlib as pl
import fino2py as f2p  


# saving the name of the data directory
ailbhe = pl.Path(r'C:\Users\kevin.omalley\OneDrive - University of Limerick\Documents\GitHub\fino2py\all_data\working_data')

# saving the path to the timestamps file
time_stamps = pl.Path(r'C:\Users\kevin.omalley\OneDrive - University of Limerick\Documents\GitHub\fino2py\all_data\Timesheets (1)-2.csv')

ImportError: cannot import name 'reduce' from 'fino2py.src.dependencies' (c:\Users\kevin.omalley\Desktop\test_env\Lib\site-packages\fino2py\src\dependencies.py)

In [2]:
for i in ailbhe.glob('**/*.csv'):
    i.unlink()

In [3]:

# this is the full version of the function

def read_raw_finometer_data(folder_path: Union[str, pl.Path], interval: Optional[str] = None, save_csv: bool = False) -> Tuple[pd.DataFrame, str]:
    '''This function imports the raw finometer data and then calculates the average of each measure over the selected time period
    The default time period is 1 minute, but this can be changed by setting the interval parameter to a different value. 
    This function may not be needed in many cases, but it is useful to have, and a good place to start.
    
    Parameters
    ----------
    folder_path : pathlib.Path object or str 
        The path to the folder containing the .txt file
    interval : str, optional
        If provided, the function will resample the data to the given interval and return the resampled data.
    save_csv : bool, optional
        If True, the function will save the imported data as a .csv file in the same folder as the .txt file.
        The default is False.
    Raises
    ------
    TypeError:
        If folder_path is not a pathlib.Path object or a string
    ValueError:
        If folder_path does not exist or is not a directory
        If there is not exactly one .txt file in the folder

    Returns
    -------
    pandas.DataFrame:
        Dataframe with the raw finometer data resampled to the given interval

    ID : str
        The Participant ID of the participant whose data is being imported
    '''
    
    try:
        folder_path = pl.Path(folder_path)
    except TypeError:
        raise TypeError('folder_path must be a pathlib.Path object or a string')

    if not folder_path.exists():
        raise ValueError('folder_path does not exist')

    if folder_path.is_dir():
        files = [file for file in folder_path.glob('*.txt')]
        if len(files) != 1:
            raise ValueError(f'Expected one .txt file, but found {len(files)} in the folder')
        file = files[0]
    elif folder_path.is_file():
        file = folder_path

    ID = file.stem.split('_')[0]



    df = pd.read_csv(
        file,
        sep=';',
        header=0,
        skiprows=8,
        skipfooter=1,
        engine='python',
    )

    df = df.drop(df.columns[13], axis=1)

    df['Time (s)'] = pd.to_datetime(df['Time (s)'], format='%H:%M:%S.%f').dt.floor('ms')



    if interval:

        csv_path = folder_path / file.with_stem(f'imported {interval} data for {ID}').with_suffix('.csv')
        try:
            df_resampled = df.set_index(pd.to_datetime(df['Time (s)'], format='%H:%M:%S.%f')).resample(f'{interval}').mean()
            df_resampled.index = df_resampled.index.strftime('%H:%M:%S.%f').str[:-3]
        except ValueError:
            raise ValueError(f'{interval} is not a valid time period, valid time periods are: 1s, 1T, 1H, 1D, 1W, 1M, 1Q, 1A')
    else:
        csv_path = folder_path / file.with_stem(f'imported data for {ID}').with_suffix('.csv')
        df = df.set_index(pd.to_datetime(df['Time (s)'], format='%H:%M:%S.%f').dt.strftime('%H:%M:%S.%f').str[:-3])
        df = df.drop('Time (s)', axis=1)


    if save_csv:
        df.to_csv(csv_path, index=True)

    return (df_resampled, ID) if interval else (df, ID)




In [4]:
# this is the full version of the function
def convert_time(time: str) -> str:
    '''
    This function converts the time stamps in the timesheets to datetime objects suitable for the other functions in this module.

    Parameters:
    -----------
    time : str
        The time stamp in the timesheets

    Returns:
    --------
    str:
        The time stamp converted to a string format of '%H:%M:%S.%f' with microseconds removed.
    '''

    try:
        time = pd.to_datetime(time)
    except Exception as e:
        raise ValueError(f"Failed to convert time {time} to datetime object. Error: {e}")

    time_str = time.strftime('%H:%M:%S.%f')[:-3]
    
    return time_str


In [5]:
# this is the full version of the function

def create_chunk(df, ID, tag, start, end):
    """
    Create a chunk of data from a dataframe between specified start and end times and return a new dataframe
    containing the mean values for each column in the chunk.
    
    Parameters:
    -----------
    df : pandas DataFrame
        The dataframe containing the data to extract a chunk from.
    ID : str
        The participant ID to include in the output dataframe.
    tag : str
        The tag to include in the column names of the output dataframe.
    start : str or None
        The start time of the chunk in the format 'HH:MM:SS' or 'HH:MM:SS.mmm'. If None, the chunk starts at the 
        beginning of the dataframe.
    end : str or None
        The end time of the chunk in the format 'HH:MM:SS' or 'HH:MM:SS.mmm'. If None, the chunk ends at the 
        end of the dataframe.
    
    Returns:
    --------
    pandas DataFrame
        A new dataframe containing the mean values for each column in the specified chunk of the input dataframe.
        The output dataframe has a row for the specified participant ID and columns with names that include the
        specified tag.
    """
    
    # Convert start and end times to datetime objects if they are specified
    if start:
        try:
            start = convert_time(start)
        except:
            print(f"Could not convert {start} to datetime object, it must be a string in the format 'HH:MM:SS' or 'HH:MM:SS.mmm'")
    if end:
        try:
            end = convert_time(end)
        except:
            print(f"Could not convert {end} to datetime object, it must be a string in the format 'HH:MM:SS' or 'HH:MM:SS.mmm'")

    # Extract the chunk of data and compute the mean values for each column
    if start and end:
        chunk = df.loc[start:end].mean().to_frame().T
    elif start:
        chunk = df.loc[start:].mean().to_frame().T
    elif end:
        chunk = df.loc[:end].mean().to_frame().T
    
    # Rename the columns with the specified tag and insert the participant ID as the first column
    chunk.columns = [f"{tag} {i}" for i in chunk.columns]
    chunk.insert(0, 'Participant ID', ID)

    return chunk


In [6]:
# this is the full version of the function

def import_protocol_times(times_file_path: pl.Path, add_seconds: bool = False, flatten_seconds: bool = False, save_csv: bool = False) -> pd.DataFrame:
    '''
    This function imports the protocol times from a .csv file and returns a cleaned pandas dataframe with the protocol times for each participant.

    Parameters
    ----------
    times_file_path : pathlib.Path
        The path to the .csv file containing the protocol times.
    add_seconds : bool, optional
        If True, seconds will be added to the time values (if missing).
    flatten_seconds : bool, optional
        If True, seconds will be set to 00 for all time values.
    save_csv : bool, optional
        If True, the imported data will be saved as a .csv file in the same folder as the .csv file.

    Raises
    ------
    TypeError:
        If times_file_path is not a pathlib.Path object.
    ValueError:
        If times_file_path does not exist or is not a file.
        If times_file_path does not have a .csv extension.
        If add_seconds and flatten_seconds are both True.
        If the function is unable to add seconds to time values or set seconds to 00.

    Returns
    -------
    pandas.DataFrame
        A cleaned pandas dataframe with the protocol times for each participant.
    '''

    def add_seconds_to_time(time_str):
        '''This function adds seconds to the time string for in case the time string is missing seconds'''
        if len(time_str) == 5:
            time_str += ":00"
        return time_str

    def flatten_seconds(time_str):
        '''This function sets seconds to 00 for a given time string'''
        return time_str[:5] + ':00'

    if not isinstance(times_file_path, pl.Path):#check if folder_path is a pathlib.Path object
        raise TypeError('file_path must be a pathlib.Path object')
    elif not times_file_path.exists(): #  and if it exists
        raise ValueError('file_path does not exist')
    elif not times_file_path.is_file(): #  and is a file 
        raise ValueError('file_path is not a file')
    elif times_file_path.suffix != '.csv': #  and is a csv file
        raise ValueError('file_path is not an csv file')
    else:
        df = pd.read_csv(times_file_path, delimiter= ',')
        df.columns = [col.strip() for col in df.columns]
        cols_to_keep = ['Participant ID', 'Start of Baseline', 'End of Baseline', 'Start of Task 1', 'End of Task 1', 'Start of Recovery Period', 'End of Recovery Period']
        df = df[cols_to_keep].applymap(lambda x: str(x).strip('"') if isinstance(x, str) else x)
        
        if add_seconds and flatten_seconds:
            raise ValueError('Only one of add_seconds and flatten_seconds can be True')

        if add_seconds:
            try:
                df.iloc[:, 1:] = df.iloc[:, 1:].applymap(lambda x: add_seconds_to_time(x) if isinstance(x, str) else x)
                df.iloc[:, 1:] = df.iloc[:, 1:].applymap(lambda x: pd.to_datetime(x, format='%H:%M:%S', errors='coerce'))
            except:
                print('Could not add seconds to time, please check the time format')

        elif flatten_seconds:
            try:
                df.iloc[:, 1:] = df.iloc[:, 1:].applymap(lambda x: add_seconds_to_time(x) if isinstance(x, str) else x)
                df.iloc[:, 1:] = df.iloc[:, 1:].applymap(lambda x: flatten_seconds(x) if isinstance(x, str) else x)
                df.iloc[:, 1:] = df.iloc[:, 1:].applymap(lambda x: pd.to_datetime(x, format='%H:%M:%S', errors='coerce'))
            except:
                print('Could not set seconds to 00, please check the time format')
        
        if save_csv: #if you want to save the csv file (which may be useful if you want to use the data in other ways)
            try:
                df.to_csv(times_file_path.parent / f"cleaned times.csv", index=False)
                print(f"CSV saved for {times_file_path.stem}")
            except Exception as e:
                print(f"Could not save csv file, error: {e}")
            


        return df

In [7]:
y = import_protocol_times(
    time_stamps,
    flatten_seconds = True,
    save_csv=True
    )

CSV saved for Timesheets (1)-2


In [8]:
# testing version of the function 

def import_protocol_averages(frame, id, times=None, save_csv=None):
    '''A function that imports the averaged finometer files (which have already been processed from the raw data)
    to produce averages for each section of the experimental protocol.

    Parameters
    ----------
    frame : pandas.DataFrame 
        The DataFrame containing the averaged finometer data
    id : str
        The participant ID
    save_csv : bool, optional
        If True, the imported data will be saved as a .csv file in the same folder as the .csv file, 
        this is not always needed and should be used sparingly
    times : dict, optional
        A dictionary of tuples of times, with the keys being the names of the time periods.

    Returns
    -------
    pandas.DataFrame
        A DataFrame with the mean values of the given columns during each time period of the study.

    Raises
    ------
    TypeError
        If frame is not a pandas.DataFrame object
        If id is not a string
    ValueError
        If times is not provided as a dictionary with at least one key-value pair
        If there are not enough times provided for a given time period
        If there are too many times provided for a given time period
    '''

    # check if frame is a pandas.DataFrame object
    if not isinstance(frame, pd.DataFrame):
        raise TypeError('''
        frame must be a pandas.DataFrame object, produced by the read_raw_finometer_data function, 
        have you run the read_raw_finometer_data function on the data?''')

    if not isinstance(id, str):
        raise TypeError('id must be a string')

    if not times:
        raise ValueError("times must be a dictionary and at least one key-value pair must be provided.")
    
    # Create an empty list of dataframes, each representing a chunk of the protocol
    chunks = []
    
    for i in times.keys():
        if len(times[i]) < 2:
            raise ValueError(f"There are not enough times provided for the {i}.")
        elif len(times[i]) > 2:
            raise ValueError(f"There are too many times provided for the {i}.")
        elif len(times[i]) == 2:
            if times[i][0] < times[i][1]:
                chunks.append(create_chunk(frame, id, i, times[i][0], times[i][1]))



    data_merge = reduce(lambda left, right: pd.merge(left, right, on=["Participant ID"], how="outer"), chunks)
    data_merge.set_index('Participant ID', inplace=True)

    if save_csv:
        path = pl.Path(save_csv)
        data_merge.to_csv( path / f"{id} protocol_averages.csv")
        print(f"Saved {id} protocol averages.csv to {path.stem}")

    return data_merge


In [17]:
a, a_id = read_raw_finometer_data(
    pl.Path(r'C:\Users\kevin.omalley\OneDrive - University of Limerick\Documents\GitHub\fino2py\all_data\working_data\Data_fyp\Participant 1_2022-10-27_09.07.37\Participant 1_2022-10-27_09.07.37.txt'),
    '30T', save_csv=True)

a.head()

  df_resampled = df.set_index(pd.to_datetime(df['Time (s)'], format='%H:%M:%S.%f')).resample(f'{interval}').mean()


Unnamed: 0_level_0,Systolic Pressure (mmHg),Diastolic Pressure (mmHg),Mean Pressure (mmHg),Heart rate (bpm),Stroke Volume (ml),Left Ventricular Ejection Time (ms),Pulse Interval (ms),Maximum Slope (mmHg/s),Cardiac Output (l/min),Total Peripheral Resistance Medical Unit (mmHg.min/l),Total Peripheral Resistance CGS (dyn.s/cm5)
Time (s),Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
09:00:00.000,188.808395,148.729383,165.724444,94.400988,18.155506,241.716049,645.345679,1001.218765,1.705432,6.412228,8549.643457
09:30:00.000,218.152201,168.404403,188.114465,84.510692,16.236038,244.459119,717.893082,1269.32956,1.370566,8.619045,11492.054088


In [23]:
import warnings

warnings.filterwarnings('ignore')
dfs = []

for row in y.iloc[0:10,:].iterrows():
    id = row[1][0]
    times = {'baseline' : [row[1][1], row[1][2]], 'task' : [row[1][3], row[1][4]], 'recovery' : [row[1][5], row[1][6]]}


    for folder in ailbhe.glob('**'):
        if id == folder.stem.split('_')[0]:
            df, df_id = read_raw_finometer_data(folder)

            try:
                dfs.append(import_protocol_averages(df, df_id, times))
            except:
                print(f"Could not import protocol averages for {id}")

warnings.filterwarnings('default')




result_df = pd.concat(dfs, axis=0)



In [24]:
result_df

Unnamed: 0_level_0,baseline Systolic Pressure (mmHg),baseline Diastolic Pressure (mmHg),baseline Mean Pressure (mmHg),baseline Heart rate (bpm),baseline Stroke Volume (ml),baseline Left Ventricular Ejection Time (ms),baseline Pulse Interval (ms),baseline Maximum Slope (mmHg/s),baseline Cardiac Output (l/min),baseline Total Peripheral Resistance Medical Unit (mmHg.min/l),...,recovery Mean Pressure (mmHg),recovery Heart rate (bpm),recovery Stroke Volume (ml),recovery Left Ventricular Ejection Time (ms),recovery Pulse Interval (ms),recovery Maximum Slope (mmHg/s),recovery Cardiac Output (l/min),recovery Total Peripheral Resistance Medical Unit (mmHg.min/l),recovery Total Peripheral Resistance CGS (dyn.s/cm5),recovery Markers
Participant ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
Participant 1,201.232086,158.522995,175.791444,94.136898,16.80738,240.326203,642.13369,1114.324064,1.577647,6.806619,...,184.15843,86.954942,16.146802,250.65407,696.59157,1256.867733,1.403779,8.178577,10904.774709,
Participant 2,174.761639,120.020484,142.213222,78.179702,28.776816,219.487896,777.891061,1413.418994,2.239106,3.876611,...,146.080065,77.25,30.081699,288.75,785.465686,1732.25817,2.30768,3.834824,5113.111111,
Participant 3,128.72125,83.05375,99.43,84.4075,54.571625,267.15,749.4875,1415.01125,4.56475,1.47407,...,101.772358,82.211382,60.71626,277.837398,779.414634,1567.793496,4.906992,2.008945,2678.6,
Participant 4,137.562137,92.348432,108.888502,86.644599,40.465389,267.909408,696.591173,1188.9036,3.498142,1.887842,...,119.266017,90.692201,35.554039,264.164345,668.697772,1408.271588,3.220474,2.268829,3025.084958,
Participant 5,93.990654,80.220561,85.323364,64.459813,18.105794,289.803738,1026.046729,239.471028,1.179252,6.109763,...,88.36673,67.362949,34.604537,293.667297,907.759924,397.132325,2.31966,2.364981,3153.31569,
Participant 6,106.697548,79.798365,85.679837,73.912807,31.27139,294.952316,816.975477,627.549046,2.305586,2.309154,...,83.861404,72.998246,32.886667,289.008772,841.140351,872.147368,2.390175,2.2387,2984.922807,
Participant 7,116.379518,62.829819,83.042922,98.686747,92.937801,267.074548,611.573795,1552.516566,9.143599,0.554955,...,91.721193,98.309671,89.174794,264.660494,617.217078,1940.726337,8.755041,0.652887,870.530864,
Participant 8,201.799232,144.032661,161.606148,109.024976,55.949952,285.806916,570.100865,1240.792507,6.113641,1.783168,...,172.054902,102.308497,46.503007,278.562092,610.196078,1206.768627,4.754902,2.515505,3353.996078,
Participant 9,125.233402,73.839212,91.713693,96.490664,48.927282,251.312241,622.702282,1395.44917,4.715975,1.170438,...,101.525974,96.567532,37.484805,259.12987,623.337662,1236.24026,3.616364,1.707231,2276.292208,
Participant 10,104.972603,71.390411,83.300228,73.28653,76.053196,295.684932,822.665525,614.374429,5.559018,0.90537,...,88.494526,69.306569,83.577007,301.925182,876.195255,723.797445,5.779927,0.938234,1250.994526,


In [15]:
result_df.to_csv(r'C:\Users\kevin.omalley\OneDrive - University of Limerick\Documents\GitHub\fino2py\all_data\for_lisa.csv')

In [11]:
# This function doesn't work yet, it's just a placeholder for now

def process_finometer_data(data_folder: str, protocol_df: pd.DataFrame) -> pd.DataFrame:
    """
    Reads in raw Finometer data from a specified folder and protocol file, and returns a processed DataFrame
    containing average values for each participant at specified time intervals.

    Parameters:
    data_folder (str): Path to folder containing the raw Finometer data files.
    protocol_file (str): Path to CSV file containing the protocol information.

    Returns:
    result_df (pd.DataFrame): DataFrame containing the processed data.
    """


    # Create a dictionary containing the start and end times for each time interval
    times = {}
    for interval in ['baseline', 'task', 'recovery']:
        start_col = f'{interval}_start'
        end_col = f'{interval}_end'
        times[interval] = [protocol_df[start_col].iloc[0], protocol_df[end_col].iloc[0]]

    # Find all participant folders within the specified data folder
    participant_folders = [f for f in pl.Path(data_folder).glob('*') if f.is_dir()]

    # Process the data for each participant and append to a list
    dfs = []
    for folder in participant_folders:
        df, df_id = read_raw_finometer_data(folder)
        try:
            dfs.append(import_protocol_averages(df, df_id, times))
        except:
            print(f"Could not import protocol averages for {df_id}")

    # Concatenate the processed data for all participants into a single DataFrame
    result_df = pd.concat(dfs, axis=0)

    return result_df


In [12]:
result_df

Unnamed: 0_level_0,baseline Systolic Pressure (mmHg),baseline Diastolic Pressure (mmHg),baseline Mean Pressure (mmHg),baseline Heart rate (bpm),baseline Stroke Volume (ml),baseline Left Ventricular Ejection Time (ms),baseline Pulse Interval (ms),baseline Maximum Slope (mmHg/s),baseline Cardiac Output (l/min),baseline Total Peripheral Resistance Medical Unit (mmHg.min/l),...,recovery Mean Pressure (mmHg),recovery Heart rate (bpm),recovery Stroke Volume (ml),recovery Left Ventricular Ejection Time (ms),recovery Pulse Interval (ms),recovery Maximum Slope (mmHg/s),recovery Cardiac Output (l/min),recovery Total Peripheral Resistance Medical Unit (mmHg.min/l),recovery Total Peripheral Resistance CGS (dyn.s/cm5),recovery Markers
Participant ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
Participant 1,201.232086,158.522995,175.791444,94.136898,16.80738,240.326203,642.13369,1114.324064,1.577647,6.806619,...,184.15843,86.954942,16.146802,250.65407,696.59157,1256.867733,1.403779,8.178577,10904.774709,
Participant 2,174.761639,120.020484,142.213222,78.179702,28.776816,219.487896,777.891061,1413.418994,2.239106,3.876611,...,146.080065,77.25,30.081699,288.75,785.465686,1732.25817,2.30768,3.834824,5113.111111,
Participant 3,128.72125,83.05375,99.43,84.4075,54.571625,267.15,749.4875,1415.01125,4.56475,1.47407,...,101.772358,82.211382,60.71626,277.837398,779.414634,1567.793496,4.906992,2.008945,2678.6,
Participant 4,137.562137,92.348432,108.888502,86.644599,40.465389,267.909408,696.591173,1188.9036,3.498142,1.887842,...,119.266017,90.692201,35.554039,264.164345,668.697772,1408.271588,3.220474,2.268829,3025.084958,
Participant 5,93.990654,80.220561,85.323364,64.459813,18.105794,289.803738,1026.046729,239.471028,1.179252,6.109763,...,88.36673,67.362949,34.604537,293.667297,907.759924,397.132325,2.31966,2.364981,3153.31569,


In [20]:
for i in result_df.columns:
    print(i, len(result_df.columns))

baseline Systolic Pressure (mmHg) 36
baseline Diastolic Pressure (mmHg) 36
baseline Mean Pressure (mmHg) 36
baseline Heart rate (bpm) 36
baseline Stroke Volume (ml) 36
baseline Left Ventricular Ejection Time (ms) 36
baseline Pulse Interval (ms) 36
baseline Maximum Slope (mmHg/s) 36
baseline Cardiac Output (l/min) 36
baseline Total Peripheral Resistance Medical Unit (mmHg.min/l) 36
baseline Total Peripheral Resistance CGS (dyn.s/cm5) 36
baseline Markers 36
task Systolic Pressure (mmHg) 36
task Diastolic Pressure (mmHg) 36
task Mean Pressure (mmHg) 36
task Heart rate (bpm) 36
task Stroke Volume (ml) 36
task Left Ventricular Ejection Time (ms) 36
task Pulse Interval (ms) 36
task Maximum Slope (mmHg/s) 36
task Cardiac Output (l/min) 36
task Total Peripheral Resistance Medical Unit (mmHg.min/l) 36
task Total Peripheral Resistance CGS (dyn.s/cm5) 36
task Markers 36
recovery Systolic Pressure (mmHg) 36
recovery Diastolic Pressure (mmHg) 36
recovery Mean Pressure (mmHg) 36
recovery Heart rate 