In [3]:
import pandas as pd
import numpy as np
import pathlib as pl
import datetime as dt
from functools import reduce #needed to allow merging of multiple datasets      

grace = pl.Path(r'C:\Users\kevin.omalley\OneDrive - University of Limerick\Documents\GitHub\fino2py\all_data\from grace')
ailbhe = pl.Path(r'C:\Users\kevin.omalley\OneDrive - University of Limerick\Documents\GitHub\fino2py\all_data\working_data')
time_stamps = pl.Path(r'C:\Users\kevin.omalley\OneDrive - University of Limerick\Documents\GitHub\fino2py\all_data\Timesheets (1)-2.csv')

In [4]:
for i in ailbhe.glob('**/*.csv'):
    i.unlink()

In [163]:

# this is the full version of the function

def read_raw_finometer_data(folder_path, interval=False, save_csv=False):
    '''This function imports the raw finometer data and then calculates the average of each measure over the selected time period
    The default time period is 1 minute, but this can be changed by setting the interval parameter to a different value. 
    This function may not be needed in many cases, but it is useful to have, and a good place to start.
    
    Parameters
    ----------
    folder_path : pathlib.Path object or str 
        The path to the folder containing the .txt file
    interval : str, optional
        If provided, the function will resample the data to the given interval and return the resampled data.
    save_csv : bool, optional
        If True, the function will save the imported data as a .csv file in the same folder as the .txt file.
        The default is False.
    Raises
    ------
    TypeError:
        If folder_path is not a pathlib.Path object or a string
    ValueError:
        If folder_path does not exist or is not a directory
        If there is not exactly one .txt file in the folder

    Returns
    -------
    pandas.DataFrame:
        Dataframe with the raw finometer data resampled to the given interval

    ID : str
        The Participant ID of the participant whose data is being imported
    '''
    
    try:
        folder_path = pl.Path(folder_path)
    except TypeError:
        raise TypeError('folder_path must be a pathlib.Path object or a string')

    if not folder_path.exists():
        raise ValueError('folder_path does not exist')

    if folder_path.is_dir():
        files = [file for file in folder_path.glob('*.txt')]
        if len(files) != 1:
            raise ValueError(f'Expected one .txt file, but found {len(files)} in the folder')
        file = files[0]
    elif folder_path.is_file():
        file = folder_path

    ID = file.stem.split('_')[0]



    df = pd.read_csv(
        file,
        sep=';',
        header=0,
        skiprows=8,
        skipfooter=1,
        engine='python',
    )

    df = df.drop(df.columns[13], axis=1)

    df['Time (s)'] = pd.to_datetime(df['Time (s)'], format='%H:%M:%S.%f').dt.floor('ms')



    if interval:

        csv_path = folder_path / file.with_stem(f'imported {interval} data for {ID}').with_suffix('.csv')
        try:
            
            df_resampled = df.set_index(pd.to_datetime(df['Time (s)'], format='%H:%M:%S.%f')).resample(f'{interval}').mean()
            df_resampled.index = df_resampled.index.strftime('%H:%M:%S.%f').str[:-3]
        except ValueError:
            raise ValueError(f'{interval} is not a valid time period, valid time periods are: 1s, 1T, 1H, 1D, 1W, 1M, 1Q, 1A')
    else:
        csv_path = folder_path / file.with_stem(f'imported data for {ID}').with_suffix('.csv')
        df = df.set_index(pd.to_datetime(df['Time (s)'], format='%H:%M:%S.%f').dt.strftime('%H:%M:%S.%f').str[:-3])
        df = df.drop('Time (s)', axis=1)


    if save_csv:
        df.to_csv(csv_path, index=True)

    return (df_resampled, ID) if interval else (df, ID)



In [210]:
def convert_time(time):
    '''This function converts the time stamps in the timesheets to datetime objects suitable for the other functions in this module
    Parameters
    ----------
    time : str
        The time stamp in the timesheets
    Returns
    -------
    datetime.datetime
        The time stamp converted to a datetime object
    '''
    time = pd.to_datetime(time)
    time = time.strftime('%H:%M:%S.%f')[:-3]
    return time


In [224]:
# this is the full version of the function


def create_chunk(df, ID, tag, start, end):
    """
    Create a chunk of data from a dataframe between specified start and end times and return a new dataframe
    containing the mean values for each column in the chunk.
    
    Parameters:
    -----------
    df : pandas DataFrame
        The dataframe containing the data to extract a chunk from.
    ID : str
        The participant ID to include in the output dataframe.
    tag : str
        The tag to include in the column names of the output dataframe.
    start : str or None
        The start time of the chunk in the format 'HH:MM:SS' or 'HH:MM:SS.mmm'. If None, the chunk starts at the 
        beginning of the dataframe.
    end : str or None
        The end time of the chunk in the format 'HH:MM:SS' or 'HH:MM:SS.mmm'. If None, the chunk ends at the 
        end of the dataframe.
    
    Returns:
    --------
    pandas DataFrame
        A new dataframe containing the mean values for each column in the specified chunk of the input dataframe.
        The output dataframe has a row for the specified participant ID and columns with names that include the
        specified tag.
    """
    
    # Convert start and end times to datetime objects if they are specified
    if start:
        try:
            start = convert_time(start)
        except:
            print(f"Could not convert {start} to datetime object, it must be a string in the format 'HH:MM:SS' or 'HH:MM:SS.mmm'")
    if end:
        try:
            end = convert_time(end)
        except:
            print(f"Could not convert {end} to datetime object, it must be a string in the format 'HH:MM:SS' or 'HH:MM:SS.mmm'")

    # Extract the chunk of data and compute the mean values for each column
    if start and end:
        chunk = df.loc[start:end].mean().to_frame().T
    elif start:
        chunk = df.loc[start:].mean().to_frame().T
    elif end:
        chunk = df.loc[:end].mean().to_frame().T
    
    # Rename the columns with the specified tag and insert the participant ID as the first column
    chunk.columns = [f"{tag} {i}" for i in chunk.columns]
    chunk.insert(0, 'Participant ID', ID)

    return chunk


In [7]:
# this is the full version of the function

def import_protocol_times(times_file_path, add_seconds=False, flatten_seconds=False, save_csv=False):
    '''This function imports the protocol times from the .csv file and returns a dataframe with the protocol times for each participant
    
    file_path: pathlib.Path object
        The path to the .csv file containing the protocol times
    add_seconds: boolean (optional)
        If True, seconds will be added to the time values (if missing)
    save_csv: boolean (optional)
        If True, the imported data will be saved as a .csv file in the same folder as the .csv file, this is not always needed and should be used sparingly
    flatten_seconds: boolean (optional)
        If True, seconds will be set to 00 for all time values
    '''

    def add_seconds_to_time(time_str):
        '''This function adds seconds to the time string for in case the time string is missing seconds'''
        if len(time_str) == 5:
            time_str += ":00"
        return time_str

    def flatten_seconds(time_str):
        '''This function sets seconds to 00 for a given time string'''
        return time_str[:5] + ':00'

    if not isinstance(times_file_path, pl.Path):#check if folder_path is a pathlib.Path object
        raise TypeError('file_path must be a pathlib.Path object')
    elif not times_file_path.exists(): #  and if it exists
        raise ValueError('file_path does not exist')
    elif not times_file_path.is_file(): #  and is a file 
        raise ValueError('file_path is not a file')
    elif times_file_path.suffix != '.csv': #  and is a csv file
        raise ValueError('file_path is not an csv file')
    else:
        df = pd.read_csv(times_file_path, delimiter= ',')
        df.columns = [col.strip() for col in df.columns]
        cols_to_keep = ['Participant ID', 'Start of Baseline', 'End of Baseline', 'Start of Task 1', 'End of Task 1', 'Start of Recovery Period', 'End of Recovery Period']
        df = df[cols_to_keep].applymap(lambda x: str(x).strip('"') if isinstance(x, str) else x)
        
        if add_seconds and flatten_seconds:
            raise ValueError('Only one of add_seconds and flatten_seconds can be True')

        if add_seconds:
            try:
                df.iloc[:, 1:] = df.iloc[:, 1:].applymap(lambda x: add_seconds_to_time(x) if isinstance(x, str) else x)
                df.iloc[:, 1:] = df.iloc[:, 1:].applymap(lambda x: pd.to_datetime(x, format='%H:%M:%S', errors='coerce'))
            except:
                print('Could not add seconds to time, please check the time format')

        elif flatten_seconds:
            try:
                df.iloc[:, 1:] = df.iloc[:, 1:].applymap(lambda x: add_seconds_to_time(x) if isinstance(x, str) else x)
                df.iloc[:, 1:] = df.iloc[:, 1:].applymap(lambda x: flatten_seconds(x) if isinstance(x, str) else x)
                df.iloc[:, 1:] = df.iloc[:, 1:].applymap(lambda x: pd.to_datetime(x, format='%H:%M:%S', errors='coerce'))
            except:
                print('Could not set seconds to 00, please check the time format')
        
        if save_csv: #if you want to save the csv file (which may be useful if you want to use the data in other ways)
            try:
                df.to_csv(times_file_path.parent / f"cleaned times.csv", index=False)
                print(f"CSV saved for {times_file_path.stem}")
            except Exception as e:
                print(f"Could not save csv file, error: {e}")
            


        return df

In [8]:
y = import_protocol_times(
    time_stamps,
    flatten_seconds = True,
    save_csv=True
    )

CSV saved for Timesheets (1)-2


In [227]:
# testing version of the function 

def import_protocol_averages(frame, id, times=None, save_csv=None):
    '''A function that imports the averaged finometer files (which have already been processed from the raw data)
    to produce averages for each section of the experimental protocol.

    Parameters
    ----------
    frame : pandas.DataFrame 
        The DataFrame containing the averaged finometer data
    id : str
        The participant ID
    save_csv : bool, optional
        If True, the imported data will be saved as a .csv file in the same folder as the .csv file, 
        this is not always needed and should be used sparingly
    times : dict, optional
        A dictionary of tuples of times, with the keys being the names of the time periods.

    Returns
    -------
    pandas.DataFrame
        A DataFrame with the mean values of the given columns during each time period of the study.

    Raises
    ------
    TypeError
        If frame is not a pandas.DataFrame object
        If id is not a string
    ValueError
        If times is not provided as a dictionary with at least one key-value pair
        If there are not enough times provided for a given time period
        If there are too many times provided for a given time period
    '''

    # check if frame is a pandas.DataFrame object
    if not isinstance(frame, pd.DataFrame):
        raise TypeError('''
        frame must be a pandas.DataFrame object, produced by the read_raw_finometer_data function, 
        have you run the read_raw_finometer_data function on the data?''')

    if not isinstance(id, str):
        raise TypeError('id must be a string')

    if not times:
        raise ValueError("times must be a dictionary and at least one key-value pair must be provided.")
    
    # Create an empty list of dataframes, each representing a chunk of the protocol
    chunks = []
    
    for i in times.keys():
        if len(times[i]) < 2:
            raise ValueError(f"There are not enough times provided for the {i}.")
        elif len(times[i]) > 2:
            raise ValueError(f"There are too many times provided for the {i}.")
        elif len(times[i]) == 2:
            if times[i][0] < times[i][1]:
                chunks.append(create_chunk(frame, i, times[i][0], times[i][1]))



    data_merge = reduce(lambda left, right: pd.merge(left, right, on=["Participant ID"], how="outer"), chunks)
    data_merge.set_index('Participant ID', inplace=True)

    if save_csv:
        data_merge.to_csv( pl.Path(save_csv) / f"{id} protocol_averages.csv")
        print(f"Saved {id} protocol averages.csv to {save_csv.stem}")

    return data_merge


In [228]:
a, a_id = read_raw_finometer_data(
    pl.Path(r'C:\Users\kevin.omalley\OneDrive - University of Limerick\Documents\GitHub\fino2py\all_data\working_data\Data_fyp\Participant 1_2022-10-27_09.07.37\Participant 1_2022-10-27_09.07.37.txt'),
    
    )

a.head().to_clipboard()

In [229]:
dict = {'Basline': ['09:17:00', '09:27:00'], 'Task': ['09:28:00', '09:35:00'], 'Recovery': ['09:35:00', '09:43:00']}

c = import_protocol_averages(a, a_id, times=dict, save_csv=r'C:\Users\kevin.omalley\OneDrive - University of Limerick\Documents\GitHub\fino2py\all_data\working_data\Data_fyp\Participant 1_2022-10-27_09.07.37')

              Systolic Pressure (mmHg)  Diastolic Pressure (mmHg)  \
Time (s)                                                            
09:07:43.090                         0                          0   
09:07:43.730                         0                          0   
09:07:44.400                         0                          0   
09:07:45.095                         0                          0   
09:07:45.800                         0                          0   
...                                ...                        ...   
09:48:58.068                       227                        203   
09:48:59.054                       214                        151   
09:48:59.816                       224                        153   
09:49:00.559                       195                        150   
09:49:01.261                       210                        175   

              Mean Pressure (mmHg)  Heart rate (bpm)  Stroke Volume (ml)  \
Time (s)                  

TypeError: reduce() of empty iterable with no initial value

In [None]:
def make_protocol_rows(data_path, timestamps_df):
    """
    Import data for participants whose timestamps are all non-null.
    
    Parameters:
    - data_path: pl.Path or str, the path to the folder containing the data files
    - timestamps_df: pandas DataFrame, containing the timestamps for each participant
    
    Returns:
    - None
    """
    data = pl.Path(data_path)
    for n in timestamps_df.index:
        if timestamps_df.iloc[n, 1:].isnull().values.any():
            print(f"Skipping participant {timestamps_df.loc[n, 'Participant ID']} due to missing timestamps")
            continue
        participant_id = timestamps_df.loc[n, 'Participant ID']
        file_name = f'imported data for {participant_id}.csv'
        file_paths = list(data.glob(f'**/{file_name}'))
        if not file_paths:
            print(f'no file found for {participant_id}')
        else:
            full_path = file_paths[0].resolve()
            print(f'File {full_path} found')

            start_bl = timestamps_df.loc[n, 'Start of Baseline']
            end_bl = timestamps_df.loc[n, 'End of Baseline']

            start_task = timestamps_df.loc[n, 'Start of Task 1']
            end_task = timestamps_df.loc[n, 'End of Task 1']

            start_rec = timestamps_df.loc[n, 'Start of Recovery Period']
            end_rec = timestamps_df.loc[n, 'End of Recovery Period']

            import_path = f'imported data for {participant_id}.csv'

            import_protocol_averages_feb_23(full_path,
                                            t1=start_bl.strftime('%H:%M:%S'),
                                            t2=end_bl.strftime('%H:%M:%S'),
                                            t3=start_task.strftime('%H:%M:%S') if not pd.isna(start_task) else None,
                                            t4=end_task.strftime('%H:%M:%S') if not pd.isna(end_task) else None,
                                            t5=start_rec.strftime('%H:%M:%S') if not pd.isna(start_rec) else None,
                                            t6=end_rec.strftime('%H:%M:%S') if not pd.isna(end_rec) else None,
                                            save_csv=True)


In [None]:
make_protocol_rows(home, y)

In [None]:
dfs = []

# Loop over each csv file in the data directory
for csv_file in home.glob('**/*protocol_averages.csv'):
    # Read in the csv file as a DataFrame and append to the list
    df = pd.read_csv(csv_file)
    dfs.append(df)

# Concatenate the list of DataFrames into a single DataFrame
merged = pd.concat(dfs, ignore_index=True)

# Print the merged DataFrame
merged