In [5]:
import pandas as pd
import numpy as np
import os
import re
from datetime import datetime, timedelta
from pathlib import Path
from APIcall_v2 import main_api_call

In [6]:
def create_emptydf(start_date,end_date):
    """
    Creates empty DataFrame with date range
    Args:
        start_date (str): Start date in 'yyyy-mm-dd' format
        end_date (str): End date in 'yyyy-mm-dd' format
        
    Returns:
        empty (df): Eempty df ready for population
    """
    start_date = str(start_date)
    end_date = str(end_date)
    start = datetime.strptime(start_date, '%Y-%m-%d')
    end = datetime.strptime(end_date, '%Y-%m-%d')
    date_range = pd.date_range(start, end)

    df = pd.DataFrame({'Date': date_range})
    
    df['Date'] = df['Date'].dt.strftime('%Y-%m-%d')
    df['nr. sessions'] = 0
    df['total km'] = 0.00
    df['km Z3-4'] = 0.00
    df['km Z5-T1-T2'] = 0.00
    df['hours alternative'] = 0.00
    return df

In [13]:
def readfiles(file_path="../data/external"):
    '''
    Creates lists of all csv files in directory

    Args: 
        file_path (str): the relative path for the folder that 
        contains all the activity files

    Returns:
        run_activities (list):
    '''
    fpath = Path(file_path)

    run_activities = list(fpath.glob(f'*Running_*.csv'))
    all_activities = list(fpath.glob(f'*.csv'))
    set_run = set(run_activities)
    set_all = set(all_activities)
    other_activities = list(set_all-set_run)

    return run_activities,other_activities

In [53]:
def readrun(file):
    df = pd.read_csv(file)
    return df

In [54]:
def populatebydate(emptydf,run_activities,other_activities,Z3_min, Z5_min):
    
    for i in emptydf['Date']:
        for file in run_activities:
            filedate =   datetime.strptime(str(file).split('_')[1], '%d-%m-%Y').strftime('%Y-%m-%d')
            if filedate == i:
                emptydf.loc[emptydf['Date'] == filedate,'nr. sessions'] += 1
                populateone(emptydf,str(file),Z3_min, Z5_min)


        for file in other_activities:
            filedate =  datetime.strptime(str(file).split('_')[1], '%d-%m-%Y').strftime('%Y-%m-%d')
            if filedate == i:
                temp_df= readrun(file)
                time_str = temp_df['Time'].iloc[-1]
                time_obj = datetime.strptime(time_str, '%H:%M:%S.%f').time()
                time_delta = timedelta(hours=time_obj.hour, minutes=time_obj.minute, seconds=time_obj.second, microseconds=time_obj.microsecond)
        
                hours_alternative = round(time_delta.total_seconds() / 3600, 2)

                emptydf.loc[emptydf['Date'] == filedate, 'hours alternative'] = hours_alternative

    df = emptydf
   
    return df

In [55]:
def populateone(df_prepop,filename, Z3_min, Z5_min):
    """
    Populates the empty DataFrame with the data from the file
    Args:
        df_prepop (df): DataFrame to be populated
        filename (str): Name of the file to be read
    Returns:
        df_postpop (df): Populated DataFrame
    """
    
    filedate =  datetime.strptime(filename.split('_')[1], '%d-%m-%Y').strftime('%Y-%m-%d')
    file_df = readrun(filename)
    
    file_df['Distance'] = pd.to_numeric(file_df['Distance'], errors='coerce')

    
    # df_prepop.loc[df_prepop['Date'] == filedate, 'total km'] = current_total_km + file_df['Distance'].iloc[-1]
    df_prepop.loc[df_prepop['Date'] == filedate,'total km'] += file_df['Distance'].iloc[-1]
    
    for idx, row in file_df.iloc[:-1].iterrows():
        hr = row['Avg HR']
        distance = row['Distance']
        if Z3_min <= hr < Z5_min:
            df_prepop.loc[df_prepop['Date'] == filedate, 'km Z3-4'] += distance
        elif hr >= Z5_min:
            df_prepop.loc[df_prepop['Date'] == filedate, 'km Z5-T1-T2'] += distance
    
    df_postpop = df_prepop
    return df_postpop 
   

In [7]:
def convert_to_day_approach(df):
    """
    Converts the DataFrame to a day approach format.
    
    Args:
        df (DataFrame): The DataFrame to convert.
        
    Returns:
        DataFrame: The converted DataFrame into a format with 7 lagging days 
        before each date in the format 

    """
    feature_cols = ['nr. sessions', 'total km', 'km Z3-4', 'km Z5-T1-T2', 'hours alternative']
    df_converted = pd.DataFrame()
    for i in range(0,7):
        for col in feature_cols:
            df_converted[f'{col}.{i}'] = df[col].shift(i)  
    df_converted['Date'] = df['Date']
    # drop rows with NaN values using dropna() with index as the row
    df_converted = df_converted.dropna()

    # replace the name of the column with the name of the column without the last 2 characters
    df_converted = df_converted.rename(columns={col: col[:-2] for col in df_converted.columns if col.endswith('.0')})


    # return df_lagged
    return df_converted          


In [None]:
def read_df_memory(saved_activity_dictionary):
    '''
    separates out two lists of dictionaries from the downloaded_activities list

    Args: 
        df_memory (list): the list of dictionaries that contains all the downloaded activities in the format:
        {'filename': filename, 'data': data}

    Returns:
        run_activities (list), other_activities (list): two lists of dictionaries that contain the activities that are running and the rest of the activities
    '''
    run_activities = []
    other_activities = []
    for item in saved_activity_dictionary:
        filename = item["filename"]
        data = item["df"]
        # if filename  matches '*Running_*.csv':
        if re.match(r".*Running_.*\.csv$", filename):
            run_activities.append({'filename': filename, 'df': data})
        else:
            other_activities.append({'filename': filename, 'df': data})
    for item in run_activities:
        filename = item["filename"]
        print(filename)
    for item in other_activities:
        filename = item["filename"]
        print(filename)
    
    return run_activities, other_activities

In [9]:
#date_start = '2024-03-03'
 # date_end = '2024-09-15'
Z3_min = 135
Z5_min = 173
start_date, end_date, df_memory = main_api_call()

empty = create_emptydf(start_date, end_date)
r,o = read_df_memory(df_memory)



Garmin Connect API - Activity Downloader
start date:  2025-03-09 start_date type:  <class 'datetime.date'>
end date:  2025-04-28 end_date type:  <class 'datetime.date'>
Login successful!
Activity data for 'Cardio_28-04-2025_18960861981.csv' loaded into DataFrame.
Activity data for 'County Cork Running_27-04-2025_18947604779.csv' loaded into DataFrame.
Activity data for 'County Cork Running_27-04-2025_18947316809.csv' loaded into DataFrame.
Activity data for 'Cardio_26-04-2025_18942358660.csv' loaded into DataFrame.
Activity data for 'County Cork Running_25-04-2025_18931510406.csv' loaded into DataFrame.
Activity data for 'Cardio_24-04-2025_18922114709.csv' loaded into DataFrame.
Activity data for 'Cardio_23-04-2025_18910651825.csv' loaded into DataFrame.
Activity data for 'Cardio_22-04-2025_18900970544.csv' loaded into DataFrame.
Activity data for 'County Cork Running_20-04-2025_18879960706.csv' loaded into DataFrame.
Activity data for 'Cardio_19-04-2025_18875456581.csv' loaded into Da

In [None]:
def populateone_memory(df_prepop, file_df, filedate, Z3_min, Z5_min):
    """
    Populates the empty DataFrame with the data from the file
    Args:
        df_prepop (df): DataFrame to be populated
        file_df (df): DataFrame containing activity data
        filedate (str): Date of the activity
        Z3_min (int): Minimum heart rate for Z3 zone
        Z5_min (int): Minimum heart rate for Z5 zone
    Returns:
        df_postpop (df): Populated DataFrame
    """
    file_df['Distance'] = pd.to_numeric(file_df['Distance'], errors='coerce')

    df_prepop.loc[df_prepop['Date'] == filedate, 'total km'] += file_df['Distance'].iloc[-1]

    for idx, row in file_df.iloc[:-1].iterrows():
        hr = row['Avg HR']
        distance = row['Distance']
        if Z3_min <= hr < Z5_min:
            df_prepop.loc[df_prepop['Date'] == filedate, 'km Z3-4'] += distance
        elif hr >= Z5_min:
            df_prepop.loc[df_prepop['Date'] == filedate, 'km Z5-T1-T2'] += distance

    return df_prepop

In [11]:
def populatebydate_memory(emptydf, run_activities, other_activities, Z3_min, Z5_min):
    for i in emptydf['Date']:
        for activity in run_activities:
            filedate = datetime.strptime(activity['filename'].split('_')[1], '%d-%m-%Y').strftime('%Y-%m-%d')
            if filedate == i:
                emptydf.loc[emptydf['Date'] == filedate, 'nr. sessions'] += 1
                populateone_memory(emptydf, activity['df'], filedate, Z3_min, Z5_min)

        for activity in other_activities:
            filedate = datetime.strptime(activity['filename'].split('_')[1], '%d-%m-%Y').strftime('%Y-%m-%d')
            if filedate == i:
                temp_df = activity['df']
                time_str = temp_df['Time'].iloc[-1]
                time_obj = datetime.strptime(time_str, '%H:%M:%S.%f').time()
                time_delta = timedelta(hours=time_obj.hour, minutes=time_obj.minute, seconds=time_obj.second, microseconds=time_obj.microsecond)

                hours_alternative = round(time_delta.total_seconds() / 3600, 2)
                emptydf.loc[emptydf['Date'] == filedate, 'hours alternative'] = hours_alternative

    return emptydf

In [12]:
df_full = populatebydate_memory(empty, r, o, Z3_min, Z5_min)
# Convert to day approach
df_converted = convert_to_day_approach(df_full)
df_converted

Unnamed: 0,nr. sessions,total km,km Z3-4,km Z5-T1-T2,hours alternative,nr. sessions.1,total km.1,km Z3-4.1,km Z5-T1-T2.1,hours alternative.1,...,total km.5,km Z3-4.5,km Z5-T1-T2.5,hours alternative.5,nr. sessions.6,total km.6,km Z3-4.6,km Z5-T1-T2.6,hours alternative.6,Date
6,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.73,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.65,2025-03-15
7,1,3.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,2.59,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2025-03-16
8,0,0.0,0.0,0.0,1.06,1.0,3.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.76,1.0,2.59,0.0,0.0,0.0,2025-03-17
9,1,3.59,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.06,...,0.0,0.0,0.0,0.4,0.0,0.0,0.0,0.0,0.76,2025-03-18
10,0,0.0,0.0,0.0,0.89,1.0,3.59,0.0,0.0,0.0,...,0.0,0.0,0.0,0.73,0.0,0.0,0.0,0.0,0.4,2025-03-19
11,1,3.85,1.85,0.0,0.0,0.0,0.0,0.0,0.0,0.89,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.73,2025-03-20
12,0,0.0,0.0,0.0,0.79,1.0,3.85,1.85,0.0,0.0,...,3.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2025-03-21
13,1,4.03,1.03,0.0,0.0,0.0,0.0,0.0,0.0,0.79,...,0.0,0.0,0.0,1.06,1.0,3.0,0.0,0.0,0.0,2025-03-22
14,1,3.5,2.5,0.0,0.0,1.0,4.03,1.03,0.0,0.0,...,3.59,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.06,2025-03-23
15,0,0.0,0.0,0.0,0.86,1.0,3.5,2.5,0.0,0.0,...,0.0,0.0,0.0,0.89,1.0,3.59,0.0,0.0,0.0,2025-03-24


# Deprecated function

In [None]:
def main_extract_transform(date_start, date_end, Z3_min = 135, Z5_min = 173):   
    """
    Main function to extract and transform data.
    """
    while True:
        # the z3_min and Z5 min need to be inputted by the user here
        Z3_min = input("Enter the minimum heart rate for your Z3 according to garmin: ") 
        Z5_min = input("Enter the minimim heart rate for your Z5 according to garmin: ")
        # wrap the input in a try except block to check if the input is a number
        try:
            Z3_min = int(Z3_min)
            Z5_min = int(Z5_min)
            break
        except ValueError:
            print("Please enter valid numbers for heart rate zone thresholds.")


    # Create an empty DataFrame for the specified date range
    empty = create_emptydf(date_start, date_end)
    
    # Read files and populate the DataFrame
    r, o = readfiles()
    df_full = populatebydate(empty, r, o, Z3_min, Z5_min)
    
    # Convert to day approach format
    dfday_user = convert_to_day_approach(df_full)

    df_full = populatebydate_memory(empty, r, o, Z3_min, Z5_min)
    # Convert to day approach
    df_converted = convert_to_day_approach(df_full)
    df_converted
    
    return dfday_user



In [59]:
main_extract_transform(date_start, date_end, Z3_min, Z5_min)

Unnamed: 0,nr. sessions,total km,km Z3-4,km Z5-T1-T2,hours alternative,nr. sessions.1,total km.1,km Z3-4.1,km Z5-T1-T2.1,hours alternative.1,...,total km.5,km Z3-4.5,km Z5-T1-T2.5,hours alternative.5,nr. sessions.6,total km.6,km Z3-4.6,km Z5-T1-T2.6,hours alternative.6,Date
6,1,0.00,0.00,0.0,0.00,0.0,0.00,0.00,0.0,0.0,...,0.00,0.00,0.0,0.0,1.0,5.51,3.51,2.0,0.0,2024-03-09
7,1,6.05,5.05,0.0,0.00,1.0,0.00,0.00,0.0,0.0,...,13.20,11.19,0.0,0.0,0.0,0.00,0.00,0.0,0.0,2024-03-10
8,0,0.00,0.00,0.0,0.00,1.0,6.05,5.05,0.0,0.0,...,0.00,0.00,0.0,0.0,1.0,13.20,11.19,0.0,0.0,2024-03-11
9,1,5.04,3.04,0.0,0.00,0.0,0.00,0.00,0.0,0.0,...,7.27,6.27,0.0,0.0,0.0,0.00,0.00,0.0,0.0,2024-03-12
10,0,0.00,0.00,0.0,0.00,1.0,5.04,3.04,0.0,0.0,...,0.00,0.00,0.0,0.0,1.0,7.27,6.27,0.0,0.0,2024-03-13
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
192,0,0.00,0.00,0.0,0.00,0.0,0.00,0.00,0.0,0.0,...,0.00,0.00,0.0,0.0,0.0,0.00,0.00,0.0,0.0,2024-09-11
193,0,0.00,0.00,0.0,0.00,0.0,0.00,0.00,0.0,0.0,...,0.00,0.00,0.0,1.0,0.0,0.00,0.00,0.0,0.0,2024-09-12
194,0,0.00,0.00,0.0,0.00,0.0,0.00,0.00,0.0,0.0,...,5.27,2.27,0.0,0.0,0.0,0.00,0.00,0.0,1.0,2024-09-13
195,1,2.37,0.00,0.0,0.00,0.0,0.00,0.00,0.0,0.0,...,0.00,0.00,0.0,0.0,1.0,5.27,2.27,0.0,0.0,2024-09-14


# in use Function


In [15]:
def main_extract_transform_memory(date_start, date_end, df_memory, Z3_min = 135, Z5_min = 173):   
    """
    Main function to extract and transform data.
    """
    while True:
        # the z3_min and Z5 min need to be inputted by the user here
        Z3_min = input("Enter the minimum heart rate for your Z3 according to garmin: ") 
        Z5_min = input("Enter the minimim heart rate for your Z5 according to garmin: ")
        # wrap the input in a try except block to check if the input is a number
        try:
            Z3_min = int(Z3_min)
            Z5_min = int(Z5_min)
            break
        except ValueError:
            print("Please enter valid numbers for heart rate zone thresholds.")

   

    empty = create_emptydf(start_date, end_date)
    r,o = read_df_memory(df_memory)
    
    df_full = populatebydate_memory(empty, r, o, Z3_min, Z5_min)
    
    # Convert to day approach format
    dfday_user = convert_to_day_approach(df_full)
    
    return dfday_user

In [16]:
start_date, end_date, df_memory = main_api_call()
main_extract_transform_memory(start_date, end_date, df_memory)

Garmin Connect API - Activity Downloader
start date:  2025-03-09 start_date type:  <class 'datetime.date'>
end date:  2025-04-28 end_date type:  <class 'datetime.date'>
Login successful!
Activity data for 'Cardio_28-04-2025_18960861981.csv' loaded into DataFrame.
Activity data for 'County Cork Running_27-04-2025_18947604779.csv' loaded into DataFrame.
Activity data for 'County Cork Running_27-04-2025_18947316809.csv' loaded into DataFrame.
Activity data for 'Cardio_26-04-2025_18942358660.csv' loaded into DataFrame.
Activity data for 'County Cork Running_25-04-2025_18931510406.csv' loaded into DataFrame.
Activity data for 'Cardio_24-04-2025_18922114709.csv' loaded into DataFrame.
Activity data for 'Cardio_23-04-2025_18910651825.csv' loaded into DataFrame.
Activity data for 'Cardio_22-04-2025_18900970544.csv' loaded into DataFrame.
Activity data for 'County Cork Running_20-04-2025_18879960706.csv' loaded into DataFrame.
Activity data for 'Cardio_19-04-2025_18875456581.csv' loaded into Da

Unnamed: 0,nr. sessions,total km,km Z3-4,km Z5-T1-T2,hours alternative,nr. sessions.1,total km.1,km Z3-4.1,km Z5-T1-T2.1,hours alternative.1,...,total km.5,km Z3-4.5,km Z5-T1-T2.5,hours alternative.5,nr. sessions.6,total km.6,km Z3-4.6,km Z5-T1-T2.6,hours alternative.6,Date
6,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.73,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.65,2025-03-15
7,1,3.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,2.59,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2025-03-16
8,0,0.0,0.0,0.0,1.06,1.0,3.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.76,1.0,2.59,0.0,0.0,0.0,2025-03-17
9,1,3.59,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.06,...,0.0,0.0,0.0,0.4,0.0,0.0,0.0,0.0,0.76,2025-03-18
10,0,0.0,0.0,0.0,0.89,1.0,3.59,0.0,0.0,0.0,...,0.0,0.0,0.0,0.73,0.0,0.0,0.0,0.0,0.4,2025-03-19
11,1,3.85,1.85,0.0,0.0,0.0,0.0,0.0,0.0,0.89,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.73,2025-03-20
12,0,0.0,0.0,0.0,0.79,1.0,3.85,1.85,0.0,0.0,...,3.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2025-03-21
13,1,4.03,1.03,0.0,0.0,0.0,0.0,0.0,0.0,0.79,...,0.0,0.0,0.0,1.06,1.0,3.0,0.0,0.0,0.0,2025-03-22
14,1,3.5,2.5,0.0,0.0,1.0,4.03,1.03,0.0,0.0,...,3.59,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.06,2025-03-23
15,0,0.0,0.0,0.0,0.86,1.0,3.5,2.5,0.0,0.0,...,0.0,0.0,0.0,0.89,1.0,3.59,0.0,0.0,0.0,2025-03-24
