In [35]:
import pandas as pd
import numpy as np
import os
from datetime import datetime
from pathlib import Path

In [36]:
Z3_min = 135
Z5_min = 173

In [79]:
def create_emptydf(start_date,end_date):
    """
    Creates empty DataFrame with date range
    Args:
        start_date (str): Start date in 'yyyy-mm-dd' format
        end_date (str): End date in 'yyyy-mm-dd' format
        
    Returns:
        empty (df): Eempty df ready for population
    """
    start = datetime.strptime(start_date, '%Y-%m-%d')
    end = datetime.strptime(end_date, '%Y-%m-%d')
    date_range = pd.date_range(start, end)

    df = pd.DataFrame({'Date': date_range})
    
    df['Date'] = df['Date'].dt.strftime('%Y-%m-%d')
    df['nr.sessions'] = 0
    df['total km'] = 0.0
    df['km Z3-4'] = 0.0
    df['km Z5-T1-T2'] = 0.0
    df['hours alternative'] = 0.0
    return df

In [80]:
empty = create_emptydf('2024-08-01','2024-09-30')
empty.head(10)

Unnamed: 0,Date,nr.sessions,total km,km Z3-4,km Z5-T1-T2,hours alternative
0,2024-08-01,0,0.0,0.0,0.0,0.0
1,2024-08-02,0,0.0,0.0,0.0,0.0
2,2024-08-03,0,0.0,0.0,0.0,0.0
3,2024-08-04,0,0.0,0.0,0.0,0.0
4,2024-08-05,0,0.0,0.0,0.0,0.0
5,2024-08-06,0,0.0,0.0,0.0,0.0
6,2024-08-07,0,0.0,0.0,0.0,0.0
7,2024-08-08,0,0.0,0.0,0.0,0.0
8,2024-08-09,0,0.0,0.0,0.0,0.0
9,2024-08-10,0,0.0,0.0,0.0,0.0


In [81]:
def readfiles(file_path="../data/external"):
    '''
    Creates lists of all csv files in directory

    Args: 
        file_path (str): the relative path for the folder that 
        contains all the activity files

    Returns:
        run_activities (list):
    '''
    fpath = Path(file_path)

    run_activities = list(fpath.glob(f'*Running_*.csv'))

    other_activities = list(fpath.glob(f'*[!Running]_*.csv'))

    return run_activities,other_activities

In [82]:
def readrun(file):
    df = pd.read_csv(file)
    return df

In [87]:
def populatebydate(emptydf,run_activities,other_activities):
    
    for i in emptydf['Date']:
        for file in run_activities:
            filedate =   datetime.strptime(str(file).split('_')[1], '%d-%m-%Y').strftime('%Y-%m-%d')
            if filedate == i:
                emptydf.loc[emptydf['Date'] == filedate,'nr.sessions'] += 1
                populateone(emptydf,str(file))


        for file in other_activities:
            filedate =  datetime.strptime(str(file).split('_')[1], '%d-%m-%Y').strftime('%Y-%m-%d')
            if filedate == i:
                temp_df= readrun(file)
                emptydf.loc[emptydf['Date'] == filedate,'hours alternative'] = temp_df['Time'].iloc[-1]

    df = emptydf
   
    return df

In [88]:
def populateone(df_prepop,filename):
    
    filedate =  datetime.strptime(filename.split('_')[1], '%d-%m-%Y').strftime('%Y-%m-%d')
    file_df = readrun(filename)
    df_prepop.loc[df_prepop['Date'] == filedate,'total km'] += file_df['Distance'].iloc[-1]
    # count rows where Avg HR between Z3_min and Z5_min
    # count rows where Avg HR > Z5_min
    df_postpop = df_prepop
    return df_postpop 
   

In [90]:
empty = create_emptydf('2024-08-01','2024-09-30')
r,o =readfiles()
df_full = populatebydate(empty,r,o)
df_full.head(32)


Unnamed: 0,Date,nr.sessions,total km,km Z3-4,km Z5-T1-T2,hours alternative
0,2024-08-01,1,6.38,0.0,0.0,00:40:33.363
1,2024-08-02,0,0.0,0.0,0.0,0.0
2,2024-08-03,1,6.18,0.0,0.0,00:36:16.562
3,2024-08-04,1,12.53,0.0,0.0,01:20:02.050
4,2024-08-05,0,0.0,0.0,0.0,0.0
5,2024-08-06,1,9.84,0.0,0.0,00:43:52.313
6,2024-08-07,0,0.0,0.0,0.0,0.0
7,2024-08-08,1,6.3,0.0,0.0,00:35:07.153
8,2024-08-09,0,0.0,0.0,0.0,01:29:02.776
9,2024-08-10,1,3.25,0.0,0.0,00:23:15.847
