In [30]:
import pandas as pd
import numpy as np
import datetime
from datetime import date
import random

from functions import mins_to_meters, meters_to_mins

from sklearn.linear_model import LinearRegression

# athlete level model

In [2]:
#create model for calculating athlete level
data = {'speed': [4.291573333333333, 5.109015873015872, 2.2351944444444443, 2.682233333333333], 
        'distance': [26.2, 3.1, 26.2, 3.1],'level': [10, 10, 1, 1]}
df_pace_lvl = pd.DataFrame(data)

#calculate slope and intercept -linear
X = df_pace_lvl[['speed', 'distance']]
y = df_pace_lvl['level']
level = LinearRegression().fit(X, y)
print('score: ', level.score(X, y))
print('coefficients: ', level.coef_)
print('intercept: ', level.intercept_)

score:  0.9932200383740991
coefficients:  [3.98780213 0.10914506]
intercept:  -10.373329770791926


# distance factor model

In [3]:
#modeling distance factor from athl_level (bound to 1-10)
data = {'level': [10, 1], 
        'dist_factor': [1, 0.5]}
df_distance_lvl = pd.DataFrame(data)
dist_model = np.poly1d(np.polyfit(df_distance_lvl['level'], df_distance_lvl['dist_factor'], 1))

# longest long run model (marathon only)

In [4]:
#modeling distance factor from athl_level (bound to 1-10)
data = {'level': [10, 1], 
        'max_lr': [23, 18]}
df_max_lr = pd.DataFrame(data)
lr_model = np.poly1d(np.polyfit(df_max_lr['level'], df_max_lr['max_lr'], 1))

In [5]:
lr_model(7.410528903816044)

21.561404946564476

# workout files

In [6]:
df_5k = pd.read_csv('workouts/5k.csv')
df_10k = pd.read_csv('workouts/10k.csv')
df_half = pd.read_csv('workouts/half_mar.csv')
df_full = pd.read_csv('workouts/full_mar.csv')

# main function for base calendar 

In [148]:
#function to date race date, number of weeks to train, and generate 
def get_calendar(race_year, race_month, race_day, weeks, pace_min, pace_sec, race_dist):
    
    #calcluate meters per second (speed) from goal pace
    speed = mins_to_meters(m=pace_min, s=pace_sec)
    
    race_date = date(race_year, race_month, race_day+1)
    
    cal = weeks*7
    
    date_list = [race_date - datetime.timedelta(days=x) for x in range(1,(cal+1))]
    
    date_list.reverse()
    
    days = []

    for x in date_list:
        days.append(x.weekday())
        
    data = {'date': date_list, 'day_code': days}
    
    df_training_cal = pd.DataFrame(data)
    
    #create validation table for weekday codes/desc to join to training calendar
    weekdays = {'day_code': range(0,7), 'day_desc': ['Mon', 'Tues', 'Wed', 'Thurs', 'Fri', 'Sat', 'Sun']}
    df_weekdays = pd.DataFrame(weekdays)
    df_weekdays
    
    df_training_cal = pd.merge(df_training_cal, df_weekdays, how='left', on='day_code')
    
    #find first monday and crop calendar down to start on first monday
    first_mon = df_training_cal[df_training_cal.day_code == 0].index[0]
    df_training_cal = df_training_cal.iloc[first_mon:]
    
    #create column for week count
    week = []
    count = 0
    for index, row in df_training_cal.iterrows():
        if row.day_code == 0:
            count += 1
            week.append(count)
        else:
            week.append(count)
    df_training_cal['week']=week
    
    #create column for training phase
    #2 week taper for blocks under 14 weeks, 3 week taper for blocks >= 14 weeks
    block = 0
    if weeks < 14:
        block += (df_training_cal.week.max() - 2)
    if weeks >= 14:
        block += (df_training_cal.week.max() - 3)
    base = np.ceil(block*0.4)
    peak = np.floor(block*0.6)

    phase= []

    for index, row in df_training_cal.iterrows():
        if row.week <= base:
            phase.append('base')
        elif row.week-base <= peak:
            phase.append('peak')
        else:
            phase.append('taper')
    df_training_cal['phase']=phase
    
    #calculate level and assign to level raw (used for pace calc)
    #if level raw is outside range(1,10), bound to nearest level and assign to dist_level (used for max distance calc)
    user_X = pd.DataFrame({'speed': [speed], 'distance': [race_dist]})
    level_raw = level.predict(user_X)[0]
    dist_level = []
    
    if level_raw < 1:
        dist_level.append(1)
    elif level_raw > 10:
        dist_level.append(10)
    else:
        dist_level.append(level_raw)
        
    #calculate paces (to be used for workouts)
    b1 = level.coef_[0]
    b2 = level.coef_[1]
    b0 = level.intercept_

    five_k = ((level_raw - b0) - (3.1*b2)) / b1
    ten_k = ((level_raw - b0) - (10.2*b2)) / b1
    hmp = ((level_raw - b0) - (13.1*b2)) / b1
    mp = ((level_raw - b0) - (26.2*b2)) / b1
    
    #weekly mileage peak
    mileage_max = 0
    if race_dist == 3.1:
        mileage_max += 45
    elif race_dist == 6.2:
        mileage_max += 50
    elif race_dist == 13.1:
        mileage_max += 60
    elif race_dist == 26.2:
        mileage_max += 75
        
    dist_factor = dist_model(dist_level[0]) 
    
    user_max = mileage_max*dist_factor
    #user_max = mileage_max-(level_final[0]*3)
    
    #weekly mileage
    base = len(df_training_cal.loc[df_training_cal.phase=='base'].week.unique())
    peak = len(df_training_cal.loc[df_training_cal.phase=='peak'].week.unique())
    taper = len(df_training_cal.loc[df_training_cal.phase=='taper'].week.unique())
    
    week_1 = round(user_max/3, 1)
    build = user_max-week_1
    weekly_miles = []
    
    week_num = []
    
    for index, row in df_training_cal.iterrows():
        if row.week not in week_num:
            week_num.append(row.week)
            
    weekly_miles = [week_1]
    miles = 0
    
    for i in range(1,base):
        miles += build/(base-1)
        weekly_miles.append(round(week_1+miles, 1))
    
    for i in range(1, peak+1):
        weekly_miles.append(round(user_max, 1))
    
    if taper == 2:
        weekly_miles.append(user_max*0.7)
        weekly_miles.append(user_max*0.4)
        
    if taper == 3:
        weekly_miles.append(round(user_max*0.85, 1))
        weekly_miles.append(round(user_max*0.65, 1))
        weekly_miles.append(round(user_max*0.3, 1))
    
    data = {'week': week_num, 'mileage': weekly_miles}
    df_mileage = pd.DataFrame(data)
    
    #add weekly mileage into df_training_cal
    weekly_mileage = []
    for index, row in df_training_cal.iterrows():
        weekly_mileage.append(df_mileage.loc[df_mileage.week == row.week].mileage.values[0])
    
    df_training_cal["weekly_mileage"] = weekly_mileage
    
    #add down week for training blocks >= 14 weeks
    if weeks >= 14:
        down_phase = []
        down_mileage = []
        
        for index, row in df_training_cal.iterrows():
            if row.week == (base + (peak-2)):
                down_phase.append('down')
                down_mileage.append(round(user_max*0.5, 1))
            else:
                down_phase.append(row.phase)
                down_mileage.append(row.weekly_mileage)
                
        df_training_cal['phase']=down_phase
        df_training_cal['weekly_mileage']=down_mileage
        
    ###LONG RUNS###
    
    #establish max long run distance
    max_lr = 0
    if (race_dist == 3.1) and (user_max/3 <= 10):
        max_lr += round(user_max/3, 1)
    elif (race_dist == 3.1) and (user_max/3 > 10):
        max_lr += 10
    elif (race_dist == 6.2) and (user_max/3 <= 12):
        max_lr += round(user_max/3, 1)
    elif (race_dist == 6.2) and (user_max/3 > 12):
        max_lr += 12
    elif (race_dist == 13.1) and (user_max/3 <= 18):
        max_lr += round(user_max/3, 1)
    elif (race_dist == 13.1) and (user_max/3 > 18):
        max_lr += 18
    #for full-marathon, use linear model
    elif (race_dist == 26.2):
        max_lr += lr_model(dist_level)[0]
    
    #max_lr = max_lr[0]
    lr_85pct = round(max_lr*0.85, 1)
    lr_90pct = round(max_lr*0.9, 1)
    lr_95pct = round(max_lr*0.95,1)
    base_lr = []
    lr1 = round(df_training_cal.loc[df_training_cal.week==1].weekly_mileage.unique()[0]/3, 1) #1/3 of first week total mileage,1)
    peak_lr = []
    down_lr = []
    postdown_lr = []
    taper_lr = []

    #base long runs
    base_step = (max_lr - lr1) / (base-1)
    base_lr_holder = lr1
    base_lr.append(lr1)

    for i in range(1,base):
        base_lr_holder+=base_step
        base_lr.append(round(base_lr_holder, 1))
    
    #if down week exists
    if weeks >= 14:
        #peak to down week long runs
        peak_lr.append(lr_85pct)
        down_week = df_training_cal.loc[df_training_cal.phase == 'down'].week.unique()[0]
        peak_weeks = len(df_training_cal.loc[(df_training_cal.phase == 'peak') & (df_training_cal.week < down_week)].week.unique())
        lr_diff = max_lr - lr_85pct
        step = lr_diff/(peak_weeks-1) 

        lr_holder = lr_85pct
        for i in range(1,peak_weeks):
            lr_holder+=step
            peak_lr.append(round(lr_holder, 1))
    
        #down week long run
        for index, row in df_training_cal.iterrows():
            if (row.day_code == 5) and (row.phase == 'down') and (row.weekly_mileage/3 < max_lr):
                down_lr.append(round(row.weekly_mileage/3, 1))

        #post down week peak long runs
        postdown_peak = len(df_training_cal.loc[(df_training_cal.phase == 'peak') & (df_training_cal.week > down_week)].week.unique())
        for i in range(1,postdown_peak+1):
            postdown_lr.append(lr_95pct)
    
    #if no down week
    else:
        peak_lr.append(lr_85pct)
        peak_weeks = len(df_training_cal.loc[(df_training_cal.day_code ==5) & (df_training_cal.phase == 'peak')])
        lr_diff = max_lr - lr_85pct
        step = lr_diff/(peak_weeks-1) 

        lr_holder = lr_85pct
        for i in range(1,peak_weeks):
            lr_holder+=step
            peak_lr.append(round(lr_holder, 1))
            
            
    #taper long runs
    for index, row in df_training_cal.iterrows():
        if (row.day_code == 5) and (row.phase == 'taper') and (row.weekly_mileage/3 < max_lr):
            taper_lr.append(round(row.weekly_mileage/3, 1))
        elif (row.day_code == 5) and (row.phase == 'taper') and (row.weekly_mileage/3 >= max_lr):
            taper_lr.append(lr_85pct)
            
    long_runs = base_lr + peak_lr + down_lr + postdown_lr + taper_lr
    
    #daily distance, run types, and run descriptions
    #runs per week
    runs_per_wk = []
    for index, row in df_training_cal.iterrows():    
        #if weekly mileage under 20, assign rest days to mon, fri, sun
        if (row.weekly_mileage < 19.99):
            runs_per_wk.append(4)
        #if weekly mileage btw 20 and 40, assign rest days to mon, fri
        elif (row.weekly_mileage >= 20) & (row.weekly_mileage < 39.99):
            runs_per_wk.append(5)
        #if weekly mileage btw 40 and 55, assign rest day to mon
        elif (row.weekly_mileage >= 40) & (row.weekly_mileage < 54.99):
            runs_per_wk.append(6)
        #plug in LR distance on saturdays
        else:
            runs_per_wk.append(7)
        
    df_training_cal['runs_per_week'] = runs_per_wk
    
    #dist, run type, run_desc
    distance = []
    run_type = []
    run_desc = []

    for index, row in df_training_cal.iterrows():    
        #if 4 runs per week, assign rest days to mon, fri, sun
        if (row.runs_per_week == 4) & (row.day_code in [0, 4, 6]):
            distance.append(0)
            run_type.append('rest')
            run_desc.append('This is a rest day. Prioritize relaxation and recovery.')
    
        #if 5 runs per week, assign rest days to mon, fri
        elif (row.runs_per_week == 5) & (row.day_code in [0, 4]):
            distance.append(0)
            run_type.append('rest')
            run_desc.append('This is a rest day. Prioritize relaxation and recovery.')
        #if 6 runs per week, assign rest day to mon
        elif (row.runs_per_week == 6) & (row.day_code == 0):
            distance.append(0)
            run_type.append('rest')
            run_desc.append('This is a rest day. Prioritize relaxation and recovery.')
        
        #plug in LR distance on saturdays
        elif row.day_code == 5:
            lr_index = row.week - 1
            distance.append(round(long_runs[lr_index], 1))
            run_type.append('long run')
            run_desc.append('This is the long run')
            
        else:
            lr_index = row.week - 1
            filler_dist = (row.weekly_mileage - long_runs[lr_index])/(row.runs_per_week - 1)
            distance.append(round(filler_dist, 1))     
            run_type.append('easy')
            run_desc.append('This run should be easy enough that you could carry on a conversation throughout the run.')
        
    df_training_cal['distance'] = distance
    df_training_cal['run_type'] = run_type
    df_training_cal['run_desc'] = run_desc
    
    return df_training_cal, level_raw, dist_level, user_max, df_mileage, \
            speed, five_k, ten_k, hmp, mp, long_runs, max_lr, peak_lr, lr_85pct, race_dist

In [149]:
result = get_calendar(race_year=2023, race_month=11, race_day=19, weeks=16, pace_min=7, pace_sec=10, race_dist=26.2)
print('raw level: ', result[1])
print('dist level: ', result[2])
print('mp: ', meters_to_mins(result[9]))
print('hmp: ', meters_to_mins(result[8]))
print('10k: ', meters_to_mins(result[7]))
print('5k: ', meters_to_mins(result[6]))
result[0].to_csv('test.csv', index=False)

raw level:  7.410528903816044
dist level:  [7.410528903816044]
mp:  (7, 10.020000000000001)
hmp:  (6, 32.400000000000006)
10k:  (6, 24.959999999999997)
5k:  (6, 7.86)


In [9]:
result[14]

26.2

In [10]:
print('all_lrs: ', result[10])
print('max_lr: ', result[11]) 
print('peak_lrs: ', result[12])
print('85pct_lr: ', result[13])

all_lrs:  [7.1, 10.0, 12.9, 15.8, 18.7, 21.6, 18.3, 19.4, 20.5, 21.6, 10.7, 20.5, 20.5, 18.2, 13.9, 6.4]
max_lr:  21.561404946564476
peak_lrs:  [18.3, 19.4, 20.5, 21.6]
85pct_lr:  18.3


In [11]:
result[0][['week', 'phase', 'weekly_mileage']].drop_duplicates()

Unnamed: 0,week,phase,weekly_mileage
0,1,base,21.4
7,2,base,30.0
14,3,base,38.5
21,4,base,47.1
28,5,base,55.6
35,6,base,64.2
42,7,peak,64.2
49,8,peak,64.2
56,9,peak,64.2
63,10,peak,64.2


In [14]:
#race_dist
result[14]

26.2

In [100]:
##WORKOUT ASSIGNMENT##
distance = result[0].distance.tolist()
run_type = result[0].run_type.tolist()
run_desc = result[0].run_desc.tolist()

for index, row in result[0].iterrows():
    #if marathoner and level > 5: base phase has weekly w/0 + easy LR
    if (result[14] == 26.2) & (result[1] >= 5) & (row.phase in ['base', 'taper']):
        #store indices of relevant workouts in i_workouts list
        i_workouts = df_full.loc[(df_full.phase == 'base') & (result[1] > df_full.dist_level_min) & (result[1] < df_full.dist_level_max)].index.tolist()
        if row.day_code == 2:
            #update wednesday run_type, desc, and dist with random w/o from w/o file data
            random.shuffle(i_workouts)
            i = random.choice(i_workouts)
            distance[index]=df_full.iloc[i]['distance']
            run_type[index]='workout'
            run_desc[index]=df_full.iloc[i]['name']
    elif (result[14] == 26.2) & (result[1] >= 5) & (row.phase == 'peak'):
        #store indices of relevant workouts in i_workouts list
        i_workouts = df_full.loc[(df_full.phase == 'speed') & (result[1] > df_full.dist_level_min) & (result[1] < df_full.dist_level_max)].index.tolist()
        if row.day_code == 2:
            #update wednesday run_type, desc, and dist with random w/o from w/o file data
            i = random.choice(i_workouts)
            distance[index]=df_full.iloc[i]['distance']
            run_type[index]='workout'
            run_desc[index]=df_full.iloc[i]['name']
        
result[0]['distance'] = distance
result[0]['run_type'] = run_type
result[0]['run_desc'] = run_desc
result[0].to_csv('test.csv', index=False)

In [34]:
df_full.loc[(df_full.phase == 'base') & (result[1] > df_full.dist_level_min) & (result[1] < df_full.dist_level_max)]

Unnamed: 0,workout_id,name,desc,distance,pace,phase,dist_level_min,dist_level_max
1,,12 x 400,1 mile warm up followd by 10 reps of 400m with...,6.75,5k,base,5,10.0
3,,6 x 800m,1 mile warm up followd by 6 reps of 800m with ...,5.75,10k down to 5k,base,5,10.0
5,,10 x 1 minute hill sprints,1.5 mile warm up followed by 10 reps of appro...,5.0,hmp down to 10k,base,5,10.0


In [55]:
df_full.iloc[5]['desc']

'1.5 mile warm up followed by 10  reps of approximately 60 second hill sprints. Finish with 1.5 mile cool down'

In [62]:
i_workouts = df_full.loc[(df_full.phase == 'base') & (result[1] > df_full.dist_level_min) & (result[1] < df_full.dist_level_max)].index.tolist()

# pick a random element from a list of strings.
workout = random.choice(i_workouts)
print(workout)

6


In [143]:
#using random selection will result in too many repeated workouts. boring. shuffle will work better
i_workouts = [1, 2, 3, 4, 5, 6]

big_list = []
prob = []
for i in range (0,1000):
    workout_list = []
    for i in range(0,5):
        workout_list.append(random.choice(i_workouts))
    big_list.append(workout_list)
    
for l in big_list:
    for i in range(len(l)-1):
        if l[i] == l[i+1]:
            prob.append(l)
        continue

In [146]:
x = [1, 2, 3, 4, 5]
random.shuffle(x)
x

[1, 5, 2, 3, 4]

In [102]:
#updating list...idea for updating weekly distances
x = ['a', 'b', 'c', 'd']
x[3] = 'new_value'
x

['a', 'b', 'c', 'new_value']