# Objective

Track my workout data for a period of time and notice trends, improvements and overall see pretty data for something I enjoy.

# How to get it organized:
### Yoga
- Track date and time of day, duration in minutes, style, workout level, body focus for each workout.
- The data can be retrieved from Down Dog app logs (which provide nearly all the data) + Google Fit app (which provides the date and time of day data).

### Running
- Track date and time of day, location, duration in minutes, total distance, average rhythm, time for every 500 meters, rhythm at every 500 meters, calories burned, time at first stop to walk. 
- The data can be retrieved entirely from Runkeeper app logs.

# To do:
- Gather data manually, use only this month's workouts: *Up to date!*

# Possible additions:
Discord bot to add new workouts, display summary of workouts, plan workouts and set reminders.

**Imports**

In [1]:
import datetime
import pandas as pd
import numpy as np
import plotly.express as px

# Yoga

In [2]:
df_yoga = pd.DataFrame(np.array([['2021-02-01T23:18Z', 30, 'Restaurative Yoga', 'Intermediate', 'Back strengthening'], ['2021-02-02T20:09Z', 30, 'Hatha Yoga', 'Intermediate', 'Back strengthening'], ['2021-02-03T22:37Z', 30, 'Full practice', 'Intermediate', 'Back strengthening'], ['2021-02-05T21:36Z', 30, 'Ashtanga', 'Intermediate', 'Traditional']]),
                      columns=['Time', 'TotalDuration', 'Style', 'DifficultyLevel', 'BodyFocus'])
df_yoga.Time = pd.to_datetime(df_yoga.Time)
df_yoga

Unnamed: 0,Time,TotalDuration,Style,DifficultyLevel,BodyFocus
0,2021-02-01 23:18:00+00:00,30,Restaurative Yoga,Intermediate,Back strengthening
1,2021-02-02 20:09:00+00:00,30,Hatha Yoga,Intermediate,Back strengthening
2,2021-02-03 22:37:00+00:00,30,Full practice,Intermediate,Back strengthening
3,2021-02-05 21:36:00+00:00,30,Ashtanga,Intermediate,Traditional


In [3]:
df_yoga = df_yoga.append(pd.Series(['2021-02-12T23:19Z', 30, 'Full practice', 'Intermediate', 'Upper body strengthening'], index=df_yoga.columns), ignore_index=True)
df_yoga

Unnamed: 0,Time,TotalDuration,Style,DifficultyLevel,BodyFocus
0,2021-02-01 23:18:00+00:00,30,Restaurative Yoga,Intermediate,Back strengthening
1,2021-02-02 20:09:00+00:00,30,Hatha Yoga,Intermediate,Back strengthening
2,2021-02-03 22:37:00+00:00,30,Full practice,Intermediate,Back strengthening
3,2021-02-05 21:36:00+00:00,30,Ashtanga,Intermediate,Traditional
4,2021-02-12T23:19Z,30,Full practice,Intermediate,Upper body strengthening


In [4]:
df_yoga = df_yoga.append(pd.Series(['2021-02-14T21:30Z', 30, 'Full practice', 'Intermediate', 'Core strengthening'], index=df_yoga.columns), ignore_index=True)
df_yoga

Unnamed: 0,Time,TotalDuration,Style,DifficultyLevel,BodyFocus
0,2021-02-01 23:18:00+00:00,30,Restaurative Yoga,Intermediate,Back strengthening
1,2021-02-02 20:09:00+00:00,30,Hatha Yoga,Intermediate,Back strengthening
2,2021-02-03 22:37:00+00:00,30,Full practice,Intermediate,Back strengthening
3,2021-02-05 21:36:00+00:00,30,Ashtanga,Intermediate,Traditional
4,2021-02-12T23:19Z,30,Full practice,Intermediate,Upper body strengthening
5,2021-02-14T21:30Z,30,Full practice,Intermediate,Core strengthening


In [5]:
df_yoga.Time = pd.to_datetime(df_yoga.Time, utc=True)
df_yoga

Unnamed: 0,Time,TotalDuration,Style,DifficultyLevel,BodyFocus
0,2021-02-01 23:18:00+00:00,30,Restaurative Yoga,Intermediate,Back strengthening
1,2021-02-02 20:09:00+00:00,30,Hatha Yoga,Intermediate,Back strengthening
2,2021-02-03 22:37:00+00:00,30,Full practice,Intermediate,Back strengthening
3,2021-02-05 21:36:00+00:00,30,Ashtanga,Intermediate,Traditional
4,2021-02-12 23:19:00+00:00,30,Full practice,Intermediate,Upper body strengthening
5,2021-02-14 21:30:00+00:00,30,Full practice,Intermediate,Core strengthening


# Running

In [6]:
df_run = pd.DataFrame(np.array([['2021-02-04T19:54Z', 'Plaza Condell', 20.96, 2.53, 8.28, 3.4, 7.3, 12.05, 16.66, 20.85, 154], ['2021-02-06T18:27Z', 'Plaza Condell', 24.33, 2.87, 8.46, 3.36, 7.2, 12.12, 16.27, 21.12, 177]]),
                                columns=['Time', 'Location', 'TotalDuration', 'TotalDistance', 'AverageRhythm', 'Time1st500m', 'Time2nd500m', 'Time3rd500m', 'Time4th500m', 'Time5th500m', 'Calories'])
df_run

Unnamed: 0,Time,Location,TotalDuration,TotalDistance,AverageRhythm,Time1st500m,Time2nd500m,Time3rd500m,Time4th500m,Time5th500m,Calories
0,2021-02-04T19:54Z,Plaza Condell,20.96,2.53,8.28,3.4,7.3,12.05,16.66,20.85,154
1,2021-02-06T18:27Z,Plaza Condell,24.33,2.87,8.46,3.36,7.2,12.12,16.27,21.12,177


In [7]:
df_run.Time1st500m = df_run.Time1st500m.astype(float)
df_run.Time2nd500m =df_run.Time2nd500m.astype(float)
df_run.Time3rd500m =df_run.Time3rd500m.astype(float)
df_run.Time4th500m =df_run.Time4th500m.astype(float)
df_run.Time5th500m =df_run.Time5th500m.astype(float)
df_run.TotalDuration = df_run.TotalDuration.astype(float)
df_run.TotalDistance = df_run.TotalDistance.astype(float)
df_run.AverageRhythm = df_run.AverageRhythm.astype(float)
df_run.Calories = df_run.Calories.astype(float)

In [8]:
df_run = df_run.append(pd.Series(['2021-02-08T19:41Z', 'Plaza Condell', 26.8, 3.04, 8.49, 3.95, 7.9, 12.02, 17.33, 21.88, 180], index=df_run.columns), ignore_index=True)
df_run.insert(10, 'Time6th500m', [20.85, 21.12, 26.8])
df_run.insert(0, 'id', [1, 2, 3])
df_run = df_run.append(pd.Series([4, '2021-02-10T20:21Z', 'Plaza Condell', 29.8, 3.6, 8.28, 3.48, 7.23, 11.78, 15.75, 20.15, 24.43, 219], index=df_run.columns), ignore_index=True)
df_run.insert(12, 'Time7th500m', [20.85, 21.12, 26.8, 28.93])
df_run.insert(13, 'Time8th500m', [20.85, 21.12, 26.8, 28.93])
df_run = df_run.append(pd.Series([5, '2021-02-12T20:39Z', 'Plaza Condell', 31.42, 3.7, 8.48, 3.68, 7.42, 11.43, 16.05, 20.4, 25.07, 29.73, 29.73, 220], index=df_run.columns), ignore_index=True)
df_run = df_run.append(pd.Series([6, '2021-02-15T21:03Z', 'Plaza Condell', 23.98, 3.04, 7.9, 3.65, 7.38, 11.15, 15.2, 19.32, 23.55, 23.55, 23.55, 189], index=df_run.columns), ignore_index=True)
df_run = df_run.append(pd.Series([7, '2021-02-17T20:12Z', 'Plaza Condell', 24.1, 3.01, 8.02, 3.45, 7.13, 11.25, 15.12, 19.92, 23.97, 23.97, 23.97, 190], index=df_run.columns), ignore_index=True)
df_run = df_run.append(pd.Series([8, '2021-02-21T21:22Z', 'Plaza Condell', 26.3, 3.07, 8.57, 3.95, 7.93, 12.07, 16.8, 21.08, 25.6, 25.6, 25.6, 182], index=df_run.columns), ignore_index=True)
df_run = df_run.append(pd.Series([9, '2021-02-23T21:20Z', 'Plaza Condell', 25.15, 3.01, 8.37, 3.48, 7.83, 12.2, 16.38, 20.6, 24.97, 24.97, 24.97, 181], index=df_run.columns), ignore_index=True)
df_run = df_run.append(pd.Series([10, '2021-02-25T20:07Z', 'Plaza Condell', 34.02, 4.0, 8.5, 3.8, 7.65, 11.65, 15.82, 20.97, 25.33, 29.68, 33.98, 245], index=df_run.columns), ignore_index=True)
df_run = df_run.append(pd.Series([11, '2021-02-28T22:32Z', 'Plaza Condell', 13.93, 1.61, 8.67, 3.7, 7.87, 12.85, 12.85, 12.85, 12.85, 12.85, 12.85, 99], index=df_run.columns), ignore_index=True)
df_run = df_run.append(pd.Series([12, '2021-03-03T21:21Z', 'Plaza Condell', 27.05, 3.12, 8.68, 3.57, 7.47, 11.75, 16.03, 21.48, 25.93, 25.93, 25.93, 188], index=df_run.columns), ignore_index=True)
df_run = df_run.append(pd.Series([13, '2021-03-05T8:30Z', 'Plaza Condell', 25.67, 3.17, 8.1, 3.57, 7.42, 11.22, 15.17, 20.3, 24.28, 24.28, 24.28, 200], index=df_run.columns), ignore_index=True)
df_run = df_run.append(pd.Series([14, '2021-03-07T21:46Z', 'Plaza Condell', 16.42, 2.0, 8.23, 3.62, 7.47, 12.28, 16.37, 16.37, 16.37, 16.37, 16.37, 125], index=df_run.columns), ignore_index=True)

In [9]:
df_run = df_run.append(pd.Series([15, '2021-03-09T21:12Z', 'Plaza Condell', 36.27, 4.18, 8.68, 3.58, 7.8, 12.63, 16.7, 21.78, 25.87, 30.57, 34.62, 256], index=df_run.columns), ignore_index=True)

In [10]:
df_run.Time = pd.to_datetime(df_run.Time, utc=True)
df_run

Unnamed: 0,id,Time,Location,TotalDuration,TotalDistance,AverageRhythm,Time1st500m,Time2nd500m,Time3rd500m,Time4th500m,Time5th500m,Time6th500m,Time7th500m,Time8th500m,Calories
0,1,2021-02-04 19:54:00+00:00,Plaza Condell,20.96,2.53,8.28,3.4,7.3,12.05,16.66,20.85,20.85,20.85,20.85,154.0
1,2,2021-02-06 18:27:00+00:00,Plaza Condell,24.33,2.87,8.46,3.36,7.2,12.12,16.27,21.12,21.12,21.12,21.12,177.0
2,3,2021-02-08 19:41:00+00:00,Plaza Condell,26.8,3.04,8.49,3.95,7.9,12.02,17.33,21.88,26.8,26.8,26.8,180.0
3,4,2021-02-10 20:21:00+00:00,Plaza Condell,29.8,3.6,8.28,3.48,7.23,11.78,15.75,20.15,24.43,28.93,28.93,219.0
4,5,2021-02-12 20:39:00+00:00,Plaza Condell,31.42,3.7,8.48,3.68,7.42,11.43,16.05,20.4,25.07,29.73,29.73,220.0
5,6,2021-02-15 21:03:00+00:00,Plaza Condell,23.98,3.04,7.9,3.65,7.38,11.15,15.2,19.32,23.55,23.55,23.55,189.0
6,7,2021-02-17 20:12:00+00:00,Plaza Condell,24.1,3.01,8.02,3.45,7.13,11.25,15.12,19.92,23.97,23.97,23.97,190.0
7,8,2021-02-21 21:22:00+00:00,Plaza Condell,26.3,3.07,8.57,3.95,7.93,12.07,16.8,21.08,25.6,25.6,25.6,182.0
8,9,2021-02-23 21:20:00+00:00,Plaza Condell,25.15,3.01,8.37,3.48,7.83,12.2,16.38,20.6,24.97,24.97,24.97,181.0
9,10,2021-02-25 20:07:00+00:00,Plaza Condell,34.02,4.0,8.5,3.8,7.65,11.65,15.82,20.97,25.33,29.68,33.98,245.0


# Plots

### General trends
Each line is a different distance. Not very clear. May look better with more observations.

In [28]:
px.line(df_run, x='id', y=['Time1st500m', 'Time2nd500m', 'Time3rd500m', 'Time4th500m', 'Time5th500m', 'Time6th500m', 'Time7th500m', 'Time8th500m'])

Comparing multiple runs with x= distance, y= time.

In [12]:
df_run['PartialTime1st500m'] = df_run.Time1st500m
df_run['PartialTime2nd500m'] = df_run.Time2nd500m - df_run.Time1st500m
df_run['PartialTime3rd500m'] = df_run.Time3rd500m - df_run.Time2nd500m
df_run['PartialTime4th500m'] = df_run.Time4th500m - df_run.Time3rd500m
df_run['PartialTime5th500m'] = df_run.Time5th500m - df_run.Time4th500m
df_run['PartialTime6th500m'] = df_run.Time6th500m - df_run.Time5th500m
df_run['PartialTime7th500m'] = df_run.Time7th500m - df_run.Time6th500m
df_run['PartialTime8th500m'] = df_run.Time8th500m - df_run.Time7th500m

In [13]:
def idmelt(x):
    runid = df_run.query(x)
    valuename = x
    meltid = pd.melt(runid, id_vars = ['id'], value_vars = ['PartialTime1st500m', 'PartialTime2nd500m', 'PartialTime3rd500m', 'PartialTime4th500m', 'PartialTime5th500m', 'PartialTime6th500m', 'PartialTime7th500m', 'PartialTime8th500m'], var_name='distance', value_name= valuename)
    return meltid

In [30]:
df_id1 = idmelt("id == 1")
df_id2 = idmelt("id == 2")
df_id3 = idmelt("id == 3")
df_id4 = idmelt("id == 4")
df_id5 = idmelt("id == 5")
df_id6 = idmelt("id == 6")
df_id7 = idmelt("id == 7")
df_id8 = idmelt("id == 8")
df_id9 = idmelt("id == 9")
df_id10 = idmelt("id == 10")
df_id11 = idmelt("id == 11")
df_id12 = idmelt("id == 12")
df_id13 = idmelt("id == 13")
df_id14 = idmelt("id == 14")
df_id15 = idmelt("id == 15")

dfs = [df_id2, df_id3, df_id4, df_id5, df_id6, df_id7, df_id8, df_id9, df_id10, df_id11, df_id12, df_id13, df_id14, df_id15]

runids = pd.DataFrame(df_id1)
runids.drop(columns='id', inplace=True)

for df in dfs:
    runids = pd.merge(runids, df, on='distance')
    
runids.drop(columns=['id_x', 'id_y'], inplace=True)
runids.distance.replace('PartialTime1st500m', '500m', inplace=True)
runids.distance.replace('PartialTime2nd500m', '1000m', inplace=True)
runids.distance.replace('PartialTime3rd500m', '1500m', inplace=True)
runids.distance.replace('PartialTime4th500m', '2000m', inplace=True)
runids.distance.replace('PartialTime5th500m', '2500m', inplace=True)
runids.distance.replace('PartialTime6th500m', '3000m', inplace=True)
runids.distance.replace('PartialTime7th500m', '3500m', inplace=True)
runids.distance.replace('PartialTime8th500m', '4000m', inplace=True)
runids

Unnamed: 0,distance,id == 1,id == 2,id == 3,id == 4,id == 5,id == 6,id == 7,id == 8,id == 9,id == 10,id == 11,id == 12,id == 13,id == 14,id == 15
0,500m,3.4,3.36,3.95,3.48,3.68,3.65,3.45,3.95,3.48,3.8,3.7,3.57,3.57,3.62,3.58
1,1000m,3.9,3.84,3.95,3.75,3.74,3.73,3.68,3.98,4.35,3.85,4.17,3.9,3.85,3.85,4.22
2,1500m,4.75,4.92,4.12,4.55,4.01,3.77,4.12,4.14,4.37,4.0,4.98,4.28,3.8,4.81,4.83
3,2000m,4.61,4.15,5.31,3.97,4.62,4.05,3.87,4.73,4.18,4.17,0.0,4.28,3.95,4.09,4.07
4,2500m,4.19,4.85,4.55,4.4,4.35,4.12,4.8,4.28,4.22,5.15,0.0,5.45,5.13,0.0,5.08
5,3000m,0.0,0.0,4.92,4.28,4.67,4.23,4.05,4.52,4.37,4.36,0.0,4.45,3.98,0.0,4.09
6,3500m,0.0,0.0,0.0,4.5,4.66,0.0,0.0,0.0,0.0,4.35,0.0,0.0,0.0,0.0,4.7
7,4000m,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,4.3,0.0,0.0,0.0,0.0,4.05


In [32]:
px.line(runids, x='distance', y=['id == 10',  'id == 15'])

### Trends for each distance
Also a bit confusing. The shift is clear from 1st 500m to 4th 500m.

In [35]:
px.line(df_run, x='id', y='Time1st500m')

In [18]:
px.line(df_run, x='id', y='Time2nd500m')

In [19]:
px.line(df_run, x='id', y='Time3rd500m')

In [20]:
px.line(df_run, x='id', y='Time4th500m')

In [21]:
px.line(df_run, x='id', y='Time5th500m')

In [33]:
px.line(df_run, x='id', y='Time8th500m')

### Trends with partial times

Plot times for specific 500m, for each run. x = distance, y = time.

In [23]:
px.line(df_run, x='id', y=['PartialTime1st500m', 'PartialTime2nd500m', 'PartialTime3rd500m', 'PartialTime4th500m', 'PartialTime5th500m', 'PartialTime6th500m', 'PartialTime7th500m', 'PartialTime8th500m'])

In [36]:
px.line(df_run, x='id', y='PartialTime1st500m')

In [37]:
px.line(df_run, x='id', y='PartialTime2nd500m')

In [38]:
px.line(df_run, x='id', y='PartialTime3rd500m')

In [39]:
px.line(df_run, x='id', y='PartialTime4th500m')

In [40]:
px.line(df_run, x='id', y='PartialTime5th500m')

In [41]:
px.line(df_run, x='id', y='PartialTime6th500m')

In [42]:
px.line(df_run, x='id', y='PartialTime7th500m')

In [43]:
px.line(df_run, x='id', y='PartialTime8th500m')

In [24]:
df_run.describe()

Unnamed: 0,id,TotalDuration,TotalDistance,AverageRhythm,Time1st500m,Time2nd500m,Time3rd500m,Time4th500m,Time5th500m,Time6th500m,...,Time8th500m,Calories,PartialTime1st500m,PartialTime2nd500m,PartialTime3rd500m,PartialTime4th500m,PartialTime5th500m,PartialTime6th500m,PartialTime7th500m,PartialTime8th500m
count,15.0,15.0,15.0,15.0,15.0,15.0,15.0,15.0,15.0,15.0,...,15.0,15.0,15.0,15.0,15.0,15.0,15.0,15.0,15.0,15.0
mean,8.0,25.746667,3.063333,8.380667,3.616,7.533333,11.896667,15.9,19.938,23.132667,...,24.903333,187.0,3.616,3.917333,4.363333,4.003333,4.038,3.194667,1.214,0.556667
std,4.472136,5.907943,0.676321,0.242619,0.179595,0.274556,0.503668,1.05772,2.365231,3.886014,...,5.814921,40.802661,0.179595,0.192483,0.413464,1.171182,1.689705,2.007994,2.085171,1.469799
min,1.0,13.93,1.61,7.9,3.36,7.13,11.15,12.85,12.85,12.85,...,12.85,99.0,3.36,3.68,3.77,0.0,0.0,0.0,0.0,0.0
25%,4.5,24.04,2.94,8.255,3.48,7.34,11.54,15.475,20.035,22.335,...,22.335,178.5,3.48,3.795,4.065,4.01,4.205,1.99,0.0,0.0
50%,8.0,25.67,3.04,8.46,3.58,7.47,12.02,16.05,20.6,24.43,...,24.97,188.0,3.58,3.85,4.28,4.15,4.4,4.23,0.0,0.0
75%,11.5,28.425,3.385,8.535,3.69,7.815,12.16,16.52,21.1,25.465,...,27.865,209.5,3.69,3.965,4.78,4.445,4.965,4.41,2.175,0.0
max,15.0,36.27,4.18,8.68,3.95,7.93,12.85,17.33,21.88,26.8,...,34.62,256.0,3.95,4.35,4.98,5.31,5.45,4.92,4.7,4.3


## Trends for each run

In [25]:
def idtimes(x):
    runid = df_run.query(x)
    meltid = pd.melt(runid, id_vars = ['id'], value_vars = ['PartialTime1st500m', 'PartialTime2nd500m', 'PartialTime3rd500m', 'PartialTime4th500m', 'PartialTime5th500m', 'PartialTime6th500m', 'PartialTime7th500m', 'PartialTime8th500m'], var_name='distance', value_name= 'partial_time')
    return meltid

In [26]:
df_id = idtimes("id == 1")
px.line(df_id, x='distance', y='partial_time')

In [27]:
df_id = idtimes("id == 14")
px.line(df_id, x='distance', y='partial_time')