In [1]:
#############
# LIBRARIES #
#############

import pandas as pd
import numpy as np
import datetime as dt


import matplotlib.pyplot as plt
import plotly.express as px
import plotly.graph_objects as go

In [2]:
# Get local data # For development
processed_data = pd.read_csv('../data/processed_data.csv')
processed_data.head()

Unnamed: 0,name,distance,moving_time,elapsed_time,total_elevation_gain,type,sport_type,id,start_date_local,achievement_count,...,average_cadence,average_temp,max_watts,weighted_average_watts,year,month,day,weekday,hour,elev_gain_per_mile
0,Errands 🥶,7.9,0.6,1.1,170.6,Ride,Ride,8557237514,02-13-2023,0,...,,,,,2023,2,13,Monday,15,21.6
1,Two Bridges w/ TA,35.2,2.4,3.1,2467.2,Ride,Ride,8551377021,02-12-2023,0,...,65.2,18.0,460.0,157.0,2023,2,12,Sunday,9,70.1
2,Evening Workout,0.0,0.4,0.4,0.0,Workout,Workout,8546178598,02-11-2023,0,...,,,,,2023,2,11,Saturday,19,
3,Blustery 🌬️ Radler Willkommen 🍻,21.4,1.6,2.0,2040.7,Ride,Ride,8540290548,02-10-2023,19,...,64.4,12.0,758.0,171.0,2023,2,10,Friday,15,95.4
4,Zwift - Pacer Group Ride: Country to Coastal i...,18.1,0.9,0.9,721.8,VirtualRide,VirtualRide,8535368855,02-09-2023,0,...,82.2,,320.0,181.0,2023,2,9,Thursday,18,39.9


In [3]:
processed_data = processed_data[processed_data['type'].isin(['Ride', 'VirtualRide'])]
processed_data.head()

Unnamed: 0,name,distance,moving_time,elapsed_time,total_elevation_gain,type,sport_type,id,start_date_local,achievement_count,...,average_cadence,average_temp,max_watts,weighted_average_watts,year,month,day,weekday,hour,elev_gain_per_mile
0,Errands 🥶,7.9,0.6,1.1,170.6,Ride,Ride,8557237514,02-13-2023,0,...,,,,,2023,2,13,Monday,15,21.6
1,Two Bridges w/ TA,35.2,2.4,3.1,2467.2,Ride,Ride,8551377021,02-12-2023,0,...,65.2,18.0,460.0,157.0,2023,2,12,Sunday,9,70.1
3,Blustery 🌬️ Radler Willkommen 🍻,21.4,1.6,2.0,2040.7,Ride,Ride,8540290548,02-10-2023,19,...,64.4,12.0,758.0,171.0,2023,2,10,Friday,15,95.4
4,Zwift - Pacer Group Ride: Country to Coastal i...,18.1,0.9,0.9,721.8,VirtualRide,VirtualRide,8535368855,02-09-2023,0,...,82.2,,320.0,181.0,2023,2,9,Thursday,18,39.9
5,Afternoon Ride,24.2,1.4,1.6,334.6,Ride,Ride,8529587715,02-08-2023,20,...,77.4,19.0,666.0,174.0,2023,2,8,Wednesday,15,13.8


In [4]:
today = dt.datetime.today()
months = ['January', 'February', 'March', 'April', 'May', 'June', 'July', 'August', 'September', 'October', 'November', 'December']
this_month = dt.datetime.today().month
this_year = dt.datetime.today().year

activity_type = ['Ride','VirtualRide', 'Run'] # Select from dropdown
grouped_by_year_and_month = processed_data.groupby(['year', 'month']).agg({'distance': 'sum', 'total_elevation_gain': 'sum'}).reset_index() # Group by year and month

# Since not all months have data, we're creating entries for missing months and setting the distance and elevation gain to 0
mux = pd.MultiIndex.from_product([grouped_by_year_and_month.year.unique(), range(1,13)], names=['year','month'])
grouped_by_year_and_month = grouped_by_year_and_month.set_index(['year', 'month']).reindex(mux, fill_value=0).reset_index()
grouped_by_year_and_month['Cumulative Distance'] = grouped_by_year_and_month.groupby(['year'])['distance'].cumsum()
grouped_by_year_and_month['Cumulative Elevation'] = grouped_by_year_and_month.groupby(['year'])['total_elevation_gain'].cumsum()

grouped_by_year_and_month['month'] = grouped_by_year_and_month['month'].apply(lambda x: months[x -1])

# Limiting data to current month
months_left = months[this_month:]

# Filtering out months beyond current one
no_data_yet = grouped_by_year_and_month[grouped_by_year_and_month.year == this_year]
no_data_yet = no_data_yet[no_data_yet.month.isin(months_left)]

# Removing upcoming months with no data from dataframe
grouped_by_year_and_month = grouped_by_year_and_month[~grouped_by_year_and_month.isin(no_data_yet)]
# Dropping na years
grouped_by_year_and_month = grouped_by_year_and_month.dropna(subset=['year'])

grouped_by_year_and_month['year'] = grouped_by_year_and_month['year'].astype(int)
grouped_by_year_and_month.head(25)

Unnamed: 0,year,month,distance,total_elevation_gain,Cumulative Distance,Cumulative Elevation
0,2012,January,0.0,0.0,0.0,0.0
1,2012,February,0.0,0.0,0.0,0.0
2,2012,March,0.0,0.0,0.0,0.0
3,2012,April,0.0,0.0,0.0,0.0
4,2012,May,20.3,522.9,20.3,522.9
5,2012,June,153.3,3008.2,173.6,3531.1
6,2012,July,172.4,4833.4,346.0,8364.5
7,2012,August,289.1,7093.1,635.1,15457.6
8,2012,September,235.4,4693.3,870.5,20150.9
9,2012,October,0.0,0.0,870.5,20150.9


In [6]:
# Adding a year to date column

In [7]:
# grouped_by_year_and_month = grouped_by_year_and_month[grouped_by_year_and_month.isin(['Ride'])]

fig = px.line(grouped_by_year_and_month, x='month', y='Cumulative Distance', color='year')
fig.update_layout(
        xaxis=dict(
            showline=True,
            showgrid=True,
            showticklabels=True,
            linecolor='rgb(204, 204, 204)',
            linewidth=1,
            ticks='outside',
            tickfont=dict(
                family='Arial',
                size=12,
                color='rgb(82, 82, 82)',
            ),
        ),
        yaxis=dict(
            showgrid=True,
            zeroline=False,
            showline=False,
            gridcolor = 'rgb(235, 236, 240)',
            showticklabels=True,
            title='',
            autorange=True
        ),
        autosize=True,
        hovermode="x unified",
        showlegend=False,
        plot_bgcolor='rgba(0,0,0,0)',
        xaxis_title='',
        yaxis_title='Distamce (miles)',
        margin=dict(l=0, r=0, t=0, b=0)
    )
fig.for_each_trace(lambda trace: fig.add_annotation(
    x=trace.x[-1], y=trace.y[-1], text='  '+trace.name, 
    font_color=trace.line.color,
    ax=10, ay=10, xanchor="left", showarrow=False))
fig.update_traces(mode="markers+lines", hovertemplate=None)
fig.update_xaxes(showgrid=False)
fig.update_yaxes(showgrid=False)

fig.show()

In [None]:
###################
# GROUPING BY DAY #
###################

In [226]:
grouped_by_year_and_month = processed_data.groupby(['year', 'month', 'day']).agg({'distance': 'sum', 'total_elevation_gain': 'sum'}).reset_index()

# Creating a new date column
grouped_by_year_and_month['date'] = pd.to_datetime(grouped_by_year_and_month[['year', 'month', 'day']])
# converting date column to datetime
grouped_by_year_and_month['date'] = pd.to_datetime(grouped_by_year_and_month['date'])

grouped_by_year_and_month = grouped_by_year_and_month.set_index('date').reindex(pd.date_range(start='2012-01-01', end='2023-12-31'), fill_value=0).reset_index().rename(columns={'index': 'date'})
grouped_by_year_and_month['year'] = grouped_by_year_and_month['date'].dt.year
grouped_by_year_and_month['month'] = grouped_by_year_and_month['date'].dt.month
grouped_by_year_and_month['day'] = grouped_by_year_and_month['date'].dt.day

grouped_by_year_and_month['Cumulative Distance'] = grouped_by_year_and_month.groupby(['year'])['distance'].cumsum()
grouped_by_year_and_month['Cumulative Elevation'] = grouped_by_year_and_month.groupby(['year'])['total_elevation_gain'].cumsum()

# Adding day counter for each year
grouped_by_year_and_month['day_counter'] = grouped_by_year_and_month.groupby(['year']).cumcount() + 1

# Removing upcoming all dates after today
grouped_by_year_and_month = grouped_by_year_and_month[grouped_by_year_and_month.date <= dt.datetime.today()]
grouped_by_year_and_month

Unnamed: 0,date,year,month,day,distance,total_elevation_gain,Cumulative Distance,Cumulative Elevation,day_counter
0,2012-01-01,2012,1,1,0.0,0.0,0.0,0.0,1
1,2012-01-02,2012,1,2,0.0,0.0,0.0,0.0,2
2,2012-01-03,2012,1,3,0.0,0.0,0.0,0.0,3
3,2012-01-04,2012,1,4,0.0,0.0,0.0,0.0,4
4,2012-01-05,2012,1,5,0.0,0.0,0.0,0.0,5
...,...,...,...,...,...,...,...,...,...
4058,2023-02-10,2023,2,10,21.4,2040.7,624.5,26653.3,41
4059,2023-02-11,2023,2,11,0.0,0.0,624.5,26653.3,42
4060,2023-02-12,2023,2,12,35.2,2467.2,659.7,29120.5,43
4061,2023-02-13,2023,2,13,7.9,170.6,667.6,29291.1,44


In [241]:
# Fetching the day counter for today's date
today_date = dt.datetime.today().strftime('%Y-%m-%d')
# Fetching the day counter for yesterday's date
today_counter = grouped_by_year_and_month[grouped_by_year_and_month.date == today_date].day_counter.values[0]
today_counter

# Fetching the cumulative distance for today's date
today_distance = grouped_by_year_and_month[grouped_by_year_and_month.date == today_date]['Cumulative Distance'].values[0]
today_distance


667.6

In [232]:
today_date

Timestamp('2023-02-14 13:41:52.216481')

In [209]:
# Projections for 2023
daily_distance_2023 = grouped_by_year_and_month[grouped_by_year_and_month.year == 2023]['distance'].sum() / grouped_by_year_and_month[grouped_by_year_and_month.year == 2023]['day_counter'].max()
on_pace_for_2023 = grouped_by_year_and_month[grouped_by_year_and_month.year == 2023]['Cumulative Distance'].max() + daily_distance_2023 * (365 - grouped_by_year_and_month[grouped_by_year_and_month.year == 2023]['day_counter'].max())

on_pace_for_2023

# Today's total distance
today_distance = grouped_by_year_and_month[grouped_by_year_and_month.date == dt.datetime.today()]['Cumulative Distance'].sum()
on_pace_for_2023

5414.977777777778

In [240]:
fig = px.line(grouped_by_year_and_month, x='day_counter', y='Cumulative Distance', color='year')
fig.update_layout(
        xaxis=dict(
            showline=True,
            showgrid=True,
            showticklabels=True,
            linecolor='rgb(204, 204, 204)',
            linewidth=1,
            ticks='outside',
            tickvals=[1, 32, 60, 91, 121, 152, 182, 213, 244, 274, 305, 335, 366],
            ticktext=['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec'],
            tickfont=dict(
                family='Arial',
                size=12,
                color='rgb(82, 82, 82)',
            ),
        ),
        yaxis=dict(
            showgrid=True,
            zeroline=False,
            showline=False,
            gridcolor = 'rgb(235, 236, 240)',
            showticklabels=True,
            title='',
            autorange=True
        ),
        autosize=True,
        hovermode="x unified",
        showlegend=False,
        plot_bgcolor='rgba(0,0,0,0)',
        xaxis_title='',
        yaxis_title='Distamce (miles)',
        margin=dict(l=0, r=0, t=0, b=0)
    )
# Addding annotations for projected distance for 2023
fig.add_annotation(x=today_counter, y=today_distance.astype(int), text=f'On pace for : {on_pace_for_2023}')
fig.for_each_trace(lambda trace: fig.add_annotation(
    x=trace.x[-1], y=trace.y[-1], text='  '+trace.name, 
    font_color=trace.line.color,
    ax=10, ay=10, xanchor="left", showarrow=False))
fig.update_traces(mode="lines", hovertemplate=None)
fig.update_xaxes(showgrid=False)
fig.update_yaxes(showgrid=False)

# Addding annotations for projected distance for 2023
fig.add_annotation(x=today, y=500, text='2023',)

fig.show()

In [197]:
grouped_by_year_and_month.head()

Unnamed: 0,date,year,month,day,distance,total_elevation_gain,Cumulative Distance,Cumulative Elevation,day_counter
0,2012-01-01,2012,1,1,0.0,0.0,0.0,0.0,1
1,2012-01-02,2012,1,2,0.0,0.0,0.0,0.0,2
2,2012-01-03,2012,1,3,0.0,0.0,0.0,0.0,3
3,2012-01-04,2012,1,4,0.0,0.0,0.0,0.0,4
4,2012-01-05,2012,1,5,0.0,0.0,0.0,0.0,5


5414.977777777778

In [165]:
grouped_by_year_and_month = processed_data.groupby(['year', 'month', 'day']).agg({'distance': 'sum', 'total_elevation_gain': 'sum'}).reset_index() # Group by year and month
grouped_by_year_and_month.head()

Unnamed: 0,year,month,day,distance,total_elevation_gain
0,2012,5,16,8.4,353.3
1,2012,5,26,11.9,169.6
2,2012,6,2,12.4,283.5
3,2012,6,3,34.8,534.1
4,2012,6,16,4.5,129.0


In [166]:
# Creating a new date column
grouped_by_year_and_month['date'] = pd.to_datetime(grouped_by_year_and_month[['year', 'month', 'day']])
# converting date column to datetime
grouped_by_year_and_month['date'] = pd.to_datetime(grouped_by_year_and_month['date'])
grouped_by_year_and_month.head()

Unnamed: 0,year,month,day,distance,total_elevation_gain,date
0,2012,5,16,8.4,353.3,2012-05-16
1,2012,5,26,11.9,169.6,2012-05-26
2,2012,6,2,12.4,283.5,2012-06-02
3,2012,6,3,34.8,534.1,2012-06-03
4,2012,6,16,4.5,129.0,2012-06-16


In [170]:
grouped_by_year_and_month = grouped_by_year_and_month.set_index('date').reindex(pd.date_range(start='2012-01-01', end='2023-12-31'), fill_value=0).reset_index().rename(columns={'index': 'date'})
grouped_by_year_and_month['year'] = grouped_by_year_and_month['date'].dt.year
grouped_by_year_and_month['month'] = grouped_by_year_and_month['date'].dt.month
grouped_by_year_and_month['day'] = grouped_by_year_and_month['date'].dt.day
grouped_by_year_and_month

Unnamed: 0,date,year,month,day,distance,total_elevation_gain
0,2012-01-01,2012,1,1,0.0,0.0
1,2012-01-02,2012,1,2,0.0,0.0
2,2012-01-03,2012,1,3,0.0,0.0
3,2012-01-04,2012,1,4,0.0,0.0
4,2012-01-05,2012,1,5,0.0,0.0
...,...,...,...,...,...,...
4378,2023-12-27,2023,12,27,0.0,0.0
4379,2023-12-28,2023,12,28,0.0,0.0
4380,2023-12-29,2023,12,29,0.0,0.0
4381,2023-12-30,2023,12,30,0.0,0.0


In [163]:
grouped_by_year_and_month[grouped_by_year_and_month.date == '2012-06-03']

Unnamed: 0,date,year,month,day,distance,total_elevation_gain,Cumulative Distance,Cumulative Elevation
154,2012-06-03,2012,6,3,34.8,534.1,67.5,1340.5


In [171]:
grouped_by_year_and_month['Cumulative Distance'] = grouped_by_year_and_month.groupby(['year'])['distance'].cumsum()
grouped_by_year_and_month['Cumulative Elevation'] = grouped_by_year_and_month.groupby(['year'])['total_elevation_gain'].cumsum()
grouped_by_year_and_month

Unnamed: 0,date,year,month,day,distance,total_elevation_gain,Cumulative Distance,Cumulative Elevation
0,2012-01-01,2012,1,1,0.0,0.0,0.0,0.0
1,2012-01-02,2012,1,2,0.0,0.0,0.0,0.0
2,2012-01-03,2012,1,3,0.0,0.0,0.0,0.0
3,2012-01-04,2012,1,4,0.0,0.0,0.0,0.0
4,2012-01-05,2012,1,5,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...
4378,2023-12-27,2023,12,27,0.0,0.0,667.6,29291.1
4379,2023-12-28,2023,12,28,0.0,0.0,667.6,29291.1
4380,2023-12-29,2023,12,29,0.0,0.0,667.6,29291.1
4381,2023-12-30,2023,12,30,0.0,0.0,667.6,29291.1


In [183]:
# Adding day counter for each year
grouped_by_year_and_month['day_counter'] = grouped_by_year_and_month.groupby(['year']).cumcount() + 1
grouped_by_year_and_month



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



Unnamed: 0,date,year,month,day,distance,total_elevation_gain,Cumulative Distance,Cumulative Elevation,day_counter
0,2012-01-01,2012,1,1,0.0,0.0,0.0,0.0,1
1,2012-01-02,2012,1,2,0.0,0.0,0.0,0.0,2
2,2012-01-03,2012,1,3,0.0,0.0,0.0,0.0,3
3,2012-01-04,2012,1,4,0.0,0.0,0.0,0.0,4
4,2012-01-05,2012,1,5,0.0,0.0,0.0,0.0,5
...,...,...,...,...,...,...,...,...,...
4058,2023-02-10,2023,2,10,21.4,2040.7,624.5,26653.3,41
4059,2023-02-11,2023,2,11,0.0,0.0,624.5,26653.3,42
4060,2023-02-12,2023,2,12,35.2,2467.2,659.7,29120.5,43
4061,2023-02-13,2023,2,13,7.9,170.6,667.6,29291.1,44


In [180]:
# Removing upcoming all dates after today
grouped_by_year_and_month = grouped_by_year_and_month[grouped_by_year_and_month.date <= dt.datetime.today()]

In [185]:
fig = px.line(grouped_by_year_and_month, x='day_counter', y='Cumulative Distance', color='year')
fig.update_layout(
        xaxis=dict(
            showline=True,
            showgrid=True,
            showticklabels=True,
            linecolor='rgb(204, 204, 204)',
            linewidth=1,
            ticks='outside',
            tickfont=dict(
                family='Arial',
                size=12,
                color='rgb(82, 82, 82)',
            ),
        ),
        yaxis=dict(
            showgrid=True,
            zeroline=False,
            showline=False,
            gridcolor = 'rgb(235, 236, 240)',
            showticklabels=True,
            title='',
            autorange=True
        ),
        autosize=True,
        hovermode="x unified",
        showlegend=False,
        plot_bgcolor='rgba(0,0,0,0)',
        xaxis_title='',
        yaxis_title='Distamce (miles)',
        margin=dict(l=0, r=0, t=0, b=0)
    )
fig.for_each_trace(lambda trace: fig.add_annotation(
    x=trace.x[-1], y=trace.y[-1], text='  '+trace.name, 
    font_color=trace.line.color,
    ax=10, ay=10, xanchor="left", showarrow=False))
fig.update_traces(mode="lines", hovertemplate=None)
fig.update_xaxes(showgrid=False)
fig.update_yaxes(showgrid=False)

fig.show()