In [1]:
# Imports

# Data curation
import numpy as np
import pandas as pd
import datetime as dt
from math import radians

# Plotting
from bokeh.plotting import figure 
from bokeh.io import output_notebook, show
from bokeh.models import LabelSet, ColumnDataSource, Label
from bokeh.models.tickers import FixedTicker
from bokeh.transform import cumsum
from bokeh.models.tools import HoverTool

output_notebook()

In [2]:
# Load the data
df = pd.read_csv('rwc.csv', index_col=0, parse_dates=['Date'])
df

Unnamed: 0,Date,Type,Distance_km,Hours,Minutes,Seconds,Time_h,Calories,ElevGain_m,AvgSpeed_km/h,Year,Month
0,2015-07-14,Walking,2.10,0,26,40,0.444444,89.0,28.0,4.725000,2015,7
1,2015-07-20,Cycling,21.18,1,8,13,1.136944,332.0,270.0,18.628879,2015,7
2,2015-07-25,Cycling,23.52,1,9,32,1.158889,390.0,327.0,20.295302,2015,7
3,2015-07-27,Running,6.94,0,42,4,0.701111,389.0,97.0,9.898574,2015,7
4,2015-07-29,Walking,1.73,0,20,36,0.343333,69.0,32.0,5.038835,2015,7
...,...,...,...,...,...,...,...,...,...,...,...,...
343,2021-04-05,Cycling,23.32,0,59,0,0.983333,439.0,398.0,23.715254,2021,4
344,2021-04-07,Running,5.40,0,28,11,0.469722,275.0,,11.496156,2021,4
345,2021-04-09,Cycling,22.80,0,45,0,0.750000,520.0,,30.400000,2021,4
346,2021-04-13,Running,10.48,0,53,41,0.894722,531.0,209.0,11.713133,2021,4


## Add data

In [42]:
def create_new_row(activity=None, year=0, month=0, day=0, distance=0, hours=-1, minutes=-1,
                   seconds=-1, cals=np.nan, elev_gain=np.nan):
    
    """The create_new_row function takes as parameters:
- activity: ['Running', 'Walking', 'Cycling'] - the activity performed by the user;
- year: integer with 4 digits, corresponding to the year when the activity was performed;
- month: integer from 1 to 12, corresponding to the month when the activity was performed;
- day: integer from 1 to 29, 30, or 31, corresponding to the day when the activity was performed;
- distance: positive float corresponding to the number of kms traveled during the activity;
- hours: integer corresponding to the number of hours it took to complete the activity;
- minutes: integer from 0 to 59 corresponding to the number of minutes it took to complete the activity,
not counting the number of hours;
- seconds: integer from 0 to 59 corresponding to the number of seconds it took to complete the activity,
not counting the number of hours and minutes;
- cals: number of calories burned during the activity;
- elev_gain: number of meters climbed during the activity.

    The function will create a new row to be added to the dataframe with all the information from the above
parameters. Then, it will ask if the user wants to add more activities and, when the user does not want to
add more activities, it will ask if the user wants to save the dataframe.

    There are some errors which can arise, especially if the parameters are written beforehand and the process
within the function is not followed. They can be fixed after, but the function works for now if the user leaves
everything as the default values and changes them when the function asks to do so.
    """
    
    # Ensure the activity is either Running, Walking, or Cycling
    while activity not in ['Running', 'Walking', 'Cycling']:
        activity = input('Which activity did you do? [Running, Walking, Cycling] \n')
        
        # If the user writes something different, it will show an error message
        if activity not in ['Running', 'Walking', 'Cycling']:
            print('That is not a valid activity.')
    
    #####################################################################################################
    # Date variables
    #####################################################################################################
    
    # year will accept four digits, corresponding to the year of the activity
    while year == 0:
        
        try:
            year = int(input('Please write the year when you performed the activity: '))
            
            # In case the user writes less than 4 digits, the process restarts
            if len(str(year)) != 4:
                print('Please write only 4 digits.')
                year = 0
        
        # If the user does not write only numbers, it will show an error message
        except ValueError:
            print('Please write only numbers.')
    
    # month will accept  either 1 or 2 digits, corresponding to the month of the activity
    while month == 0:
        
        try:
            month = int(input('Please write the month (number) when you performed the activity: '))
            
            # In case the user writes more than 2 digits or less than 1, the process restarts
            if len(str(month)) not in [1, 2]:
                print('Please write either 1 or 2 digits.')
                month = 0
            
            # If the user writes a number that does not correspond to a month (1-12), the process restarts
            elif month not in list(range(1,13)):
                print('Please write a number from 1 to 12')
                month = 0
        
        # If the user does not write only numbers, it will show an error message
        except ValueError:
            print('Please write only numbers.')
            
    # day will accept  either 1 or 2 digits, corresponding to the day of the activity
    while day == 0:
        
        try:
            day = int(input('Please write the day (number) when you performed the activity: '))
            
            # In case the user writes more than 2 digits or less than 1, the process restarts
            if len(str(day)) not in [1, 2]:
                print('Please write either 1 or 2 digits.')
                day = 0
            
            # In case the selected month has 31 days and the chosen day number is not within the boundaries
            if month in [1, 3, 5, 7, 8, 10, 12] and (day < 0 or day > 31):
                print('Please write a number from 1 to 31')
                day = 0
            
            # In case the selected month has 30 days and the chosen day number is not within the boundaries
            elif month in [4, 6, 9, 11] and (day < 0 or day > 30):
                print('Please write a number from 1 to 30')
                day = 0
            
            # In case the selected month is February and the chosen day number is not within the boundaries
            # Later it should be fixed to accomodate the years with 366 days
            elif month == 2 and (day < 0 or day > 29):
                print('Please write a number from 1 to 30')
                day = 0
        
        # If the user does not write only numbers, it will show an error message
        except ValueError:
            print('Please write only numbers.')
            
    # Create the date
    date = dt.datetime(year, month, day)#.strftime('%Y-%m-%d')
    
    #####################################################################################################
    
    # distance can accept decimal values
    while distance == 0:
        try:
            distance = float(input("""How much was the distance (km) of the activity?
Please separate the decimal part with a ".".\n"""))
            
            # In case the distance written is a negative number, the process restarts
            if distance < 0:
                print('Please write a number larger than 0.')
                distance = 0
    
    # If the user does not write only numbers, it will show an error message
        except ValueError:
            print('Please write only numbers and a decimal point if needed.')
    
    #####################################################################################################
    # Variables linked to the duration of the activity
    #####################################################################################################
    
    # hours will accept an integer corresponding to the number of hours the activity took
    while hours == -1:
        
        try:
            hours = int(input('Please write the amount of hours the activity took: '))
            
            # In case the user writes a negative number, the process restarts
            if hours < 0:
                print('Please write a non-negative number.')
                hours = -1
        
        # If the user does not write only numbers, it will show an error message
        except ValueError:
            print('Please write only numbers.')
            
    # minutes will accept an integer corresponding to the number of minutes the activity took besides the
    # number of hours, i.e., from 0 to 59
    while minutes == -1:
        
        try:
            minutes = int(input('Please write the amount of minutes the activity took (from 0 to 59): '))
            
            # In case the user writes a negative number or a number bigger than 59, the process restarts
            if minutes < 0 or minutes > 59:
                print('Please write a number from 0 to 59.')
                minutes = -1
        
        # If the user does not write only numbers, it will show an error message
        except ValueError:
            print('Please write only numbers.')
            
    # seconds will accept an integer corresponding to the number of seconds the activity took besides the
    # number of hours and minutes, i.e., from 0 to 59
    while seconds == -1:
        
        try:
            seconds = int(input('Please write the amount of seconds the activity took (from 0 to 59): '))
            
            # In case the user writes a negative number or a number bigger than 59, the process restarts
            if seconds < 0 or seconds > 59:
                print('Please write a number from 0 to 59.')
                seconds = -1
        
        # If the user does not write only numbers, it will show an error message
        except ValueError:
            print('Please write only numbers.')
    
    # Create the amount of time spent in the activity in hours
    time = hours+(minutes/60)+(seconds/3600)
    
    # Create the average speed of the activity (km/h)
    avg_speed = distance/time
    
    #####################################################################################################
    # Optional variables
    #####################################################################################################
    
    # Ask if the user wants to add the numbers of calories burned during the activity
    add_calories = input('Do you want to add the number of calories you have burned? [y/n] ')
    
    # If the chosen option is not y or n, ask again until it is one of them
    while add_calories not in ['y', 'n']:
        print('That is not a valid option.')
        add_calories = input('Please write y or n: ')
    
    # If the user wants to add it, the user should write a positive number
    if add_calories == 'y':
        
        while cals == np.nan:
            try:
                cals = int(input('Please write the number of calories you have burned: '))
            
                # if the user writes a negative number, the process restarts
                if cals <= 0:
                    print('Please write a positive number.')
                    cals = np.nan
            
            # If the user does not write only numbers, it will show an error message
            except ValueError:
                print('Please write only numbers.')
    
    # If not, the process continues
    else:
        pass
    
    #####################################################################################################
    
    # Ask if the user wants to add the numbers of meters climbed during the activity
    add_elevation = input('Do you want to add the number of meters you have climbed? [y/n] ')
    
    # If the chosen option is not y or n, ask again until it is one of them
    while add_elevation not in ['y', 'n']:
        print('That is not a valid option.')
        add_elevation = input('Please write y or n: ')
    
    # If the user wants to add it, the user should write a positive number
    if add_elevation == 'y':
        
        while elev_gain == np.nan:
            try:
                elev_gain = int(input('Please write the number of meters you have climbed: '))
            
                # if the user writes a negative number, the process restarts
                if elev_gain <= 0:
                    print('Please write a positive number.')
                    elev_gain = np.nan
            
            # If the user does not write only numbers, it will show an error message
            except ValueError:
                print('Please write only numbers.')
    
    # If not, the process continues
    else:
        pass
    
    #####################################################################################################
    
    # Create a row to add to the dataframe
    last_activity = [date, activity, distance, hours, minutes, seconds, time, cals, elev_gain, avg_speed,
                     year, month]
    
    # Add the row to the dataframe
    df.loc[df.shape[0]] = last_activity
    
    #####################################################################################################
    
    # Ask if there are more activities to be added
    more_activities = input('Do you want to add another activity? [y, n] ')
    
    while more_activities not in ['y', 'n']:
        print('That is not a valid option.')
        more_activities = input('Please write y or n: ')
        
    if more_activities == 'y':
        create_new_row()
    
    # Save the dataframe
    save_data = input('Do you want to save the dataframe? [y, n] ')
    
    while save_data not in ['y', 'n']:
        print('That is not a valid option.')
        save_data = input('Please write y or n: ')
        
    if save_data == 'y':
        df.to_csv('rwc.csv')
    
    return df

## Plots

### Yearly Statistics

In [19]:
def yearly_statistics(activity, statistic):
    
    '''
The yearly_statistics function requires 2 arguments: activity, which can be one of the following strings: 
Walking, Running, Cycling; and statistic, which can be one of the following strings: Counter, Distance, Time.

This function produces a bar chart based on the specific activity and statistic in cause, highlighting the
maximum value(s) in red and the minimum value(s) in blue. It also allows for the user to hover the cursor over
the bar to know more information about that year's chosen activity.
    '''
    
    #####################################################################################################
    # Error handling of wrong parameter input
    #####################################################################################################
    
    # Lists to store the options for each parameter
    activity_options = df.Type.unique()
    statistic_options = ['Distance', 'Time', 'Counter']
    
    # Ensure only the current activities can be selected
    while activity not in activity_options:
        print('That is not a valid activity.')  # Warning message
        
        # Let the user choose another activity
        activity = input('Please choose one of the following activities [Running, Walking, Cycling]:\n')
        
    # Ensure only one of Distance, Time or Counter can be the selected statistic
    while statistic not in statistic_options:
        print('That is not a valid statistic.')  # Warning message
        
        # Let the user choose another statistic
        statistic = input('Please choose one of the following statistics [Distance, Time, Counter]:\n')
    
    #####################################################################################################
    # Data selection and curation
    #####################################################################################################
    
    # Limit the data you will consider based on the activity, group it by year and sum it
    activity_df = df.loc[df.Type==activity].groupby('Year').sum()
    
    # Round the decimal cases of the distance to 2 if the activity is not cycling, and to 0 if it is cycling
    if activity != 'Cycling':
        activity_df.Distance_km = activity_df.Distance_km.round(2)
    else:
        activity_df.Distance_km = activity_df.Distance_km.round(0)
    
    # Add the average speed column which needs to come from the grouped data by years but the mean is taken
    # instead of the sum. In this case, regardless of the activity, the number is rounded to 2 decimal cases
    activity_df['avg_speed'] = df.loc[df.Type==activity].groupby('Year').mean()['AvgSpeed_km/h'].round(2)
    
    # Add the counts column which comes from the grouped data by years and a counter is taken
    activity_df['count'] = df.loc[df.Type==activity].groupby('Year').count()['Date']
    
    # Create a column with the colors of the bars. Green is the smallest, red the biggest and blue are the
    # others. Create also the time labels
    
    color, time_spent = [], [] # Variable to hold the colors and the time labels
    
    for year in activity_df.index: # Loop over the years as they are the indices
        
        # Make sure the colors of the bars are set according to the statistic chosen
        if statistic == 'Distance':
            to_check = activity_df.Distance_km
        elif statistic == 'Time':
            to_check = activity_df.Time_h
        elif statistic == 'Counter':
            to_check = activity_df['count']
    
        # Add the color to the list
        if to_check[year] == max(to_check):
            color.append('red')
        elif to_check[year] == min(to_check):
            color.append('green')
        else:
            color.append('blue')
        
        # Create the time labels
        hour = int(activity_df.Time_h[year]) # The integer part is the number of hours spent
        
        # By removing the integer part to the overall value, you get the minutes, which need to be multiplied
        # by 60 and then rounded to no decimal cases
        minutes = int(round((activity_df.Time_h[year]-hour)*60, 0)) 
        
        time = str(hour)+'h '+str(minutes)+'m' # Create the label
        time_spent.append(time) # Add the label to the list

    # Add the columns to the dataframe
    activity_df['color'] = color
    activity_df['time_spent'] = time_spent
    
    #####################################################################################################
    # Plotting
    #####################################################################################################
    
    # Set the source as the curated dataframe
    source = ColumnDataSource(activity_df)

    
    # Set the right counter name when the mouse hovers over the bars
    if activity == 'Running':
        counter_name = 'Number of runs'
    elif activity == 'Cycling':
        counter_name = 'Number of bike rides'
    else:
        counter_name = 'Number of walks'
    
    # Information when the mouse is hovered over the bars
    tooltips = [('Distance', "@Distance_km{0,0.00} km"), ('Time', "@time_spent"),
                ("Calories burned","@Calories{0,0}"), ("Cumulative Elevation Gain", "@ElevGain_m{0,0} m"),
                ("Average Speed", "@avg_speed{0.00} km/h"), (counter_name, "@count")]
    
    # Set the title and the y-axis label
    # If the chosen statistic was Time, the title will only change due to the activity. The label for the 
    # y-axis will always be Hours
    if statistic == 'Time':
        title = 'Amount of Time Spent '+activity # Adapt the title based on the activity
        label = 'Hours' # Y-axis label
    
    # If the chosen statistic is Distance, the title will be adjusted according to the activity, and the
    # y-axis label will be Kilometers
    elif statistic == 'Distance':
        if activity == 'Walking':
            verb = 'Walked'
        elif activity == 'Cycling':
            verb = 'Cycled'
        else:
            verb = 'Run'
        
        # As it happened for Time, the same procedure is applied to Distance
        title = 'Number of Kilometers '+verb+' per Year'
        label = 'Kilometers'
    
    else:
        title = counter_name+' per Year'
        label = counter_name
    
    # Instantiate the figure
    sports_fig = figure(title=title, x_axis_label='Year', y_axis_label = label, tooltips=tooltips,
                        plot_width=900, plot_height=500, tools='save')

    # Tweak the title
    sports_fig.title.align = 'center'
    sports_fig.title.text_font_size = "20px"

    # Remove unnecessary graph elements
    # Remove gridlines
    sports_fig.xgrid.grid_line_color, sports_fig.ygrid.grid_line_color = None, None

    # Remove x axis minor ticks
    sports_fig.xaxis.minor_tick_line_color = None
    
    # Remove outline line
    sports_fig.outline_line_color = None

    # Vertical bars
    # Set the bar height based on the chosen statistic and choose the data labels accordingly
    if statistic == 'Distance':
        label_choice = height_choice = 'Distance_km'
    elif statistic == 'Time':
        height_choice, label_choice = 'Time_h', 'time_spent'
    else:
        label_choice = height_choice = 'count'
    
    sports_fig.vbar(x='Year', top=height_choice, width=0.9, source=source, color='color')

    # Get the labels
    labels = LabelSet(x='Year', y=height_choice, text=label_choice, level='glyph', text_align='center',
                      source=source, render_mode='canvas', y_offset=3)

    # Add the labels to the figure
    sports_fig.add_layout(labels)

    # Show the figure
    show(sports_fig)

In [20]:
yearly_statistics(123, 123)

That is not a valid activity.
Please choose one of the following activities [Running, Walking, Cycling]:
Running
That is not a valid statistic.
Please choose one of the following statistics [Distance, Time, Counter]:
Distance


### Monthly Statistics

In [21]:
def monthly_statistics(activity=None, statistic=None, year=None):
    
    '''monthly_statistics requires 3 arguments:
- activity: ['Running', 'Walking', 'Cycling'], which corresponds to the existing activities at the moment;
- statistic: '''
    
    #####################################################################################################
    # Error handling of wrong parameter input
    #####################################################################################################
    
    # Lists to store the options for each parameter
    activity_options = df.Type.unique() 
    statistic_options = ['Distance', 'Time', 'Counter']
    
    # Ensure only the current activities can be selected
    while activity not in activity_options:
        print('That is not a valid activity.')  # Warning message
        
        # Let the user choose another activity
        activity = input('Please choose one of the following activities [Running, Walking, Cycling]:\n')
    
    # List containing the available years for the chosen activity
    year_options = df.loc[df.Type==activity].Year.unique()
    
    # Ensure only one of Distance, Time or Counter can be the selected statistic
    while statistic not in statistic_options:
        print('That is not a valid statistic.')  # Warning message
        
        # Let the user choose another statistic
        statistic = input('Please choose one of the following statistics [Distance, Time, Counter]:\n')
    
    # Ensure only the existing years for the chosen activity can be chosen. 
    while year not in year_options:
        print('That is not a valid year. Please choose one of the following years:')  # Warning message
        for available_year in year_options:
            print(str(available_year)+';', end=' ')
        
        # Let the user choose another statistic
        year = int(input(''))
        
    #####################################################################################################
    # Data selection and curation
    #####################################################################################################
    
    # Limit the data to be considered, based on the activity and the year, group it by month and sum it
    activity_df = df.loc[df.Type==activity].loc[df.Year==year].groupby('Month').sum()
    
    # Round the decimal cases of the distance to 2
    activity_df.Distance_km = activity_df.Distance_km.round(2)    
    
    # Add the average speed column which needs to come from the grouped data by month but the mean is taken
    # instead of the sum, rounded to 2 decimal cases
    activity_df['avg_speed'] = df.loc[df.Type==activity].loc[df.Year==year].groupby('Month').mean()['AvgSpeed_km/h'].round(2)
    
    # Add the counts column which comes from the grouped data by years and a counter is taken
    activity_df['count'] = df.loc[df.Type==activity].loc[df.Year==year].groupby('Month').count()['Date']
    
    # Create a column with the colors of the bars. Green is the smallest, red the biggest and blue are the
    # others. Create also the time labels
    
    color, time_spent = [], [] # Variable to hold the colors and the time labels
    
    for month in activity_df.index: # Loop over the months as they are the indices
        
        # Make sure the colors of the bars are set according to the statistic chosen
        if statistic == 'Distance':
            to_check = activity_df.Distance_km
        elif statistic == 'Time':
            to_check = activity_df.Time_h
        elif statistic == 'Counter':
            to_check = activity_df['count']
    
        # Add the color to the list
        if to_check[month] == max(to_check):
            color.append('red')
        elif to_check[month] == min(to_check):
            color.append('green')
        else:
            color.append('blue')
        
        # Create the time labels
        hour = int(activity_df.Time_h[month]) # The integer part is the number of hours spent
        
        # By removing the integer part to the overall value, you get the minutes, which need to be multiplied
        # by 60 and then rounded to no decimal cases
        minutes = int(round((activity_df.Time_h[month]-hour)*60, 0)) 
        
        time = str(hour)+'h '+str(minutes)+'m' # Create the label
        time_spent.append(time) # Add the label to the list

    # Add the columns to the dataframe
    activity_df['color'] = color
    activity_df['time_spent'] = time_spent
    
    # As there are some months in which some of the activities were not done and they should "appear" in the
    # graph, it is necessary to create rows for them
    
    # Variable to store the values for a row without any activity
    no_activity_row = [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, None, None]
    
    for month in range(1,13): # Loop over the months
        # If there were activities during the month, nothing needs to be done
        if month in activity_df.index:
            pass
        # Add the no activity row if there were any activities
        else:
            activity_df.loc[month] = no_activity_row
    
    # Sort the index of the dataframe
    activity_df = activity_df.sort_index()
    
    #####################################################################################################
    # Plotting
    #####################################################################################################
    
    # Set the source as the curated dataframe
    source = ColumnDataSource(activity_df)

    # Set the right counter name when the mouse hovers over the bars
    if activity == 'Running':
        counter_name = 'Number of runs'
    elif activity == 'Cycling':
        counter_name = 'Number of bike rides'
    else:
        counter_name = 'Number of walks'
    
    # Information when the mouse is hovered over the bars
    tooltips = [('Distance', "@Distance_km{0,0.00} km"), ('Time', "@time_spent"),
                ("Calories burned","@Calories{0,0}"), ("Cumulative Elevation Gain", "@ElevGain_m{0,0} m"),
                ("Average Speed", "@avg_speed{0.00} km/h"), (counter_name, "@count")]
    
    # Set the title and the y-axis label
    # If the chosen statistic was Time, the title will only change due to the activity. The label for the 
    # y-axis will always be Hours
    if statistic == 'Time':
        title = 'Amount of Time Spent '+activity+'in '+str(year) # Adapt the title based on the activity
        label = 'Hours' # Y-axis label
    
    # If the chosen statistic is Distance, the title will be adjusted according to the activity, and the
    # y-axis label will be Kilometers
    elif statistic == 'Distance':
        if activity == 'Walking':
            verb = 'Walked'
        elif activity == 'Cycling':
            verb = 'Cycled'
        else:
            verb = 'Run'
        
        # As it happened for Time, the same procedure is applied to Distance
        title = 'Number of Kilometers '+verb+' per Month in '+str(year)
        label = 'Kilometers'
    
    else:
        title = counter_name+' per Month in '+str(year)
        label = counter_name
    
    # Instantiate the figure
    sports_fig = figure(title=title, x_axis_label='Month', y_axis_label=label, tooltips=tooltips,
                        plot_width=900, plot_height=500, tools='save')

    # Tweak the title
    sports_fig.title.align = 'center'
    sports_fig.title.text_font_size = "20px"

    # Remove unnecessary graph elements
    # Remove gridlines
    sports_fig.xgrid.grid_line_color, sports_fig.ygrid.grid_line_color = None, None

    # Remove x axis minor ticks
    sports_fig.xaxis.minor_tick_line_color = None
    
    # Remove outline line
    sports_fig.outline_line_color = None
    
    # Change the x_ticks to the names of the months
    x_ticks_dict = {} # Dictionary that will hold the old and the new values for the x-ticks
    
    # List with the names of the months
    month_name = ["Jan", "Feb", "Mar", "Apr", "May", "Jun", "Jul", "Aug", "Sep", "Oct", "Nov", "Dec"]
    
    # Assign the month number to its name
    for month in activity_df.index:
        x_ticks_dict[month] = month_name[month-1]
    
    sports_fig.xaxis.ticker = FixedTicker(ticks=activity_df.index) # Show all the months as x-ticks
    sports_fig.xaxis.major_label_overrides = x_ticks_dict # Override the x-tick labels
    
    # Vertical bars
    # Set the bar height based on the chosen statistic and choose the data labels accordingly
    if statistic == 'Distance':
        height_choice = label_choice = 'Distance_km'
    elif statistic == 'Time':
        height_choice, label_choice = 'Time_h', 'time_spent'
    else:
        height_choice = label_choice = 'count'
        
    sports_fig.vbar(x='Month', top=height_choice, width=0.9, source=source, color='color')
    
    # Change the x_ticks to the names of the months
    x_ticks_dict = {}
    month_name = ["Jan", "Feb", "Mar", "Apr", "May", "Jun", "Jul", "Aug", "Sep", "Oct", "Nov", "Dec"]
    for month in activity_df.index:
        x_ticks_dict[month] = month_name[month-1]

    sports_fig.xaxis.major_label_overrides = x_ticks_dict
        
    # Get the labels - it is necessary to have a "new" source without NaN values
    new_source=ColumnDataSource(activity_df.dropna())
    
    labels = LabelSet(x='Month', y=height_choice, text=label_choice, level='glyph', text_align='center',
                      source=new_source, render_mode='canvas', y_offset=3)
    
    # Add the labels to the figure
    sports_fig.add_layout(labels)

    # Show the figure
    show(sports_fig)

In [22]:
monthly_statistics()

That is not a valid activity.
Please choose one of the following activities [Running, Walking, Cycling]:
Walking
That is not a valid statistic.
Please choose one of the following statistics [Distance, Time, Counter]:
Counter
That is not a valid year. Please choose one of the following years:
2015; 2016; 2017; 2018; 2019; 2020; 2021; 2021


### Time Spent Moving

In [3]:
def time_spent_moving_per_year(year=None):
    
    '''Documentation'''
    
    while year not in df.Year.unique():
        print('That is not a valid year. Please choose one of the following years:')  # Warning message
        for available_year in df.Year.unique():
            print(str(available_year)+';', end=' ')
        
        # Let the user choose another statistic
        year = int(input(''))
    
    # Select the data respective to that year, group it by type and sum it
    year_df = df.loc[df.Year==year].groupby('Type').sum()
    
    # Create a column for the labels of the time and for the colors of the sectors (red is running, green
    # is walking and blue is cycling)
    time_spent, color = [], []
    for activity in year_df.index:
        hours = int(year_df.Time_h[activity]) # Number of hours
        # Number of minutes, besides the hours already accounted
        minutes = int((year_df.Time_h[activity]-hours)*60) 
        time_label = str(hours)+'h '+str(minutes)+'m' # Create the label
        time_spent.append(time_label) # Add the label to the list
        
        if activity == 'Running':
            color.append('red')
        
        elif activity == 'Walking':
            color.append('green')
            
        else:
            color.append('blue')
    
    year_df['time_spent'] = time_spent # Add the list as columns
    year_df['sector_color'] = color 
    
    # Add a column for the number of activities
    year_df['counter'] = df.loc[df.Year==year].groupby('Type').count()['Date']
    
    # Sort the dataframe by the value of the time in h
    year_df = year_df.sort_values(by='Time_h')
    
    # Percentage of time spent in each activity
    year_df['time_percentage'] = [year_df.Time_h[activity]/year_df.Time_h.sum() for activity in year_df.index]
    
    # Convert the percentages to radians
    year_df['graph_radians'] = [radians(year_df['time_percentage'][activity]*360) for activity in year_df.index]
    
    # Set the source of the data
    source = ColumnDataSource(year_df)
    
    # Define the title
    title = 'Time Spent in Exercising Activities in '+str(year)
    
    # Set the tooltips
    tooltips = [('Distance', "@Distance_km{0,0.00} km"), ('Time', "@time_spent"),
                ("Calories burned","@Calories{0,0}"), ("Cumulative Elevation Gain", "@ElevGain_m{0,0} m"),
                ('Number of activities', "@counter")]
    
    # Instantiate the figure
    time_pie = figure(plot_height=500, title=title, tools='hover, save', tooltips=tooltips,
                      x_range=(-0.65, 1.2))
    
    # Add the sectors
    time_pie.wedge(x=0, y=0, radius=0.6, start_angle=cumsum('graph_radians', include_zero=True),
                   end_angle=cumsum('graph_radians'), line_color='black', fill_color='sector_color',
                   legend_field='Type', source=source)
    
    # Tweak the title
    time_pie.title.align = 'center'
    time_pie.title.text_font_size = "20px"    

    # Hide the axes
    time_pie.axis.visible = False

    # Remove the gridlines
    time_pie.xgrid.grid_line_color, time_pie.ygrid.grid_line_color = None, None

    # Remove the outline
    time_pie.outline_line_color = None
    
    # Show the total amount of time spent
    summed = year_df.sum()['Time_h'] # Total amount of time in hours
    all_hours = int(summed)
    all_minutes = int((summed-all_hours)*60) # Same procedure as above
    
    # Text of the overall time spent
    all_time = str(all_hours)+'h '+str(all_minutes)+'min'
    
    all_message_1 = Label(x=0.65, y=0.25, text='Total Amount of Time:')
    all_message_2 = Label(x=0.8, y=0.175, text=all_time)
    
    time_pie.add_layout(all_message_1)
    time_pie.add_layout(all_message_2)
    
    show(time_pie)

In [4]:
time_spent_moving_per_year()

That is not a valid year. Please choose one of the following years:
2015; 2016; 2017; 2018; 2019; 2020; 2021; 2020


### Comparisons

In [1]:
# Compare the same year for different sports holding variables constant

In [58]:
import numpy as np
import pandas as pd

from bokeh.models import ColumnDataSource, FactorRange, Title
from bokeh.plotting import figure

def two_activities_one_year_comparison():
    
    # Page title
    # st.markdown("<h1 style='text-align: center;'>Two Activities One Year Comparison Comparison</h1>", unsafe_allow_html=True)
    
    # Load the dataframe
    df = pd.read_csv('rwc.csv', index_col=0, parse_dates=['Date'])
    
    # Variables declared but the logic is missing
    year = 2019
    statistic = 'Time'
    activity_1 = 'Running'
    activity_2 = 'Cycling'
        
    ##############################################################################################
    # Data selection and curation
    ##############################################################################################
    
    # Limit the data you will consider based on the chosen year and the activities, group it by activities and
    # then by month, and sum it. Even though there are only 3 activies, it is better to select them
    # directly, rather than by selecting based on the negation of the remaining as more activities could
    # be added in the future
    pre_activity_df = df.loc[((df.Year==year) & ((df.Type==activity_1) | (df.Type==activity_2)))].\
                      groupby(['Type', 'Month'])
    
    activity_df = pre_activity_df.sum().drop('Year', axis=1)
    
    # Round the decimal cases of the distance to 2
    activity_df.Distance_km = activity_df.Distance_km.round(2)
    
    # Add the average speed column
    activity_df['avg_speed'] = pre_activity_df.mean()['AvgSpeed_km/h'].round(2)
    
    # Add the counts column
    activity_df['count'] = pre_activity_df.count()['Date']
    
    # Color (same color code as used in yearly_comparison.py) and time labels columns
    color = ['green' if act == activity_1 else 'red' for act, month in my_df.index] # Color column
    
    time_spent = np.array([])  # Time labels column
    for year, month in activity_df.index: # Loop through the index, which has 2 keys
            
        # Create the time labels
        hour = int(activity_df.Time_h[year, month]) # The integer part is the number of hours spent
        
        # By removing the integer part to the overall value, you get the minutes, which need to be
        # multiplied by 60 and then rounded to no decimal cases
        minutes = int(round((activity_df.Time_h[year, month]-hour)*60, 0)) 
        
        time = str(hour)+'h '+str(minutes)+'m' # Create the label
        time_spent = np.append(time_spent, time) # Add the label to the list

    # Add the columns to the dataframe
    activity_df['color'] = color
    activity_df['time_spent'] = time_spent
    
    # As there are some months in which some of the activities were not done and they should
    # "appear" in the graph, it is necessary to create rows for them
    # Variable to store the values for a row without any activity
    no_activity_row = [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, None, None]
    
    # Dataframe which will be used to create the plots
    df_to_plot = pd.DataFrame(columns=activity_df.columns)
    
    # Loop through the dataframe and consider only the highest level of the grouping to check if they
    # have all months
    for act in activity_df.index.levels[0]:
        
        # Restrict the dataframe to only one activity
        single_activity_df = activity_df.loc[act]
        
        for month in range(1,13): # Loop over the months
            idx = str(act)+', '+str(month) # Create an index
            
            # If there were activities during the month, add the row of the activity_df
            if month in single_activity_df.index:
                df_to_plot.loc[idx] = activity_df.loc[act, month]
            # Add the no activity row if there were any activities
            else:
                df_to_plot.loc[idx] = no_activity_row
        
    # List with the name of the months
    month_name = ["Jan", "Feb", "Mar", "Apr", "May", "Jun", "Jul", "Aug", "Sep", "Oct", "Nov", "Dec"]
    
    # List which will be on the x-axis
    x_axis = [(month, act) for act in activity_df.index.levels[0] for month in month_name]
    
    # Add the list to the dataframe
    df_to_plot['x-axis'] = x_axis
    
    #####################################################################################################
    # Plotting
    #####################################################################################################
    
    # Set the source as the curated dataframe
    source = ColumnDataSource(df_to_plot)
    
    # Information when the mouse is hovered over the bars
    tooltips = [('Distance', "@Distance_km{0,0.00} km"), ('Time', "@time_spent"),
                ("Calories burned","@Calories{0,0}"),
                ("Cumulative Elevation Gain", "@ElevGain_m{0,0} m"),
                ("Average Speed", "@avg_speed{0.00} km/h"), ("Number of activities", "@count")]
    
    # Set the title and the y-axis label
    # The beginning and ending of the title will not change regardless of the activity
    title_beginning = 'Comparison of the '
    title_ending = ' between '+activity_1+' and '+activity_2
    if statistic == 'Time':
        title = title_beginning+'Time Spent in '+str(year)
        label = 'Hours' # Y-axis label
    
    # If the chosen statistic is Distance
    elif statistic == 'Distance':
        if activity == 'Walking':
            verb = 'Walked'
        elif activity == 'Cycling':
            verb = 'Cycled'
        else:
            verb = 'Run'
        
        # As it happened for Time, the same procedure is applied to Distance
        title = title_beginning+'Distance Covered in '+str(year)+' per Month'
        label = 'Kilometers'
    
    else:
        title = title_beginning+'Number of Activities in '+str(year)+' per Month'
        label = 'Number of Activities'
    
    # Instantiate the figure
    comparison_fig = figure(y_axis_label=label, tooltips=tooltips,
                            plot_width=900, plot_height=500, tools='save',
                            x_range=FactorRange(*x_axis))
    
    # Add the ending of the title before so that it is below the other
    comparison_fig.add_layout(Title(text=title_ending, text_font_size='20px', align='center'),
                              'above')
    
    # Add the first line of the title
    comparison_fig.add_layout(Title(text=title, text_font_size='20px', align='center'), 'above')
    
    # Vertical bars
    # Set the bar height based on the chosen statistic and choose the data labels accordingly
    if statistic == 'Distance':
        height_choice = 'Distance_km'
    elif statistic == 'Time':
        height_choice = 'Time_h'
    else:
        height_choice = 'count'
    
    comparison_fig.vbar(x='x-axis', top=height_choice, width=0.9, color='color', source=source)
    
    # Remove unnecessary graph elements
    # Remove gridlines
    comparison_fig.xgrid.grid_line_color, comparison_fig.ygrid.grid_line_color = None, None

    # Remove x axis minor ticks
    comparison_fig.xaxis.minor_tick_line_color = None
    
    # Remove outline line
    comparison_fig.outline_line_color = None
    
    # Start of the y range
    comparison_fig.y_range.start = 0
    
    # Range padding of the x-axis
    comparison_fig.x_range.range_padding = 0.1
    
    # Rotate the labels of the years
    comparison_fig.xaxis.major_label_orientation = 1
    
    show(comparison_fig)

two_activities_one_year_comparison()

In [22]:
x, y, z =-1,-1,-2
5*x+y-2*z+2==-(5/2)*x-(1/2)*y+z-1


True

### Stacked time spent

- Years
- Months
- Comparison?

![alt text](stacked.png "Title")

In [48]:
def evolution_of_time_spent_exercising():
    
    # Page title
    #st.markdown("<h1 style='text-align: center;'>Evolution of the Time Spent Exercising</h1>",
                #unsafe_allow_html=True)
    
    # Select the magnitude
    #magnitude = st.selectbox('Magnitude:', ('Absolute', 'Relative'))
    magnitude='Absolute'
    
    # Load the dataframe
    df = pd.read_csv('rwc.csv', index_col=0, parse_dates=['Date'])
    
    # Drop the unnecessary columns for the analysis and group the data by Activity and then by Year
    pre_activity_df = df.drop(['Hours', 'Minutes', 'Seconds', 'Month'], axis=1).\
                 groupby(['Type', 'Year'])
    
    activity_df = pre_activity_df.sum()
    
    # Round the decimal cases of the distance to 2
    activity_df.Distance_km = activity_df.Distance_km.round(2)
    
    # Add the average speed column
    activity_df['avg_speed'] = pre_activity_df.mean()['AvgSpeed_km/h'].round(2)
    
    # Add the counts column
    activity_df['count'] = pre_activity_df.count()['Date']
    
    # Add the time labels
    time_spent = np.array([])  # Time labels column
    for year, activity in activity_df.index: # Loop through the index, which has 2 keys
            
        # Create the time labels
        hour = int(activity_df.Time_h[year, activity]) # The integer part is the number of hours spent
        
        # By removing the integer part to the overall value, you get the minutes, which need to be
        # multiplied by 60 and then rounded to no decimal cases
        minutes = int(round((activity_df.Time_h[year, activity]-hour)*60, 0)) 
        
        time = str(hour)+'h '+str(minutes)+'m' # Create the label
        time_spent = np.append(time_spent, time) # Add the label to the list
    
    activity_df['time_spent'] = time_spent # Add the column
    
    df_to_plot_dict = {} # Create a dictionary from which the dataframe will be created
    
    # It is necessary to loop over everything in the activity_df (each key from the index and columns)
    # The keys of the df_to_plot_dict will be the column names which will be composed of the previous
    # name of the column plus a suffix of "_<activity name>". If the key has no associated value, a 
    # list with the first element will be added. From that point on, the values will just be appended.
    for activity in activity_df.index.levels[0]: # Loop over the activities
        for col in activity_df.columns: # Loop over the columns
            for year in activity_df.index.levels[1]:
                if col+'_'+activity not in df_to_plot_dict.keys():
                    df_to_plot_dict[col+'_'+activity] = [activity_df[col][activity, year]]
                else:
                    df_to_plot_dict[col+'_'+activity].append(activity_df[col][activity, year])
    
    df_to_plot = pd.DataFrame.from_dict(df_to_plot_dict) # Create the dataframe from the dictionary
    
    df_to_plot['year'] = activity_df.index.levels[1] # Add the year to the dataframe
    
    # Create columns with the share of the amount of time spent in the activities
    # Create the variables to store the share values
    share_Cycling, share_Running, share_Walking = np.array([]), np.array([]), np.array([])
    
    for idx in df_to_plot.index: # Loop through the index of the df_to_plot
        
        # Select the columns of the time
        time_h_cols = df_to_plot.loc[:, df_to_plot.columns.str.startswith('Time_h_')].columns
        
        # Sum all the time spent in the activities in the year under consideration
        all_time = df_to_plot[time_h_cols].loc[idx].sum()
        
        # Add the share of the time spent to the array
        share_Cycling = np.append(share_Cycling, ((df_to_plot['Time_h_Cycling'][idx]/all_time)*100))
        share_Running = np.append(share_Running, ((df_to_plot['Time_h_Running'][idx]/all_time)*100))
        share_Walking = np.append(share_Walking, ((df_to_plot['Time_h_Walking'][idx]/all_time)*100))
    
    # Add the arrays to the dataframe
    df_to_plot['share_Cycling'] = share_Cycling
    df_to_plot['share_Running'] = share_Running
    df_to_plot['share_Walking'] = share_Walking
    
    # Change the name of the columns to be rendered based on the chosen magnitude
    if magnitude == 'Absolute':
        selection = 'Time_h_'
    elif magnitude == 'Relative':
        selection = 'share_'
        
    # Identify the columns whose name should be changed and store them in a list
    to_replace = df_to_plot.loc[:, df_to_plot.columns.str.startswith(selection)].columns
    
    # Dictionary to be used to rename the columns
    new_names = {col:col[len(selection):] for col in to_replace}
    df_to_plot = df_to_plot.rename(columns=new_names) # Rename the columns
    
    colors = ['blue', 'red', 'green'] # List for the colors of the segments
    
    activities = list(activity_df.index.levels[0]) # List for the legend
    
    if magnitude == 'Absolute':
        
        # Create a column for the total amount of time
        total_time_y = [round(sum(df_to_plot[activities].loc[idx]),2) for idx in df_to_plot.index]
        df_to_plot['total_time_y'] = total_time_y # Add to the dataframe
        
        # Create a column for the labels of the total amount of time
        total_time_label = np.array([])
        for t in total_time_y:
            hours = str(int(t)) # Get the total amount of hours
            minutes = int((t-int(t))*60) # Remove the hours to get the minutes left
            
            # Format the minutes to either not show or to have a 0 if there's only one digit
            if minutes == 0: 
                minutes = ''
            elif minutes < 10:
                minutes = '0'+str(minutes)
            else:
                minutes = str(minutes)
            
            total_time_label = np.append(total_time_label, hours+':'+minutes+"'")
        
        df_to_plot['total_time_label'] = total_time_label
    
    #####################################################################################################
    # Plotting
    #####################################################################################################
    
    # Set the source as the curated dataframe
    source = ColumnDataSource(df_to_plot)
    
    # Title and y-axis label adaptation
    if magnitude == 'Absolute':
        title = "Evolution of the Time Spent per Activity per Year"
        y_axis_label = 'Hours'
        
    else:
        title = "Evolution of the Share of Time Spent per Activity per Year"
        y_axis_label = 'Percentage of Time Spent'
    
    # Instantiate the figure
    evo_fig = figure(plot_width=900, plot_height=500, title=title, tools="save",
                     y_axis_label=y_axis_label, x_axis_label='Year',
                     x_range=(min(df_to_plot.year)-0.5, max(df_to_plot.year)+1.5))
        
    # Assign a variable to the stacked vertical bars to customize the hovertools
    renderers = evo_fig.vbar_stack(activity_df.index.levels[0], x='year', width=0.9, color=colors,
                                   source=source, line_color='black', legend_label=activities)
    
    for glyph in renderers: # Loop over the glyphs (which are the layers of the bar)
        activity = glyph.name # Assign the activity name to a variable
        
        # Select only the columns associated with the activity under consideration
        act_df = df_to_plot.loc[:, df_to_plot.columns.str.endswith(activity)]
        
        # Create a list with the column names with @ before the name so that it can be added to
        # the tooltips
        act_df_cols = ['@'+col for col in act_df.columns]
        # Define the tooltips with the respective formats
        hover = HoverTool(tooltips=[('Distance', act_df_cols[0]+"{0,0.00} km"),
                                    ('Time', act_df_cols[7]),
                                    ("Calories Burned", act_df_cols[2]+"{0,0}"),
                                    ("Cumulative Elevation Gain", act_df_cols[3]+"{0,0} m"),
                                    ("Average Speed", act_df_cols[5]+"{0.00} km/h"),
                                    ("Number of Activities", act_df_cols[6]),
                                    ('Percentage of Time Spent', act_df_cols[8]+"{0.00}%")],
                          renderers=[glyph])
        
        evo_fig.add_tools(hover) # Add the customized hovertool to the figure
    
    # Tweak the title
    evo_fig.title.align = 'center'
    evo_fig.title.text_font_size = "20px"
    
    labels = LabelSet(x='year', y='total_time_y', text='total_time_label', level='glyph', text_align='center',
                      source=source, render_mode='canvas', y_offset=3)
    
    # Add the labels to the figure
    evo_fig.add_layout(labels)

    # Remove the gridlines
    evo_fig.xgrid.grid_line_color, evo_fig.ygrid.grid_line_color = None, None

    evo_fig.outline_line_color = None # Remove the outline
    
    evo_fig.legend.location = 'center_right' # Define the location of the legend
    
    evo_fig.y_range.start = 0 # Start of the y range
    
    evo_fig.xaxis.minor_tick_line_color = None  # Turn off x-axis minor ticks
    
    # Customize the x-ticks
    evo_fig.xaxis.ticker = FixedTicker(ticks=df_to_plot.year)
    
    show(evo_fig)
    
evolution_of_time_spent_exercising()

4
25
24
50
28
54
6


### Last years evolution

- 3 years, 5 years,...

![alt text](mycyclinglog.png "Title")

In [26]:
import streamlit as st

import numpy as np
import pandas as pd

from general_functions import define_counter_name

from bokeh.models import ColumnDataSource, FactorRange, Title
from bokeh.plotting import figure

def last_3_years_performance():
    
    # Page title
    #st.markdown("<h1 style='text-align: center;'>Last 3 Years Evolution</h1>", unsafe_allow_html=True)
    
    # Load the dataframe
    df = pd.read_csv('rwc.csv', index_col=0, parse_dates=['Date'])
    
    #col_1, col_2 = st.beta_columns(2)
    
    # Variables to select the activity and the statistic
    #activity = col_1.selectbox('Activity:', sorted(df.Type.unique()))
    #statistic = col_2.selectbox('Statistic:', ('Count', 'Distance', 'Time'))
    activity='Running'
    statistic='Distance'
        
    ##############################################################################################
    # Data selection and curation
    ##############################################################################################
    
    # Limit the data you will consider based on the activity and the most recent 3 years, group it
    # by year and then by month, and sum it
    pre_activity_df = df.drop(['Hours', 'Minutes', 'Seconds'], axis=1).\
                      loc[((df.Type==activity) & (df.Year>max(df.Year)-3))].\
                      groupby(['Year', 'Month'])
    
    activity_df = pre_activity_df.sum()
    
    # Round the decimal cases of the distance to 2
    activity_df.Distance_km = activity_df.Distance_km.round(2)
    
    # Add the average speed column
    activity_df['avg_speed'] = pre_activity_df.mean()['AvgSpeed_km/h'].round(2)
    
    # Add the counts column
    activity_df['count'] = pre_activity_df.count()['Date']
    
    # Color and time labels columns
    color, time_spent = np.array([]), np.array([])
    years_to_consider = activity_df.index.levels[0]
    
    # Loop through the index, which has 2 keys, but consider only the year's part for the colors
    for year, month in activity_df.index:
        
        # Set one color to each year
        if year == min(years_to_consider):
            color = np.append(color, 'blue')
            
        elif year == max(years_to_consider):
            color = np.append(color, 'green')
        
        else:
            color = np.append(color, 'red')
            
        # Create the time labels
        hour = int(activity_df.Time_h[year, month]) # The integer part is the number of hours spent
        
        # By removing the integer part to the overall value, you get the minutes, which need to be
        # multiplied by 60 and then rounded to no decimal cases
        minutes = int(round((activity_df.Time_h[year, month]-hour)*60, 0)) 
        
        time = str(hour)+'h '+str(minutes)+'m' # Create the label
        time_spent = np.append(time_spent, time) # Add the label to the list

    # Add the columns to the dataframe
    activity_df['color'] = color
    activity_df['time_spent'] = time_spent
    
    # As there are some months in which some of the activities were not done and they should
    # "appear" in the graph, it is necessary to create rows for them
    # Variable to store the values for a row without any activity
    no_activity_row = [0, 0, 0, 0, 0, 0, 0, None, None]
    
    # Dataframe which will be used to create the plots
    df_to_plot = pd.DataFrame(columns=activity_df.columns)
    
    # Loop through the dataframe and consider only the years to check if they have all months
    for year in years_to_consider:
        
        # Restrict the dataframe to only one year at a time
        single_year_df = activity_df.loc[year]
        
        for month in range(1,13): # Loop over the months
            idx = str(year)+', '+str(month) # Create an index
            
            # If there were activities during the month, add the row of the activity_df
            if month in single_year_df.index:
                df_to_plot.loc[idx] = activity_df.loc[year, month]
            # Add the no activity row if there were any activities
            else:
                df_to_plot.loc[idx] = no_activity_row
    
    # List with the name of the months
    month_name = ["Jan", "Feb", "Mar", "Apr", "May", "Jun", "Jul", "Aug", "Sep", "Oct", "Nov", "Dec"]
    
    # List which will be on the x-axis
    x_axis = [(month, str(year)) for year in years_to_consider for month in month_name]
    
    # Add the list to the dataframe
    df_to_plot['x-axis'] = x_axis
    
    years = np.array([])
    for month in range(12*len(years_to_consider)):
        if month < 12:
            years = np.append(years, min(years_to_consider))
        elif month < 24:
            years = np.append(years, max(years_to_consider)-1)
        else:
            years = np.append(years, max(years_to_consider))
    
    df_to_plot['years'] = years
    
    #####################################################################################################
    # Plotting
    #####################################################################################################
    
    # Set the source as the curated dataframe
    source = ColumnDataSource(df_to_plot)
    
    counter_name = define_counter_name(activity)
    
    # Information when the mouse is hovered over the bars
    tooltips = [('Distance', "@Distance_km{0,0.00} km"), ('Time', "@time_spent"),
                ("Calories burned","@Calories{0,0}"),
                ("Cumulative Elevation Gain", "@ElevGain_m{0,0} m"),
                ("Average Speed", "@avg_speed{0.00} km/h"), (counter_name, "@count")]
    
    # Set the title and the y-axis label
    # The beginning and ending of the title will not change regardless of the activity
    title_beginning = 'Evolution of the '
    title_ending = 'During the Last 3 Years'
    if statistic == 'Time':
        title = title_beginning+'Time Spent '+activity
        label = 'Hours' # Y-axis label
    
    # If the chosen statistic is Distance
    elif statistic == 'Distance':
        if activity == 'Walking':
            verb = 'Walked'
        elif activity == 'Cycling':
            verb = 'Cycled'
        else:
            verb = 'Run'
        
        # As it happened for Time, the same procedure is applied to Distance
        title = title_beginning+'Number of Kilometers '+verb+' per Month'
        label = 'Kilometers'
    
    else:
        title = title_beginning+counter_name+' per Month'
        label = counter_name
    
    # Instantiate the figure
    evolution_fig = figure(y_axis_label=label, tooltips=tooltips,
                            plot_width=900, plot_height=500, tools='save',
                            x_range=FactorRange(*x_axis))
    
    # Add the ending of the title before so that it is below the other
    evolution_fig.add_layout(Title(text=title_ending, text_font_size='20px', align='center'),
                              'above')
    
    # Add the first line of the title
    evolution_fig.add_layout(Title(text=title, text_font_size='20px', align='center'), 'above')
    
    # Vertical bars
    # Set the bar height based on the chosen statistic and choose the data labels accordingly
    if statistic == 'Distance':
        height_choice = 'Distance_km'
    elif statistic == 'Time':
        height_choice = 'Time_h'
    else:
        height_choice = 'count'
    
    evolution_fig.vbar(x='x-axis', top=height_choice, width=0.9, color='color', source=source)
    
    # Remove unnecessary graph elements
    # Remove gridlines
    evolution_fig.xgrid.grid_line_color, evolution_fig.ygrid.grid_line_color = None, None

    # Remove x axis minor ticks
    evolution_fig.xaxis.minor_tick_line_color = None
    
    # Remove outline line
    evolution_fig.outline_line_color = None
    
    # Start of the y range
    evolution_fig.y_range.start = 0
    
    # Range padding of the x-axis
    evolution_fig.x_range.range_padding = 0.1
    
    # Rotate the labels of the years
    evolution_fig.xaxis.major_label_orientation = 1
    
    # Show the figure
    show(evolution_fig)
    #st.bokeh_chart(comparison_fig, True)

In [27]:
last_3_years_performance()