In [7]:
import pandas as pd
import json
import os
import datetime
import plotly.express as px
import plotly.io as pio
import plotly.graph_objects as go

In [8]:
# Set directory path
dir_path = 'garmin-data/garmin-data-dump-2024-05-10/DI_CONNECT/DI-Connect-Metrics/'

# Create empty list to store DataFrames
df_list = []

# Loop through each file in directory
for filename in os.listdir(dir_path):
    if filename.endswith('.json') and filename.startswith('RunRacePredictions'):
        # Read JSON data from file
        filepath = os.path.join(dir_path, filename)
        with open(filepath, 'r') as f:
            data = json.load(f)
        # Create DataFrame from JSON data
        df = pd.DataFrame(data)
        # Append DataFrame to list
        df_list.append(df)

# ------------------------------------- add amendments file to list -------------------------------------
# add amendments file to list
with open ('garmin-data/RunRacePredictions_amendments.json', 'r') as f:
    data = json.load(f)
df = pd.DataFrame(data)
df_list.append(df)
# ------------------------------------- add amendments file to list -------------------------------------


# Concatenate list of DataFrames into one collective DataFrame
collective_df = pd.concat(df_list)

# convert seconds to hh:mm:SS format
#def convert_to_hhmmss(seconds):
#    return datetime.timedelta(seconds=seconds)

# Apply the conversion function to the raceTime5K, raceTime10K, raceTimeHalf, and raceTimeMarathon columns
#collective_df['raceTime5K'] = collective_df['raceTime5K'].apply(convert_to_hhmmss)
#collective_df['raceTime10K'] = collective_df['raceTime10K'].apply(convert_to_hhmmss)
#collective_df['raceTimeHalf'] = collective_df['raceTimeHalf'].apply(convert_to_hhmmss)
#collective_df['raceTimeMarathon'] = collective_df['raceTimeMarathon'].apply(convert_to_hhmmss)

# Convert the calendarDate column to datetime format
#collective_df['calendarDate'] = pd.to_datetime(collective_df['calendarDate'])

# Drop duplicate rows based on calendarDate, keeping only the last recorded row
collective_df = collective_df.drop_duplicates(subset='calendarDate', keep='last')
# sort list by calendarDate
collective_df = collective_df.sort_values('calendarDate')
# remove userProfilePK, timestamp and deviceId columns
collective_df = collective_df.drop(columns=['userProfilePK', 'timestamp', 'deviceId']) 

In [9]:
# show the DataFrame
#print(collective_df)
#collective_df.info()
collective_df.to_csv('garmin-data/RunRacePredictions.csv', index=False)

In [10]:
plot_title = (
    "Race Time Predictions - Generated on "
    + datetime.datetime.now().strftime("%d-%m-%Y %H:%M:%S")
    + " - Data cut off at 2024-05-10"
)
color = ["green", "green", "yellow", "red", "red"]

# create a plotly figure
fig = go.Figure()

# Create a scatter plot of raceTimeHalf vs. calendarDate
# fig = px.scatter(collective_df, x='calendarDate', y=pd.to_datetime(collective_df['raceTimeHalf'],unit='s'), template="plotly_dark", color='raceTimeHalf', color_continuous_scale=color)

# fig.update_layout(color='raceTimeHalf', color_continuous_scale=color)

# add a scatter plot of raceTime5K vs. calendarDate
marker = dict(
    colorscale=color,
    color=collective_df["raceTime5K"],
    showscale=False,
    cmin=875,
    cmax=1825,
    colorbar=dict(
        thickness=5,
        tickvals=[900, 1050, 1200, 1350, 1500, 1650, 1800],
        ticktext=[
            "00:15:00",
            "00:17:30",
            "00:20:00",
            "00:22:30",
            "00:25:00",
            "00:27:30",
            "00:30:00",
        ],
        title="Color Scale",
        tickformat="%H:%M:%S",
        tickmode="array",
        outlinewidth=0,
    ),
)
fig.add_trace(
    go.Scatter(
        x=collective_df["calendarDate"],
        y=pd.to_datetime(collective_df["raceTime5K"], unit="s"),
        name="5K",
        line=dict(color="white"),
        marker=marker,
    )
)

# add a scatter plot of raceTime10K vs. calendarDate
marker = dict(
    colorscale=color,
    color=collective_df["raceTime10K"],
    showscale=False,
    cmin=1750,
    cmax=3650,
    colorbar=dict(
        thickness=5,
        tickvals=[1800, 2100, 2400, 2700, 3000, 3300, 3600],
        ticktext=[
            "00:30:00",
            "00:35:00",
            "00:40:00",
            "00:45:00",
            "00:50:00",
            "00:55:00",
            "00:60:00",
        ],
        title="Color Scale",
        tickformat="%H:%M:%S",
        tickmode="array",
        outlinewidth=0,
    ),
)
fig.add_trace(
    go.Scatter(
        x=collective_df["calendarDate"],
        y=pd.to_datetime(collective_df["raceTime10K"], unit="s"),
        name="10K",
        line=dict(color="white"),
        marker=marker,
    )
)

# Add a line plot of raceTimeHalf vs. calendarDate
marker = dict(
    colorscale=color,
    color=collective_df["raceTimeHalf"],
    showscale=False,
    cmin=3495,
    cmax=9105,
    colorbar=dict(
        thickness=5,
        tickvals=[3600, 4500, 5400, 6300, 7200, 8100, 9000],
        ticktext=[
            "01:00:00",
            "01:15:00",
            "01:30:00",
            "01:45:00",
            "02:00:00",
            "02:15:00",
            "02:30:00",
        ],
        title="Color Scale",
        tickformat="%H:%M:%S",
        tickmode="array",
        outlinewidth=0,
    ),
)
fig.add_trace(
    go.Scatter(
        x=collective_df["calendarDate"],
        y=pd.to_datetime(collective_df["raceTimeHalf"], unit="s"),
        name="Half Marathon",
        line=dict(color="white"),
        marker=marker,
    )
)

# add a scatter plot of raceTimeMarathon vs. calendarDate
marker = dict(
    colorscale=color,
    color=collective_df["raceTimeMarathon"],
    showscale=False,
    cmin=6990,
    cmax=18210,
    colorbar=dict(
        thickness=5,
        tickvals=[7200, 9000, 10800, 12600, 14400, 16200, 18000],
        ticktext=[
            "2:00:00",
            "2:30:00",
            "3:00:00",
            "3:30:00",
            "4:00:00",
            "4:30:00",
            "5:00:00",
        ],
        title="Color Scale",
        tickformat="%H:%M:%S",
        tickmode="array",
        outlinewidth=0,
    ),
)
fig.add_trace(
    go.Scatter(
        x=collective_df["calendarDate"],
        y=pd.to_datetime(collective_df["raceTimeMarathon"], unit="s"),
        name="Marathon",
        line=dict(color="white"),
        marker=marker,
    )
)


# Set the default plot title
fig.update_layout(
    title_text="Test Race Time Predictions - Generated on "
    + datetime.datetime.now().strftime("%d-%m-%Y %H:%M:%S")
    + " - Data cut off at 2024-05-10"
)

# Set the x-axis title
fig.update_xaxes(title_text="Date")

# Set the y-axis title
fig.update_yaxes(title_text="Race Time Prediction (hh:mm:SS)")

# Set the axes tick settings
fig.update_yaxes(tickformat="%H:%M:%S")
fig.update_xaxes(tickformat="%d-%m-%Y")

# Reverse the y-axis so the fastest times are at the top
fig.update_yaxes(autorange="reversed")

# reverse the y-axis of the color scale
fig.update_layout(coloraxis_colorbar=dict(yanchor="top", y=1))

# Set the hover text
fig.update_traces(mode="markers+lines", hovertemplate=None)

# set crosshairs
fig.update_xaxes(showspikes=True, spikemode="across")
fig.update_yaxes(showspikes=True, spikemode="across")

# create dropdown menu to select which line to show on the graph
fig.update_layout(
    updatemenus=[
        dict(
            buttons=list(
                [
                    dict(
                        label="All",
                        method="update",
                        args=[
                            {"visible": [True, True, True, True], "marker.showscale": False, },
                            {
                                "title": "All " + plot_title,
                            },
                        ],
                    ),
                    dict(
                        label="5K",
                        method="update",
                        args=[
                            {"visible": [True, False, False, False], "marker.showscale": True},
                            {
                                "title": "5K " + plot_title,
                            },
                        ],
                    ),
                    dict(
                        label="10K",
                        method="update",
                        args=[
                            {"visible": [False, True, False, False], "marker.showscale": True},
                            {
                                "title": "10K " + plot_title,
                            },
                        ],
                    ),
                    dict(
                        label="Half Marathon",
                        method="update",
                        args=[
                            {"visible": [False, False, True, False], "marker.showscale": True},
                            {
                                "title": "Half Marathon " + plot_title,
                            },
                        ],
                    ),
                    dict(
                        label="Marathon",
                        method="update",
                        args=[
                            {"visible": [False, False, False, True], "marker.showscale": True},
                            {
                                "title": "Marathon " + plot_title,
                            },
                        ],
                    ),
                ]
            ),
            bgcolor="Grey",
            font=dict(color="Black"),
            type="buttons",
            direction="left",
            showactive=True,
            xanchor="left",
            y=1.03,
            yanchor="top",
        )
    ]
)

# update the layout theme
fig.update_layout(template="plotly_dark", showlegend=False, title_x=0.5)

# update

# Plot the figure
fig.show()
fig.write_html("garmin-data/RunRacePredictions.html")