In [1]:
import seaborn as sns
import json
from datetime import datetime, timedelta
import pandas as pd
import matplotlib.pyplot as plt
import folium
import numpy as np
import plotly.express as px 
sns.set_style("darkgrid")

# Sleep Data

In [None]:
# Load the JSON data from file
with open('data/sleep.json', 'r') as f:
    data = json.load(f)

# Create an empty DataFrame to store the extracted data
sleep_df = pd.DataFrame()

# Loop over each dictionary in the list
for sleep_dict in data:
    # Create a pandas Series from the dictionary and add it to the DataFrame
    sleep_series = pd.Series(sleep_dict)
    sleep_df = sleep_df.append(sleep_series, ignore_index=True)

# Print the resulting DataFrame
sleep_df=sleep_df.fillna(0)
sleep_df['calendarDate'] = pd.to_datetime(sleep_df['calendarDate'])
sleep_df['weekday'] = sleep_df['calendarDate'].dt.day_name()
def is_weekend(day):
    if day in ['Saturday', 'Sunday']:
        return 'Weekend'
    else:
        return 'Regular day'

# Apply the function to the day column and create a new column called 'day_type'
sleep_df['day_type'] = sleep_df['weekday'].apply(is_weekend)
sleep_df

In [None]:
sleep_df["start_date"] = pd.to_datetime(sleep_df["sleepStartTimestampGMT"])
sleep_df["end_date"] = pd.to_datetime(sleep_df["sleepEndTimestampGMT"])
sleep_df["start_time"] = sleep_df["start_date"].dt.time
sleep_df["end_time"] = sleep_df["end_date"].dt.time
sleep_df['duration'] = (sleep_df['end_date'] - sleep_df['start_date']).dt.total_seconds() / 3600

df =sleep_df[["start_time","end_time","duration","day_type"]]
df["Day"]= df.index
today = datetime.now().date()

for index, row in df.iterrows():
    start_time = row['start_time']
    end_time = row['end_time']
    if start_time < datetime.strptime('14:00:00', '%H:%M:%S').time():
        start_date = today + timedelta(days=1)
    else:
        start_date = today

    if end_time < start_time:
        end_date = start_date + timedelta(days=1)
    else:
        end_date = start_date

    start_datetime = datetime.combine(start_date, start_time)
    end_datetime = datetime.combine(end_date, end_time)


    df.at[index, 'start_time'] = start_datetime
    df.at[index, 'end_time'] = end_datetime

fig = px.timeline(df, x_start="start_time", x_end="end_time", y="Day", color="day_type", width=800, height=1000)
fig.update_yaxes(autorange="reversed") # otherwise tasks are listed from the bottom up
fig.show()
df_sleep_dur=df.copy()


In [None]:
df_sleep_dur['timestamp_start'] = pd.to_datetime(df_sleep_dur['start_time'], infer_datetime_format=True)
df_sleep_dur['timestamp_end'] = pd.to_datetime(df_sleep_dur['end_time'], infer_datetime_format=True)

print(f"Average start sleeping time: {df_sleep_dur['timestamp_start'].mean().time()}")
print(f"Average wake up time: {df_sleep_dur['timestamp_end'].mean().time()}")

In [None]:
sleep_df['week_number'] = sleep_df['calendarDate'].dt.isocalendar().week
sleep_df['overallScore'] = sleep_df['sleepScores'].apply(lambda x: x['overallScore'])

heat_df = sleep_df[["week_number","overallScore","weekday"]]
# Convert the weekday column to a categorical variable with a specific order
weekday_order = ["Monday", "Tuesday", "Wednesday", "Thursday", "Friday", "Saturday", "Sunday"]
heat_df["weekday"] = pd.Categorical(heat_df["weekday"], categories=weekday_order)

# Pivot the data to create a 2D matrix with weekday as rows and calendarDate as columns
heat_data = heat_df.pivot("week_number", "weekday",  "overallScore")
df_44_to_52 = heat_data.iloc[15:]
pivoted_data = pd.concat([df_44_to_52, heat_data.iloc[:15]])
# Create the heatmap
fig, ax = plt.subplots(figsize=(10, 8))
sns.heatmap(pivoted_data, cmap="YlGnBu", annot=True, fmt=".0f", cbar=False, ax=ax)

# Set the axis labels and title
ax.set_ylabel("Week nr")
ax.set_xlabel("Weekday")
ax.set_title("Overall Score Heatmap")

plt.show()

In [None]:
heat_df = sleep_df[["week_number","duration","weekday"]]
# Convert the weekday column to a categorical variable with a specific order
weekday_order = ["Monday", "Tuesday", "Wednesday", "Thursday", "Friday", "Saturday", "Sunday"]
heat_df["weekday"] = pd.Categorical(heat_df["weekday"], categories=weekday_order)

# Pivot the data to create a 2D matrix with weekday as rows and calendarDate as columns
heat_data = heat_df.pivot("week_number", "weekday",  "duration")
df_44_to_52 = heat_data.iloc[15:]
pivoted_data = pd.concat([df_44_to_52, heat_data.iloc[:15]])
# Create the heatmap
fig, ax = plt.subplots(figsize=(10, 8))
sns.heatmap(pivoted_data, cmap="YlGnBu", annot=True, fmt=".0f", cbar=False, ax=ax)

# Set the axis labels and title
ax.set_ylabel("Week nr")
ax.set_xlabel("Weekday")
ax.set_title("Sleeping Duration Heatmap")

plt.show()

In [None]:
fig, ax1 = plt.subplots(figsize=(10, 6))

# Create the first y-axis for overall score
color = 'tab:red'
ax1.set_xlabel('Date')
ax1.set_ylabel('Overall Score', color=color)
ax1.plot(sleep_df['calendarDate'], sleep_df['overallScore'], color=color)
ax1.tick_params(axis='y', labelcolor=color)

# Create the second y-axis for duration
ax2 = ax1.twinx()
color = 'tab:blue'
ax2.set_ylabel('Duration (hours)', color=color)
ax2.plot(sleep_df['calendarDate'], sleep_df['duration'], color=color)
ax2.tick_params(axis='y', labelcolor=color)

# Set the plot title
plt.title('Sleep Data')

# Show the plot
plt.show()

In [None]:
sleep_df['deepSleepHours'] = sleep_df['deepSleepSeconds'] / 3600
sleep_df['lightSleepHours'] = sleep_df['lightSleepSeconds'] / 3600
sleep_df['remSleepHours'] = sleep_df['remSleepSeconds'] / 3600
sleep_df['awakeSleepHours'] = sleep_df['awakeSleepSeconds'] / 3600
sleep_df['totSleepHours'] = sleep_df['deepSleepHours'] + sleep_df['lightSleepHours'] + sleep_df['remSleepHours']
sleep_df[['deepSleepHours', 'lightSleepHours', 'remSleepHours', 'awakeSleepHours', "totSleepHours"]]

In [None]:
# Create a line plot of the sleep data
fig, ax = plt.subplots(figsize=(10, 6))

sleep_df.plot(x='calendarDate', y=['deepSleepHours', 'lightSleepHours', 'remSleepHours', 'awakeSleepHours', "totSleepHours"], ax=ax)
sns.set_theme()
# Set the plot title and axis labels
ax.set_title('Sleep Data')
ax.set_xlabel('Date')
ax.set_ylabel('Hours')

# Show the plot
plt.show()

In [None]:
print(f"Avg light sleep: {round(sleep_df['lightSleepHours'].mean(),2)} hours")
print(f"Avg deep sleep: {round(sleep_df['deepSleepHours'].mean(),2)} hours")
print(f"Avg rem sleep: {round(sleep_df['remSleepHours'].mean(),2)} hours")
print(f"Avg total sleep: {round(sleep_df['totSleepHours'].mean(),2)} hours")

print(f"Min total sleep: {round(sleep_df['totSleepHours'].min(),2)} hours")
print(f"Max total sleep: {round(sleep_df['totSleepHours'].max(),2)} hours")



In [None]:
# Group by weekday and calculate the mean sleep durations
sleep_weekday = sleep_df.groupby(sleep_df['weekday']).mean()
sleep_weekday

In [None]:
# Plot a stacked bar chart
sleep_weekday[['deepSleepHours', 'lightSleepHours', 'remSleepHours']].plot(kind='bar', stacked=True)
sleep_weekday['awakeSleepHours'].plot(kind='bar', color='black', bottom=sleep_weekday[['deepSleepHours', 'lightSleepHours', 'remSleepHours']].sum(axis=1), figsize=(10, 6))

# Set the labels and title
plt.xticks(range(7), ['Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Saturday', 'Sunday'])
plt.xlabel('Weekday')
plt.ylabel('Sleep Duration (Hours)')
plt.title('Average Sleep Duration by Weekday')

# Show the plot
plt.show()

# Activities Data

In [13]:
df_act = pd.read_csv('data/activities.csv')
df_act['Activity Type'] = df_act['Activity Type'].str.replace('Resort Skiing/Snowboarding','Resort Skiing')
df_act['Start Time'] = pd.to_datetime(df_act['Start Time'], utc=True)
df_act['End Time'] = pd.to_datetime(df_act['End Time'], utc=True)
df_act['weekday'] = df_act['Start Time'].dt.day_name()


In [None]:
df_act.columns

In [None]:
df_act["weekday"] = pd.Categorical(df_act["weekday"], categories=
    ['Monday','Tuesday','Wednesday','Thursday','Friday','Saturday', 'Sunday'],
    ordered=True)

weekday_counts = df_act["weekday"].value_counts()
weekday_counts = weekday_counts.sort_index()
weekday_counts.plot(kind="bar",figsize=(10, 6))
plt.title("Number of Activities by Weekday")
plt.xlabel("Weekday")
plt.ylabel("Count")
plt.show()

In [None]:
df_act_heat = df_act.sort_values(by='Start Time')
start_date = df_act_heat["Start Time"].iloc[0]
end_date = df_act_heat["End Time"].iloc[-1]
df_act_heat["date"] = df_act_heat["Start Time"].dt.date
df_act_heat = df_act_heat.set_index("date")

date_index = pd.date_range(start=start_date, end=end_date)
df_date = pd.DataFrame({'date': date_index})
df_date["date"] = df_date["date"]

df_date['week_number'] = df_date['date'].dt.isocalendar().week
df_date['weekday'] = df_date['date'].dt.day_name()
df_date["date"] = df_date["date"].dt.date
df_date = df_date.set_index("date")
df_act_heat=df_act_heat.drop(["weekday"], axis =1 )
df_act_heat = df_date.join(df_act_heat)

heat_df = df_act_heat[["week_number","Max. Heart Rate (bpm)","weekday"]]
# Convert the weekday column to a categorical variable with a specific order
weekday_order = ["Monday", "Tuesday", "Wednesday", "Thursday", "Friday", "Saturday", "Sunday"]
heat_df["weekday"] = pd.Categorical(heat_df["weekday"], categories=weekday_order)
heat_df["date2"] = heat_df.index
heat_df = heat_df[~heat_df["date2"].duplicated()]
heat_df = heat_df.drop(["date2"], axis =1)
heat_data = heat_df.pivot("week_number", "weekday",  "Max. Heart Rate (bpm)")
df_44_to_52 = heat_data.iloc[15:]
pivoted_data = pd.concat([df_44_to_52, heat_data.iloc[:15]])

fig, ax = plt.subplots(figsize=(10, 6))
sns.heatmap(pivoted_data, cmap="YlGnBu", annot=True, fmt=".0f", cbar=False, ax=ax)
ax.set_ylabel("Week nr")
ax.set_xlabel("Weekday")
ax.set_title("Max. Heart Rate (bpm)")

plt.show()

In [None]:
heat_df = df_act_heat[["week_number","Calories","weekday"]]
# Convert the weekday column to a categorical variable with a specific order
weekday_order = ["Monday", "Tuesday", "Wednesday", "Thursday", "Friday", "Saturday", "Sunday"]
heat_df["weekday"] = pd.Categorical(heat_df["weekday"], categories=weekday_order)
heat_df["date2"] = heat_df.index
heat_df = heat_df[~heat_df["date2"].duplicated()]
heat_df = heat_df.drop(["date2"], axis =1)
heat_data = heat_df.pivot("week_number", "weekday",  "Calories")
df_44_to_52 = heat_data.iloc[15:]
pivoted_data = pd.concat([df_44_to_52, heat_data.iloc[:15]])

fig, ax = plt.subplots(figsize=(10, 6))
sns.heatmap(pivoted_data, cmap="YlGnBu", annot=True, fmt=".0f", cbar=False, ax=ax)

ax.set_ylabel("Week nr")
ax.set_xlabel("Weekday")
ax.set_title("Calories")

plt.show()

In [18]:
df_act['Elapsed Duration (h:m:s)'] = pd.to_datetime(df_act["Elapsed Duration (h:m:s)"], format='%H:%M:%S')
df_act['MinutesDuration'] = df_act['Elapsed Duration (h:m:s)'].dt.hour*60 + df_act['Elapsed Duration (h:m:s)'].dt.minute

In [None]:
start=df_act["Start Time"].iloc[0] 
end = df_act["Start Time"].iloc[-1]
total_days = (start - end).days 
print(total_days)
total_weeks= total_days/7
print(total_weeks)
total_activities = len(df_act)
print(total_activities)
print(total_activities/total_weeks)

In [None]:
# get activity counts
activities = df_act["Activity Type"].value_counts()

# set color palette
colors = ['#5DA5DA', '#FAA43A', '#60BD68', '#F17CB0', '#B2912F', '#B276B2', '#DECF3F', '#F15854']
sns.set_palette(sns.color_palette(colors))

# plot pie chart
plt.figure(figsize=(8, 8))
plt.pie(activities.values, labels=activities.index, autopct='%1.1f%%',textprops={'fontsize': 10})
plt.title('Activity Types')

plt.show()

In [None]:
df_act_time = df_act.groupby("Activity Type")["MinutesDuration"].sum()
plt.bar(df_act_time.index, df_act_time.values)
plt.xlabel('Activity Type')
plt.ylabel('Total Duration (Minutes)')
plt.title('Total Duration of Each Activity')
plt.xticks(rotation=90)
plt.show()

In [None]:
# Create subplots with more spacing
fig, (ax1, ax2) = plt.subplots(ncols=2, figsize=(10,5))
plt.subplots_adjust(wspace=0.5)

# First subplot
sns.barplot(x="Activity Type", y="MinutesDuration", data=df_act, estimator=np.mean, ax=ax1)
ax1.set_xlabel('Activity Type')
ax1.set_ylabel('Average Duration (Minutes)')
ax1.set_title('Average Duration (Minutes) of Each Activity')
ax1.set_xticklabels(labels=ax1.get_xticklabels(), rotation=90)

# Second subplot
sns.barplot(x="Activity Type", y="MinutesDuration", data=df_act, estimator=np.sum, ax=ax2)
ax2.set_xlabel('Activity Type')
ax2.set_ylabel('Total Duration (Minutes)')
ax2.set_title('Total Duration (Minutes) of Each Activity')
ax2.set_xticklabels(labels=ax2.get_xticklabels(), rotation=90)

# Display the plot
plt.tight_layout()
plt.show()


In [None]:
# Create subplots with more spacing
fig, (ax1, ax2) = plt.subplots(ncols=2, figsize=(10,5))
plt.subplots_adjust(wspace=0.5)

# First subplot
sns.barplot(x="Activity Type", y="Distance (km)", data=df_act, estimator=np.mean, ax=ax1)
ax1.set_xlabel('Activity Type')
ax1.set_ylabel('Average Distance (km)')
ax1.set_title('Average Distance (km) of Each Activity')
ax1.set_xticklabels(labels=ax1.get_xticklabels(), rotation=90)

# Second subplot
sns.barplot(x="Activity Type", y="Distance (km)", data=df_act, estimator=np.sum, ax=ax2)
ax2.set_xlabel('Activity Type')
ax2.set_ylabel('Total Distance (km)')
ax2.set_title('Total Distance (km) of Each Activity')
ax2.set_xticklabels(labels=ax2.get_xticklabels(), rotation=90)

# Display the plot
plt.tight_layout()
plt.show()



In [None]:
df_bcs = df_act[(df_act["Activity Type"] == "Backcountry Skiing") | (df_act["Activity Type"] == "Resort Skiing")]

sns.scatterplot(data=df_bcs, x="Distance (km)", y="MinutesDuration", hue="Activity Type")
plt.title("Scatter Plot of Duration vs Distance")
plt.show()

In [None]:
sns.scatterplot(data=df_act[(df_act["Activity Type"] == "Backcountry Skiing")], x="Distance (km)", y="Elevation Gain (m)")
plt.show()

In [None]:
df_bcs = df_bcs.dropna(subset=['Elevation Gain (m)']) # Remove rows with NaN in 'Elevation Gain (m)' column

ax = sns.barplot(x=df_bcs.index, y='Elevation Gain (m)', hue='Location Name', data=df_bcs)
ax.set_title("Elevation Gain by Location for Backcountry Skiing Activities")
ax.legend(loc='center left', bbox_to_anchor=(1, 0.5)) # Move legend outside plot

plt.show()

In [None]:
# create figure and subplots
fig, axs = plt.subplots(1, 2, figsize=(12, 6))
#ax1 = sns.scatterplot(data=df_act, y="Average Heart Rate (bpm)", x="MinutesDuration", hue="Activity Type")
# plot first scatterplot on first subplot
sns.scatterplot(data=df_act, y="Average Heart Rate (bpm)", x="MinutesDuration", hue="Activity Type", ax=axs[0], legend=False)
axs[0].set_title("Average Heart Rate vs. Duration")

# plot second scatterplot on second subplot
sns.scatterplot(data=df_act, y="Max. Heart Rate (bpm)", x="MinutesDuration", hue="Activity Type", ax=axs[1], legend=False)
axs[1].set_title("Max. Heart Rate vs. Duration")

# add common legend below subplots
handles, labels = ax1.get_legend_handles_labels()
fig.legend(handles, labels, loc='lower center', ncol=4)
fig.subplots_adjust(bottom=0.25)
plt.show()



In [None]:
import matplotlib.pyplot as plt
import seaborn as sns

# create figure and subplots
fig, axs = plt.subplots(1, 2, figsize=(12, 6))

# plot first boxplot on first subplot
sns.boxplot(data=df_act, x='Activity Type', y='Calories', ax=axs[0])
axs[0].set_title('Calories by Activity Type')
axs[0].set_xlabel('Activity Type')
axs[0].set_ylabel('Calories')
axs[0].tick_params(axis='x', rotation=90)

# plot second scatterplot on second subplot
sns.scatterplot(data=df_act, x="Calories", y="MinutesDuration", hue="Activity Type", ax=axs[1])
axs[1].set_title("Training duration vs Calories")
axs[1].set_xlabel('Calories')
axs[1].set_ylabel('Duration')
axs[1].legend(loc='upper right')

plt.show()


In [None]:
df_act["start_hour"] = df_act["Start Time"].dt.time
df_act['hour'] = df_act['Start Time'].dt.hour
df =df_act[["hour","Activity Type"]]

df.groupby(['hour', 'Activity Type']).size().unstack().plot(kind='bar', stacked=True)
plt.xlabel('Hour of Day')
plt.ylabel('Number of Activities')
plt.title('Activity Types by Hour of Day')
plt.legend(title='Activity Type', loc='upper left')
plt.figure(figsize=(50, 5))
plt.show()


In [None]:
# Load JSON data into a Pandas DataFrame
df = pd.read_json("data/training_load.json")

# Convert timestamp column to datetime
# Convert timestamp column to datetime and set as index
df['timestamp'] = pd.to_datetime(df['timestamp'], unit='ms')


# Print DataFrame
df.plot(x='timestamp', y=['dailyTrainingLoadAcute'],figsize=(12,4))
plt.xlabel('Date')
plt.ylabel('Training load')
plt.title('Acute training load timeline')
plt.show()



In [None]:
df_map = df_act.dropna(subset=['Begin Longitude (°DD)'])
# Create map object
m = folium.Map(location=[0, 0], zoom_start=2)

# Add markers for each row in the dataframe
for index, row in df_map.iterrows():
    folium.Marker(location=[row['Begin Latitude (°DD)'], row['Begin Longitude (°DD)']]).add_to(m)

# Display map
m