In [59]:
import numpy as np
import pandas as pd
import re
import os

import seaborn as sns
import matplotlib.pyplot as plt
import plotly.graph_objects as go

data_dir = "/Users/philip_p/Documents/whoop/"

pd.set_option("display.max_columns", 10)
pd.set_option("display.width", 600)

def clean_input_whoop_data(input_data: pd.DataFrame) -> pd.DataFrame:
    """Method to clean input Whoop dataframe from HabitDash.com

    Parameters
    ---------
    input_df
        Input csv titled YYYY-MM-DD Habit Dash (flat file).csv

    Returns
    -------
    pd.DataFrame
        Cleaned dataframe with columns ['date', 'field', 'value']
    """
    assert 'date' in input_data.columns, "Input data does not have a valid 'date' column"

    print(f"Reading input data, {input_data['date'].nunique()} "
          f"days worth of data...")

    # rename the field column
    new_field_name_map = \
        {k: k.replace("whoop_", "") for k in input_data['field'].unique()}

    input_data['field'] = input_data['field'].map(new_field_name_map)
    input_data.drop('source', axis=1, inplace=True)
    input_data['date'] = pd.to_datetime(input_data['date'])

    return input_data

# --- READ WHOOP DATA
whoop_file = [x for x in os.listdir(data_dir) if
              re.search("flat file", x)][0]
habit_dash_df = pd.read_csv(os.path.join(data_dir, whoop_file))

cleaned_df = clean_input_whoop_data(input_data=habit_dash_df)

cleaned_df['field'].unique() # 31 measures

macro_fields = \
    set([x.split("_")[0] for x in cleaned_df['field'].unique()])

# recovery score plotting
recovery = cleaned_df.loc[cleaned_df['field'] == 'recovery_score'].copy(True)
recovery['colour'] = pd.cut(x=recovery['value'],
                            bins=[0,33,67,100],
                            labels=['red', 'yellow', 'green'])

Reading input data, 86 days worth of data...


In [60]:
import plotly.express as px
fig = px.scatter(recovery, x='date', y='value', 
                 color='colour', color_discrete_sequence=['#FF9900', 'green', 'red'])
fig.update_layout(
    title={
        'text': 'Recovery Scores History',
        'yanchor': 'top'
    },
    xaxis_title='Date',
    yaxis_title='Recovery Score'
)


In [61]:
import plotly.express as px
fig_two = px.line(recovery, x='date', y='value')
fig_two.update_layout(
    title={
        'text': 'Recovery Scores History',
        'yanchor': 'top'
    },
    xaxis_title='Date',
    yaxis_title='Recovery Score'
)


In [62]:
import plotly.figure_factory as ff

fig_three = ff.create_distplot(
    [recovery['value']], 
    bin_size=2, 
    group_labels=['Recovery_Score'])
fig_three.show()

In [64]:
import scipy.stats as sci_stats
sci_stats.describe(recovery['value'])


DescribeResult(nobs=84, minmax=(23.0, 97.0), mean=62.035714285714285, variance=342.87822719449224, skewness=0.2914893897834395, kurtosis=-0.6489346775453706)

In [65]:
bxplot_fig = go.Figure()
bxplot_fig = go.Figure(data=[go.Box(x=recovery['value'],
                    boxpoints='all', # show all points in the plot
                    jitter=0.3)])


# bxplot_fig = px.box(recovery['value'], points='all')
bxplot_fig.show()

In [66]:
recovery['month'] = pd.DatetimeIndex(recovery['date']).month

In [97]:
# filter only for the months which I have more than 15 observations for
num_observations = pd.pivot_table(recovery, 
                                  index='month', 
                                  values='value', 
                                  aggfunc='count').reset_index()
valid_months = num_observations.loc[num_observations['value'] > 15, 'month'].values
recovery_df_to_analyse = recovery[recovery['month'].isin(valid_months)].copy(True)
recovery_df_to_analyse['month_str'] = \
[datetime.strftime(x, '%b') for x in recovery_df_to_analyse['date']]

In [98]:
recovery_df_to_analyse.shape

(72, 6)

In [99]:
month_bxplot_fig = px.box(recovery_df_to_analyse,
                     x='month_str',
                     y='value',
                     points='all')
month_bxplot_fig.update_traces(jitter=0)
month_bxplot_fig.show()

In [92]:
from datetime import datetime
months = [datetime.strftime(x, '%b') for x in recovery['date']]
months
# datetime.strptime(recovery['date'], "%b")

['Mar',
 'Mar',
 'Mar',
 'Mar',
 'Mar',
 'Mar',
 'Mar',
 'Mar',
 'Mar',
 'Mar',
 'Mar',
 'Mar',
 'Mar',
 'Mar',
 'Mar',
 'Mar',
 'Mar',
 'Apr',
 'Apr',
 'Apr',
 'Apr',
 'Apr',
 'Apr',
 'Apr',
 'Apr',
 'Apr',
 'Apr',
 'Apr',
 'Apr',
 'Apr',
 'Apr',
 'Apr',
 'Apr',
 'Apr',
 'Apr',
 'Apr',
 'Apr',
 'Apr',
 'Apr',
 'Apr',
 'Apr',
 'Apr',
 'Apr',
 'Apr',
 'Apr',
 'Apr',
 'Apr',
 'May',
 'May',
 'May',
 'May',
 'May',
 'May',
 'May',
 'May',
 'May',
 'May',
 'May',
 'May',
 'May',
 'May',
 'May',
 'May',
 'May',
 'May',
 'May',
 'May',
 'May',
 'May',
 'May',
 'May',
 'May',
 'Jun',
 'Jun',
 'Jun',
 'Jun',
 'Jun',
 'Jun',
 'Jun',
 'Jun',
 'Jun',
 'Jun',
 'Jun',
 'Jun']