In [None]:
from datetime import date, datetime, timedelta as td
import pytz
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline

In [None]:
# functions to convert UTC to Pacific time zone and extract date/time elements
convert_tz = lambda x: x.to_pydatetime().replace(tzinfo=pytz.utc).astimezone(pytz.timezone('US/Pacific'))
get_year = lambda x: convert_tz(x).year
get_month = lambda x: '{}-{:02}'.format(convert_tz(x).year, convert_tz(x).month) #inefficient
get_date = lambda x: '{}-{:02}-{:02}'.format(convert_tz(x).year, convert_tz(x).month, convert_tz(x).day) #inefficient
get_day = lambda x: convert_tz(x).day
get_hour = lambda x: convert_tz(x).hour
get_minute = lambda x: convert_tz(x).minute
get_day_of_week = lambda x: convert_tz(x).weekday()

In [None]:
steps = pd.read_csv("StepCount.csv")

In [None]:
# parse out date and time elements as Pacific time
steps['startDate'] = pd.to_datetime(steps['startDate'])
steps['year'] = steps['startDate'].map(get_year)
steps['month'] = steps['startDate'].map(get_month)
steps['date'] = steps['startDate'].map(get_date)
steps['day'] = steps['startDate'].map(get_day)
steps['hour'] = steps['startDate'].map(get_hour)
steps['dow'] = steps['startDate'].map(get_day_of_week)

In [None]:
steps.tail(10)

In [None]:
steps_by_date = steps.groupby(['date'])['value'].sum().reset_index(name='Steps')

In [None]:
steps_by_date.tail()

In [None]:
steps_by_date.to_csv("steps_per_day.csv", index=False)

In [None]:
steps_device_by_year = steps.groupby(['year', 'sourceName'])['value'].sum().reset_index(name='Steps')

In [None]:
steps_device_by_year

In [None]:
steps_device_by_year = steps.groupby(['year'])['value'].sum().reset_index(name='Steps')

In [None]:
steps_device_by_year

In [None]:
steps_by_date['RollingMeanSteps'] = steps_by_date.Steps.rolling(window=10, center=True).mean()
steps_by_date.plot(x='date', y='RollingMeanSteps', title= 'Daily step counts rolling mean over 10 days', figsize=[10, 6])

In [None]:
steps_by_date['date'] = pd.to_datetime(steps_by_date['date'])
steps_by_date['dow'] = steps_by_date['date'].dt.weekday

In [None]:
data = steps_by_date.groupby(['dow'])['Steps'].mean()

fig, ax = plt.subplots(figsize=[10, 6])
ax = data.plot(kind='bar', x='day_of_week')

n_groups = len(data)
index = np.arange(n_groups)
opacity = 0.75

#fig, ax = plt.subplots(figsize=[10, 6])
ax.yaxis.grid(True)

plt.suptitle('Average Steps by Day of the Week', fontsize=16)
dow_labels = ['Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Saturday', 'Sunday']
plt.xticks(index, dow_labels, rotation=45)
plt.xlabel('Day of Week', fontsize=12, color='red')

In [None]:
total_steps_by_month = steps.groupby(['month'])['value'].sum().reset_index(name='Steps')

In [None]:
total_steps_by_month

In [None]:
# a bit of a hackish solution, could use improvement
dataset = total_steps_by_month
chart_title = 'Number of Steps per month'

n_groups = len(dataset)
index = np.arange(n_groups)

ax = dataset.plot(kind='line', figsize=[20, 5], linewidth=4, alpha=1, marker='o', color='#6684c1', 
                      markeredgecolor='#6684c1', markerfacecolor='w', markersize=8, markeredgewidth=2)

# ax.set_xlim((year_counts.index[0], year_counts.index[-1]))

ax.yaxis.grid(True)
ax.xaxis.grid(True)
# ax.set_ylim(0, 1000)
ax.set_xticks(index)
ax.set_ylabel('Step Count')
# ax.set_xlabel('')
plt.xticks(index, dataset.month, rotation=90)
ax.set_title(chart_title)

plt.show()

In [None]:
total_steps_by_years = steps.groupby(['year'])['value'].sum().reset_index(name='Steps')

In [None]:
total_steps_by_years

In [None]:
dataset = total_steps_by_years

n_groups = len(dataset)
opacity = 0.5
fig, ax = plt.subplots(figsize=[10, 6])
ax.yaxis.grid(True)
index = np.arange(n_groups)
bar_width = 0.4

data = plt.bar(index, dataset.Steps, bar_width,
                 alpha=opacity,
                 color='c',
                 label='Steps')

data[-1].set_color('r')

plt.ylabel('Steps')
plt.title('Total Steps Per Year')
plt.xticks(index, dataset.year, rotation=45)
plt.legend()

plt.tight_layout()
plt.show()

In [None]:
hour_steps = steps.groupby(['hour'])['value'].sum().reset_index(name='Steps')

In [None]:
ax = hour_steps.Steps.plot(kind='line', figsize=[10, 5], linewidth=4, alpha=1, marker='o', color='#6684c1', 
                      markeredgecolor='#6684c1', markerfacecolor='w', markersize=8, markeredgewidth=2)

xlabels = hour_steps.index.map(lambda x: '{:02}:00'.format(x))
ax.set_xticks(range(len(xlabels)))
ax.set_xticklabels(xlabels, rotation=45, rotation_mode='anchor', ha='right')

# ax.set_xlim((hour_steps.index[0], hour_steps.index[-1]))

ax.yaxis.grid(True)
# ax.set_ylim((0, 1300))
ax.set_ylabel('Steps')
ax.set_xlabel('')
ax.set_title('Steps by hour the day')

plt.show()

In [None]:
weight = pd.read_csv("BodyMass.csv")

In [None]:
weight

In [None]:
resting = pd.read_csv("RestingHeartRate.csv")
len(resting)

In [None]:
# parse out date and time elements as Pacific time
resting['startDate'] = pd.to_datetime(resting['startDate'])
resting['year'] = resting['startDate'].map(get_year)
resting['month'] = resting['startDate'].map(get_month)
resting['date'] = resting['startDate'].map(get_date)

In [None]:
resting[resting.month == '2022-01'].plot(x='date', y='value', title= 'Resting HR', figsize=[10, 6])

In [None]:
resting[resting.month == '2021-06'].plot(x='date', y='value', title= 'Resting HR', figsize=[10, 6])

In [None]:
hr = pd.read_csv("HeartRate.csv")
len(hr)

In [None]:
# parse out date and time elements as Pacific time
hr['startDate'] = pd.to_datetime(hr['startDate'])
hr['year'] = hr['startDate'].map(get_year)
hr['month'] = hr['startDate'].map(get_month)
hr['hour'] = hr['startDate'].map(get_hour)
hr['date'] = hr['startDate'].map(get_date)

In [None]:
# plot a single day
test_date = '2023-09-18'
# len(hr[hr.date == test_date])
# TODO: Fix Axis for Hour of Day or Something Similar
hr[hr.date == test_date].plot(x='startDate', y='value', title= 'HR', figsize=[10, 6])

In [None]:
sleep_raw = pd.read_csv("SleepAnalysis.csv")

In [None]:
sleep_raw.tail()

In [None]:
distance_walking_running = pd.read_csv("DistanceWalkingRunning.csv")

In [None]:
distance_walking_running.tail()

In [None]:
distance_walking_running['startDate'] = pd.to_datetime(hr['startDate'])
distance_walking_running['year'] = hr['startDate'].map(get_year)
distance_walking_running['month'] = hr['startDate'].map(get_month)
distance_walking_running['hour'] = hr['startDate'].map(get_hour)
distance_walking_running['date'] = hr['startDate'].map(get_date)

In [None]:
# plot a single day
test_date = '2023-09-17'
# len(hr[hr.date == test_date])
# TODO: Fix Axis for Hour of Day or Something Similar
distance_walking_running[distance_walking_running.date == test_date].plot(x='startDate', y='value', title= 'distance_walking_running', figsize=[10, 6])