# Data Literacy
I chose to read my yoga workout routines from my apple watch. I exported the health data, read it into a data frame and summarized the key metrics. 


In [1]:
import pandas as pd
import xml.etree.ElementTree
import datetime
import numpy as np 
path_to_exportxml = "export.xml"



In [2]:
# HK Record Schema 

# <Record type="HKQuantityTypeIdentifierHeartRate" sourceName="Neelesh’s Apple Watch" sourceVersion="5.0.1" device="&lt;&lt;HKDevice: 0x2827b98b0&gt;, name:Apple Watch, manufacturer:Apple, model:Watch, hardware:Watch4,4, software:5.0.1&gt;" unit="count/min" creationDate="2018-12-16 22:50:29 -0700" startDate="2018-12-16 22:50:24 -0700" endDate="2018-12-16 22:50:24 -0700" value="68">
#  <MetadataEntry key="HKMetadataKeyHeartRateMotionContext" value="0"/>
# </Record>

#<Workout workoutActivityType="HKWorkoutActivityTypeYoga" duration="30.15948544939359" durationUnit="min" totalDistance="0" totalDistanceUnit="mi" totalEnergyBurned="93.80830393469761" totalEnergyBurnedUnit="kcal" sourceName="Neelesh’s Apple Watch" sourceVersion="5.0.1" device="&lt;&lt;HKDevice: 0x282556350&gt;, name:Apple Watch, manufacturer:Apple, model:Watch, hardware:Watch4,4, software:5.0.1&gt;" creationDate="2018-12-24 22:06:28 -0700" startDate="2018-12-24 21:36:18 -0700" endDate="2018-12-24 22:06:28 -0700">
#  <MetadataEntry key="HKIndoorWorkout" value="0"/>
#</Workout>

HeartRate = "HKQuantityTypeIdentifierHeartRate" 
Yoga = "HKWorkoutActivityTypeYoga"
Walking = "HKWorkoutActivityTypeWalking"
Meditation = "HKWorkoutActivityTypeMindAndBody"


In [3]:
#Adapted from Thomas Willey's https://gist.github.com/thomaswilley/5079f1106b1ddf2c71b6

find_rec_workout = './/Workout'
find_rec_record = './/Record'

def iter_records(healthdata, find_rec_name):
    healthdata_attr = healthdata.attrib
    for rec in healthdata.iterfind(find_rec_name):
        rec_dict = healthdata_attr.copy()
        rec_dict.update(healthdata.attrib)
        for k, v in rec.attrib.items():
            if 'date' in k.lower():
                rec_dict[k] = datetime.datetime.strptime(v, '%Y-%m-%d %H:%M:%S %z')
                
            rec_dict[k] = v    
        yield rec_dict

e = xml.etree.ElementTree.parse(path_to_exportxml).getroot()
df_workouts = pd.DataFrame(list(iter_records(e,find_rec_workout)))
df_records = pd.DataFrame(list(iter_records(e, find_rec_record)))


In [4]:
## I wanted to create different data frames for each activity and interesting metric. So, I created a data frame first for the heart rate data 

heart_rate_df = df_records[df_records['type'] == HeartRate]

hr_cols = ['value', 'unit', 'creationDate', 'startDate', 'endDate']

heart_rate_df[hr_cols].head(10)
#heart_rate_df.info()
heart_rate_df.to_csv("heart_rate.csv")

#when I run this locally i want to be able to just write out a separate CSV file so I can analyze these data with Tableau
#heart_rate_df.to_csv("heart_rate.csv")
                           

In [5]:
## Then I created a data frame for each activity type

yoga_df = df_workouts[df_workouts['workoutActivityType'] == 'HKWorkoutActivityTypeYoga'] 

running_df = df_workouts[df_workouts['workoutActivityType'] == 'HKWorkoutActivityTypeRunning'] 

walking_df = df_workouts[df_workouts['workoutActivityType'] == 'HKWorkoutActivityTypeWalking'] 

meditation_df = df_workouts[df_workouts['workoutActivityType'] == 'HKWorkoutActivityTypeMindAndBody'] 


#for local use to write out CSVs for analysis
#yoga_df.to_csv("yoga_workout.csv")
#running_df.to_csv("running_workout.csv")
#walking_df.to_csv("walking_workout.csv")
#meditation_df.to_csv("meditation_workout.csv")

yoga_df.to_csv("yoga_workout.csv")
running_df.to_csv("running_workout.csv")
walking_df.to_csv("walking_workout.csv")
meditation_df.to_csv("meditation_workout.csv")


In [56]:
# utility functions/code. 
#get the row where the column value of duration is max, from that row select the 4th column which is the end date



def get_date_for_max_activity(yoga_df):
    max_row = yoga_df[yoga_df['duration'] == yoga_df['duration'].max()]
    d = get_formatted_date(max_row['endDate']
    duration = str(yoga_df['duration'].max())
    duration = duration[0:5] #- just get short form
    return d, duration

def get_date_for_min_activity(yoga_df):
    max_row = yoga_df[yoga_df['duration'] == yoga_df['duration'].min()]
    d = min_row['endDate']
    duration = str(yoga_df['duration'].min())
    duration = duration[0:5] #- just get short form
    return d, duration

#Return a formatted date as string with updated format of M/D/Y
def get_formatted_date(aDate):
  d = pd.to_datetime(aDate)
  d = pd.Series(d)
  d = d.dt.strftime('%m/%d/%Y')
  d = d.iloc[0]
  return d

#get row with max duration for activity
date, duration = get_date_for_max_activity(yoga_df)
print(f"max activity duration : {duration} on date: {get_formatted_date(date)}")

#d = get_formatted_date(d)
#print(d)

max activity duration : 92.60 on date: 09/01/2019


In [76]:
#Here is a data story 

print(f"There are a total of {yoga_df['duration'].count()} records of me doing yoga as an activity between {get_formatted_date(yoga_df['creationDate'].min())} and the last day of activity before the export of data on {get_formatted_date(yoga_df['creationDate'].max())}.")

print("")

print(f"On average, my yoga session lasted for {str(yoga_df['duration'].astype('float').mean())[0:]} minutes.")

date, duration = get_date_for_max_activity(yoga_df)
print(f"Wait, what was the longest session? Turns out, my longest yoga sitting was {duration} min on {get_formatted_date(date)}")

print("")
date, duration = get_date_for_min_activity(yoga_df)
print(f"The shortest? Well, shortest yoga sitting was {duration} mins on {get_formatted_date(date)}")

print("The shortest was likely me phat fingering my iPhone screen\n")


print("Hmm...wonder what I was thinking then?")

print(f"During the same time, I meditated a total of {meditation_df['duration'].count()} times with an average meditation duration of {meditation_df['duration'].astype('float').mean()}! \nThere were several times where I must have forgotten to turn off the meditation workout activity.\nHow do I know that, because I don't see myself sitting still for {meditation_df['duration'].astype('float').max()} minutes in one sitting! \nHAHAHA")

print("")

print(f"I still continue to do my yoga and monitor the times but this was a fun data exercise")




There are a total of 363 records of me doing yoga as an activity between 12/20/2018 and the last day of activity before the export of data on 04/03/2020.

On average, my yoga session lasted for 50.11466450496921 minutes.
Wait, what was the longest session? Turns out, my longest yoga sitting was 92.60 min on 09/01/2019

The shortest? Well, shortest yoga sitting was 1.270 mins on 01/26/2019
The shortest was likely me phat fingering my iPhone screen

Hmm...wonder what I was thinking then?
During the same time, I meditated a total of 280 times with an average meditation duration of 25.621953048074534! 
There were several times where I must have forgotten to turn off the meditation workout activity.
How do I know that, because I don't see myself sitting still for 1172.445536818107 minutes in one sitting! 
HAHAHA

I still continue to do my yoga and monitor the times but this was a fun data exercise
