In [1]:
#imports for parsing
import xml.etree.ElementTree as ET
from datetime import datetime
root = ET.parse('export.xml').getroot()

#imports for plotting
from matplotlib import pyplot as plt
import gif 

Parse XML and look for 'Record' xml tags. Step counts are under 'HKQuantityTypeIdentifierStepCount' Record type.

Apple reports Apple Watch step count and iPhone step count separately. To prevent double counting, this code looks only for step counts reported by the iPhone.

In [2]:
date_list=[]
steps=[]

for type_tag in root.findall('Record'):
    record_type = type_tag.get('type')
    record_device = type_tag.get('device')
    
    #if record is step count reported by iPhone:
    if (record_type == 'HKQuantityTypeIdentifierStepCount') and ('iPhone' in record_device):
        d=type_tag.get('startDate')[0:10] #get date part of the string (without time)
        date_list.append(d)
        steps.append(int(type_tag.get('value')))

Combine multiple step count entries for the same day

In [3]:
unique_dates=sorted(set(date_list))

steps_by_day = {i:0 for i in unique_dates}
for idx,step in enumerate(steps):
    steps_by_day[date_list[idx]]+=step

Implement a 2 week moving average to smoothen out sharp increase/decrease

In [4]:
key_list = list(steps_by_day.keys())
value_list = list(steps_by_day.values())

window_size = 14 #14 day moving average
i = 0
moving_average = []

while i < len(value_list) - window_size + 1:
    this_window = value_list[i:i+window_size]
    window_average = sum(this_window)/window_size
    moving_average.append(window_average)
    i+=1
    
for i in range(window_size-1):
    moving_average.append(value_list[i-window_size+1])

steps = moving_average

Define annotations to be added on plot

In [5]:
notable_dates = ['2016-12-26','2017-08-17','2017-04-09', '2017-09-01', '2018-01-18', '2018-05-15', '2018-09-15', 
                 '2018-12-15', '2019-02-01', '2019-05-01','2019-08-18', '2019-10-01', '2020-02-28']
#convert string dates to indices
notable_dates = [key_list.index(i) for i in notable_dates]

#event description for corresponding notable_dates
events = ["Winter '16 Co-op", "Flew home", "Summer '17 (2B)", "Fall '17 Co-op", "Winter '18 (3A)", "Summer '18 Co-op", "Fall '18 (3B)",
         "Flew home","Winter '19 Co-op", "Summer '19 (4A)", "Flew home", "Fall '19 Co-op (BC)", "Quarantine begins"]

#y position of tag
y_pos = [8500, 1450, 13000, 11500, 9000, 12500, 13500, 
         3000, 8000, 13500, 1450, 12700, 11000]

Plot and save as gif

In [6]:
FONT = {'fontsize': 16, 'ha':'center'}
bg = '#fcfeff'
plt.rcParams["axes.facecolor"] = bg
plt.rcParams["figure.facecolor"] = bg
plt.rcParams["savefig.facecolor"] = bg

num = len(key_list)

@gif.frame

def plot(date):
    fig, ax = plt.subplots(figsize=(10, 6))
    plt.plot(list((range(date))), steps[:date],color='#000000')
    
    #set axis limits
    ax.set_xlim([0, num])
    ax.set_ylim([0, max(steps)])
    
    #axis ticks
    ax.set_xticks([date])
    ax.set_yticks([0, 1000, 5000, 10000, 15000])
    
    #tick labels
    ax.set_xticklabels([(datetime.strptime(date_list[date], '%Y-%m-%d')).strftime("%b '%y")], **FONT)
    
    plt.title('Number of Steps Per Day', **FONT)

    #add annotations
    style = dict(size=10, color='red', ha='center')
    for idx, d in enumerate(notable_dates):
        if d < date:
            ax.text(d, y_pos[idx], events[idx], **style)

frames = []

for date in range(num):
    frame = plot(date)
    frames.append(frame)

#duplicate last frame to add some "pause" time
for i in range(400):
    frames.append(frames[-1]) 
    
gif.save(frames, 'dailySteps.gif', duration=25)