In [75]:
import xml.etree.ElementTree as ET
import pandas as pd
from datetime import datetime as dt
import pytz

tree = ET.parse('/Users/naoki/Lecture/ds_program_health/apple_health_export/export.xml')
root = tree.getroot()

In [76]:
datetime_list = []
walking = []

for child in root:
    data = child.attrib

    try:
        if data['type'] == "HKQuantityTypeIdentifierDistanceWalkingRunning":
            datetime_list.append(data['startDate'])
            walking.append(data['value'])
            
    except:
        pass

df = pd.DataFrame({
    'datetime': datetime_list,
    'walking_running': walking
})

df['walking_running'] = df['walking_running'].astype(float)

# 日本時間への変換
df['datetime'] = pd.to_datetime(df['datetime'], utc=True)
df['datetime'] = df['datetime'].dt.tz_convert('Asia/Tokyo')

# 日付ごとに walking_running をまとめる
df_grouped = df.groupby(df['datetime'].dt.date)['walking_running'].sum().reset_index()

# 特定の日付範囲のデータを抽出
start_date = dt(2023, 12, 14, tzinfo=pytz.timezone('Asia/Tokyo')).date()
end_date = dt(2024, 1, 13, tzinfo=pytz.timezone('Asia/Tokyo')).date()

df_walk = df_grouped[(df_grouped['datetime'] >= start_date) & (df_grouped['datetime'] <= end_date)]

In [77]:
df_walk

Unnamed: 0,datetime,walking_running
260,2023-12-14,5.817344
261,2023-12-15,6.845698
262,2023-12-16,4.403902
263,2023-12-17,3.235018
264,2023-12-18,2.937305
265,2023-12-19,5.187411
266,2023-12-20,4.613
267,2023-12-21,4.093291
268,2023-12-22,7.618255
269,2023-12-23,2.845942


In [78]:
datetime_list = []
energy = []

for child in root:
    data = child.attrib

    try:
        if data['type'] == "HKQuantityTypeIdentifierActiveEnergyBurned":
            datetime_list.append(data['startDate'])
            energy.append(data['value'])
            
    except:
        pass

df = pd.DataFrame({
    'datetime': datetime_list,
    'ActiveEnergy': energy
})

df['ActiveEnergy'] = df['ActiveEnergy'].astype(float)

# 日本時間への変換
df['datetime'] = pd.to_datetime(df['datetime'], utc=True)
df['datetime'] = df['datetime'].dt.tz_convert('Asia/Tokyo')

# 日付ごとに ActiveEnergy をまとめる
df_grouped = df.groupby(df['datetime'].dt.date)['ActiveEnergy'].sum().reset_index()

# 特定の日付範囲のデータを抽出
start_date = dt(2023, 12, 14, tzinfo=pytz.timezone('Asia/Tokyo')).date()
end_date = dt(2024, 1, 13, tzinfo=pytz.timezone('Asia/Tokyo')).date()

df_energy = df_grouped[(df_grouped['datetime'] >= start_date) & (df_grouped['datetime'] <= end_date)]

In [79]:
df_energy

Unnamed: 0,datetime,ActiveEnergy
267,2023-12-14,224.383
268,2023-12-15,266.245
269,2023-12-16,156.181
270,2023-12-17,129.771
271,2023-12-18,139.848
272,2023-12-19,233.868
273,2023-12-20,174.916
274,2023-12-21,163.801
275,2023-12-22,306.996
276,2023-12-23,87.97


In [80]:
datetime_list = []
step = []

for child in root:
    data = child.attrib

    try:
        if data['type'] == "HKQuantityTypeIdentifierFlightsClimbed":
            datetime_list.append(data['startDate'])
            step.append(data['value'])
            
    except:
        pass

df = pd.DataFrame({
    'datetime': datetime_list,
    'stepupCount': step
})

df['stepupCount'] = df['stepupCount'].astype(float)

# 日本時間への変換
df['datetime'] = pd.to_datetime(df['datetime'], utc=True)
df['datetime'] = df['datetime'].dt.tz_convert('Asia/Tokyo')

# 日付ごとに stepupCount をまとめる
df_grouped = df.groupby(df['datetime'].dt.date)['stepupCount'].sum().reset_index()

# 特定の日付範囲のデータを抽出
start_date = dt(2023, 12, 14, tzinfo=pytz.timezone('Asia/Tokyo')).date()
end_date = dt(2024, 1, 13, tzinfo=pytz.timezone('Asia/Tokyo')).date()

df_stepup = df_grouped[(df_grouped['datetime'] >= start_date) & (df_grouped['datetime'] <= end_date)]

In [81]:
df_stepup

Unnamed: 0,datetime,stepupCount
166,2023-12-14,14.0
167,2023-12-15,12.0
168,2023-12-16,6.0
169,2023-12-17,5.0
170,2023-12-18,8.0
171,2023-12-19,20.0
172,2023-12-20,9.0
173,2023-12-21,11.0
174,2023-12-22,17.0
175,2023-12-23,7.0


In [83]:
import pandas as pd
from datetime import datetime as dt
import pytz

datetime_list = [] 
counts = []

for child in root:
    data = child.attrib

    try:
        if data['type'] == 'HKQuantityTypeIdentifierStepCount':
            datetime_list.append(data['startDate'])
            counts.append(data['value'])
            
    except:
        pass

df = pd.DataFrame({
    'datetime': datetime_list,
    'stepCount': counts
})

df['stepCount'] = df['stepCount'].astype(int)

# 日本時間への変換
df['datetime'] = pd.to_datetime(df['datetime'], utc=True)
df['datetime'] = df['datetime'].dt.tz_convert('Asia/Tokyo')

# 日付ごとに stepCount をまとめる
df_grouped = df.groupby(df['datetime'].dt.date)['stepCount'].sum().reset_index()

# 特定の日付範囲のデータを抽出
start_date = dt(2023, 12, 14, tzinfo=pytz.timezone('Asia/Tokyo')).date()
end_date = dt(2024, 1, 13, tzinfo=pytz.timezone('Asia/Tokyo')).date()

df_step = df_grouped[(df_grouped['datetime'] >= start_date) & (df_grouped['datetime'] <= end_date)]


In [84]:
df_step

Unnamed: 0,datetime,stepCount
260,2023-12-14,8676
261,2023-12-15,9669
262,2023-12-16,6369
263,2023-12-17,4866
264,2023-12-18,3885
265,2023-12-19,7178
266,2023-12-20,6604
267,2023-12-21,6028
268,2023-12-22,10730
269,2023-12-23,4002


In [87]:
merged_df = pd.merge(pd.merge(pd.merge(df_step, df_stepup, on='datetime', how='outer'), df_walk, on='datetime', how='outer'), df_energy, on='datetime', how='outer')
merged_df

Unnamed: 0,datetime,stepCount,stepupCount,walking_running,ActiveEnergy
0,2023-12-14,8676,14.0,5.817344,224.383
1,2023-12-15,9669,12.0,6.845698,266.245
2,2023-12-16,6369,6.0,4.403902,156.181
3,2023-12-17,4866,5.0,3.235018,129.771
4,2023-12-18,3885,8.0,2.937305,139.848
5,2023-12-19,7178,20.0,5.187411,233.868
6,2023-12-20,6604,9.0,4.613,174.916
7,2023-12-21,6028,11.0,4.093291,163.801
8,2023-12-22,10730,17.0,7.618255,306.996
9,2023-12-23,4002,7.0,2.845942,87.97


In [88]:
merged_df.to_csv('health.csv', index=False)