# This is a test code for converting XML file from Health app tp JSON

## Import

In [3]:
import xml.etree.ElementTree as ET
import json
import os
import sys
import pandas as pd
from collections import defaultdict

current_path = os.getcwd()
parrent_path = os.path.abspath(os.path.join(current_path, '..'))
sys.path.append(parrent_path)

from Update_Git import git_add, git_commit, git_push


## Update Git Repo

In [8]:
file_path = os.path.join(current_path, 'Read_XML.ipynb')
git_add(file_path)
git_commit('Updated dahsbord: Read_XML.ipynb')
git_push('main')

''

## Read the xml file (takes 1m)

In [4]:
data_path = os.path.abspath("E:\Maylab\Data\Komprimeret arkiv\Albi\eksport.xml")
tree = ET.parse(data_path)
root = tree.getroot()

# Extract the required info from the data

## Clean the data

In [61]:
# Initialize a list to store extracted data
data = []

### General data
 "Category": "User Info"

In [62]:
# Extract user info (Me tag)
user_info = root.find("Me")  # Check if "Me" exists
if user_info is not None:
    user_attribs = user_info.attrib
    date_of_birth = user_attribs.get("HKCharacteristicTypeIdentifierDateOfBirth", "N/A")
    biological_sex = user_attribs.get("HKCharacteristicTypeIdentifierBiologicalSex", "N/A")

    # Fix gender handling
    if biological_sex == "HKBiologicalSexFemale":
        gender = "Female"
    elif biological_sex == "HKBiologicalSexMale":
        gender = "Male"
    else:
        gender = "Unknown"

    blood_type = user_attribs.get("HKCharacteristicTypeIdentifierBloodType", "N/A")
    skin_type = user_attribs.get("HKCharacteristicTypeIdentifierFitzpatrickSkinType", "N/A")
    medications_use = user_attribs.get("HKCharacteristicTypeIdentifierCardioFitnessMedicationsUse", "N/A")

    data.append({
        "Category": "User Info",
        "Date of Birth": date_of_birth,
        "Biological Sex": gender,
        "Blood Type": blood_type,
        "Skin Type": skin_type,
        "Medications Use": medications_use
    })

### Height and body mass
"Category": "Health Data"

In [63]:
# Extract height, weight, and other records
for record in root.findall("Record"):
    record_type = record.attrib.get("type", "")
    if record_type in ["HKQuantityTypeIdentifierHeight", "HKQuantityTypeIdentifierBodyMass"]:
        data.append({
            "Category": "Health Data",
            "Type": record_type,
            "Source": record.attrib.get("sourceName", "N/A"),
            "Value": record.attrib.get("value", "N/A"),
            "Unit": record.attrib.get("unit", "N/A"),
            "Creation Date": record.attrib.get("creationDate", "N/A"),
            "Start Date": record.attrib.get("startDate", "N/A"),
            "End Date": record.attrib.get("endDate", "N/A")
        })

## Step Counts
"Category": "Step Counts"

In [64]:
# Dictionary to store daily step count
daily_steps = defaultdict(int)
num_days = 7

# Extract step count records
for record in root.findall("Record"):
    record_type = record.attrib.get("type", "")
    
    if record_type == "HKQuantityTypeIdentifierStepCount":
        step_value = int(record.attrib.get("value", 0))  # Convert value to integer
        start_date = record.attrib.get("startDate", "").split(" ")[0]  # Extract only YYYY-MM-DD
        
        if start_date:
            daily_steps[start_date] += step_value  # Sum step counts per day

        # Stop when we have data for 7 unique days
        if len(daily_steps) == num_days:
            break

# Sort daily_steps by date in descending order
sorted_steps = sorted(daily_steps.items(), key=lambda x: x[0], reverse=True)

# Append aggregated daily step data to `data` list
for date, total_steps in sorted_steps:
    data.append({
        "Category": "Daily Step Count",
        "Date": date,
        "Total Steps": total_steps
    })

## Heart Rate Variability
"Category": "HRV"

In [65]:
# Dictionary to store daily HRV values
daily_hrv = defaultdict(list)
num_days = 7

# Extract HRV records
for record in root.findall("Record"):
    record_type = record.attrib.get("type", "")
    
    if record_type == "HKQuantityTypeIdentifierHeartRateVariabilitySDNN":
        value = float(record.attrib.get("value", 0))  # Convert to float
        start_date = record.attrib.get("startDate", "").split(" ")[0]  # Extract only YYYY-MM-DD
        
        if start_date:
            daily_hrv[start_date].append(value)  # Store HRV per day

        # Stop when we have data for 7 unique days
        if len(daily_hrv) == num_days:
            break

# Compute daily average HRV
avg_hrv_per_day = {
    date: round(sum(hrv_values) / len(hrv_values), 2)  # Compute average HRV & round
    for date, hrv_values in daily_hrv.items()
}

# Sort HRV data by date in descending order
sorted_hrv = sorted(avg_hrv_per_day.items(), key=lambda x: x[0], reverse=True)

# Append sorted HRV data to `data` list
for date, avg_hrv in sorted_hrv:
    data.append({
        "Category": "HRV",
        "Date": date,
        "Average HRV (ms)": avg_hrv
    })

# Collect all data and create JSON & CSV

In [66]:
# Convert to pandas DataFrame for better visualization
df = pd.DataFrame(data)

# Save to CSV or JSON if needed
df.to_csv("health_data.csv", index=False)
df.to_json("health_data.json", orient="records", indent=2)