In [None]:
import numpy as np
import pandas as pd
from statistics import mean
import matplotlib as mpl
import matplotlib.pyplot as plt
import os

In [None]:
import xml.etree.ElementTree as ET
import re
from datetime import datetime

def readtcx(path):
    heartrate_data = []
    with open(path) as xml_file:
        xml_str = xml_file.read()
        xml_str = re.sub(' xmlns="[^"]+"', '', xml_str, count=1)
        root = ET.fromstring(xml_str)
        activities = root.findall('.//Activity')
        for activity in activities:
            tracking_points = activity.findall('.//Trackpoint')
            for tracking_point in list(tracking_points):
                children = list(tracking_point)
                time = datetime.strptime(children[0].text, '%Y-%m-%dT%H:%M:%S.%fZ')
                hr = list(tracking_point.find('HeartRateBpm'))[0].text
                heartrate_data.append([time, hr])
    df = pd.DataFrame(heartrate_data, columns=['time', 'hr'])
    #df['time'] = pd.to_datetime(df['time'], unit='s')
    return df

In [None]:
heartrate_data = readtcx("../data/Lucas/walking1/walking1.tcx")
heartrate_data

In [None]:
def time_diff(df):
    res = []
    for i in range(len(df["Time (s)"]) - 1):
        val1 = df["Time (s)"][i]
        val2 = df["Time (s)"][i + 1]
        
        res.append(val2 - val1)
    return mean(res)

def read_phyphox(parent_dir):
    acc = pd.read_csv(os.path.join(parent_dir, "Accelerometer.csv"))
    gyro = pd.read_csv(os.path.join(parent_dir, "Gyroscope.csv"))
    loc = pd.read_csv(os.path.join(parent_dir, "Location.csv"))
        
    data_len = min(len(acc), len(gyro))
    acc = acc[0:data_len]
    gyro = gyro[0:data_len]
        
    time_step = mean([time_diff(acc), time_diff(gyro)])
    
    for i in range(data_len):
        time = time_step * i
        acc["Time (s)"][i] = time
        gyro["Time (s)"][i] = time
    
    acc.set_index('Time (s)', inplace=True)
    gyro.set_index('Time (s)', inplace=True)
    loc.set_index('Time (s)', inplace=True)
        
            
    merged = acc.join(gyro, how="outer")
    merged = pd.concat([merged, loc]).sort_index().interpolate()
    
    # Rename columns
    merged.index.names = ["time"]
    merged.rename(inplace=True, columns={
        "Acceleration x (m/s^2)": "acceleration_x",
        "Acceleration y (m/s^2)": "acceleration_y",
        "Acceleration z (m/s^2)": "acceleration_z",
        "Gyroscope x (rad/s)": "gyroscope_x",
        "Gyroscope y (rad/s)": "gyroscope_y",
        "Gyroscope z (rad/s)": "gyroscope_z",
        "Latitude (°)": "latitude",
        "Longitude (°)": "longitude",
        "Height (m)": "height",
        "Velocity (m/s)": "velocity",
        "Direction (°)": "direction",
        "Horizontal Accuracy (m)": "h_accuracy",
        "Vertical Accuracy (m)": "v_accuracy",

    })
    merged = merged.dropna()
    
    time_df = pd.read_csv(os.path.join(parent_dir, "meta", "time.csv"))
    start_time = time_df.loc[time_df["event"] == "START"]["system time"][0]
    
    merged.reset_index(inplace=True)
    merged['time'] = pd.to_datetime(merged['time'] + start_time,unit='s')
    return merged

In [None]:
phyphox_data

In [None]:
def read_combined(path_pp, path_tcx):
    pp = read_phyphox(path_pp)
    hr = readtcx(path_tcx)
    
    pp["hr"] = np.nan
    
    
    # Finds the time intervals of the heart rate measurements and update the phyphox heart rate accordingly
    # This is done as samsung measurements are not very fine grained and only give use relatively large
    # time intervals
    for i in range(len(hr) - 1):
        row1 = hr.iloc[i]
        row2 = hr.iloc[i + 1]
        pp.loc[(pp["time"] >= row1["time"].to_datetime64()) & (pp["time"] < row2["time"].to_datetime64()), "hr"] = row1["hr"]
    
    # remove data point without overlap in time
    return pp.dropna()

In [None]:
df = read_combined("../data/Lucas/walking2/", "../data/Lucas/walking2/walking2.tcx")
df

In [None]:
# Acceleration figure
fig, (ax1, ax2, ax3) = plt.subplots(3, sharex=True)

fig.suptitle("Acceleration")

ax1.plot(df["time"], df["acceleration_x"])
ax1.set_ylabel("Acceleration x")

ax2.plot(df["time"], df["acceleration_y"])
ax2.set_ylabel("Acceleration y")

ax3.plot(df["time"], df["acceleration_z"])
ax3.set_ylabel("Acceleration z")

plt.subplots_adjust(hspace=0.1)
fig.align_labels()

# Gyroscope figure
fig, (ax1, ax2, ax3) = plt.subplots(3, sharex=True)

fig.suptitle("Gyroscope")

ax1.plot(df["time"], df["gyroscope_x"])
ax1.set_ylabel("Gyroscope x")

ax2.plot(df["time"], df["gyroscope_y"])
ax2.set_ylabel("Gyroscope y")

ax3.plot(df["time"], df["gyroscope_z"])
ax3.set_ylabel("Gyroscope z")

plt.subplots_adjust(hspace=0.1)
fig.align_labels()

# Position figure
fig, (ax1, ax2, ax3, ax4) = plt.subplots(4, sharex=True)

fig.suptitle("Position")

ax1.plot(df["time"], df["latitude"])
ax1.set_ylabel("Latitude")

ax2.plot(df["time"], df["longitude"])
ax2.set_ylabel("Longitude")

ax3.plot(df["time"], df["height"])
ax3.set_ylabel("Height")

ax4.plot(df["time"], df["velocity"])
ax4.set_ylabel("Velocity")

plt.subplots_adjust(hspace=0.1)
fig.align_labels()

# Position figure
fig, ax = plt.subplots(1, sharex=True)

fig.suptitle("Heart rate")

ax.plot(df["time"], df["hr"])
ax.set_ylabel("Heart rate")


plt.subplots_adjust(hspace=0.1)
fig.align_labels()

In [None]:
import math
import copy
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from scipy.stats import norm
from sklearn.decomposition import PCA
from ML4QS.Python3Code.Chapter4.FrequencyAbstraction import FourierTransformation
import re

np.random.seed(0)

# Figure 4.1

# Sample frequency (Hz)
fs = 4

# Create time points....
#df = pd.DataFrame(np.arange(0, 16.1, float(1)/fs), columns=list('X'))
#c1 = 3 * np.sin(2 * math.pi * 0.2 * df['X'])
#c2 = 2 * np.sin(2 * math.pi * 0.25 * (df['X']-2)) + 5
#df['Y'] = c1 + c2
#print(df)

# Figure 4.2
periodic_predictor_cols = ['acceleration_x', 'acceleration_y', 'acceleration_z', 'gyroscope_x', 'gyroscope_y', 'gyroscope_z']

FreqAbs = FourierTransformation()
data_table = FreqAbs.abstract_frequency(copy.deepcopy(df), periodic_predictor_cols[3:], 40, fs)
# Get the frequencies from the columns....
frequencies = []
values = []
for col in data_table.columns:
    val = re.findall(r'freq_\d+\.\d+_Hz', col)
    if len(val) > 0:
        frequency = float((val[0])[5:len(val)-4])
        frequencies.append(frequency)
        values.append(data_table.loc[data_table.index, col])

fig = plt.figure()
ax1 = fig.add_subplot(111)
#plt.xlim([0, 5])
ax1.plot(frequencies, values, 'b+')
ax1.set_xlabel('Frequency (Hz)')
ax1.set_ylabel('$a$')
plt.show()

In [None]:
# 'gyroscope_x_max_freq', 'gyroscope_x_freq_weighted', 'gyroscope_x_pse'
plt.figure(figsize=(15,5))
plt.plot(data_table['time'], data_table['gyroscope_x_max_freq'])
plt.show()
plt.figure(figsize=(15,5))
plt.plot(data_table['time'], data_table['gyroscope_x_freq_weighted'])
plt.ylim([-3,10])
plt.show()
plt.figure(figsize=(15,5))
plt.plot(data_table['time'], data_table['gyroscope_x_pse'])
plt.show()

This part is giving me "ValueError: window must be an integer" even though I'm doing the same thing as in the example code

In [None]:
from ML4QS.Python3Code.Chapter4.TemporalAbstraction import NumericalAbstraction
NumAbs = NumericalAbstraction()
first_index = df.index[0]
milliseconds_per_instance = (df['time'][first_index+1] - df['time'][first_index]).microseconds/1000
#milliseconds_per_instance = (df.index[1] - df.index[0]).microseconds/1000
#window_sizes = [int(float(5000)/milliseconds_per_instance), int(float(0.5*60000)/milliseconds_per_instance), int(float(5*60000)/milliseconds_per_instance)]
              
dataset = NumAbs.abstract_numerical(df, ['hr'], 10, 'mean')

plt.plot(dataset['hr_temp_mean'])
plt.show()