In [327]:
import pandas as pd
from Error_Compare import *
from Time_series_alignment_functions import *
import analysis.dtw as cdtw
import dtw as pdtw
import numpy as np
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [328]:
# Parameters
#activity_dir = "data/dataset/2b8f256d-7063-410c-b66d-3bdfb7d140c5"
#activity_dir = "data/dataset/fa99ac972245361cf4256225cae4a2c0b80ff333"
activity_dir = "data/dataset/c220e0e4-d30d-4ead-858a-1545b91bc362"
min_time = 60
max_time = np.inf
key_metrics = ["breathTime", "VT", "instBR"]
lb_error_window = 3 # lower bound error window
ra_window = 60 # rolling average error window
ra_threshold = 0.5

In [329]:
# Data loading
activity = os.path.split(activity_dir)[-1]
clean_dfs = load_data(activity_dir, has_uncleaned = False)
raw_chest = clean_dfs["raw_slow_df"][["time","c",]]
live_b3_df = clean_dfs["live_b3_df"][key_metrics]
pp_b3_df = clean_dfs["aws_b3_df"][key_metrics]
# Select subset of data based on min_time and max_time
raw_chest = raw_chest[(raw_chest["time"] > min_time) & (raw_chest["time"] < max_time)]
live_b3_df = live_b3_df[(live_b3_df["breathTime"] > min_time) & (live_b3_df["breathTime"] < max_time)]
pp_b3_df = pp_b3_df[(pp_b3_df["breathTime"] > min_time) & (pp_b3_df["breathTime"] < max_time)]
# Backfill pp nans if necessary
# if pp_b3_df.isnull().values.any():
#     print("pp_b3_df CONTAINS NaNs- backfilling")
#     # Get location of nans
#     nan_locs = np.argwhere(pp_b3_df.isnull().values)
#     nan_times = pp_b3_df["breathTime"][nan_locs[:,0]]
#     print("pp_b3_df Nan times: ", nan_times)
#     pp_b3_df = pp_b3_df.fillna(method = "bfill")



Pickled data found in activity data directory. Loading data...


In [330]:
# Method 1 Shift by an offset, and then take upperbound on the error
test_metrics = {}
pp_s_df = run_cleaning_process(clean_dfs["aws_b3_df"][key_metrics])
live_s_df = run_cleaning_process(clean_dfs["live_b3_df"][key_metrics])
# select only where index is between min_time and max_time
pp_s_df = pp_s_df.loc[(pp_s_df.index >= min_time) & (pp_s_df.index <= max_time)]
live_s_df = live_s_df.loc[(live_s_df.index >= min_time) & (live_s_df.index <= max_time)]
# shift all live indices by -6
live_s_df.index = live_s_df.index - 6
# get the time range where both pp and live are valid
min_s = max(min(pp_s_df.index), min(live_s_df.index))
max_s = min(max(pp_s_df.index), max(live_s_df.index))
# Apply the time range to the two dataframes
pp_s_df = pp_s_df.loc[(pp_s_df.index >= min_s) & (pp_s_df.index <= max_s)]
live_s_df = live_s_df.loc[(live_s_df.index >= min_s) & (live_s_df.index <= max_s)]

# Compute (percent) positive error for pp to live
pp_lb_error, pp_percent_error = compute_lower_bound_error(pp_s_df["VT"].to_numpy(), live_s_df["VT"].to_numpy(), lb_error_window)
# Compute the rolling average error for pp_percent_error
pp_ra_error = compute_RAT_error(pp_percent_error, ra_threshold, ra_window)
# Compute (percent) positive error for live to pp
live_lb_error, live_percent_error = compute_lower_bound_error(live_s_df["VT"].to_numpy(), pp_s_df["VT"].to_numpy(), window = lb_error_window)
# Compute the rolling average error for pp_percent_error
live_ra_error = compute_RAT_error(live_percent_error, ra_threshold, ra_window)

pp_error_summary = summarize_percent_error(pp_percent_error)
live_error_summary = summarize_percent_error(live_percent_error)
if True: # Plotting the error
    fig = make_subplots(rows = 4, cols = 1, shared_xaxes=True, vertical_spacing=0.05, subplot_titles=("Raw Chest Signal","Input Signals", "PP to Live LB Relative Percent Error", "Live to PP LB Relative Percent Error"))
    fig.update_layout(title_text="Method 1 Error Analysis (VT)")

    # Plot the raw chest signal
    fig.add_trace(go.Scatter(x=raw_chest["time"], y=raw_chest["c"], name="Raw Chest Signal", line = LINE_DICT["CHEST"]), row=1, col=1)
    fig.update_yaxes(title_text="Chest Signal", row=1, col=1)

    # Plot the VT Signals (interpolated and live shifted in this case)
    fig.add_trace(go.Scatter(x=live_s_df.index, y=live_s_df["VT"], name="Live (-6s)", line = LINE_DICT["LIVE"]), row=2, col=1)
    fig.add_trace(go.Scatter(x=pp_s_df.index, y=pp_s_df["VT"], name="PP (s)", line = LINE_DICT["PP"]), row=2, col=1)
    fig.update_yaxes(title_text="VT", row=2, col=1)

    # plot the error
    fig.add_trace(go.Bar(x=pp_s_df.index, y=pp_percent_error[:,0], name="PP to Live  LB Error", marker_color= COLOR_DICT["ERROR"]), row=3, col=1)
    fig.add_trace(go.Scatter(x=pp_s_df.index, y = pp_ra_error[:,0], name="PP to Live RA Error", line = LINE_DICT["ERROR"]), row=3, col=1)
    fig.update_yaxes(title_text="Percent Positive Error", row=3, col=1)
    fig.update_yaxes(range=[0, 2], row=3, col=1)

    # plot the error
    fig.add_trace(go.Bar(x = live_s_df.index, y=live_percent_error[:,0], name="Live to PP LB Error", marker_color= COLOR_DICT["ERROR"]), row=4, col=1)
    fig.add_trace(go.Scatter(x=live_s_df.index, y = live_ra_error[:,0], name="Live to PP RA Error", line = LINE_DICT["ERROR"]), row=4, col=1)
    fig.update_yaxes(title_text="Percent Positive Error", row=4, col=1)
    fig.update_yaxes(range=[0, 2], row=4, col=1)

    fig.update_xaxes(title_text="Time (s)", row=4, col=1)

    fig.show()


In [331]:
# Repeat the same process of instBR
# Compute (percent) positive error for pp to live
pp_lb_error, pp_percent_error = compute_lower_bound_error(pp_s_df["instBR"].to_numpy(), live_s_df["instBR"].to_numpy(), lb_error_window)
# Compute the rolling average error for pp_percent_error
pp_ra_error = compute_RAT_error(pp_percent_error, ra_threshold, ra_window)
# Compute (percent) positive error for live to pp
live_lb_error, live_percent_error = compute_lower_bound_error(live_s_df["instBR"].to_numpy(), pp_s_df["instBR"].to_numpy(), window = lb_error_window)
# Compute the rolling average error for pp_percent_error
live_ra_error = compute_RAT_error(live_percent_error, ra_threshold, ra_window)

pp_error_summary = summarize_lb_error(pp_percent_error)
live_error_summary = summarize_lb_error(live_percent_error)
if True: # Plotting the error
    fig = make_subplots(rows = 4, cols = 1, shared_xaxes=True, vertical_spacing=0.05, subplot_titles=("Raw Chest Signal","Input Signals", "PP to Live LB Relative Percent Error", "Live to PP LB Relative Percent Error"))
    fig.update_layout(title_text="Method 1 Error Analysis (instBR)")

    # Plot the raw chest signal
    fig.add_trace(go.Scatter(x=raw_chest["time"], y=raw_chest["c"], name="Raw Chest Signal", line = LINE_DICT["CHEST"]), row=1, col=1)
    fig.update_yaxes(title_text="Chest Signal", row=1, col=1)

    # Plot the VT Signals (interpolated and live shifted in this case)
    fig.add_trace(go.Scatter(x=live_s_df.index, y=live_s_df["instBR"], name="Live (-6s)", line = LINE_DICT["LIVE"]), row=2, col=1)
    fig.add_trace(go.Scatter(x=pp_s_df.index, y=pp_s_df["instBR"], name="PP (s)", line = LINE_DICT["PP"]), row=2, col=1)
    fig.update_yaxes(title_text="instBR", row=2, col=1)

    # plot the error
    fig.add_trace(go.Bar(x=pp_s_df.index, y=pp_percent_error[:,0], name="PP to Live  LB Error", marker_color= COLOR_DICT["ERROR"]), row=3, col=1)
    fig.add_trace(go.Scatter(x=pp_s_df.index, y = pp_ra_error[:,0], name="PP to Live RA Error", line = LINE_DICT["ERROR"]), row=3, col=1)
    fig.update_yaxes(title_text="Percent Positive Error", row=3, col=1)
    fig.update_yaxes(range=[0, 2], row=3, col=1)

    # plot the error
    fig.add_trace(go.Bar(x = live_s_df.index, y=live_percent_error[:,0], name="Live to PP LB Error", marker_color= COLOR_DICT["ERROR"]), row=4, col=1)
    fig.add_trace(go.Scatter(x=live_s_df.index, y = live_ra_error[:,0], name="Live to PP RA Error", line = LINE_DICT["ERROR"]), row=4, col=1)
    fig.update_yaxes(title_text="Percent Positive Error", row=4, col=1)
    fig.update_yaxes(range=[0, 2], row=4, col=1)

    fig.update_xaxes(title_text="Time (s)", row=4, col=1)

    fig.show()

In [332]:
if live_b3_df.isnull().values.any():
    print("live_b3_df CONTAINS NaNs- backfilling")
    # Get location of nans
    nan_locs = np.argwhere(live_b3_df.isnull().values)
    nan_times = live_b3_df["breathTime"][nan_locs[:,0]]
    print("live_b3_df Nan times: ", nan_times)

In [333]:
if pp_b3_df.isnull().values.any():
    print("pp_b3_df CONTAINS NaNs- backfilling")
    # Get location of nans
    nan_locs = np.argwhere(pp_b3_df.isnull().values)
    nan_times = pp_b3_df["breathTime"][nan_locs[:,0]]
    print("pp_b3_df Nan times: ", nan_times)
    #pp_b3_df = pp_b3_df.fillna(method = "bfill")

pp_b3_df CONTAINS NaNs- backfilling


KeyError: '[1] not in index'

In [None]:
# check if pp_s_df and live_s_df are the same length
print(len(pp_s_df))
print(len(live_s_df))
# Check if any indices repeat in pp_s_df
print(len(pp_s_df.index.unique()))
# Check if any indices repeat in live_s_df
print(len(live_s_df.index.unique()))
# find the different in indices between pp_s_df and live_s_df
print(len(set(pp_s_df.index.unique()) - set(live_s_df.index.unique())))
# get the max and min index of pp_s_df
print(f"pp max index {pp_s_df.index.max()}")
print(f"pp min index {pp_s_df.index.min()}")
# get the max and min index of live_s_df
print(f"live max index {live_s_df.index.max()}")
print(f"live min index {live_s_df.index.min()}")

