# Partial breaths

Comparison of our methods of handling the partial breath at the start and at the end of the frame.

In [73]:
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots
import numpy as np
from sklearn.linear_model import LinearRegression

In [74]:
%load_ext autoreload
%autoreload 2

# only display 2 decimals
pd.set_option('display.precision', 2)

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


## Read data

In [75]:
edge_options = ['zeroed', 'beyond_frame', 'excluded', 'old_style']

filenames = {
    'zeroed':      'nimr_Tr1_fe-zeroed_20250324-103013_features.csv',
    'beyond_frame':'nimr_Tr1_fe-beyond_frame_20250324-103104_features.csv',
    'excluded':    'nimr_Tr1_fe-excluded_20250324-104026_features.csv',
    'old_style':   'nimr breath marks before changing to mean/nimr_Tr1__20250306-174438_features.csv'
}

dfs = list(pd.DataFrame())
for filename in filenames:
    dfs.append(pd.read_csv(f"../data/results/{filenames[filename]}", low_memory=False))

In [76]:
# The RR reference
rr_reference           = 'RR ref (mean)'
rr_reference_old_style = 'RR ref (median)'

# Make df_old_style compatible with the newer defs
dfs[3].rename(columns={rr_reference_old_style:rr_reference}, inplace=True)

In [77]:
df_combined = pd.DataFrame({
    name: df[rr_reference].reset_index(drop=True)
    for name, df in zip(edge_options, dfs)
})

df_stats = pd.DataFrame({
    'mean':   df_combined.mean(),
    '25%ile': df_combined.quantile(0.25),
    '50%ile': df_combined.quantile(0.5),
    '75%ile': df_combined.quantile(0.75)
})
df_stats

Unnamed: 0,mean,25%ile,50%ile,75%ile
zeroed,32.22,23.99,29.0,38.0
beyond_frame,32.12,23.82,29.02,37.92
excluded,32.8,24.17,29.72,38.09
old_style,33.95,24.49,29.67,39.46


In [91]:
# Scatter plots comparing these

fig = make_subplots(
    rows=1,
    cols=len(dfs)+1,
    horizontal_spacing=0.04,
    shared_yaxes=True,
    shared_xaxes=True,
)

opacity = 0.75
marker_size = 3
marker_color = 'blue'

fig.add_trace(
    go.Scatter(
        x=dfs[1][rr_reference],
        y=dfs[0][rr_reference],
        mode="markers",
        marker={"opacity":opacity, "color":marker_color, "size":marker_size},
    ),
    row=1,
    col=1
)
fig.add_trace(
    go.Scatter(
        x=dfs[1][rr_reference],
        y=dfs[2][rr_reference],
        mode="markers",
        marker={"opacity":opacity, "color":marker_color, "size":marker_size},
    ),
    row=1,
    col=2
)
fig.add_trace(
    go.Scatter(
        x=dfs[0][rr_reference],
        y=dfs[2][rr_reference],
        mode="markers",
        marker={"opacity":opacity, "color":marker_color, "size":marker_size},
    ),
    row=1,
    col=3
)
fig.add_trace(
    go.Scatter(
        x=dfs[1][rr_reference],
        y=dfs[3][rr_reference],
        mode="markers",
        marker={"opacity":opacity, "color":"red", "size":marker_size},
    ),
    row=1,
    col=4
)

# Line of equality
all_data = list()
for i in range(0,len(dfs)):
    all_data.append(dfs[i][rr_reference])
max_all_data = np.max(all_data) + 5
for colnum in range(1,len(dfs)+1):
    fig.add_trace(
        go.Scatter(
            x=[0,max_all_data],
            y=[0,max_all_data],
            mode="lines",
            line={'color':'lightgray', 'width':.7}
        ),
        row=1,
        col=colnum
    )

fig.update_layout(
    width=1600,
    height=500
)
fig.update_layout(
    title='Scatter comparisons of the methods',
    yaxis1_title=edge_options[0],
    yaxis2_title=edge_options[2],
    yaxis3_title=edge_options[2],
    yaxis4_title=edge_options[3],

    xaxis1_title=edge_options[1],
    xaxis2_title=edge_options[1],
    xaxis3_title=edge_options[0],
    xaxis4_title=edge_options[1],

    yaxis1 = dict(
        range=[0,max_all_data],
        constrain='domain',
        dtick=10
    ),
    yaxis2 = dict(
        range=[0,max_all_data],
        constrain='domain',
        dtick=10
    ),
    yaxis3 = dict(
        range=[0,max_all_data],
        constrain='domain',
        dtick=10
    ),
    yaxis4 = dict(
        range=[0,max_all_data],
        constrain='domain',
        dtick=10
    ),

    xaxis1 = dict(
        range=[0,max_all_data],
        scaleanchor = 'y1',    # Configure axes to have equal scale
        scaleratio = 1,
        constrain = 'domain',
        dtick=10
    ),
    xaxis2 = dict(
        range=[0,max_all_data],
        scaleanchor = 'y2',    # Configure axes to have equal scale
        scaleratio = 1,
        constrain = 'domain',
        dtick=10        
    ),
    xaxis3 = dict(
        range=[0,max_all_data],
        scaleanchor = 'y3',    # Configure axes to have equal scale
        scaleratio = 1,
        constrain = 'domain',
        dtick=10        
    ),
    xaxis4 = dict(
        range=[0,max_all_data],
        scaleanchor = 'y4',    # Configure axes to have equal scale
        scaleratio = 1,
        constrain = 'domain',
        dtick=10        
    ),
    showlegend=False,
)

fig.show()

In [None]:
# Bland-Altman plots comparing these three and comparing old_style to beyond_frame

comparisons = [[0, 1], [2, 1], [2, 0], [3, 1]]      # using edge_option index
numplots = len(comparisons)

fig = make_subplots(
    rows=1,
    cols=numplots,
    horizontal_spacing=0.02,
    shared_yaxes=True,
    shared_xaxes=True,
    subplot_titles=(
        f"{edge_options[comparisons[0][0]]} vs. {edge_options[comparisons[0][1]]}",
        f"{edge_options[comparisons[1][0]]} vs. {edge_options[comparisons[1][1]]}",
        f"{edge_options[comparisons[2][0]]} vs. {edge_options[comparisons[2][1]]}",
        f"{edge_options[comparisons[3][0]]} vs. {edge_options[comparisons[3][1]]}",
    )
)

opacity = 0.5
marker_size = 3

for index in range(0, numplots):
    diff =  dfs[comparisons[index][0]][rr_reference] - dfs[comparisons[index][1]][rr_reference]
    mean = (dfs[comparisons[index][0]][rr_reference] + dfs[comparisons[index][1]][rr_reference])/2
    if index < 3:
        marker_color = 'blue'
        labels = False
    else:
        marker_color = 'red'
        labels = True

    fig.add_trace(
        go.Scatter(
            x=mean,
            y=diff,
            mode="markers",
            marker={"color":marker_color, "opacity":opacity, "size":marker_size},
        ),
        row=1,
        col=index + 1
    )
    fig.update_yaxes(dict(dtick=2), row=1, col=index + 1)
    fig.update_xaxes(dict(title="Mean (bpm)"), row=1, col=index + 1)
    
    bias = np.nanmean(diff)
    std_diff = np.nanstd(diff, axis=0)
    upper_loa = bias + 1.96 * std_diff
    lower_loa = bias - 1.96 * std_diff

    fig.add_hline(y=bias,      line={'width':0.8, 'color':'black', 'dash':'solid'}, label={'text':'bias' if labels else None, 'textposition':'start'}, row=1, col=index + 1)
    fig.add_hline(y=upper_loa, line={'width':0.8, 'color':'black', 'dash':'dot'},   label={'text':'LoA'  if labels else None, 'textposition':'start'}, row=1, col=index + 1)
    fig.add_hline(y=lower_loa, line={'width':0.8, 'color':'black', 'dash':'dot'},   label={'text':'LoA'  if labels else None, 'textposition':'start'}, row=1, col=index + 1)

fig.update_layout(
    width=numplots * 300,
    height=400,
    title='Bland-Altman comparisons, with mean bias and 95% limits of agreement, of the methods',
    yaxis1_title='Difference (bpm)',
    showlegend=False,
)

In [80]:
# Histograms of their differences

comparisons = [[0, 1], [2, 1], [2, 0]]      # using edge_option index

marker_color = 'blue'
opacity = 0.5
marker_size = 3

for index in range(0, 3):
    diff =  dfs[comparisons[index][0]][rr_reference] - dfs[comparisons[index][1]][rr_reference]
    mean = (dfs[comparisons[index][0]][rr_reference] + dfs[comparisons[index][1]][rr_reference])/2

    fig = go.Figure(
        data=[go.Histogram(x=diff)]
    )

    fig.update_layout(
        width =500,
        height=500,
        title=f"{edge_options[comparisons[index][0]]} vs. {edge_options[comparisons[index][1]]}",
    )
    fig.show()
