In [8]:
import pandas as pd
import numpy as np
import plotly.express as px
import io
import openox as ox
from exclude_unclean import *
from session_functions import threesamples, recalculate_so2_range
from importlib import reload
import ipywidgets
import os
from tqdm.notebook import tqdm


In [7]:
abg = pd.read_csv('../OpenOxPHIData/output_internal_bloodgas.csv')
threesamples(abg)
abg = abg[['date','time','sample','so2','session']]


# count number of non null samples in each labview 2hz file


In [19]:
uncleancount = {}

# load all csvs
for file in os.listdir('../OpenOxPHIData/waveforms/labview_2hz'):
    df = pd.read_csv(f'../OpenOxPHIData/waveforms/labview_2hz/{file}')
    if "Nellcor/SpO2" in df.columns:
        nellcorcol = "Nellcor/SpO2"
    else:
        nellcorcol = "Nellcor PM1000N-1/SpO2"
    uncleancount[file] = len(df[df[nellcorcol] > 0])

In [20]:
cleancount = {}
for file in os.listdir('../OpenOxPHIData/cleaned/waveforms/labview_2hz'):
    df = pd.read_csv(f'../OpenOxPHIData/cleaned/waveforms/labview_2hz/{file}')
    if "Nellcor/SpO2" in df.columns:
        nellcorcol = "Nellcor/SpO2"
    else:
        nellcorcol = "Nellcor PM1000N-1/SpO2"
    cleancount[file] = len(df[df[nellcorcol] > 0])

In [31]:
pd.DataFrame([uncleancount,cleancount]).T.sum()

0    2589100.0
1    2577644.0
dtype: float64

In [8]:
base_path = '../OpenOxPHIData/waveforms/labview_2hz/'
pattern = 'labview_session_{}_2hz.csv'
cols_to_summarize = ['Masimo 97/SpO2','Nellcor/SpO2'] 
labview_files = ox.get_labview_files(abg, base_path, pattern)
labview_samples = ox.extract_values_by_sample(labview_files, 'session','Sample',5,cols_to_summarize,abg)

  for i in abg['session'].unique().astype(int):


No file for session 139 at ../OpenOxPHIData/waveforms/labview_2hz/labview_session_139_2hz.csv
No file for session -2147483648 at ../OpenOxPHIData/waveforms/labview_2hz/labview_session_-2147483648_2hz.csv
No file for session 17 at ../OpenOxPHIData/waveforms/labview_2hz/labview_session_17_2hz.csv
No file for session 74 at ../OpenOxPHIData/waveforms/labview_2hz/labview_session_74_2hz.csv
No file for session 412 at ../OpenOxPHIData/waveforms/labview_2hz/labview_session_412_2hz.csv
No file for session 417 at ../OpenOxPHIData/waveforms/labview_2hz/labview_session_417_2hz.csv
up in the clouds with session 549
up in the clouds with session 550
up in the clouds with session 551
up in the clouds with session 552
up in the clouds with session 553
up in the clouds with session 554
up in the clouds with session 555
up in the clouds with session 556
up in the clouds with session 557
up in the clouds with session 558
up in the clouds with session 559
up in the clouds with session 560
up in the clouds

## Create labview samples dataframe

In [5]:
#set everything to keep to start
labview_samples['manual_clean_so2'] = 'keep'
labview_samples['manual_clean_masimo'] = 'keep'
labview_samples.head()

NameError: name 'labview_samples' is not defined

## How many sessions don't meet FDA Criteria, before cleaning?

In [46]:
rejected_sessions_notcleaned = {}
for name, group in (pbar:=(tqdm(labview_samples.groupby(['session']),total=len(pbar)))):
    pbar.set_postfix_str(f"session: {name}")
    group=group[group['algo'] == 'keep']
    tuple, df = ox.session_criteria_check(group)
    rejected_sessions_notcleaned[name] = tuple
rejected_sessions_notcleaned_df = pd.DataFrame(rejected_sessions_notcleaned).T
rejected_sessions_notcleaned_df['rejected'] = rejected_sessions_notcleaned_df.apply(lambda x: False if sum([x[0],x[1],x[2]]) == 3 else True,axis=1)
rejected_sessions_notcleaned_df['rejected'].value_counts()

  0%|          | 0/317 [00:00<?, ?it/s]

rejected
True     270
False     47
Name: count, dtype: int64

In [52]:
# do crosstab on rejected sessions

pd.crosstab(rejected_sessions_notcleaned_df['rejected'],rejected_sessions_notcleaned_df[1])

1,False,True
rejected,Unnamed: 1_level_1,Unnamed: 2_level_1
False,0,47
True,99,171


In [49]:
for col in rejected_sessions_notcleaned_df.columns:
    print(rejected_sessions_notcleaned_df[col].value_counts())

0
False    170
True     147
Name: count, dtype: int64
1
True     218
False     99
Name: count, dtype: int64
2
False    171
True     146
Name: count, dtype: int64
rejected
True     270
False     47
Name: count, dtype: int64


# Apply new algorithm criteria

In [11]:
reload(ox)
labview_samples, counts = ox.sample_stability_multi(labview_samples, 'so2','Nellcor/SpO2', 'Timestamp','algo',1.5,2)
labview_samples.to_csv('labview_samples.csv',index=True)
labview_samples.head()

  df[timestamp_col] = pd.to_datetime(df[timestamp_col], errors='coerce')


Unnamed: 0,session,sample,Masimo 97/SpO2,Nellcor/SpO2,so2,Timestamp,manual_clean_so2,manual_clean_masimo,sample_diff_prev,sample_diff_next,...,Nellcor/SpO2_diff_prev,Nellcor/SpO2_diff_next,Timestamp_diff_prev,Timestamp_diff_next,so2_stable,so2_reason,Nellcor_stable,Nellcor_reason,algo_status,algo
0,13,1,100.0,100.0,99.4,2024-05-17 09:33:02,keep,keep,0.0,-1.0,...,0.0,0.0,0.0,-64.0,True,keep because previous sample is within bound,True,keep because previous sample is within bound,True,keep
1,13,2,100.0,100.0,98.9,2024-05-17 09:34:06,keep,keep,1.0,-1.0,...,0.0,1.909091,64.0,-40.0,True,keep because previous sample is within bound,True,keep because previous sample is within bound,True,keep
2,13,3,100.0,98.090909,98.6,2024-05-17 09:34:46,keep,keep,1.0,-1.0,...,-1.909091,-1.727273,40.0,-33.0,True,keep because previous sample is within bound,True,keep because previous sample is within bound,True,keep
3,13,4,100.0,99.818182,99.0,2024-05-17 09:35:19,keep,keep,1.0,-1.0,...,1.727273,-0.181818,33.0,-37.0,True,keep because previous sample is within bound,True,keep because previous sample is within bound,True,keep
4,13,5,100.0,100.0,98.8,2024-05-17 09:35:56,keep,keep,1.0,-1.0,...,0.181818,3.909091,37.0,-389.0,True,keep because previous sample is within bound,True,keep because previous sample is within bound,True,keep


# Do sensitivity analysis with new algorithm criteria

The criteria is to reject a sample if either sao2 or reference oximeter are more than x or y% different from each other. Let's test the permutations of x and y.

Iterate through each permutation of cleaning threshold on the data repository, to create labview samples dataframes.

For each permutation of ```(so2bound, refbound)```, count how many samples are kept and rejected, and add to a dictionary.

In [13]:
labview_samples_cleaned = []

arange = np.arange(1.25,3,0.25)
print(arange)
sensitivity_analysis = {}

for so2bound in (pbar:= tqdm(arange, total=len(arange))):
    pbar.set_postfix_str(f"so2bound: {so2bound}")
    for refbound in (pbar2:=tqdm(arange, total=len(arange))):
        pbar2.set_postfix_str(f"refbound: {refbound}")
        df, value_counts = ox.sample_stability_multi(labview_samples, 'so2','Nellcor/SpO2', 'Timestamp','algo',so2bound,refbound)
        sensitivity_analysis[(so2bound,refbound)] = value_counts
        df['so2bound'] = so2bound
        df['refbound'] = refbound
        labview_samples_cleaned.append(df)

labview_samples_cleaned = pd.concat(labview_samples_cleaned)

[1.25 1.5  1.75 2.   2.25 2.5  2.75]


  0%|          | 0/7 [00:00<?, ?it/s]

  0%|          | 0/7 [00:00<?, ?it/s]

  0%|          | 0/7 [00:00<?, ?it/s]

  0%|          | 0/7 [00:00<?, ?it/s]

  0%|          | 0/7 [00:00<?, ?it/s]

  0%|          | 0/7 [00:00<?, ?it/s]

  0%|          | 0/7 [00:00<?, ?it/s]

  0%|          | 0/7 [00:00<?, ?it/s]

## Sensitivity analysis of rejected *samples*

In [14]:
# Convert the dictionary to a list of dictionaries with separated keys and inner values
rows = [
    {'so2bound': k[0], 'refbound': k[1], **v}
    for k, v in sensitivity_analysis.items()
]

# Create a DataFrame from the list of dictionaries
sens = pd.DataFrame(rows)
sens['reject'] = sens['reject_nellcor'] + sens['reject_both'] + sens['reject_so2']
sens.to_csv('sens.csv')
sens.head()

Unnamed: 0,so2bound,refbound,keep,reject_nellcor,reject_both,reject_so2,reject
0,1.25,1.25,6580,535,363,330,1228
1,1.25,1.5,6712,403,328,365,1096
2,1.25,1.75,6813,302,304,389,995
3,1.25,2.0,6856,259,283,410,952
4,1.25,2.25,6985,130,185,508,823


## Sensitivity of rejected *sessions*

Now take the large labview samples df, and apply function to check whether the sessions pass based on FDA criteria. This will help us understand how our cleaning algorithm causes sessions to pass or fail based on ISO criteria, and choose a cleaning threshold.

In [15]:
rejected_sessions = {}

#iterate through dataframes in groupby object
for name, group in (pbar:=(tqdm(labview_samples_cleaned.groupby(['so2bound','refbound','session']),total=len(pbar)))):
    pbar.set_postfix_str(f"session: {name}")
    group=group[group['algo'] == 'keep']
    tuple, df = ox.session_criteria_check(group)
    rejected_sessions[name] = tuple


  0%|          | 0/7 [00:00<?, ?it/s]

In [17]:
rejected_sessions_df = pd.DataFrame(rejected_sessions).T.reset_index()
rejected_sessions_df.columns = ['so2bound','refbound','session','criteria1','criteria2','criteria3']
rejected_sessions_df['rejected'] = rejected_sessions_df.apply(lambda x: True if sum([x['criteria1'],x['criteria2'],x['criteria3']]) <3 else False, axis=1)

t1 = rejected_sessions_df.groupby(['so2bound','refbound']).sum().reset_index()[['so2bound','refbound','rejected']]
t1
# rejected_sessions_df

Unnamed: 0,so2bound,refbound,rejected
0,1.25,1.25,283
1,1.25,1.5,280
2,1.25,1.75,276
3,1.25,2.0,276
4,1.25,2.25,274
5,1.25,2.5,273
6,1.25,2.75,272
7,1.5,1.25,277
8,1.5,1.5,272
9,1.5,1.75,270


In [18]:
import seaborn as sns
import matplotlib.pyplot as plt
from matplotlib.ticker import FuncFormatter


In [19]:
t1h = t1.pivot(index='so2bound',columns='refbound',values='rejected')
t1h

refbound,1.25,1.50,1.75,2.00,2.25,2.50,2.75
so2bound,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
1.25,283,280,276,276,274,273,272
1.5,277,272,270,270,267,265,264
1.75,274,270,268,268,264,261,260
2.0,272,268,266,266,256,253,252
2.25,270,265,263,263,249,247,245
2.5,268,262,260,259,245,242,239
2.75,267,261,258,257,241,238,235


In [35]:
sns.heatmap(t1h, annot=True, fmt='g')

<Axes: xlabel='refbound', ylabel='so2bound'>

In [20]:
import plotly.express as px


In [31]:
# Pivot the data for heatmap plotting using keyword arguments
pivot_table_so2 = sens.pivot(index="so2bound", columns="refbound", values="reject_so2")
pivot_table_nellcor = sens.pivot(index="so2bound", columns="refbound", values="reject_nellcor")
pivot_table_both = sens.pivot(index="so2bound", columns="refbound", values="reject_both")
pivot_table_reject = sens.pivot(index="so2bound", columns="refbound", values="reject")

# Create heatmaps in a 2x2 grid
fig, ax = plt.subplots(2, 2, figsize=(18, 12))

# Define a common color bar for all heatmaps
cbar_ax = fig.add_axes([.91, .3, .03, .4])

# Titles for each heatmap
titles = ['Reject SO2', 'Reject Nellcor', 'Reject Both', 'Reject']

# Mapping each pivot table with a title
for ax_coords, pivot_table, title in zip([(0, 0), (0, 1), (1, 0), (1, 1)],
                                         [pivot_table_so2, pivot_table_nellcor, pivot_table_both, pivot_table_reject],
                                         titles):
    i, j = ax_coords
    sns.heatmap(pivot_table, cmap='coolwarm',
                annot=True, ax=ax[i][j], fmt='d',
                cbar=i == 0 and j == 1, cbar_ax=None if not (i == 0 and j == 1) else cbar_ax,
                cbar_kws={'label': 'Count'})
    ax[i][j].set_title(f'Heatmap of {title}')
    ax[i][j].set_xlabel('Ref Bound')
    ax[i][j].set_ylabel('SO2 Bound')
    ax[i][j].invert_yaxis()

plt.tight_layout(rect=[0, 0, .9, 1])  # Adjust the rect to make room for the color bar
plt.show()

  plt.tight_layout(rect=[0, 0, .9, 1])  # Adjust the rect to make room for the color bar


# Upload to Redcap

In [23]:
sens = pd.read_csv('sens.csv')

In [24]:
from redcap import Project
import streamlit as st
    
api_url = 'https://redcap.ucsf.edu/api/'
api_k = st.secrets['api_k']
proj = Project(api_url, api_k)
# f = io.BytesIO(proj.import_file(record='9', field='file')[0])
# proj.import_file(record='9', file_name='labview_samples.csv',field='file', file_object=labview_samples.to_csv(index=True))