In [1]:
import numpy as np
from scipy.integrate import cumulative_trapezoid
from scipy.signal import find_peaks
from scipy import signal
import plotly.express as px
import pandas as pd
import matplotlib.pyplot as plt
from pathlib import Path

In [2]:
import plotly.io as pio
pio.renderers.default = 'notebook_connected'

In [43]:
# Todo update this logic for files with name as prefix
data_dir = Path('./data/CPET hosp')
# data_dir = Path('./data/CPET ambu anonyme')
files = []
for xls in data_dir.glob('*.xlsx'):
    prefix = xls.name.split('_')[0]
    log_match = list(data_dir.glob(f'{prefix}*raw.log'))
    if len(log_match) == 0:
        print(f'No matching raw log for Excel file {xls}')
    elif len(log_match) > 1:
        print(f'More then 1 matching raw log for Excel file {xls}?')
    else:
        files.append((xls, log_match[0]))

In [44]:
xls, raw_log = files[5]

df_cycles = pd.read_excel(xls)
assert df_cycles['Unnamed: 0'][1] == 'Unité'
assert df_cycles['Unnamed: 0'][2] == 'Théor.'
# df_cycles = df_cycles.drop('Unnamed: 0', axis=1)
df_cycles = df_cycles.drop('Temps', axis=1)
df_cycles = df_cycles.drop([0, 1, 2])
keep_columns = {'Unnamed: 0', 'Vol.Cour.', 'VIn', 'tIn', 'VEx', 'tEx', 'ttot'}
df_cycles = df_cycles.drop([col for col in df_cycles.columns if col not in keep_columns], axis=1)
df_cycles = df_cycles.rename(columns={
    'Unnamed: 0': 'phase',
    'Vol.Cour.': 'vol_instant',
    'VIn': 'vol_in',
    'VEx': 'vol_ex',
    'tIn': 't_in',
    'tEx': 't_ex',
    'ttot': 'duration'
})
df_cycles['phase'] = df_cycles['phase'].replace('Repos', 'rest')
df_cycles['phase'] = df_cycles['phase'].replace('Charge', 'load')
df_cycles['phase'] = df_cycles['phase'].replace('Récupération', 'recovery')
df_cycles['phase'] = df_cycles['phase'].ffill()
df_cycles = df_cycles.reset_index(drop=True)

In [45]:
df_cycles

Unnamed: 0,phase,vol_instant,vol_in,t_in,vol_ex,t_ex,duration
0,rest,0.37,0.36,0.7,0.38,1,1.8
1,rest,0.36,0.36,0.7,0.37,1,1.7
2,rest,0.34,0.34,0.7,0.33,0.9,1.6
3,rest,0.34,0.31,0.7,0.36,1.1,1.8
4,rest,0.38,0.36,0.8,0.4,1.4,2.1
...,...,...,...,...,...,...,...
792,recovery,-,-,-,-,-,-
793,recovery,-,-,-,-,-,-
794,recovery,-,-,-,-,-,-
795,recovery,-,-,-,-,-,-


In [46]:
df_raw = pd.read_csv(raw_log, delimiter='\t', names=['t', 'flow', 'fo2', 'fco2'])
df_raw

Unnamed: 0,t,flow,fo2,fco2
0,0.000,0.0,20.90972,0.001764
1,0.008,0.0,20.98204,0.002383
2,0.016,0.0,21.00116,0.002405
3,0.024,0.0,20.94472,0.001773
4,0.032,0.0,20.86901,0.000987
...,...,...,...,...
200044,1600.352,0.0,20.73049,0.000000
200045,1600.360,0.0,20.73826,0.000000
200046,1600.368,0.0,20.72032,0.000000
200047,1600.376,0.0,20.69039,0.000000


In [123]:
sampling_freq = 1 / (df_raw['t'][:-1] - df_raw['t'].shift(1)[1:]).mean()
demean_window_seconds = 20

In [124]:
demean_window_seconds * sampling_freq

2500.0

In [244]:
filter_freq = 2*1e-6
1/(filter_freq * sampling_freq)

4000.0

In [245]:
# df_raw['flow_ma'] = df_raw['flow'].rolling(int(demean_window_seconds * sampling_freq)).mean().fillna(0)
# instant_vol = cumulative_trapezoid(y=df_raw['flow'] - df_raw['flow_ma'], x=df_raw['t'], initial=0)
df_raw['instant_vol_raw'] = cumulative_trapezoid(y=df_raw['flow'], x=df_raw['t'], initial=0) 
sos = signal.butter(4, Wn=filter_freq*sampling_freq, btype='highpass', output='sos')
flow_filtered = signal.sosfilt(sos, df_raw['flow'])
df_raw['instant_vol'] = cumulative_trapezoid(y=flow_filtered, x=df_raw['t'], initial=0) 

In [246]:
df_raw.instant_vol.sum()

-64.86788364850098

In [None]:
px.line(df_raw, x='t', y=['instant_vol_raw', 'instant_vol'])
# px.line(df_raw, x='t', y=['flow', 'flow_ma', 'filtered', 'instant_vol'])