- This notebook shows how is possible to calculate the output pressure given u_in for breath_ids, using only PID theory.
- This notebook explores only PID terms: P > 0 and I = 0.

In [None]:
import numpy as np
import pandas as pd
import gc
import os
import random
import matplotlib.pyplot as plt

In [None]:
train = pd.read_csv('../input/ventilator-pressure-prediction/train.csv')
test = pd.read_csv('../input/ventilator-pressure-prediction/test.csv')

train['dcount'] = train.groupby('breath_id')['id'].transform('cumcount')
test['dcount'] = test.groupby('breath_id')['id'].transform('cumcount')

train['uo'] = 80 - train.groupby('breath_id')['u_out'].transform('sum')
test['uo'] = 80 - test.groupby('breath_id')['u_out'].transform('sum')

train['time_delta'] = (train['time_step'] - train.groupby('breath_id')['time_step'].shift(1)).fillna(0)
test['time_delta'] = (test['time_step'] - test.groupby('breath_id')['time_step'].shift(1)).fillna(0)

print(train.shape)
train.head()

In [None]:
oof = pd.read_csv('../input/ventmodels/train-blend-1.csv')
oof.columns = ['id', 'pred']
train = train.merge(oof, on='id', how='left')
train

In [None]:
oof = pd.read_csv('../input/ventmodels/1336_submission.csv')
oof.columns = ['id', 'pred']
test = test.merge(oof, on='id', how='left')
test

In [None]:
train['error'] = (train['pressure'] - train['pred']).abs()
train.loc[train.u_out>0,'error'] = 0
train.loc[train.u_out==0,'error'].hist(bins=20)

In [None]:
maxdrift = train.loc[train.u_out==0,'error'].mean() + 3*train.loc[train.u_out==0,'error'].std()
maxdrift

- According the host paper https://arxiv.org/pdf/2102.06779.pdf, only the terms P and I were used in the PID simulations.
- Hosts gives P, I and Setpoints used in appendinx A.2

In [None]:
p_coef = [0.01, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10]
i_coef = [0.01, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10]
setpoints = [10, 15, 20, 25, 30, 35]

In [None]:
unique_pressures = train['pressure'].round(decimals=7).unique()
unique_pressures = list(np.sort(unique_pressures))
len(unique_pressures), unique_pressures[:10]

In [None]:
maxdrift / (unique_pressures[1]-unique_pressures[0])

In [None]:
max_pressure = 64.82099173863328
min_pressure = -1.895744294564641
diff_pressure = 0.0703021454512

In [None]:
def generate_u_in(pressure, time_step, kp, ki, kt, integral=0):
    dt = np.diff(time_step, prepend=[0])
    preds = []
    for j in range(32):
        error = kt - pressure[j]
        integral += (error - integral) * (dt[j] / (dt[j] + 0.5))
        preds.append(kp * error + ki * integral)
    return preds

pressure = train[train['breath_id'] == 1]['pressure'].values 
timestep = train[train['breath_id'] == 1]['time_step'].values
u_in = generate_u_in(pressure, timestep, 0.8, 8.0, 20)
noise = train[train['breath_id'] == 1]['u_in'].values[:32] - u_in

plt.figure()
plt.plot(timestep[:32], train[train['breath_id'] == 1]['u_in'].values[:32], label='u_in')
plt.plot(timestep[:32], u_in, label='u_in_hat')
plt.legend()
plt.show()

In [None]:
# Ps = train['pressure'].unique()
# Ps = np.sort(Ps)
# P1s = Ps
# P2s = Ps[:, None]
# PDiff = P1s-P2s

# def fill_gaps_b(kp, ki, kt, u_in, dts):
#     us = np.zeros(80, dtype='float64') - 9
#     preds = np.zeros(80, dtype='float64') - 9
#     match = 0
#     for j in range(32):
#         u1, u2 = u_in[j], u_in[j+1]
#         ki2 = ki * dts[j+1] / (0.5 + dts[j+1])
#         u2_hat= u1+kp*PDiff + ki2*(kt-P2s-(u1-kp*(kt-P1s))/ki)
#         m = np.abs(u2 - u2_hat) <= 1e-7
#         if np.any(m):
#             us[j+1] = u2
#             if preds[j + 1] == -9:
#                 match += 1
#                 pos = np.where(m)[0][0]
#                 preds[j+1] = P2s[pos]
#     return preds, match

# pressure = train[train['breath_id'] == 1]['pressure'].values.copy()
# timestep = train[train['breath_id'] == 1]['time_delta'].values.copy()
# u_in = train[train['breath_id'] == 1]['u_in'].values.copy()
# plt.plot(pressure[:32])

# for P in p_coef:
#     for I in i_coef:
#         for SP in setpoints:
#             res, match = fill_gaps_b(P, I, SP, u_in, timestep )
#             if match>16:
#                 print(match, P, I, SP)

In [None]:
# Ps = train['pressure'].unique()
# Ps = np.sort(Ps)
# P1s = Ps
# P2s = Ps[:, None]
# PDiff = P1s-P2s

# def fill_gaps_b(kp, ki, kt, u_in, dts, ypred):
#     us = np.zeros(80, dtype='float64') - 9
#     preds = np.zeros(80, dtype='float64') - 9
#     match = 0
#     for j in range(1,32):
#         u1, u2 = u_in[j], u_in[j+1]
#         ki2 = ki * dts[j+1] / (0.5 + dts[j+1])
#         vmin = np.min(ypred[j:j+2])
#         vmax = np.max(ypred[j:j+2])
#         pi = np.clip(vmin - 2.0, min_pressure, max_pressure)
#         pe = np.clip(vmax + 2.0, min_pressure, max_pressure)
#         pi = np.sum(Ps<=pi)
#         pe = np.sum(Ps<=pe)
#         u2_hat= u1 + kp*PDiff[pi:pe, pi:pe] + ki2*(kt-P2s[pi:pe]-(u1-kp*(kt-P1s[pi:pe]))/ki)
#         #print(u2.shape, u2_hat.shape)
#         m = np.abs(u2 - u2_hat) <= 1e-7
#         if np.any(m):
#             #print('hit')
#             us[j+1] = u2
#             if preds[j + 1] == -9:
#                 match += 1
#                 pos = np.where(m)[0][0]
#                 preds[j+1] = P2s[pi+pos]
            
        
#     return preds, match


In [None]:
# train['breath_id'].unique()

In [None]:
# def match_breath(u_in, u_out, timestep, kp, ki, kt):
#     dt = np.diff(timestep)
#     dt2 = dt / (dt + 0.5)
#     in_len = np.sum(1 - u_out)
#     preds = np.zeros(len(u_in)) - 999
    
#     for t in range(1, in_len):
#         # If we had a match in previous timestep, we can re-use that match
#         if preds[t - 1] != -999:
#             P0 = preds[t - 1]
#         else:
#             P0 = np.arange(MIN_PRESSURE, MAX_PRESSURE + DIFF_PRESSURE, DIFF_PRESSURE)
            
#         I0 = (u_in[t - 1] - kp * (kt - P0))/ki

#         # Calculate 2 points for our P1
#         I11 = I0 + (kt - MIN_PRESSURE - I0) * dt2[t-1]
#         u_in_hat1 = kp * (kt - MIN_PRESSURE) + ki * I11

#         I12 = I0 + (kt - MIN_PRESSURE2 - I0) * dt2[t-1]
#         u_in_hat2 = kp * (kt - MIN_PRESSURE2) + ki * I12

#         # Get slope and intersection point
#         slope = u_in_hat2 - u_in_hat1
#         x_intersect = (u_in[t] - u_in_hat2) / slope
        
#         # We want the intersection to be an integer
#         diff = np.abs(np.round(x_intersect) - x_intersect)
        
#         if diff.min() < 1e-10:
#             pos = np.argmin(diff)
            
#             if preds[t - 1] == -999:
#                 preds[t - 1] = P0[np.argmin(diff)]
#                 preds[t] = MIN_PRESSURE + int(x_intersect[pos] + 1) * DIFF_PRESSURE
#             else:
#                 preds[t] = MIN_PRESSURE + (np.round(x_intersect) + 1) * DIFF_PRESSURE
            
#     return preds

In [None]:
# i = 1
# pressure = train[train['breath_id'] == i]['pressure'].values.copy()
# timestep = train[train['breath_id'] == i]['time_delta'].values.copy()
# u_in = train[train['breath_id'] == i]['u_in'].values.copy()
# ypred = train[train['breath_id'] == i]['pred'].values.copy()

In [None]:
# %%timeit
# res, match = fill_gaps_b(1, 8, 20, u_in, timestep, ypred )

In [None]:
MAX_PRESSURE  = PRESSURE_MAX  = 64.82099173863328
MIN_PRESSURE  = PRESSURE_MIN  = -1.895744294564641
DIFF_PRESSURE = PRESSURE_STEP = 0.0703021454512
MIN_PRESSURE2 = PRESSURE_MIN2 = MIN_PRESSURE + DIFF_PRESSURE

def match_breath(u_in, u_out, timestep, kp, ki, kt):
    dt = np.diff(timestep)
    dt2 = dt / (dt + 0.5)
    in_len = np.sum(1 - u_out)
    preds = np.zeros(len(u_in)) - 999
    
    match=0
    for t in range(1, in_len):
        # If we had a match in previous timestep, we can re-use that match
        if preds[t - 1] != -999:
            P0 = preds[t - 1]
        else:
            P0 = np.arange(MIN_PRESSURE, MAX_PRESSURE + DIFF_PRESSURE, DIFF_PRESSURE)
            
        I0 = (u_in[t - 1] - kp * (kt - P0))/ki

        # Calculate 2 points for our P1
        I11 = I0 + (kt - MIN_PRESSURE - I0) * dt2[t-1]
        u_in_hat1 = kp * (kt - MIN_PRESSURE) + ki * I11

        I12 = I0 + (kt - MIN_PRESSURE2 - I0) * dt2[t-1]
        u_in_hat2 = kp * (kt - MIN_PRESSURE2) + ki * I12

        # Get slope and intersection point
        slope = u_in_hat2 - u_in_hat1
        x_intersect = (u_in[t] - u_in_hat2) / slope
        
        # We want the intersection to be an integer
        diff = np.abs(np.round(x_intersect) - x_intersect)
        
        if diff.min() < 1e-10:
            match+=1
            pos = np.argmin(diff)
            
            if preds[t - 1] == -999:
                preds[t - 1] = P0[np.argmin(diff)]
                preds[t] = MIN_PRESSURE + int(x_intersect[pos] + 1) * DIFF_PRESSURE
            else:
                preds[t] = MIN_PRESSURE + (np.round(x_intersect) + 1) * DIFF_PRESSURE
            
    return preds, match

In [None]:
i = 1
pressure = train[train['breath_id'] == i]['pressure'].values.copy()
timestep = train[train['breath_id'] == i]['time_step'].values.copy()
u_in = train[train['breath_id'] == i]['u_in'].values.copy()
u_out = train[train['breath_id'] == i]['u_out'].values.copy()
ypred = train[train['breath_id'] == i]['pred'].values.copy()

In [None]:
%%time
res, match = match_breath(u_in, u_out, timestep, 1, 8, 20)

In [None]:
import time
# starttime = time.time()
# count = 0
# for i in train['breath_id'].unique()[:10]:
#     count += 1
#     pressure = train[train['breath_id'] == i]['pressure'].values.copy()
#     timestep = train[train['breath_id'] == i]['time_step'].values.copy()
#     u_in = train[train['breath_id'] == i]['u_in'].values.copy()
#     u_out = train[train['breath_id'] == i]['u_out'].values.copy()
#     ypred = train[train['breath_id'] == i]['pred'].values.copy()
#     match = 0
#     for P in p_coef:
#         for I in i_coef:
#             for SP in setpoints:
#                 res, match = match_breath(u_in, u_out, timestep, P, I, SP)
#                 if match>24:
#                     print(i, count, match, P, I, SP, (time.time()-starttime), (time.time()-starttime)/count )
#                     break
#             if match>24:
#                 break
#         if match>24:
#             break   
#     print((time.time()-starttime)/count)
# #1 1 28 0.8 8 20 2.3033382892608643 2.3033394813537598

In [None]:
# %%time
# import time
# starttime = time.time()
# count = 0
# for i in train['breath_id'].unique()[:4]:
#     count += 1
#     pressure = train[train['breath_id'] == i]['pressure'].values.copy()
#     timestep = train[train['breath_id'] == i]['time_delta'].values.copy()
#     u_in = train[train['breath_id'] == i]['u_in'].values.copy()
#     ypred = train[train['breath_id'] == i]['pred'].values.copy()
#     match = 0
#     for P in p_coef:
#         for I in i_coef:
#             for SP in setpoints:
#                 res, match = fill_gaps_b(P, I, SP, u_in, timestep, ypred )
#                 if match>24:
#                     print(i, count, match, P, I, SP, (time.time()-starttime), (time.time()-starttime)/count )
#                     break
#             if match>24:
#                 break
#         if match>24:
#             break   
#     print()

In [None]:
# %%time
# import time
# starttime = time.time()
# count = 0
# for i in train['breath_id'].unique():
#     count += 1
#     pressure = train[train['breath_id'] == i]['pressure'].values.copy()
#     timestep = train[train['breath_id'] == i]['time_delta'].values.copy()
#     u_in = train[train['breath_id'] == i]['u_in'].values.copy()
#     ypred = train[train['breath_id'] == i]['pred'].values.copy()
#     match = 0
#     for P in p_coef:
#         for I in i_coef:
#             for SP in setpoints:
#                 res, match = fill_gaps_b(P, I, SP, u_in, timestep, ypred )
#                 if match>24:
#                     print(i, count, match, P, I, SP, (time.time()-starttime), (time.time()-starttime)/count )
#                     break
#             if match>24:
#                 break
#         if match>24:
#             break   
#     print()

In [None]:
starttime = time.time()

PIDTEST = []
count = 0
for i in test['breath_id'].unique()[:1000]:
    count += 1
    ids = test[test['breath_id'] == i]['id'].values.copy()
    timestep = test[test['breath_id'] == i]['time_step'].values.copy()
    u_in = test[test['breath_id'] == i]['u_in'].values.copy()
    u_out = test[test['breath_id'] == i]['u_out'].values.copy()
    #ypred = test[test['breath_id'] == i]['pred'].values.copy()
    match = 0
    for P in p_coef:
        for I in i_coef:
            for SP in setpoints:
                res, match = match_breath(u_in, u_out, timestep, P, I, SP)
                if match>24:
                    dt = pd.DataFrame({
                        'id': ids,
                        'breath_id': i,
                        'P': P,
                        'I': I,
                        'SP': SP,
                        'pressure': res,
                    })                    
                    PIDTEST.append(dt)
                    print(i, count, match, P, I, SP, (time.time()-starttime), (time.time()-starttime)/count )
                    break
            if match>24:
                break
        if match>24:
            break
            
PIDTEST = pd.concat(PIDTEST).reset_index(drop=True)
PIDTEST.shape

In [None]:
# plt.plot(pressure[:32])
# plt.plot(res[:32])

In [None]:
PIDTEST.to_csv('pid-test-1.csv', index=False)

In [None]:
# P  = proportional term
# SP  = pressure set-point
# for PID output: Pressure = (Setpoint - u_in) * P

BIDtrain = []
for SP in setpoints:
    for P in p_coef:
        train['u_ctrl'] = ((SP - train['u_in']/P)).round(decimals=7)
        
        #Check is results falls in any of the 950 train pressure values
        train['isclass'] = 0
        train.loc[train['u_ctrl'].isin(unique_pressures), 'isclass'] = 1
        
        dt = train.loc[(train.u_out==0)&(train.dcount>=1)].groupby('breath_id')[['isclass','uo']].agg({'isclass':'sum', 'uo':'first'}).reset_index().sort_values('isclass', ascending=False).reset_index(drop=True)
        dt = dt.loc[dt['isclass']>=(dt['uo']-3)]
        if dt.shape[0]>0:
            print('matches:',dt.shape[0], 'P=',P, 'SP=', SP)
            dt['P'] = P
            dt['SP'] = SP
            BIDtrain.append(dt)
            

BIDtrain = pd.concat(BIDtrain)
#BIDtrain = BIDtrain.sort_values('error').reset_index(drop=True)
print(BIDtrain.shape)
BIDtrain.head(10)

In [None]:
# P  = proportional term
# SP  = pressure set-point
# for PID output: Pressure = (Setpoint - u_in) * P
# Since testset doesn't have pressure, we know if the controller have the term P>0 and I==0, only if the results falls in any of the 950 different values of pressure found in train set.

BIDtest = []
for SP in setpoints:
    for P in p_coef:
        test['u_ctrl'] = ((SP - test['u_in']/P)).round(decimals=7)
        
        #Check is results falls in any of the 950 train pressure values
        test['isclass'] = 0
        test.loc[test['u_ctrl'].isin(unique_pressures), 'isclass'] = 1
        
        dt = test.loc[(test.u_out==0)&(test.dcount>=1)].groupby('breath_id')[['isclass','uo']].agg({'isclass':'sum', 'uo':'first'}).reset_index().sort_values('isclass', ascending=False).reset_index(drop=True)
        dt = dt.loc[dt['isclass']>=(dt['uo']-3)]
        if dt.shape[0]>0:
            print('matches:',dt.shape[0], 'P=',P, 'SP=', SP)
            dt['P'] = P
            dt['SP'] = SP
            BIDtest.append(dt)
            
BIDtest = pd.concat(BIDtest)
#BIDtest = BIDtest.sort_values(['P', 'SP']).reset_index(drop=True)
print(BIDtest.shape)
BIDtest.head(10)

In [None]:
#for i in range(BIDtrain.shape[0]):
for i in range(10):
    bid = BIDtrain.iloc[i]
    P = bid.P
    SP = bid.SP
    tmp = train.loc[train.breath_id == bid.breath_id].copy()
    tmp['u_ctrl'] =  (SP - tmp['u_in']/P)
    tmp.loc[(tmp.u_out==0)&(tmp.dcount>=0)].plot( x='time_step', y=['pressure', 'u_ctrl'], title = 'P='+str(P) + ' SP:'+str(SP) )

In [None]:
#for i in range(BIDtest.shape[0]):
for i in range(10):
    bid = BIDtest.iloc[i]
    P = bid.P
    SP = bid.SP
    tmp = test.loc[test.breath_id == bid.breath_id].copy()
    tmp['u_ctrl'] =  (SP - tmp['u_in']/P)
    tmp.loc[(tmp.u_out==0)&(tmp.dcount>=0)].plot( x='time_step', y=['u_ctrl'], title = 'P='+str(P) + ' SP:'+str(SP) )

In [None]:
test.head()

In [None]:
test = test.merge(BIDtest[['breath_id','P','SP']],on='breath_id',how='left')
test['pred'] = (test.SP - test['u_in']/test.P).round(decimals=7)
test_ids = BIDtest.breath_id.values

tmp = test.loc[(test.dcount>0)&(test.breath_id.isin(test_ids))]
tmp

In [None]:
sub = pd.read_csv('../input/ventmodels/Exp-102-ResBiLSTM-v2-CustomLoss-v1-AdamW-LogUin-30Fold-Seed2025-FineTune.csv')

sub = sub.merge(tmp[['id','pred']],on='id',how='left')

sub.loc[sub.pred.notna(),'pressure'] = sub.loc[sub.pred.notna(),'pred']

del sub['pred']
sub

In [None]:
tmp = pd.read_csv('pid-test-1.csv')
tmp = tmp.loc[ tmp.pressure > - 999, ['id','pressure'] ].reset_index(drop=True)
tmp.columns = ['id', 'pred']
tmp.head()

In [None]:
sub = sub.merge(tmp[['id','pred']],on='id',how='left')
sub.loc[sub.pred.notna(),'pressure'] = sub.loc[sub.pred.notna(),'pred']

del sub['pred']
sub

In [None]:
sub.to_csv('submission-postprocessing.csv', index=False)