In [None]:
import numpy as np 
import pandas as pd 
from sklearn.metrics import mean_absolute_error  
#from sklearn.metrics import mean_absolute_percentage_error
from sklearn.metrics import r2_score
from sklearn.metrics import mean_squared_error


In [None]:
%tensorflow_version 2.x
import tensorflow as tf
device_name = tf.test.gpu_device_name()
if device_name != '/device:GPU:0':
  raise SystemError('GPU device not found')
print('Found GPU at: {}'.format(device_name))

Found GPU at: /device:GPU:0


In [None]:
# id - globally-unique time step identifier across an entire file
# breath_id - globally-unique time step for breaths
# R - lung attribute indicating how restricted the airway is (in cmH2O/L/S). Physically, this is the change in pressure per change in flow (air volume per time). Intuitively, one can imagine blowing up a balloon through a straw. We can change R by changing the diameter of the straw, with higher R being harder to blow.
# C - lung attribute indicating how compliant the lung is (in mL/cmH2O). Physically, this is the change in volume per change in pressure. Intuitively, one can imagine the same balloon example. We can change C by changing the thickness of the balloon’s latex, with higher C having thinner latex and easier to blow.
# time_step - the actual time stamp.
# u_in - the control input for the inspiratory solenoid valve. Ranges from 0 to 100.
# u_out - the control input for the exploratory solenoid valve. Either 0 or 1.
# pressure - the airway pressure measured in the respiratory circuit, measured in cmH2O.

In [None]:
df_train = pd.read_csv('/content/drive/MyDrive/googleb/train.csv')
df_test = pd.read_csv('/content/drive/MyDrive/googleb/test.csv')

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
 df_train.iloc[0:320,]

Unnamed: 0,id,breath_id,time_step,u_in,u_out,pressure,area,u_in_lag2,u_in_lag4,R_20,R_5,R_50,C_10,C_20,C_50,ewm_u_in_mean,ewm_u_in_std,ewm_u_in_corr
0,1,1,0.000000,0.083334,0,5.837492,0.000000,0.000000,0.000000,1,0,0,0,0,1,0.083334,,
1,2,1,0.033652,18.383041,0,5.907794,0.618632,0.000000,0.000000,1,0,0,0,0,1,9.550171,12.939847,1.0
2,3,1,0.067514,22.509278,0,7.876254,2.138333,0.083334,0.000000,1,0,0,0,0,1,14.172507,11.777739,1.0
3,4,1,0.101542,22.808822,0,11.742872,4.454391,18.383041,0.000000,1,0,0,0,0,1,16.560977,10.448647,1.0
4,5,1,0.135756,25.355850,0,12.234987,7.896588,22.509278,0.083334,1,0,0,0,0,1,18.571834,9.801731,1.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
315,316,4,2.400609,4.946119,1,7.032628,393.557153,4.925605,4.897809,0,0,1,0,0,1,4.275163,2.256531,1.0
316,317,4,2.432266,4.954007,1,6.540513,405.606618,4.936515,4.912832,0,0,1,0,0,1,4.320843,2.186126,1.0
317,318,4,2.464062,4.960767,1,6.399909,417.830256,4.946119,4.925605,0,0,1,0,0,1,4.363890,2.117588,1.0
318,319,4,2.495845,4.966532,1,6.962326,430.225952,4.954007,4.936515,0,0,1,0,0,1,4.404417,2.050887,1.0


Feature addition

In [None]:
#since it is time series we can augment more featues
#ewm: we are taking exponential weighted avg breathid wise
#rolling window - considering 8 features in one window
#expanding mean - taking mean of 3 periods
#lag -  we will shift value 

In [None]:
df_train['area'] = df_train['time_step'] * df_train['u_in']
df_train['area'] = df_train.groupby('breath_id')['area'].cumsum()

In [None]:
df_train['u_in_lag2'] = df_train['u_in'].shift(2).fillna(0)
df_train['u_in_lag4'] = df_train['u_in'].shift(4).fillna(0)

In [None]:
df_train['R'] = df_train['R'].astype(str)
df_train['C'] = df_train['C'].astype(str)
df_train = pd.get_dummies(df_train)

In [None]:
df_train["ewm_u_in_mean"] = df_train.groupby('breath_id').u_in.apply(lambda x: x.ewm(halflife=10).mean())
df_train['ewm_u_in_std']  = df_train.groupby('breath_id').u_in.apply(lambda x: x.ewm(halflife=10).std())
df_train['ewm_u_in_corr']  = df_train.groupby('breath_id').u_in.apply(lambda x: x.ewm(halflife=10).corr())

In [None]:
df_train["rolling_10_mean"] = df_train.groupby('breath_id').u_in.apply(lambda x: x.rolling(window=10, min_periods=1).mean())
df_train['rolling_10_max']  = df_train.groupby('breath_id').u_in.apply(lambda x: x.rolling(window=10, min_periods=1).max())
df_train['rolling_10_std']  = df_train.groupby('breath_id').u_in.apply(lambda x: x.rolling(window=10, min_periods=1).std())

In [None]:
df_train['expand_mean'] =df_train.groupby('breath_id').u_in.apply(lambda x: x.expanding(2).mean())
df_train['expand_max'] = df_train.groupby('breath_id').u_in.apply(lambda x: x.expanding(2).max())
df_train['expand_std'] = df_train.groupby('breath_id').u_in.apply(lambda x: x.expanding(2).std())

In [None]:
df_train = df_train.fillna(0)

In [None]:
targets = df_train['pressure'].to_numpy().reshape(-1, 80)
df_train.drop(['pressure', 'id', 'breath_id'], axis = 1, inplace = True)

In [None]:
targets.shape ###

(75450, 80)

Training LSTM simple model for prediction

In [None]:
from sklearn.preprocessing import RobustScaler, normalize
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.callbacks import LearningRateScheduler
from tensorflow.keras.optimizers.schedules import ExponentialDecay
from sklearn.model_selection import KFold

In [None]:
RS = RobustScaler()
df_train = RS.fit_transform(df_train)

In [None]:
df_train = df_train.reshape(-1, 80, df_train.shape[-1])

In [None]:
df_train.shape ###

(75450, 80, 21)

In [None]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(df_train, targets, test_size=0.3, random_state=42)

X_train.shape

(52815, 80, 21)

In [None]:
y_train.shape ###

(52815, 80)

In [None]:
X_train.shape[-2:]

(80, 21)

In [None]:
EPOCH = 2
BATCH_SIZE = 1024
tf.debugging.set_log_device_placement(True)
gpus = tf.config.list_logical_devices('GPU')
strategy = tf.distribute.MirroredStrategy(gpus)
with strategy.scope():
    kf = KFold(n_splits = 2, shuffle = True, random_state = 228)
    test_preds = []
    for fold, (train_idx, test_idx) in enumerate(kf.split(X_train, y_train)):
        print('-'*15, '>', f'Fold {fold+1}', '<', '-'*15)
        X, X_valid = X_train[train_idx], X_train[test_idx]
        y, y_valid = y_train[train_idx], y_train[test_idx]
        model = keras.models.Sequential([
            keras.layers.Input(shape = X.shape[-2:]),
            keras.layers.Bidirectional(keras.layers.LSTM(400, return_sequences = True)),
            keras.layers.Bidirectional(keras.layers.LSTM(300, return_sequences = True)),
            keras.layers.Bidirectional(keras.layers.LSTM(200, return_sequences = True)),
            keras.layers.Bidirectional(keras.layers.LSTM(100, return_sequences = True)),
            keras.layers.Dense(50, activation = 'relu'),
            keras.layers.Dense(1),
        ])
        model.compile(optimizer = "adam", loss = "mae")

        scheduler = ExponentialDecay(1e-3, 400*((len(X)*0.8)/BATCH_SIZE), 1e-5)
        lr = LearningRateScheduler(scheduler, verbose = 1)

        model.fit(X, y, validation_data = (X_valid, y_valid), epochs = EPOCH, batch_size = BATCH_SIZE, callbacks = [lr])

        test_preds.append(model.predict(X_test).squeeze().reshape(-1, 1).squeeze())


INFO:tensorflow:Using MirroredStrategy with devices ('/job:localhost/replica:0/task:0/device:GPU:0',)


INFO:tensorflow:Using MirroredStrategy with devices ('/job:localhost/replica:0/task:0/device:GPU:0',)


[1;30;43mStreaming output truncated to the last 5000 lines.[0m
Executing op Identity in device /job:localhost/replica:0/task:0/device:GPU:0
Executing op __inference_predict_function_327618 in device /job:localhost/replica:0/task:0/device:GPU:0
Executing op ReadVariableOp in device /job:localhost/replica:0/task:0/device:GPU:0
Executing op Identity in device /job:localhost/replica:0/task:0/device:GPU:0
Executing op Identity in device /job:localhost/replica:0/task:0/device:GPU:0
Executing op __inference_predict_function_327618 in device /job:localhost/replica:0/task:0/device:GPU:0
Executing op ReadVariableOp in device /job:localhost/replica:0/task:0/device:GPU:0
Executing op Identity in device /job:localhost/replica:0/task:0/device:GPU:0
Executing op Identity in device /job:localhost/replica:0/task:0/device:GPU:0
Executing op __inference_predict_function_327618 in device /job:localhost/replica:0/task:0/device:GPU:0
Executing op ReadVariableOp in device /job:localhost/replica:0/task:0/de

In [None]:
# EPOCH = 100
# BATCH_SIZE = 1024


# tpu = tf.distribute.cluster_resolver.TPUClusterResolver.connect()
# tpu_strategy = tf.distribute.experimental.TPUStrategy(tpu)

# with tpu_strategy.scope():
#     kf = KFold(n_splits = 5, shuffle = True, random_state = 228)
#     test_preds = []
#     for fold, (train_idx, test_idx) in enumerate(kf.split(X_train, y_train)):
#         print('-'*15, '>', f'Fold {fold+1}', '<', '-'*15)
#         X, X_valid = X_train[train_idx], X_train[test_idx]
#         y, y_valid = y_train[train_idx], y_train[test_idx]
#         model = keras.models.Sequential([
#             keras.layers.Input(shape = X.shape[-2:]),
#             keras.layers.Bidirectional(keras.layers.LSTM(400, return_sequences = True)),
#             keras.layers.Bidirectional(keras.layers.LSTM(300, return_sequences = True)),
#             keras.layers.Bidirectional(keras.layers.LSTM(200, return_sequences = True)),
#             keras.layers.Bidirectional(keras.layers.LSTM(100, return_sequences = True)),
#             keras.layers.Dense(50, activation = 'relu'),
#             keras.layers.Dense(1),
#         ])
#         model.compile(optimizer = "adam", loss = "mae")

#         scheduler = ExponentialDecay(1e-3, 400*((len(X)*0.8)/BATCH_SIZE), 1e-5)
#         lr = LearningRateScheduler(scheduler, verbose = 1)

#         model.fit(X, y, validation_data = (X_valid, y_valid), epochs = EPOCH, batch_size = BATCH_SIZE, callbacks = [lr])

#         test_preds.append(model.predict(X_test).squeeze().reshape(-1, 1).squeeze())


NameError: ignored

In [None]:
y_pred=sum(test_preds)/5
y_test=y_test.reshape(-1,1991920)[0]

In [None]:


print("Mean Absolute Error: ", mean_absolute_error(y_test,y_pred))

#print("Mean Absolute Percentage Error: ", mean_absolute_percentage_error(y_test,y_pred))

print("r2 score: ", r2_score(y_test,y_pred))

print("Mean Squared Error: ", mean_squared_error(y_test,y_pred))

In [None]:
 y_pred.shape