In [3]:
import numpy as np
import tensorflow as tf
import lightgbm
from sklearn.model_selection import train_test_split, GroupKFold,KFold
from sklearn.metrics import mean_absolute_error
import optuna
from sklearn.preprocessing import normalize
try:
    tpu = tf.distribute.cluster_resolver.TPUClusterResolver()
    print('Device:', tpu.master())
    tf.config.experimental_connect_to_cluster(tpu)
    tf.tpu.experimental.initialize_tpu_system(tpu)
    strategy = tf.distribute.experimental.TPUStrategy(tpu)
    print(strategy)
except:
    strategy = tf.distribute.get_strategy()
print('Number of replicas:', strategy.num_replicas_in_sync)

In [4]:
Data_Path = 'data/'
train = pd.read_csv(Data_Path+'train.csv')
test = pd.read_csv(Data_Path+'test.csv')
submission = pd.read_csv(Data_Path+'sub.csv')

In [5]:
train['u_in_cumsum'] = (train['u_in']).groupby(train['breath_id']).cumsum()
test['u_in_cumsum'] = (test['u_in']).groupby(test['breath_id']).cumsum()

In [6]:
train['u_in_lag'] = train['u_in'].shift(2)
train = train.fillna(0)

test['u_in_lag'] = test['u_in'].shift(2)
test = test.fillna(0)

In [7]:
# Feature Engineering
def add_features(df):
    df['area'] = df['time_step'] * df['u_in']
    df['area'] = df.groupby('breath_id')['area'].cumsum()
    df['u_in_cumsum'] = (df['u_in']).groupby(df['breath_id']).cumsum()
    df['u_in_lag2'] = df['u_in'].shift(2).fillna(0)
    df['u_in_lag4'] = df['u_in'].shift(4).fillna(0)
    df['R'] = df['R'].astype(str)
    df['C'] = df['C'].astype(str)
    df = pd.get_dummies(df)
    df['ewm_u_in_mean'] = df.groupby('breath_id')['u_in'].ewm(halflife=10).mean().reset_index(level=0,drop=True)
    df['ewm_u_in_std'] = df.groupby('breath_id')['u_in'].ewm(halflife=10).std().reset_index(level=0,drop=True)
    df['ewm_u_in_corr'] = df.groupby('breath_id')['u_in'].ewm(halflife=10).corr().reset_index(level=0,drop=True)
    df['rolling_10_mean'] = df.groupby('breath_id')['u_in'].rolling(window=10, min_periods=1).mean().reset_index(level=0,drop=True)
    df['rolling_10_max'] = df.groupby('breath_id')['u_in'].rolling(window=10, min_periods=1).max().reset_index(level=0,drop=True)
    df['rolling_10_std'] = df.groupby('breath_id')['u_in'].rolling(window=10, min_periods=1).std().reset_index(level=0,drop=True)
    df['expand_mean'] = df.groupby('breath_id')['u_in'].expanding(2).mean().reset_index(level=0,drop=True)
    df['expand_max'] = df.groupby('breath_id')['u_in'].expanding(2).max().reset_index(level=0,drop=True)
    df['expand_std'] = df.groupby('breath_id')['u_in'].expanding(2).std().reset_index(level=0,drop=True)
    df = df.fillna(0)
    return df

In [8]:
train = add_features(train)
test = add_features(test)

In [9]:
pd.__version__

In [10]:
train.head()

In [11]:
test.head()

In [12]:
X = train.drop(['pressure', 'id', 'breath_id', 'u_out'], axis=1)
y = train[['pressure']].to_numpy().reshape(-1, 80)
X_test = test.drop(['id', 'breath_id', 'u_out'], axis=1)
X = X.to_numpy().reshape(75450, 80, -1)

In [13]:
print(f"[+]{X.shape}")
print(f"[+]{y.shape}")
print(f"[+]{X_test.shape}")

In [14]:
with strategy.scope():
    kf = KFold(n_splits=5,shuffle=True,random_state=2021)
    test_pred = []
    for fold,(train_idx,test_idx) in enumerate(kf.split(X,y)):
        print(f'[+] Fold{fold+1}')
        x_train,x_valid = X[train_idx],X[test_idx]
        y_train,y_valid = y[train_idx],y[test_idx]
        schedul = tf.keras.optimizers.schedules.ExponentialDecay(1e-3, 200*((len(X)*0.8)/1024), 1e-5)
        model1 = tf.keras.layers.Input(shape=(80,22))
        x= tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(200, return_sequences=True))(model1)
        x = tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(150, return_sequences=True))(x)
        x = tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(100, return_sequences=True))(x)
        x = tf.keras.layers.Dense(100, activation='relu')(x)
        x = tf.keras.layers.Dense(1)(x)

        model = tf.keras.models.Model(inputs = model1,outputs=x)

        model.compile(optimizer='adam',loss='mae')
        model.fit(x_train, y_train, validation_data=(x_valid, y_valid), epochs=150, batch_size=1024, callbacks=[tf.keras.callbacks.LearningRateScheduler(schedul)])
#         model.save(f'fold{fold+1} LSTM weights')
        test_pred.append(model.predict(X_test.to_numpy().reshape(50300, 80, 22)).squeeze().reshape(-1, 1).squeeze())

In [15]:
submission["pressure"] = sum(test_pred)/5
submission.to_csv('submission_task.csv', index=False)