In [1]:
import sys, os
import numpy as np 
import pandas as pd
import warnings
import pickle
warnings.filterwarnings("ignore")
from sklearn.model_selection import KFold, GroupKFold
from sklearn.preprocessing import RobustScaler, normalize
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.callbacks import EarlyStopping, LearningRateScheduler, ModelCheckpoint, ReduceLROnPlateau
from tensorflow.keras.optimizers.schedules import ExponentialDecay
from dataclasses import dataclass, asdict
from datetime import datetime

workplace = '/content/drive/MyDrive/kaggle/codes/ventilator-pressure-prediction/'
sys.path.append(workplace)
from utils import save_json, load_json
from config import keras_model_conf
from functions import keras_loss_metrics

In [2]:
@dataclass
class train_conf():
    seed: int = 665

    csv_dir: str = '/content/drive/MyDrive/kaggle/datasets/ventilator-pressure-prediction/train_v7-scaled.csv'
    model_dir = '/content/drive/MyDrive/kaggle/codes/ventilator-pressure-prediction/logs/dlast/good_model/'

    # training params
    epoch: int = 500
    batch_size: int = 512

    model_name: str = 'dlaset'
    model_param: keras_model_conf.dlast_conf = keras_model_conf.dlast_conf()

    criterion: str = 'mask_mae'

    # optimizer params
    lr: float = 1e-4

    # for early stopping
    early_stop_patience: int = 15

    # for scheduler
    factor: float = 0.75
    scheduler_patience: int = 7

In [4]:
def load_data(df, x_col):
    x = df[x_col].values.astype(np.float32).reshape(-1, 80, len(x_col))
    x_col += ['pressure']
    y = df[x_col].values.astype(np.float32).reshape(-1, 80, len(x_col))

    return x, y

In [6]:
cfg = train_conf()
df = pd.read_csv(cfg.csv_dir)

In [None]:
now = datetime.now().strftime("%m%d%H%M%S")
out = f'{workplace}logs/finetune-{cfg.model_name}/{now}/'
os.makedirs(out, exist_ok=True)

save_json(asdict(cfg), out + 'param.json')

# x_col = ['u_in', 'u_out','time_step','u_in_cumsum','u_in_cummean','area','cross',
#          'cross2','R_cate','C_cate','breath_time','u_in_lag_1','u_in_lag_2','u_in_lag_3',
#          'u_in_lag_4','u_in_time1','u_in_time2','u_in_time3','u_in_time4','u_out_lag_1','u_out_lag_2','u_out_lag_3','u_out_lag_4'] 

x_col = ['time_step', 'u_in', 'u_out', 'cross', 'cross2', 'area',
       'time_step_cumsum', 'u_in_cumsum', 'u_in_lag1', 'u_out_lag1',
       'u_in_lag_back1', 'u_out_lag_back1', 'u_in_lag2', 'u_out_lag2',
       'u_in_lag_back2', 'u_out_lag_back2', 'u_in_lag3', 'u_out_lag3',
       'u_in_lag_back3', 'u_out_lag_back3', 'u_in_lag4', 'u_out_lag4',
       'u_in_lag_back4', 'u_out_lag_back4', 'breath_id__u_in__max',
       'breath_id__u_in__mean', 'breath_id__u_in__diffmax',
       'breath_id__u_in__diffmean', 'u_in_diff1', 'u_out_diff1', 'u_in_diff2',
       'u_out_diff2', 'u_in_diff3', 'u_out_diff3', 'u_in_diff4', 'u_out_diff4',
       'u_in_cummean', 'breath_id__u_in_lag', 'breath_id__u_in_lag2',
       'time_step_diff', 'ewm_u_in_mean', '15_in_sum', '15_in_min',
       '15_in_max', '15_in_mean', 'u_in_lagback_diff1', 'u_out_lagback_diff1',
       'u_in_lagback_diff2', 'u_out_lagback_diff2', 'R_20', 'R_5', 'R_50',
       'C_10', 'C_20', 'C_50', 'R__C_20__10', 'R__C_20__20', 'R__C_20__50',
       'R__C_50__10', 'R__C_50__20', 'R__C_50__50', 'R__C_5__10', 'R__C_5__20',
       'R__C_5__50']

X, Y = load_data(df, x_col)

pickle.dump(x_col, open(out + 'x_col.pkl', 'wb'))

gpu_strategy = tf.distribute.get_strategy()
with gpu_strategy.scope():
    kf = KFold(n_splits=10, shuffle=True)

    for i, (tidx, vidx) in enumerate(kf.split(X, Y)):
        print('###', i, '###')

        tidx = pickle.load(open(f'{cfg.model_dir}train_idx_{i}.pkl', 'rb')) 
        vidx = pickle.load(open(f'{cfg.model_dir}valid_idx_{i}.pkl', 'rb')) 

        X_train, X_valid = X[tidx], X[vidx]
        Y_train, Y_valid = Y[tidx], Y[vidx]

        pickle.dump(tidx, open(f'{out}train_idx_{i}.pkl', 'wb'))
        pickle.dump(vidx, open(f'{out}valid_idx_{i}.pkl', 'wb'))

        model = keras.models.load_model(f'{cfg.model_dir}model-{i}')
        optimizer = keras.optimizers.Adam(learning_rate = cfg.lr)
        model.compile(optimizer=optimizer, loss=keras_loss_metrics.mask_mae)

        callbacks = []

        callbacks.append(
            ReduceLROnPlateau(
                monitor = "val_loss",
                factor = cfg.factor,
                verbose = 1,
                patience = cfg.scheduler_patience))
        
        callbacks.append(
            EarlyStopping(
                monitor = "val_loss",
                patience = cfg.early_stop_patience,
                verbose =1,
                mode = "min",
                restore_best_weights = True))

        model.fit(X_train, Y_train, validation_data=(X_valid, Y_valid), epochs=cfg.epoch, batch_size=cfg.batch_size, callbacks=callbacks) 
        save_locally = tf.saved_model.SaveOptions(experimental_io_device='/job:localhost')
        model.save(f'{out}model-{i}', options=save_locally)