```
This Notebook is sample. 
This Notebook can be placed and used under dir vXXX
```

# Install Modules

In [None]:
# ! pip install -q slackweb, transformers

# Resolve Path

In [None]:
import sys
sys.path.append("../../..")
sys.path.append("..")

# Import

In [None]:
import gc
import pandas as pd
from sklearn.model_selection import StratifiedKFold

from teads.notebook.config import NotebookConfig
from teads.notebook.version import Version
from teads.util.logger import StdoutLogger, FileLogger
from teads.util.notification import Slack
from teads.util.util import in_kaggle, in_colab
from teads.util.reduce_mem_usage import reduce_mem_usage
from google_brain.dataset import GoogleBrainTorchDatasetCreator
from google_brain.context import GoogleBrainContext
from google_brain.metrics import MAE
from google_brain.lstm_cls.lstm import GoogleBrainLSTM
from google_brain.lstm_cls.experiment import GoogleBrainLSTMExperimentConfig, GoogleBrainLSTMExperiment

# import warnings
# warnings.simplefilter("ignore")

# Config

In [None]:
notification = Slack("your_slack_webhook_url")

In [None]:
global_conf = NotebookConfig(
    version = Version(
        1, 
        """
        hogehoge
        """
    ),
    logger=StdoutLogger(),
    file_logger=FileLogger("lstm_cls"),
    notification=notification,
    seed=1,
    is_local=True
)

# Prepare Data

In [None]:
if global_conf.is_local:
    train = pd.read_csv("../input/train.csv")
    test = pd.read_csv("../input/test.csv")
    sample_submission = pd.read_csv("../input/sample_submission.csv")
elif in_kaggle():
    train = pd.read_csv("../input/google-brain-fold/train.csv")
    test = pd.read_csv("../input/ventilator-pressure-prediction/test.csv")
    sample_submission = pd.read_csv("../input/ventilator-pressure-prediction/sample_submission.csv")
elif in_colab():
    train = pd.read_csv("/content/drive/MyDrive/kaggle/google-brain-2021/input/train.csv")
    test = pd.read_csv("/content/drive/MyDrive/kaggle/google-brain-2021/input/test.csv")
    sample_submission = pd.read_csv("/content/drive/MyDrive/kaggle/google-brain-2021/input/sample_submission.csv")

# Fold

In [None]:
train["R_C"] = train["R"].astype(str) + "_" + train["C"].astype(str)
train_gby = train.groupby("breath_id")["R_C"].agg("first").reset_index()
cv = StratifiedKFold(n_splits=20, shuffle=True, random_state=global_conf.seed).split(train_gby, train_gby["R_C"])

fold_df = pd.DataFrame()
fold_df["id"] = train["id"]
fold_df["fold"] = -1

for fold, (train_idx, valid_idx) in enumerate(cv):
    valid_breath_ids = train_gby.iloc[valid_idx]["breath_id"].values
    idxs = train[train["breath_id"].isin(valid_breath_ids)].index.to_list()
    fold_df.loc[idxs, "fold"] = fold

train["stratified_20fold"] = fold_df["fold"]

train = train.drop(columns=["R_C"])

# Feature Engineering

In [None]:
feature_col = ["id", "breath_id", "R", "C", "time_step", "u_in", "u_out"]
target_col = ["pressure"]

In [None]:
train_X = train[feature_col]
train_y = train[target_col]

test_X = test[feature_col]

folds = train["stratified_20fold"]
folds

In [None]:
del train, test
gc.collect()

In [None]:
%%time
features = ["id", "base", "u_in", "u_out", "rc", "signal", "time_step", "mix"]
dataset_creator = GoogleBrainTorchDatasetCreator(train_X, train_y, test_X, features=features, folds=folds)
dataset = dataset_creator.make()

In [None]:
display(dataset.test_X.head())

print("number of features: ", len(dataset.test_X.columns))
print("columns: ", dataset.test_X.columns)
print("u_out uniques: ", dataset.test_X["u_out"].unique())

# Context

In [None]:
context = GoogleBrainContext(dataset, sample_submission, global_conf.logger)

# Experiment

In [None]:
metrics = MAE()
score = MAE()

In [None]:
exp_config = GoogleBrainLSTMExperimentConfig(
    exp_name="lstm_cls",
    version=global_conf.version.n,
    n_fold=20, 
    metrics=metrics, 
    score=score, 
    file_logger=global_conf.file_logger, 
    std_logger=global_conf.logger, 
    notification=global_conf.notification,
    use_optimize_params=False
)

exp = GoogleBrainLSTMExperiment(
    context, 
    exp_config,
    folds=[i for i in range(20)]
)

exp_result = exp.run(optimize=False)

# exp.remake_oof_submission()

In [None]:
global_conf.file_logger.default(
    [
        "",
        "================Result=============",
        f"metrics: {exp_result.metrics}", 
        f"score: {exp_result.score}",
        f"time: {exp_result.time}",
        "===================================",
        ""
    ]
)

In [None]:
exp_result.score

In [None]:
exp_result.submission_df