# Import

In [7]:
from utils import init_logger, timer, fix_seed
import random
import pandas as pd
pd.set_option('display.max_columns', 150)
import numpy as np
import category_encoders as ce
import matplotlib.pyplot as plt
import datetime as dt
from tqdm import tqdm
from tqdm.notebook import tqdm_notebook
tqdm_notebook.pandas(desc="Processing:")

fix_seed()
logger = init_logger()

In [8]:
PROJECT_NAME = "v1"

# Load

In [9]:
from etl import load_data

In [10]:
with timer("read csv", logger):
    df_train, df_test = load_data()

2022/01/01 11:36:37 45 [INFO] [read csv] start.
2022/01/01 11:36:37 47 [INFO] [read csv] done in 0.034 seconds.


In [11]:
X_train = df_train.drop(["PassengerId", "Survived"], axis=1)
y_train = df_train["Survived"]
X_test = df_test.drop(["PassengerId", "Survived"], axis=1)

# Training

In [12]:
from model_nn import ModelNN
from runner import Runner
from keras.callbacks import EarlyStopping
from hyperopt import hp, fmin, tpe, space_eval

In [13]:
runner = Runner(PROJECT_NAME, ModelNN, cv=False)

In [5]:
params = {
    "layers": 4,
    "dropout": 0.2,
    "units": 8
}

space = {
    "layers": hp.uniformint("layer", 3, 7),
    "dropout": hp.uniform("dropout", 0.1, 0.4),
    "units": hp.choice("units", [4, 8, 12, 16])
}

train_params = {
    "epochs": 5000,
    "batch_size": 32,
    "verbose": 0,
    "callbacks": [EarlyStopping(monitor="val_loss", min_delta=0, patience=30, verbose=1)]
}

In [16]:
def objective(args):
    logger.info(args)
    runner.train(X_train, y_train, args, train_params)
    return runner.get_score()

In [17]:
best = fmin(fn=objective, space=space, algo=tpe.suggest, max_evals=2)
best_params = space_eval(space, best)
logger.info("best params: {}".format(best_params))

  0%|                                     | 0/2 [00:00<?, ?trial/s, best loss=?]

2022/01/01 11:37:07 2 [INFO] {'dropout': 0.35533811619227473, 'layers': 6, 'units': 12}


Metal device set to: Apple M1 Max


2022-01-01 11:37:07.285715: I tensorflow/core/common_runtime/pluggable_device/pluggable_device_factory.cc:305] Could not identify NUMA node of platform GPU ID 0, defaulting to 0. Your kernel may not have been built with NUMA support.
2022-01-01 11:37:07.285887: I tensorflow/core/common_runtime/pluggable_device/pluggable_device_factory.cc:271] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:0 with 0 MB memory) -> physical PluggableDevice (device: 0, name: METAL, pci bus id: <undefined>)
2022-01-01 11:37:07.406809: W tensorflow/core/platform/profile_utils/cpu_utils.cc:128] Failed to get CPU frequency: 0 Hz
2022-01-01 11:37:07.787040: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:112] Plugin optimizer for device_type GPU is enabled.
2022-01-01 11:37:09.189156: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:112] Plugin optimizer for device_type GPU is enabled.


Epoch 00221: early stopping                                                     
 50%|█████     | 1/2 [01:28<01:28, 88.49s/trial, best loss: 0.37501025199890137]

2022/01/01 11:38:35 2 [INFO] {'dropout': 0.14697523300939222, 'layers': 5, 'units': 4}
2022-01-01 11:38:36.206935: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:112] Plugin optimizer for device_type GPU is enabled.
2022-01-01 11:38:37.315807: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:112] Plugin optimizer for device_type GPU is enabled.


Epoch 00220: early stopping                                                     
100%|██████████| 2/2 [02:43<00:00, 81.89s/trial, best loss: 0.37501025199890137]

2022/01/01 11:39:51 3 [INFO] best params: {'dropout': 0.35533811619227473, 'layers': 6, 'units': 12}





# Inference

In [18]:
runner = Runner(PROJECT_NAME, ModelNN, cv=False)
with timer("train", logger):
    runner.train(X_train, y_train, params, train_params)

2022/01/01 11:40:00 45 [INFO] [train] start.
2022-01-01 11:40:01.338058: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:112] Plugin optimizer for device_type GPU is enabled.
2022-01-01 11:40:02.224749: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:112] Plugin optimizer for device_type GPU is enabled.
2022/01/01 11:40:40 47 [INFO] [train] done in 39.170 seconds.


Epoch 00136: early stopping


In [18]:
runner.get_score()

0.4293127179145813

In [16]:
with timer("prediction", logger):
    prob = runner.predict(X_test)
pred = np.where(prob > 0.5, 1, 0)
pred

2022/01/01 11:15:21 45 [INFO] [prediction] start.
2022-01-01 11:15:21.248984: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:112] Plugin optimizer for device_type GPU is enabled.
2022-01-01 11:15:21.462221: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:112] Plugin optimizer for device_type GPU is enabled.
2022-01-01 11:15:21.666765: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:112] Plugin optimizer for device_type GPU is enabled.
2022-01-01 11:15:21.935051: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:112] Plugin optimizer for device_type GPU is enabled.
2022-01-01 11:15:22.202376: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:112] Plugin optimizer for device_type GPU is enabled.
2022/01/01 11:15:22 47 [INFO] [prediction] done in 1.260 seconds.


array([0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 1, 0, 1, 1, 0, 0, 0, 0, 0, 0,
       1, 1, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1,
       1, 0, 0, 0, 1, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 1, 1,
       1, 0, 0, 1, 1, 0, 0, 0, 1, 0, 0, 1, 0, 1, 1, 0, 0, 0, 0, 0, 1, 1,
       1, 1, 0, 0, 1, 0, 1, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0,
       0, 1, 1, 1, 1, 0, 0, 1, 0, 1, 1, 0, 1, 0, 0, 1, 0, 1, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0,
       0, 0, 1, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1,
       1, 0, 1, 1, 0, 0, 1, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 1,
       0, 0, 1, 1, 0, 1, 0, 0, 1, 0, 1, 0, 0, 0, 0, 1, 0, 0, 1, 0, 1, 0,
       1, 0, 1, 0, 1, 1, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1,
       0, 0, 0, 0, 1, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 1,
       0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 1, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0,
       0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0,

In [17]:
submission = pd.DataFrame({
    "PassengerId": df_test["PassengerId"],
    "Survived": pred
})
submission
submission.to_csv(f"../submission/submission_{PROJECT_NAME}.csv", index=False)
submission

Unnamed: 0,PassengerId,Survived
891,892,0
892,893,0
893,894,0
894,895,0
895,896,0
...,...,...
1304,1305,0
1305,1306,1
1306,1307,0
1307,1308,0


In [20]:
!kaggle competitions submit titanic -f ../submission/submission_{PROJECT_NAME}.csv -m "loss: 0.4293127179145813"

100%|████████████████████████████████████████| 2.77k/2.77k [00:03<00:00, 842B/s]
Successfully submitted to Titanic - Machine Learning from Disaster