In [70]:
# Download titanic.zip from https://www.kaggle.com/competitions/titanic
!unzip titanic.zip

Archive:  titanic.zip
replace gender_submission.csv? [y]es, [n]o, [A]ll, [N]one, [r]ename: A
  inflating: gender_submission.csv   
  inflating: test.csv                
  inflating: train.csv               


In [71]:
import os
import random
from pathlib import Path

import numpy as np
import polars as pl
from joblib import dump, load

from sklearn.model_selection import train_test_split
from sklearn.metrics import f1_score
from sklearn.preprocessing import StandardScaler
from sklearn.impute import SimpleImputer

import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import TensorDataset, DataLoader

In [72]:
SEED = 42
USE_COLUMNS = ["Pclass", "Sex", "Age", "SibSp", "Parch", "Fare"]
MODEL_DIR = Path("models")
MODEL_DIR.mkdir(exist_ok=True)

BEST_MODEL_PATH = MODEL_DIR / "mlp_torch_best.pth"
FINAL_MODEL_PATH = MODEL_DIR / "mlp_torch_final.pth"
PREPROCESS_PATH = MODEL_DIR / "preprocess.joblib"
THRESHOLD = 0.5
BATCH_SIZE = 64
HIDDEN_DIM = 32
MAX_EPOCHS = 100
PATIENCE = 10
LR = 1e-3
WEIGHT_DECAY = 1e-4

In [73]:
def set_seed(seed: int = 42):
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
set_seed(SEED)

In [74]:
train_raw = pl.read_csv("train.csv").select(USE_COLUMNS + ["Survived"])
test_raw = pl.read_csv("test.csv").select(USE_COLUMNS)

# Preprocess
Encoding string categorical feature(`Sex` column) to labels.

- `female`: 0
- `male`: 1

In [75]:
sex_label_mapping = {"female": 0, "male": 1}
train_numeric = train_raw.with_columns(
    pl.col("Sex").replace_strict(sex_label_mapping).alias("Sex")
)
test_numeric = test_raw.with_columns(
    pl.col("Sex").replace_strict(sex_label_mapping).alias("Sex")
)

splitting training dataset for validation.

In [76]:
train, valid = train_test_split(train_numeric, test_size=0.3, random_state=SEED)
print(f"train size: {train.shape}")
print(f"valid size: {valid.shape}")

y_train = train["Survived"].to_numpy()
y_valid = valid["Survived"].to_numpy()
X_train = train.select(pl.exclude("Survived"))
X_valid = valid.select(pl.exclude("Survived"))

train size: (623, 7)
valid size: (268, 7)


In [77]:
imputer = SimpleImputer(strategy="median")
scaler = StandardScaler()

X_train_np = X_train.to_numpy()
X_valid_np = X_valid.to_numpy()

X_train_np = imputer.fit_transform(X_train_np)
X_valid_np = imputer.transform(X_valid_np)

X_train_np = scaler.fit_transform(X_train_np)
X_valid_np = scaler.transform(X_valid_np)

In [78]:
# Construct as DataLoader
Xtr = torch.tensor(X_train_np, dtype=torch.float32)
ytr = torch.tensor(y_train.reshape(-1, 1), dtype=torch.float32)
Xva = torch.tensor(X_valid_np, dtype=torch.float32)
yva = torch.tensor(y_valid.reshape(-1, 1), dtype=torch.float32)

train_loader = DataLoader(TensorDataset(Xtr, ytr), batch_size=BATCH_SIZE, shuffle=True)


In [79]:
# 2 layer NN
class Net(nn.Module):
    def __init__(self, in_dim: int, hid_dim: int):
        super().__init__()
        self.fc1 = nn.Linear(in_dim, hid_dim)
        self.relu = nn.ReLU()
        self.fc2 = nn.Linear(hid_dim, 1)  # binary logit
    def forward(self, x):
        x = self.fc1(x)
        x = self.relu(x)
        x = self.fc2(x)  # logits
        return x

In [80]:
input_dim = Xtr.shape[1]
model = Net(input_dim, HIDDEN_DIM)

criterion = nn.BCEWithLogitsLoss()
optimizer = optim.Adam(model.parameters(), lr=LR, weight_decay=WEIGHT_DECAY)

# Training Loop

In [81]:
best_val = float("inf")
wait = 0
for epoch in range(1, MAX_EPOCHS + 1):
    model.train()
    epoch_loss = 0.0
    for xb, yb in train_loader:
        optimizer.zero_grad()
        logits = model(xb)
        loss = criterion(logits, yb)
        loss.backward()
        optimizer.step()
        epoch_loss += loss.item() * xb.size(0)
    epoch_loss /= len(train_loader.dataset)

    # validation
    model.eval()
    with torch.no_grad():
        val_logits = model(Xva)
        val_loss = criterion(val_logits, yva).item()
        pred_label = (val_logits > 0.5).to(int)
        score = f1_score(pred_label, yva, average='macro')
    print(f"Epoch {epoch:03d} | train_loss={epoch_loss:.4f} val_loss={val_loss:.4f} f1-score={score:.4f}")

    if val_loss + 1e-6 < best_val:
        best_val = val_loss
        wait = 0
        torch.save(model.state_dict(), BEST_MODEL_PATH)
    else:
        wait += 1
        if wait >= PATIENCE:
            print("Early stopping.")
            break

Epoch 001 | train_loss=0.6900 val_loss=0.6803 f1-score=0.3776
Epoch 002 | train_loss=0.6693 val_loss=0.6600 f1-score=0.3776
Epoch 003 | train_loss=0.6500 val_loss=0.6412 f1-score=0.3694
Epoch 004 | train_loss=0.6323 val_loss=0.6240 f1-score=0.3694
Epoch 005 | train_loss=0.6162 val_loss=0.6073 f1-score=0.3694
Epoch 006 | train_loss=0.6002 val_loss=0.5921 f1-score=0.3694
Epoch 007 | train_loss=0.5864 val_loss=0.5770 f1-score=0.3889
Epoch 008 | train_loss=0.5725 val_loss=0.5630 f1-score=0.4059
Epoch 009 | train_loss=0.5600 val_loss=0.5494 f1-score=0.4309
Epoch 010 | train_loss=0.5482 val_loss=0.5365 f1-score=0.4649
Epoch 011 | train_loss=0.5374 val_loss=0.5242 f1-score=0.5125
Epoch 012 | train_loss=0.5271 val_loss=0.5131 f1-score=0.5566
Epoch 013 | train_loss=0.5185 val_loss=0.5025 f1-score=0.6077
Epoch 014 | train_loss=0.5100 val_loss=0.4926 f1-score=0.6266
Epoch 015 | train_loss=0.5022 val_loss=0.4837 f1-score=0.6478
Epoch 016 | train_loss=0.4954 val_loss=0.4757 f1-score=0.6590
Epoch 01

In [82]:
# load best & save final
model.load_state_dict(torch.load(BEST_MODEL_PATH, map_location="cpu"))
torch.save(model.state_dict(), FINAL_MODEL_PATH)
dump({"imputer": imputer, "scaler": scaler}, PREPROCESS_PATH)
print("Saved model & preprocessors:", FINAL_MODEL_PATH, PREPROCESS_PATH)

Saved model & preprocessors: models/mlp_torch_final.pth models/preprocess.joblib


In [83]:
# =====================================================================
# Inference utilities (前処理の読み込み → sigmoid(model(x)) → しきい値でラベル化)
# =====================================================================
def load_preprocess_and_model(model_path=FINAL_MODEL_PATH, preprocess_path=PREPROCESS_PATH,
                              input_dim_hint=None, hidden_dim=HIDDEN_DIM):
    """前処理(imputer, scaler)と学習済みモデルを読み込み"""
    pp = load(preprocess_path)  # {"imputer": ..., "scaler": ...}
    imputer_loaded = pp["imputer"]
    scaler_loaded = pp["scaler"]

    # input_dim は呼び出し時のデータから決まるが、クラス定義には必要
    in_dim = input_dim_hint if input_dim_hint is not None else input_dim
    mdl = Net(in_dim, hidden_dim)
    mdl.load_state_dict(torch.load(model_path, map_location="cpu"))
    mdl.eval()
    return imputer_loaded, scaler_loaded, mdl

In [84]:
def predict_proba_numpy(X_np: np.ndarray, imputer: SimpleImputer, scaler: StandardScaler, mdl: nn.Module) -> np.ndarray:
    """確率（正例=1の確率）を numpy で返す"""
    X_tx = imputer.transform(X_np)
    X_tx = scaler.transform(X_tx)
    Xt = torch.tensor(X_tx, dtype=torch.float32)
    with torch.no_grad():
        prob = torch.sigmoid(mdl(Xt)).numpy().ravel()
    return prob


In [85]:
def predict_label_numpy(X_np: np.ndarray, imputer: SimpleImputer, scaler: StandardScaler, mdl: nn.Module,
                        threshold: float = THRESHOLD) -> np.ndarray:
    """確率を閾値で 0/1 に変換"""
    prob = predict_proba_numpy(X_np, imputer, scaler, mdl)
    return (prob > threshold).astype(int)

In [86]:
# ----------------------
# Example: run inference on test.csv
# ----------------------
# 前処理：Sexエンコード（学習と同じ変換）
test_np = test_numeric.to_numpy()

# 読み込み（input_dim_hint は列数を指定）
imputer_ld, scaler_ld, model_ld = load_preprocess_and_model(
    model_path=FINAL_MODEL_PATH,
    preprocess_path=PREPROCESS_PATH,
    input_dim_hint=test_np.shape[1],
    hidden_dim=HIDDEN_DIM
)

test_prob = predict_proba_numpy(test_np, imputer_ld, scaler_ld, model_ld)
test_pred = (test_prob > THRESHOLD).astype(int)

print("test prob head:", test_prob[:10])
print("test pred head:", test_pred[:10])

test prob head: [0.08423626 0.44779164 0.09385546 0.11689068 0.59219176 0.18823364
 0.62022376 0.28163928 0.6368817  0.1932441 ]
test pred head: [0 0 0 0 1 0 1 0 1 0]


In [87]:
compiled_model = torch.compile(model)

In [88]:
model

Net(
  (fc1): Linear(in_features=6, out_features=32, bias=True)
  (relu): ReLU()
  (fc2): Linear(in_features=32, out_features=1, bias=True)
)

In [89]:
compiled_model

OptimizedModule(
  (_orig_mod): Net(
    (fc1): Linear(in_features=6, out_features=32, bias=True)
    (relu): ReLU()
    (fc2): Linear(in_features=32, out_features=1, bias=True)
  )
)

In [90]:
!pip install --upgrade onnx onnxscript



In [91]:
import torch.onnx

In [92]:
dummy_input = torch.randn(1, input_dim)

onnx_program = torch.onnx.export(model, dummy_input, dynamo=True)

[torch.onnx] Obtain model graph for `Net([...]` with `torch.export.export(..., strict=False)`...
[torch.onnx] Obtain model graph for `Net([...]` with `torch.export.export(..., strict=False)`... ✅
[torch.onnx] Run decomposition...
[torch.onnx] Run decomposition... ✅
[torch.onnx] Translate the graph into ONNX...
[torch.onnx] Translate the graph into ONNX... ✅


In [93]:
onnx_program.save(MODEL_DIR / "mlp_torch.onnx")