섹션 1(전체 코드)

In [None]:
import torch
from torch import nn, optim
from torch.utils.data import DataLoader
from datetime import datetime
import wandb
import argparse
from pathlib import Path
import sys
import os
import pandas as pd

필요한 라이브러리를 import함

In [74]:
BASE_PATH = str(Path().resolve().parent.parent)
print("BASE_PATH:", BASE_PATH)
sys.path.append(BASE_PATH)

BASE_PATH: C:\Users\MASTER\git\link_dl


base path를 설정함

In [75]:
from titanic_dataset import get_preprocessed_dataset

def get_data():
    train_dataset, validation_dataset, test_dataset = get_preprocessed_dataset()

    train_data_loader = DataLoader(dataset=train_dataset, batch_size=wandb.config.batch_size, shuffle=True)
    validation_data_loader = DataLoader(dataset=validation_dataset, batch_size=len(validation_dataset))
    test_data_loader = DataLoader(dataset=test_dataset, batch_size=len(test_dataset))

    return train_data_loader, validation_data_loader, test_data_loader

titatnic 데이터를 가져와 train_data_loader와 validation_data_loader를 반환함

섹션 2(활성 함수 선택 -> ReLU)

In [76]:
class MyModel(nn.Module):
  def __init__(self, n_input, n_output):
    super().__init__()

    self.model = nn.Sequential(
      nn.Linear(n_input, wandb.config.n_hidden_unit_list[0]),
      nn.ReLU(),
      nn.Linear(wandb.config.n_hidden_unit_list[0], wandb.config.n_hidden_unit_list[1]),
      nn.ReLU(),
      nn.Linear(wandb.config.n_hidden_unit_list[1], n_output),
    )

  def forward(self, x):
    x = self.model(x)
    return x

모델 클래스 정의

In [77]:
def get_model_and_optimizer():
    my_model = MyModel(n_input=10, n_output=1)
    optimizer = optim.SGD(my_model.parameters(), lr=wandb.config.learning_rate)
    
    return my_model, optimizer

모델과 옵티마이저 반환 함수

섹션 3(EarlyStopping)

In [78]:
from _01_code._99_common_utils.early_stopping import EarlyStopping

def training_loop(model, optimizer, train_data_loader, validation_data_loader, early_stopping):
  n_epochs = wandb.config.epochs
  loss_fn = nn.BCEWithLogitsLoss()
  next_print_epoch = 100

  for epoch in range(1, n_epochs + 1):
    loss_train = 0.0
    num_trains = 0
    for train_batch in train_data_loader:
      input = train_batch['input']
      target = train_batch['target'].float().unsqueeze(1)
      output_train = model(input)
      loss = loss_fn(output_train, target)
      loss_train += loss.item()
      num_trains += 1

      optimizer.zero_grad()
      loss.backward()
      optimizer.step()

    loss_validation = 0.0
    num_validations = 0
    with torch.no_grad():
      for validation_batch in validation_data_loader:
        input = validation_batch['input']
        target = validation_batch['target'].float().unsqueeze(1)
        output_validation = model(input)
        loss = loss_fn(output_validation, target)
        loss_validation += loss.item()
        num_validations += 1

    avg_train_loss = loss_train / len(train_data_loader)
    avg_val_loss = loss_validation / len(validation_data_loader)

    wandb.log({
      "Epoch": epoch,
      "Training loss": loss_train / num_trains,
      "Validation loss": loss_validation / num_validations
    })

    message, early_stop = early_stopping.check_and_save(avg_val_loss, model)

    if early_stop:
      print(f"Early stopping triggered at epoch {epoch}")
      break

    if epoch >= next_print_epoch:
      print(
        f"Epoch {epoch}, "
        f"Training loss {loss_train / num_trains:.4f}, "
        f"Validation loss {loss_validation / num_validations:.4f}"
      )
      next_print_epoch += 100

training loop 정의

섹션 3(테스트를 통한 submission.csv 구성)

In [79]:
def inference_and_save(model, test_data_loader, checkpoint_path, output_csv_path):
    for b in test_data_loader:
        print(b)
        break

    print("Loading best model checkpoint for inference...")
    model.load_state_dict(torch.load(checkpoint_path, map_location="cpu"))
    model.eval()

    all_preds = []
    all_passenger_ids = []

    with torch.no_grad():
        for batch in test_data_loader:
            inputs = batch['input']
            outputs = model(inputs)
            probs = torch.sigmoid(outputs)
            preds = (probs > 0.5).int().squeeze(1).tolist()
            all_preds.extend(preds)
    
    test_df = pd.read_csv("test.csv")
    passenger_ids = test_df["PassengerId"].tolist()

    submission = pd.DataFrame({
        "PassengerId": passenger_ids,
        "Survived": all_preds
    })
    submission.to_csv(output_csv_path, index=False)
    print(f"✅ submission.csv saved to: {output_csv_path}")

테스트 코드

In [80]:
def main(args):
  current_time_str = datetime.now().astimezone().strftime('%Y-%m-%d_%H-%M-%S')

  config = {
    'epochs': args.epochs,
    'batch_size': args.batch_size,
    'learning_rate': 1e-3,
    'n_hidden_unit_list': [20, 20],
  }

  wandb.init(
    mode="online" if args.wandb else "disabled",
    project="my_model_training",
    notes="My first wandb experiment",
    tags=["my_model", "titanic"],
    name=current_time_str,
    config=config
  )
  print(args)
  print(wandb.config)

  train_data_loader, validation_data_loader, test_data_loader = get_data()

  linear_model, optimizer = get_model_and_optimizer()

  checkpoint_dir = os.path.join(BASE_PATH, "checkpoints")
  os.makedirs(checkpoint_dir, exist_ok=True)

  early_stopping = EarlyStopping(
    patience=20,
    delta=1e-5,
    project_name="titanic_model",
    checkpoint_file_path=checkpoint_dir,
    run_time_str=current_time_str
  )

  print("#" * 50, 1)

  training_loop(
    model=linear_model,
    optimizer=optimizer,
    train_data_loader=train_data_loader,
    validation_data_loader=validation_data_loader,
    early_stopping=early_stopping
  )

  latest_checkpoint = os.path.join(checkpoint_dir, "titanic_model_checkpoint_latest.pt")

  submission_path = os.path.join(BASE_PATH, f"submission_{current_time_str}.csv")

  inference_and_save(linear_model, test_data_loader, latest_checkpoint, submission_path)
  
  wandb.finish()

main 함수 정의

섹션 2(배치 사이즈 선정 -> 16)

In [81]:
if __name__ == "__main__":
  parser = argparse.ArgumentParser()

  parser.add_argument(
    "--wandb", action=argparse.BooleanOptionalAction, default=False, help="True or False"
  )

  parser.add_argument(
    "-b", "--batch_size", type=int, default=512, help="Batch size (int, default: 512)"
  )

  parser.add_argument(
    "-e", "--epochs", type=int, default=1_000, help="Number of training epochs (int, default:1_000)"
  )

  args = parser.parse_args(["--wandb", "-b", "16", "-e", "1000"])

  main(args)

Namespace(wandb=True, batch_size=16, epochs=1000)
{'epochs': 1000, 'batch_size': 16, 'learning_rate': 0.001, 'n_hidden_unit_list': [20, 20]}


The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  all_df["alone"].fillna(0, inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  all_df["Embarked"].fillna("missing", inplace=True)


Index(['Survived', 'Pclass', 'Sex', 'Age', 'SibSp', 'Parch', 'Fare',
       'Embarked', 'title', 'family_num', 'alone'],
      dtype='object')
   Survived  Pclass  Sex   Age  SibSp  Parch     Fare  Embarked  title  \
0       0.0       3    1  22.0      1      0   7.2500         2      2   
1       1.0       1    0  38.0      1      0  71.2833         0      3   
2       1.0       3    0  26.0      0      0   7.9250         2      1   
3       1.0       1    0  35.0      1      0  53.1000         2      3   
4       0.0       3    1  35.0      0      0   8.0500         2      2   
5       0.0       3    1  29.0      0      0   8.4583         1      2   
6       0.0       1    1  54.0      0      0  51.8625         2      2   
7       0.0       3    1   2.0      3      1  21.0750         2      0   
8       1.0       3    0  27.0      0      2  11.1333         2      3   
9       1.0       2    0  14.0      1      0  30.0708         0      3   

   family_num  alone  
0           1    0.

0,1
Epoch,▁▁▁▁▁▁▁▂▂▂▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▄▄▄▄▅▅▅▅▅▇▇▇▇██
Training loss,█▇▇▆▇▆▆▆▆▆▅▅▅▅▄▅▄▄▄▃▃▃▃▃▂▃▂▂▃▂▂▂▁▂▂▁▁▁▁▁
Validation loss,██▇▇█▇▇▇▇▇▇▇▇▆▇▆▆▆▆▅▆▇▅▅▆▄▄▃▃▄▂▂▃▃▁▁▂▁▄▁

0,1
Epoch,618.0
Training loss,0.48077
Validation loss,0.52424


main 함수 실행

숙제 후기

타이타닉 데이터를 다루며 딥러닝의 기본적인 과정을 익힐 수 있었다.
또한 활성 함수와 배치 사이즈를 변경하며 loss의 변화가 달라지는 것을 확인할 수 있었다.

섹션 4(캐글 제출 이미지)

### 캐글 이미지
<img src="kaggle.png" alt="Kaggle 결과" width="400">