# W&B의 Sweeps
- 하이퍼파라미터 최적화를 위한 W&B의 강력한 기능
- W&B Sweeps는 하이퍼파라미터 최적화를 위해 여러 실험을 자동 실행하고 결과를 관리하는 도구
- 특정 범위의 하이퍼파라미터 조합을 기반으로 여러 실험을 자동으로 실행하고 최적의 모델을 찾는 데 사용

## Sweep Configuration
- Sweeps의 실험 설정을 정의
- 하이퍼파라미터 범위, 최적화 목표, 실험 방식 등을 설정

## Sweep Agent
- 개별 하이퍼파라미터 조합을 실행하는 프로세스

In [1]:
import pandas as pd
import numpy as np
import random
import os
import torch
from sklearn.preprocessing import OneHotEncoder, MinMaxScaler

DATA_PATH = "../data/"

SEED = 42 # 시드값

# 데이터 블러오기
train = pd.read_csv(f"{DATA_PATH}titanic_train.csv") # 학습데이터
test = pd.read_csv(f"{DATA_PATH}titanic_test.csv") # 테스트 데이터

# 결측치 처리
age_mean = train["age"].mean()
fare_median = train["fare"].median()
cabin_unk = "UNK"
embarked_mode = train["embarked"].mode()[0]
train["age"] = train["age"].fillna(age_mean)
train["cabin"] = train["cabin"].fillna(cabin_unk)
test["age"] = test["age"].fillna(age_mean)
test["fare"] = test["fare"].fillna(fare_median)
test["cabin"] = test["cabin"].fillna(cabin_unk)
test["embarked"] = test["embarked"].fillna(embarked_mode)

# 특성으로 사용할 변수 선택
cols = ["age","sibsp","parch","fare","pclass","gender","embarked"]
train_ft = train[cols].copy()
test_ft = test[cols].copy()

# 범주형 변수 원핫인코딩
cols = ['gender','embarked']
enc = OneHotEncoder(handle_unknown = 'ignore')
enc.fit(train[cols])
tmp = pd.DataFrame(
    enc.transform(train_ft[cols]).toarray(),
    columns = enc.get_feature_names_out()
)
train_ft = pd.concat([train_ft,tmp],axis=1).drop(columns=cols)
tmp = pd.DataFrame(
    enc.transform(test_ft[cols]).toarray(),
    columns = enc.get_feature_names_out()
)
test_ft = pd.concat([test_ft,tmp],axis=1).drop(columns=cols)


# Min-Max Scaling
scaler = MinMaxScaler()
scaler.fit(train_ft)
train_ft = scaler.transform(train_ft)
test_ft = scaler.transform(test_ft)

# 정답 데이터
target = train["survived"].to_numpy().reshape(-1,1) # 정답 데이터 2차원으로 변경

class TitanicDataset(torch.utils.data.Dataset):
    def __init__(self, x, y=None):
        self.x = x
        self.y = y

    def __len__(self):
        return len(self.x)

    def __getitem__(self, idx):
        item = {}
        item["x"] = torch.Tensor(self.x[idx])
        if self.y is not None:
            item["y"] = torch.Tensor(self.y[idx])
        return item

In [2]:
def reset_seeds(seed):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True

class Net(torch.nn.Module):
    def __init__(self, n_features):
        super().__init__()
        self.seq = torch.nn.Sequential(
            torch.nn.Linear(n_features, 12),
            torch.nn.BatchNorm1d(12),
            torch.nn.LeakyReLU(),
            torch.nn.Linear(12, 8),
            torch.nn.BatchNorm1d(8),
            torch.nn.LeakyReLU(),
            torch.nn.Linear(8, 4),
            torch.nn.BatchNorm1d(4),
            torch.nn.LeakyReLU(),
            torch.nn.Linear(4, 1)
        )
    def forward(self, x):
        return self.seq(x)

def train_loop(dl, model, loss_fn, optimizer, device):
    epoch_loss = 0
    model.train()
    for batch in dl:
        pred = model(batch["x"].to(device))
        loss = loss_fn(pred, batch["y"].to(device))

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        epoch_loss += loss.item()

    epoch_loss /= len(dl)
    return epoch_loss

@torch.no_grad()
def test_loop(dl, model, loss_fn, device):
    epoch_loss = 0
    model.eval()

    act = torch.nn.Sigmoid()
    pred_list = []
    for batch in dl:
        pred = model( batch["x"].to(device) )
        if batch.get("y") is not None:
            loss = loss_fn(pred, batch["y"].to(device) )
            epoch_loss += loss.item()

        pred = act(pred)
        pred = pred.to("cpu").numpy()
        pred_list.append(pred)

    pred = np.concatenate(pred_list)
    epoch_loss /= len(dl)
    return epoch_loss, pred

from sklearn.model_selection import KFold
from sklearn.metrics import roc_auc_score

loss_fn = torch.nn.BCEWithLogitsLoss()
device = "cuda" if torch.cuda.is_available() else "cpu"
batch_size = 32
n_features = train_ft.shape[1]
n_splits = 5

cv = KFold(n_splits, shuffle=True, random_state=SEED)

In [3]:
import wandb

wandb.login()

wandb: Using wandb-core as the SDK backend.  Please refer to https://wandb.me/wandb-core for more information.
wandb: Currently logged in as: koeyhi (koeyhi-student). Use `wandb login --relogin` to force relogin


True

In [4]:
class Net(torch.nn.Module):
    def __init__(self, n_features, hidden_size, act_func_key, dropout):
        super().__init__()
        act_func = {
            "relu": torch.nn.ReLU(),
            "lkrelu": torch.nn.LeakyReLU(),
            "prelu": torch.nn.PReLU(),
            "elu": torch.nn.ELU(),
            "silu": torch.nn.SiLU(),
            "gelu": torch.nn.GELU(),
        }

        self.seq = torch.nn.Sequential(
            torch.nn.Linear(n_features, hidden_size),
            act_func[act_func_key],
            torch.nn.Dropout(dropout),
            torch.nn.Linear(hidden_size, hidden_size // 2),
            act_func[act_func_key],
            torch.nn.Linear(hidden_size // 2, 1),
        )

    def forward(self, x):
        return self.seq(x)

In [5]:
model = Net(train_ft.shape[1], 16, "gelu", 0.1)
model(torch.Tensor(train_ft[:2]))

tensor([[0.1236],
        [0.1805]], grad_fn=<AddmmBackward0>)

# Sweep config 설정

In [6]:
sweep_config = {
    "project": "my-project", # 프로젝트 지정
    "name": "titanic-hp-tuning", # wandb의 UI에 표시되는 스윕의 이름
    "method": "bayes", # 튜닝 전략("bayes", "random", "grid")
}

In [7]:
# 최적화에 사용할 메트릭 지정
sweep_config["metric"] = {
    "name": "score", # wandb.log에 기록하는 평가지표의 키 이름
    "goal": "maximize", # maximize, minimize
}

In [8]:
# 하이퍼파라미터 탐색 범위
sweep_config["parameters"] = {
    # key: 하이퍼파라미터 명, value: 하이퍼파라미터 범위
    "batch_size": {"values": [8, 16, 32, 64, 128]},
    "lr": {
        "distribution": "uniform",
        "min": 0.0001,
        "max": 0.01
    },
    "hidden_size": {"values": [6, 8, 10, 12, 16]},
    "act_func_key": {"values": ["relu", "lkrelu", "prelu", "elu", "silu", "gelu"]},
    "dropout": {"values": [0., 0.1, 0.2, 0.3, 0.4, 0.5]}
}

In [9]:
from pprint import pprint

pprint(sweep_config)

{'method': 'bayes',
 'metric': {'goal': 'maximize', 'name': 'score'},
 'name': 'titanic-hp-tuning',
 'parameters': {'act_func_key': {'values': ['relu',
                                            'lkrelu',
                                            'prelu',
                                            'elu',
                                            'silu',
                                            'gelu']},
                'batch_size': {'values': [8, 16, 32, 64, 128]},
                'dropout': {'values': [0.0, 0.1, 0.2, 0.3, 0.4, 0.5]},
                'hidden_size': {'values': [6, 8, 10, 12, 16]},
                'lr': {'distribution': 'uniform', 'max': 0.01, 'min': 0.0001}},
 'project': 'my-project'}


# wandb.sweep 함수
- 스윕 생성 함수로 스윕 설정값(딕셔너리 형태)을 전달

In [14]:
sweep_id = wandb.sweep(sweep_config)

Create sweep with ID: g4lfwo5g
Sweep URL: https://wandb.ai/koeyhi-student/my-project/sweeps/g4lfwo5g


In [15]:
sweep_id

'g4lfwo5g'

# wandb.agent 함수
- 생성된 스윕에 대해 하이퍼파라미터 탐색 수행
- 주요 파라미터
    - sweep_id
    - 콜백함수
    - count: 시도 횟수

In [16]:
class AgentFunction:
    def __init__(self, x, y, loss_fn, device, seed):
        self.x, self.y, self.loss_fn, self.device, self.seed = x, y, loss_fn, device, seed
        self.cv = KFold(5, shuffle=True, random_state=self.seed)

    def __call__(self, config=None):
        with wandb.init(config=config):
            config = wandb.config
            score_list = []
            for i, (tri, vai) in enumerate(self.cv.split(self.x)):
                train_dt = TitanicDataset(self.x[tri], self.y[tri])
                train_dl = torch.utils.data.DataLoader(train_dt, batch_size=config.batch_size, shuffle=True)

                valid_dt = TitanicDataset(self.x[vai], self.y[vai])
                valid_dl = torch.utils.data.DataLoader(valid_dt, batch_size=config.batch_size, shuffle=False)

                model = Net(self.x.shape[1], config.hidden_size, config.act_func_key, config.dropout).to(self.device)
                optimizer = torch.optim.Adam(model.parameters(), lr=config.lr)

                patience = 0
                best_score = 0
                for epoch in range(100):
                    train_loss = train_loop(train_dl, model, self.loss_fn, optimizer, self.device)
                    valid_loss, pred = test_loop(valid_dl, model, self.loss_fn, self.device)
                    score = roc_auc_score(self.y[vai], pred)
                    patience += 1

                    if score > best_score:
                        best_score = score
                        patience = 0

                    if patience == 5:
                        break

                score_list.append(score)

            wandb.log({"score": np.mean(score_list)})

In [17]:
reset_seeds(SEED)
agent_func = AgentFunction(train_ft, target, loss_fn, device, SEED)
wandb.agent(sweep_id, agent_func, count=10)

[34m[1mwandb[0m: Agent Starting Run: iyvydk7l with config:
[34m[1mwandb[0m: 	act_func_key: elu
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	dropout: 0
[34m[1mwandb[0m: 	hidden_size: 16
[34m[1mwandb[0m: 	lr: 0.0055241294588629156
[34m[1mwandb[0m: Currently logged in as: [33mkoeyhi[0m ([33mkoeyhi-student[0m). Use [1m`wandb login --relogin`[0m to force relogin


0,1
score,▁

0,1
score,0.89764


[34m[1mwandb[0m: Agent Starting Run: hvmjujar with config:
[34m[1mwandb[0m: 	act_func_key: lkrelu
[34m[1mwandb[0m: 	batch_size: 128
[34m[1mwandb[0m: 	dropout: 0.3
[34m[1mwandb[0m: 	hidden_size: 16
[34m[1mwandb[0m: 	lr: 0.0012326570558403938


0,1
score,▁

0,1
score,0.88535


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: jik1yowt with config:
[34m[1mwandb[0m: 	act_func_key: prelu
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	dropout: 0.5
[34m[1mwandb[0m: 	hidden_size: 8
[34m[1mwandb[0m: 	lr: 0.008486637454744622


0,1
score,▁

0,1
score,0.89512


[34m[1mwandb[0m: Agent Starting Run: gyyrnw0i with config:
[34m[1mwandb[0m: 	act_func_key: gelu
[34m[1mwandb[0m: 	batch_size: 8
[34m[1mwandb[0m: 	dropout: 0.3
[34m[1mwandb[0m: 	hidden_size: 10
[34m[1mwandb[0m: 	lr: 0.006671662610639849


0,1
score,▁

0,1
score,0.89933


[34m[1mwandb[0m: Agent Starting Run: mgfqucf7 with config:
[34m[1mwandb[0m: 	act_func_key: elu
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	dropout: 0.5
[34m[1mwandb[0m: 	hidden_size: 8
[34m[1mwandb[0m: 	lr: 0.0014257288026262836


0,1
score,▁

0,1
score,0.89134


[34m[1mwandb[0m: Agent Starting Run: fqxpzh3y with config:
[34m[1mwandb[0m: 	act_func_key: prelu
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	dropout: 0.1
[34m[1mwandb[0m: 	hidden_size: 16
[34m[1mwandb[0m: 	lr: 0.0028182956367660697


0,1
score,▁

0,1
score,0.89819


[34m[1mwandb[0m: Agent Starting Run: 84sm598n with config:
[34m[1mwandb[0m: 	act_func_key: gelu
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	dropout: 0.3
[34m[1mwandb[0m: 	hidden_size: 12
[34m[1mwandb[0m: 	lr: 0.006299816121834344


0,1
score,▁

0,1
score,0.89985


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: cti6zp05 with config:
[34m[1mwandb[0m: 	act_func_key: gelu
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	dropout: 0.2
[34m[1mwandb[0m: 	hidden_size: 16
[34m[1mwandb[0m: 	lr: 0.006068032076158268


0,1
score,▁

0,1
score,0.90151


[34m[1mwandb[0m: Agent Starting Run: 360g4o7v with config:
[34m[1mwandb[0m: 	act_func_key: gelu
[34m[1mwandb[0m: 	batch_size: 8
[34m[1mwandb[0m: 	dropout: 0.2
[34m[1mwandb[0m: 	hidden_size: 16
[34m[1mwandb[0m: 	lr: 0.009208659703468267


0,1
score,▁

0,1
score,0.90296


[34m[1mwandb[0m: Agent Starting Run: qcc9rvkm with config:
[34m[1mwandb[0m: 	act_func_key: silu
[34m[1mwandb[0m: 	batch_size: 8
[34m[1mwandb[0m: 	dropout: 0.2
[34m[1mwandb[0m: 	hidden_size: 16
[34m[1mwandb[0m: 	lr: 0.006302944484363948


0,1
score,▁

0,1
score,0.9022
