In [1]:
import torch
import torch.nn as nn
import torch.optim as optim

In [7]:
#로컬 모듈 import
import sys
import os
print(os.getcwd())
# 모듈 경로를 sys.path에 추가
module_path = os.path.abspath(os.path.join('..', 'my_transformer'))

if module_path not in sys.path:
    sys.path.append(module_path)
from my_transformer.my_transformer import Transformer

/Users/minji/Works/Ybigta/25-1_DS/25-1_DS_assignment/Week_01/Transformer


In [6]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
import wandb

# 데이터셋 정의
class PatternDataset(Dataset):
    def __init__(self, num_samples=1000, sequence_length=4, max_num=10):
        self.data = []
        self.targets = []
        for _ in range(num_samples):
            start = torch.randint(0, max_num, (1,)).item()  # 시작 숫자
            diff = torch.randint(1, 5, (1,)).item()  # 등차
            sequence = [start + i * diff for i in range(sequence_length)]
            next_value = sequence[-1] + diff  # 다음에 올 숫자
            
            # 입력 시퀀스는 정수로 이루어진 시퀀스, 타겟은 다음에 올 숫자
            self.data.append(torch.tensor(sequence))
            self.targets.append(next_value)
    
    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        return self.data[idx], self.targets[idx]

# 하이퍼파라미터 설정
src_vocab_size = 100
tgt_vocab_size = 200
d_model = 32
n_heads = 4
d_ff = 64
num_encoder_layers = 6
num_decoder_layers = 6
dropout = 0.1
batch_size = 32
num_epochs = 50
learning_rate = 0.0001

# WandB 초기화
#TODO
# https://wandb.ai/authorize (API Key 확인하는 웹사이트)
wandb.init(project="transformer", config={
    "src_vocab_size": src_vocab_size,
    "tgt_vocab_size": tgt_vocab_size,
    "d_model": d_model,
    "n_heads": n_heads,
    "d_ff": d_ff,
    "num_encoder_layers": num_encoder_layers,
    "num_decoder_layers": num_decoder_layers,
    "dropout": dropout,
    "batch_size": batch_size,
    "num_epochs": num_epochs,
    "learning_rate": learning_rate,
})

# 데이터셋 및 데이터로더 생성
dataset = PatternDataset(num_samples=1000)
dataloader = DataLoader(dataset, batch_size=batch_size, shuffle=True)

# 모델 초기화 (Transformer 모델이 별도로 정의되어 있어야 합니다)
model = Transformer(src_vocab_size, tgt_vocab_size, d_model, n_heads, d_ff, num_encoder_layers, num_decoder_layers, dropout)

# 손실 함수 및 최적화 도구 정의
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)

# 학습 루프

def train():
    for epoch in range(num_epochs):
        model.train()
        total_loss = 0

        for src, tgt in dataloader:
            # 예측 시작을 위한 빈 타겟 시퀀스 준비
            tgt_input = torch.zeros_like(tgt).unsqueeze(1)
            tgt = tgt.unsqueeze(1)
            
            optimizer.zero_grad()
            output = model(src, tgt_input)
            
            # 출력 크기 조정 및 손실 계산
            output = output.view(-1, tgt_vocab_size)
            tgt = tgt.view(-1)
            
            loss = criterion(output, tgt)
            loss.backward()
            optimizer.step()

            total_loss += loss.item()
        
        avg_loss = total_loss / len(dataloader)
        print(f"Epoch {epoch+1}/{num_epochs}, Loss: {avg_loss:.4f}")
        
        # WandB에 손실값 로깅
        wandb.log({"epoch": epoch+1, "loss": avg_loss})

train()


0,1
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███
loss,█▇▇▆▅▅▅▄▄▄▃▃▃▃▃▂▂▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁

0,1
epoch,50.0
loss,0.19631


Epoch 1/50, Loss: 5.1018
Epoch 2/50, Loss: 4.5519


KeyboardInterrupt: 

In [None]:
# 테스트 데이터셋 생성 (학습 데이터와 동일한 방식으로 생성)
test_dataset = PatternDataset(num_samples=1000)  # 테스트용 샘플 수
test_dataloader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

def test_model(model, dataloader):
    model.eval()  # 평가 모드로 설정
    total_correct = 0
    total_samples = 0

    with torch.no_grad():  # 그래디언트 계산 중지 (평가 시에는 필요하지 않음)
        for src, tgt in dataloader:
            # 입력 시퀀스 준비
            tgt_input = torch.zeros_like(tgt).unsqueeze(1)  # 예측 시작을 위한 빈 타겟 시퀀스
            tgt = tgt.unsqueeze(1)  # 타겟을 2D 텐서로 변환
            
            # 모델에 입력 시퀀스를 전달하고 예측 값 생성
            output = model(src, tgt_input)
            predicted = output.argmax(dim=-1)  # 예측 결과는 argmax를 통해 얻음
            
            # 실제 타겟과 예측값 비교
            correct = (predicted.view(-1) == tgt.view(-1)).sum().item()
            total_correct += correct
            total_samples += tgt.size(0)

    # 정확도 계산
    accuracy = total_correct / total_samples * 100
    print(f"Test Accuracy: {accuracy:.2f}%")
    
    # WandB에 테스트 정확도 로깅
    wandb.log({"test_accuracy": accuracy})
    wandb.run.summary["test_accuracy"] = accuracy

# 학습된 모델 테스트
test_model(model, test_dataloader)


Test Accuracy: 100.00%


Wandb Sweep 사용해보기!


In [8]:
sweep_config = {
    "method": "grid",  # "random", "bayes", "grid" 등 사용 가능
    "metric": {
        "name": "loss",
        "goal": "minimize"
    },
    "parameters": {
        "learning_rate": {
            "values": [0.0001, 0.0005, 0.001]
        },
        "batch_size": {
            "values": [16, 32, 64]
        },
        "d_model": {
            "values": [32, 64]
        }
    }
}

# Sweep 등록 (프로젝트 이름은 wandb.init에서 사용한 것과 동일해야 함)
sweep_id = wandb.sweep(sweep_config, project="transformer")
# 지정한 횟수만큼 에이전트를 실행 (count를 조절하여 실험 횟수를 늘릴 수 있습니다)
wandb.agent(sweep_id, function=train, count=10)



Create sweep with ID: eujvj2j2
Sweep URL: https://wandb.ai/Promise_BERT/transformer/sweeps/eujvj2j2


[34m[1mwandb[0m: Agent Starting Run: 0nitfvuh with config:
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	d_model: 32
[34m[1mwandb[0m: 	learning_rate: 0.0001
Exception in thread Thread-29 (_run_job):
Traceback (most recent call last):
  File "c:\Users\gydbs\anaconda3\envs\network\Lib\site-packages\wandb\agents\pyagent.py", line 306, in _run_job
    self._function()
  File "C:\Users\gydbs\AppData\Local\Temp\ipykernel_13844\1180014963.py", line 98, in train
  File "c:\Users\gydbs\anaconda3\envs\network\Lib\site-packages\wandb\sdk\wandb_run.py", line 450, in wrapper
    return func(self, *args, **kwargs)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "c:\Users\gydbs\anaconda3\envs\network\Lib\site-packages\wandb\sdk\wandb_run.py", line 401, in wrapper_fn
    return func(self, *args, **kwargs)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "c:\Users\gydbs\anaconda3\envs\network\Lib\site-packages\wandb\sdk\wandb_run.py", line 391, in wrapper
    return func(self, *args, **kwar

Epoch 1/50, Loss: 2.8400


[34m[1mwandb[0m: Agent Starting Run: tl3rykp8 with config:
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	d_model: 32
[34m[1mwandb[0m: 	learning_rate: 0.0005
Exception in thread Thread-30 (_run_job):
Traceback (most recent call last):
  File "c:\Users\gydbs\anaconda3\envs\network\Lib\site-packages\wandb\agents\pyagent.py", line 306, in _run_job
    self._function()
  File "C:\Users\gydbs\AppData\Local\Temp\ipykernel_13844\1180014963.py", line 98, in train
  File "c:\Users\gydbs\anaconda3\envs\network\Lib\site-packages\wandb\sdk\wandb_run.py", line 450, in wrapper
    return func(self, *args, **kwargs)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "c:\Users\gydbs\anaconda3\envs\network\Lib\site-packages\wandb\sdk\wandb_run.py", line 401, in wrapper_fn
    return func(self, *args, **kwargs)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "c:\Users\gydbs\anaconda3\envs\network\Lib\site-packages\wandb\sdk\wandb_run.py", line 391, in wrapper
    return func(self, *args, **kwar

Epoch 1/50, Loss: 2.6560
