In [28]:
import torch

In [44]:
t1 = torch.tensor([ [ [1, 2, 3],
                    [4, 5, 6],
                    [7, 8, 9] ],

                    [[1, 2, 3],
                     [4, 5, 6],
                     [7, 8, 9]]], dtype=torch.float32)

In [45]:
t1.shape

torch.Size([2, 3, 3])

In [46]:
t1.numpy()

array([[[1., 2., 3.],
        [4., 5., 6.],
        [7., 8., 9.]],

       [[1., 2., 3.],
        [4., 5., 6.],
        [7., 8., 9.]]], dtype=float32)

In [56]:
t1[0,1,2]

tensor(6.)

In [58]:
t2 = torch.tensor([[5, 6], [7, 8]])

In [60]:
print(t1.shape)
print(t2.shape)

torch.Size([2, 3, 3])
torch.Size([2, 2])


In [61]:
t2

tensor([[5, 6],
        [7, 8]])

In [63]:
t2.view(4,1)

tensor([[5],
        [6],
        [7],
        [8]])

In [71]:
import pandas as pd

df = pd.read_csv("../datasets/df_blanchard.csv")

x = torch.from_numpy(df["UNEMP_rate"].values).unsqueeze(dim=1).float()
y = torch.from_numpy(df["GDP_growth"].values).unsqueeze(dim=1).float()

print(x.shape)
print(y.shape)


torch.Size([2155, 1])
torch.Size([2155, 1])


In [None]:
from torch.utils.data import Dataset, DataLoader

class MyDataset(Dataset):
    def __init__(self, csv_file):
        self.label = pd.read_csv(csv_file)

    def __len__(self):
        return len(self.label)

    def __getitem__(self, idx):
        sample = torch.tensor(self.label.iloc[idx, 1]).int()
        label = torch.tensor(self.label.iloc[idx, 50]).int()
        return sample, label
    
tensor_dataset = MyDataset("../datasets/df_blanchard.csv")
training_data = DataLoader(tensor_dataset, batch_size=431, shuffle=True)
training_data

<torch.utils.data.dataloader.DataLoader at 0x151156f80>

In [None]:
import torch.nn as nn

class MyModel(nn.Module):
    def __init__(self):
        super(MyModel, self).__init__()
        self.linear = nn.Linear(3, 1)

    def forward(self, x):
        return self.linear(x)

In [79]:
import torch
from torch import nn, optim
from torch.utils.data import Dataset, DataLoader
import pandas as pd

# 1) Dataset: sliding window of length=50 → predict next unemployment value(s)
class UnempSequenceDataset(Dataset):
    def __init__(self, csv_file: str, seq_len: int = 50, horizon: int = 1):
        df = pd.read_csv(csv_file).dropna(subset=["UNEMP_rate"])  # 결측치 제거
        data = df["UNEMP_rate"].values.astype(float)

        # 정규화 (평균 제거 + 표준편차 나누기)
        mean = data.mean()
        std = data.std()
        data = (data - mean) / std

        self.seq_len = seq_len
        self.horizon = horizon

        sequences = []
        targets = []
        total_len = len(data)
        for i in range(total_len - seq_len - horizon + 1):
            x_seq = data[i : i + seq_len]                           # (seq_len,)
            y_seq = data[i + seq_len : i + seq_len + horizon]       # (horizon,)
            sequences.append(x_seq.reshape(seq_len, 1))             # (seq_len, 1)
            targets.append(y_seq.reshape(horizon, 1))               # (horizon, 1)

        self.X = torch.tensor(sequences, dtype=torch.float32)       # (num_samples, seq_len, 1)
        self.Y = torch.tensor(targets, dtype=torch.float32)         # (num_samples, horizon, 1)

    def __len__(self):
        return self.X.size(0)

    def __getitem__(self, idx):
        return self.X[idx], self.Y[idx]


# 2) Seq2Seq LSTM 모델 정의
class Encoder(nn.Module):
    def __init__(self, input_size: int, hidden_size: int, num_layers: int):
        super(Encoder, self).__init__()
        self.lstm = nn.LSTM(
            input_size=input_size,
            hidden_size=hidden_size,
            num_layers=num_layers,
            batch_first=True,
            bidirectional=False
        )

    def forward(self, src: torch.Tensor):
        batch_size = src.size(0)
        hidden_size = self.lstm.hidden_size
        num_layers = self.lstm.num_layers

        h0 = torch.zeros(num_layers, batch_size, hidden_size, device=src.device)
        c0 = torch.zeros(num_layers, batch_size, hidden_size, device=src.device)
        _, (hidden, cell) = self.lstm(src, (h0, c0))
        return hidden, cell


class Decoder(nn.Module):
    def __init__(self, input_size: int, hidden_size: int, num_layers: int, output_size: int):
        super(Decoder, self).__init__()
        self.lstm = nn.LSTM(
            input_size=input_size,
            hidden_size=hidden_size,
            num_layers=num_layers,
            batch_first=True,
            bidirectional=False
        )
        self.fc = nn.Linear(hidden_size, output_size)

    def forward(self, tgt: torch.Tensor, hidden: torch.Tensor, cell: torch.Tensor):
        out, (hidden, cell) = self.lstm(tgt, (hidden, cell))
        prediction = self.fc(out.squeeze(1))  # (batch_size, output_size)
        return prediction, hidden, cell


class Seq2Seq(nn.Module):
    def __init__(self, encoder: Encoder, decoder: Decoder, device: torch.device, horizon: int = 1):
        super(Seq2Seq, self).__init__()
        self.encoder = encoder
        self.decoder = decoder
        self.device = device
        self.horizon = horizon

    def forward(self, src: torch.Tensor, tgt: torch.Tensor = None, teacher_forcing_ratio: float = 0.5):
        batch_size = src.size(0)
        output_size = self.decoder.fc.out_features

        outputs = torch.zeros(batch_size, self.horizon, output_size, device=self.device)

        hidden, cell = self.encoder(src)
        input_decoder = src[:, -1, :].unsqueeze(1)  # (batch_size, 1, input_size)

        for t in range(self.horizon):
            pred, hidden, cell = self.decoder(input_decoder, hidden, cell)
            outputs[:, t, :] = pred

            use_teacher = tgt is not None and torch.rand(1).item() < teacher_forcing_ratio
            if use_teacher:
                input_decoder = tgt[:, t, :].unsqueeze(1)  # (batch_size, 1, input_size)
            else:
                input_decoder = pred.unsqueeze(1)           # (batch_size, 1, input_size)

        return outputs


# 3) 학습 파이프라인
if __name__ == "__main__":
    # 장치 설정
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

    # 하이퍼파라미터
    seq_len     = 50
    horizon     = 1
    input_size  = 1
    hidden_size = 64
    num_layers  = 2
    output_size = 1

    num_epochs     = 20
    batch_size     = 32
    learning_rate  = 1e-4    # 학습률 낮춤
    teacher_force_r = 0.5

    # 데이터셋 및 데이터로더
    dataset = UnempSequenceDataset(
        csv_file="../datasets/df_blanchard.csv",
        seq_len=seq_len,
        horizon=horizon
    )
    loader = DataLoader(dataset, batch_size=batch_size, shuffle=True)

    # 모델 생성 및 옵티마이저/손실함수 정의
    enc = Encoder(input_size=input_size, hidden_size=hidden_size, num_layers=num_layers).to(device)
    dec = Decoder(input_size=input_size, hidden_size=hidden_size, num_layers=num_layers, output_size=output_size).to(device)
    model = Seq2Seq(encoder=enc, decoder=dec, device=device, horizon=horizon).to(device)

    criterion = nn.MSELoss()
    optimizer = optim.Adam(model.parameters(), lr=learning_rate)

    # 학습 루프
    for epoch in range(1, num_epochs + 1):
        model.train()
        total_loss = 0.0

        for batch_src, batch_tgt in loader:
            batch_src = batch_src.to(device)  # (batch_size, seq_len, 1)
            batch_tgt = batch_tgt.to(device)  # (batch_size, horizon, 1)

            optimizer.zero_grad()
            outputs = model(batch_src, batch_tgt, teacher_forcing_ratio=teacher_force_r)
            loss = criterion(
                outputs.squeeze(-1),  # (batch_size, horizon)
                batch_tgt.squeeze(-1)  # (batch_size, horizon)
            )
            loss.backward()
            torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=1.0)
            optimizer.step()

            total_loss += loss.item() * batch_src.size(0)

        avg_loss = total_loss / len(dataset)
        print(f"Epoch {epoch:02d}/{num_epochs:02d}  Loss: {avg_loss:.6f}")


Epoch 01/20  Loss: 1.005243
Epoch 02/20  Loss: 0.990092
Epoch 03/20  Loss: 0.947294
Epoch 04/20  Loss: 0.842102
Epoch 05/20  Loss: 0.825729
Epoch 06/20  Loss: 0.820607
Epoch 07/20  Loss: 0.816823
Epoch 08/20  Loss: 0.816172
Epoch 09/20  Loss: 0.811071
Epoch 10/20  Loss: 0.805553
Epoch 11/20  Loss: 0.803794
Epoch 12/20  Loss: 0.795155
Epoch 13/20  Loss: 0.789678
Epoch 14/20  Loss: 0.781921
Epoch 15/20  Loss: 0.772535
Epoch 16/20  Loss: 0.759576
Epoch 17/20  Loss: 0.740897
Epoch 18/20  Loss: 0.724283
Epoch 19/20  Loss: 0.687111
Epoch 20/20  Loss: 0.635838


In [None]:
class SelfAttention(nn.Module):
    def __init__(self, embed_size, heads):
        super(SelfAttention, self).__init__()
        self.embed_size = embed_size
        self.heads = heads
        self.head_dim = embed_size // heads

        assert (self.head_dim * heads == embed_size), "Embedding size must be divisible by heads"
        self.values = nn.Linear(self.head_dim, self.head_dim, bias =False)
        self.keys = nn.Linear(self.head_dim, self.head_dim, bias =False)
        self.queries = nn.Linear(self.head_dim, self.head_dim, bias =False)
        self.fc_out = nn.Linear(heads * self.head_dim, embed_size)

    def forward(self, values, keys, query, mask):
        N = query.shape[0]
        value_len, key_len, query_len = values.shape[1], keys.shape[1], query.shape[1]

        values = values.reshape(N, value_len, self.heads, self.head_dim)
        keys = keys.reshape(N, key_len, self.heads, self.head_dim)
        queries = query.reshape(N, query_len, self.heads, self.head_dim)

        values = self.values(values)
        keys = self.keys(keys)
        queries = self.queries(queries)

        energy = torch.einsum("nqhd,nkhd->nhqk", [queries, keys])


        
        if mask is not None:
            energy = energy.masked_fill(mask == 0, float("-1e20"))

        attention = torch.softmax(energy / (self.embed_size ** (1/2)), dim=3)

        out = torch.einsum("nhql,nlhd->nqhd",[attention, values]).reshape(
            N, query_len, self.heads * self.head_dim
        )

        out = self.fc_out(out)
        return out

In [None]:
class TransformerBlock(nn.Module):
    def __init__(self, embed_size, heads, dropout, forward_expansion):
        super(TransformerBlock, self).__init__()
        self.attention = SelfAttention(embed_size, heads)
        self.norm1 = nn.LayerNorm(embed_size)
        self.norm2 = nn.LayerNorm(embed_size)

        self.feed_forward = nn.Sequential(
            nn.Linear(embed_size, forward_expansion * embed_size),
            nn.ReLU(),
            nn.Linear(forward_expansion*embed_size, embed_size)
        )
        self.dropout = nn.Dropout(dropout)

    def forward(self, value, key, query, mask):
        attention = self.attention(value, key, query, mask)

        x = self.dropout(self.norm1(attention + query))
        forward = self.feed_forward(x)
        out = self.dropout(self.norm2(forward + x))
        return out

In [None]:
class Encoder(nn.Module):
    def __init__(
            self,
            src_vocab_size,
            embed_size,
            num_layers,
            heads,
            device,
            forward_expansion,
            dropout,
            max_length,
    ):
        super(Encoder, self).__init__()
        self.embed_size = embed_size
        self.device = device
        self.word_embedding = nn.Embedding(src_vocab_size, embed_size)
        self.position_embedding = nn.Embedding(max_length, embed_size)

        self.layers = nn.ModuleList(
            [
                TransformerBlock(
                    embed_size,
                    heads,
                    dropout,
                    forward_expansion=forward_expansion,
                )
            for _ in range(num_layers)]
        )
        self.dropout = nn.Dropout(dropout)

    def forward(self, x, mask):
        N, seq_length = x.shape
        positions = torch.arange(0, seq_length).expand(N, seq_length).to(self.device)

        out = self.dropout(self.word_embedding(x) + self.position_embedding(positions))

        for layer in self.layers:
            out = layer(out, out, out, mask)
        
        return out

In [None]:
class DecoderBlock(nn.Module):
    def __init__(self, embed_size, heads, forward_expansion, dropout, device):
        super(DecoderBlock, self).__init__()
        self.attention = SelfAttention(embed_size, heads)
        self.norm = nn.LayerNorm(embed_size)
        self.transformer_block = TransformerBlock(
            embed_size, heads, dropout, forward_expansion
        )
        self.dropout = nn.Dropout(dropout)

    def forward(self, x, value, key, src_mask, trg_mask):
        attention = self.attention(x, x, x, trg_mask)
        query = self.dropout(self.norm(attention + x))
        out = self.transformer_block(value, key, query, src_mask)
        return out

In [None]:
class Decoder(nn.Module):
    def __init__(self,
                 trg_vocab_size,
                 embed_size,
                 num_layers,
                 heads,
                 forward_expansion,
                 dropout,
                 device,
                 max_length,
    ):
        super(Decoder, self).__init__()
        self.device = device
        self.word_embedding = nn.Embedding(trg_vocab_size, embed_size)
        self.position_embedding = nn.Embedding(max_length, embed_size)

        self.layers = nn.ModuleList(
            [DecoderBlock(embed_size, heads, forward_expansion, dropout, device)
             for _ in range(num_layers)]
        )

        self.fc_out = nn.Linear(embed_size, trg_vocab_size)
        self.dropout = nn.Dropout(dropout)

    def forward(self, x, enc_out, src_mask, trg_mask):
        N, seq_length = x.shape
        positions = torch.arange(0, seq_length).expand(N, seq_length).to(self.device)
        x = self.dropout((self.word_embedding(x) + self.position_embedding(positions)))

        for layer in self.layers:
            x = layer(x, enc_out, enc_out, src_mask, trg_mask)
        out = self.fc_out(x)
        return out

In [None]:
class Transformer(nn.Module):
    def __init__(
            self,
            src_vocab_size,
            trg_vocab_size,
            src_pad_idx,
            trg_pad_idx,
            embed_size=256,
            num_layers=6,
            forward_expansion=4,
            heads=8,
            dropout=0,
            device="cuda",
            max_length=100
    ):
        super(Transformer, self).__init__()

        self.encoder = Encoder(
            src_vocab_size,
            embed_size,
            num_layers,
            heads,
            device,
            forward_expansion,
            dropout,
            max_length
        )

        self.decoder = Decoder(
            trg_vocab_size,
            embed_size,
            num_layers,
            heads,
            forward_expansion,
            dropout,
            device,
            max_length
        )

        self.src_pad_idx = src_pad_idx
        self.trg_pad_idx = trg_pad_idx
        self.device = device

    def make_src_mask(self, src):
        src_mask = (src != self.src_pad_idx).unsqueeze(1).unsqueeze(2)
        return src_mask.to(self.device)
    
    def make_trg_mask(self, trg):
        N, trg_len = trg.shape
        trg_mask = torch.tril(torch.ones(trg_len, trg_len)).expand(
            N, 1, trg_len, trg_len
        )
        return trg_mask.to(self.device)
    
    def forward(self, src, trg):
        src_mask = self.make_src_mask(src)
        trg_mask = self.make_trg_mask(trg)
        enc_src = self.encoder(src, src_mask)
        out = self.decoder(trg, enc_src, src_mask, trg_mask)
        return out


In [None]:
if __name__ == "__main__":
    device = "cpu"
    x = torch.tensor([[1,2,3,6,7,9,0,7,9,4], [5,7,9,5,4,3,2,7,9,1]]).to(device)
    trg = torch.tensor([[6,2,5,7,8,9,5,3,7,9], [9,4,2,6,8,6,4,2,5,0]]).to(device)

    src_pad_idx = 0
    trg_pad_idx = 0
    src_vocab_size = 10
    trg_vocab_size = 10
    model = Transformer(src_vocab_size, trg_vocab_size, src_pad_idx, trg_pad_idx,device=device).to(device)
    out = model(x, trg[:, :-1])
    print(out.shape)  # (N, trg_len-1, trg_vocab_size)

torch.Size([2, 9, 10])
