In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, Dataset
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from torch.nn import TransformerEncoder, TransformerEncoderLayer
import pandas as pd


In [2]:
# # CSV 파일 로드
# df = pd.read_csv('your_data.csv')

import numpy as np
# 데이터 생성
np.random.seed(42)
data = {
    'feature1': np.random.rand(100) * 10,    # 0과 10 사이의 값
    'feature2': np.random.rand(100) * 100,   # 0과 100 사이의 값
    'feature3': np.random.rand(100) * 1000,  # 0과 1000 사이의 값
    'feature4': np.random.rand(100) * 10,    # 추가된 네 번째 특성
    'target': np.random.rand(100) * 500      # 0과 500 사이의 타겟 값
}

df = pd.DataFrame(data)

# df shape
print(df.shape) #(100, 4)

# 필요한 컬럼 선택
features = df[['feature1', 'feature2', 'feature3']]  # 예시 컬럼
target = df['target']

# 데이터 분할
X_train, X_test, y_train, y_test = train_test_split(features, target, test_size=0.2, random_state=42)

# 데이터 스케일링
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)


(100, 5)


In [3]:
class RegressionDataset(Dataset):
    def __init__(self, features, labels):
        self.features = torch.tensor(features, dtype=torch.float32).unsqueeze(1)
        self.labels = torch.tensor(labels.to_numpy(), dtype=torch.float32).unsqueeze(1)  # target을 (n_samples, 1)로 조정

    def __len__(self):
        return len(self.labels)
    
    def __getitem__(self, index):
        return self.features[index], self.labels[index]

train_dataset = RegressionDataset(X_train, y_train)
test_dataset = RegressionDataset(X_test, y_test)

train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)


In [6]:
class TransformerRegressionModel(nn.Module):
    def __init__(self, input_dim, num_heads, num_layers, dim_feedforward):
        super(TransformerRegressionModel, self).__init__()
        encoder_layer = TransformerEncoderLayer(d_model=input_dim, nhead=num_heads, dim_feedforward=dim_feedforward)
        self.transformer_encoder = TransformerEncoder(encoder_layer, num_layers=num_layers)
        self.fc_out = nn.Linear(input_dim, 1)

    def forward(self, src):
        if src.dim() == 2:  # [batch_size, feature_dim]
            src = src.unsqueeze(1)  # [batch_size, 1, feature_dim]
        src = src.permute(1, 0, 2)  # seq_len, batch, feature
        output = self.transformer_encoder(src)
        output = output.permute(1, 0, 2)  # batch, seq_len, feature
        return self.fc_out(output.mean(dim=1)), self.transformer_encoder.layers[0].self_attn.attn_output_weights

model = TransformerRegressionModel(input_dim=4, num_heads=2, num_layers=2, dim_feedforward=32)


In [7]:
criterion = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

for epoch in range(10):
    model.train()
    for data, target in train_loader:
        optimizer.zero_grad()
        output, _ = model(data)
        loss = criterion(output, target)
        loss.backward()
        optimizer.step()
    print(f'Epoch {epoch+1}, Loss: {loss.item()}')


AssertionError: 

In [None]:
model.eval()
data, _ = next(iter(test_loader))  # 테스트 데이터셋에서 배치 하나 가져오기
with torch.no_grad():
    _, attn_weights = model(data)

plt.matshow(attn_weights[0].detach().numpy(), cmap='viridis')
plt.colorbar()
plt.title("Attention Map")
plt.xlabel("Key Positions")
plt.ylabel("Query Positions")
plt.show()


---

## 1. 데이터 로드 및 전처리
판다스를 사용하여 CSV 파일을 로드하고, 필요한 전처리를 수행합니다.
이를 위해 데이터를 정규화하거나 표준화하는 것이 일반적입니다.

In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

# CSV 파일 로드
df = pd.read_csv('your_data.csv')

# 필요한 컬럼 선택
features = df[['feature1', 'feature2', 'feature3']]  # 예시 컬럼
target = df['target']

# 데이터 분할
X_train, X_test, y_train, y_test = train_test_split(features, target, test_size=0.2, random_state=42)

# 데이터 스케일링
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)


## 2. 데이터셋 및 데이터 로더 구성
PyTorch의 Dataset 및 DataLoader를 사용하여 학습과 테스트 데이터를 로드합니다.

In [None]:
from torch.utils.data import DataLoader, Dataset

In [None]:
class RegressionDataset(Dataset):
    def __init__(self, features, labels):
        self.features = features
        self.labels = labels
    
    def __len__(self):
        return len(self.labels)
    
    def __getitem__(self, index):
        return self.features[index], self.labels[index]

train_dataset = RegressionDataset(X_train, y_train)
test_dataset = RegressionDataset(X_test, y_test)

train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)


---

In [1]:
import math
import os
from tempfile import TemporaryDirectory
from typing import Tuple

import torch
from torch import nn, Tensor
import torch.nn.functional as F
from torch.nn import TransformerEncoder, TransformerEncoderLayer
from torch.utils.data import dataset

In [None]:
class TransformerModel(nn.Module):

    def __init__(self, ntoken: int, d_model: int, nhead: int, d_hid: int, nlayers: int, dropout: float = 0.5):
        super().__init__()
        self.model_type = 'Transformer'
        self.pos_encoder = PositionalEncoding(d_model, dropout)
        encoder_layers = TransformerEncoderLayer(d_model, nhead, d_hid, dropout)
        self.transformer_encoder = TransformerEncoder(encoder_layers, nlayers)
        self.encoder = nn.Embedding(ntoken, d_model)
        self.d_model = d_model
        self.decoder = nn.Linear(d_model, ntoken)
        
        