In [11]:
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torch.utils.data import DataLoader, TensorDataset
import json

def generate_dataset(seq_length, num_samples, vocab_size): # vocab_size:
    inputs = torch.randint(1, vocab_size, (num_samples, seq_length))
    outputs = inputs.clone()
    return TensorDataset(inputs, outputs)

class Encoder(nn.Module):
    def __init__(self, input_size, hidden_size):
        super(Encoder, self).__init__()
        self.hidden_size = hidden_size
        self.linear = nn.Linear(input_size, hidden_size)
        self.activation = nn.Tanh()

    def forward(self, input_seq):
        batch_size, seq_length = input_seq.size() # batch_size, seq_elngth
        hidden = torch.zeros(batch_size, self.hidden_size).to(device)

        for char_idx in range(seq_length):
            x_t = nn.functional.one_hot(input_seq[:, char_idx], num_classes = self.linear.in_features).float()
            hidden = self.activation(self.lineaㄴr(x_t) + hidden)
        return hidden

class Decoder(nn.Module):
    def __init__(self, input_size, hidden_size, output_size):
        super(Decoder, self).__init__()
        self.hidden_size = hidden_size
        self.input_size = input_size
        self.output_size = output_size

        # self.i2h = nn.Linear(input_size, hidden_size) # input -> hidden
        self.linear1 = nn.Linear(input_size, hidden_size)
        self.activation = nn.Tanh()
        self.linear2 = nn.Linear(hidden_size, output_size)

    def forward(self, target_seq, hidden):
        batch_size, seq_len = target_seq.size()
        outputs = torch.zeros(batch_size, seq_len, self.output_size).to(device)

        for char_idx in range(seq_len):
            if char_idx == 0:
                previous_y = torch.zeros(batch_size, self.input_size).to(device)
            else:
                y_prev = target_seq[:, char_idx - 1]
                previous_y = nn.functional.one_hot(y_prev, self.input_size).to(device).float()
            hidden = self.activation(self.linear1(previous_y) + hidden)
            output = self.linear2(hidden)
            outputs[:, char_idx, :] = output
        return outputs

class Seq2Seq(nn.Module):
    def __init__(self, encoder, decoder):
        super(Seq2Seq, self).__init__()
        self.encoder = encoder
        self.decoder = decoder

    def forward(self, input_seq, target_seq):
        encoder_hidden = self.encoder(input_seq)
        decoder_output = self.decoder(target_seq, encoder_hidden)
        return decoder_output

def train_model(model, dataloader, criterion, optimizer, num_epochs, device):
    model.to(device)
    training_stats = {
        'epoch': [],
        'train_loss': [],
        'train_accuracy': []
    }
    for epoch in range(1, num_epochs + 1):
        model.train()
        epoch_loss = 0
        correct = 0
        total = 0
        for inputs, targets in dataloader:
            # inputs.shape - batch_size, sequence_length
            inputs, targets = inputs.to(device), targets.to(device)
            optimizer.zero_grad()

            outputs = model(inputs, targets)
            outputs = outputs.view(-1, outputs.size(-1)) # batch_size * seq_length, output_size
            targets = targets.view(-1) # batch_size * seq_len
            loss = criterion(outputs, targets)

            loss.backward()
            optimizer.step()
            epoch_loss += loss.item()
            
            # Accuracy 계산
            predicted = torch.argmax(outputs, dim=1)
            correct += (predicted == targets).sum().item()
            total += targets.size(0)
            
        avg_loss = epoch_loss / len(dataloader)
        accuracy = correct / total
        
        # 성능 기록
        training_stats['epoch'].append(epoch)
        training_stats['train_loss'].append(avg_loss)
        training_stats['train_accuracy'].append(accuracy)
        print(f'Epoch [{epoch}/{num_epochs}], Loss: {avg_loss:.4f}, Accuracy: {accuracy * 100:.2f}%')

    # 학습 성능을 JSON 파일로 저장
    with open('training_stats.json', 'w') as f:
        json.dump(training_stats, f)
    
def evaluate_model(model, dataloader, device):
    """모델 평가 후 정확도 반환"""
    model.eval()
    correct = 0
    total = 0

    with torch.no_grad():
        for inputs, targets in dataloader:
            inputs, targets = inputs.to(device), targets.to(device)
            outputs = model(inputs, targets) # batch_size, seq_length, vocab_size

            predicted = torch.argmax(outputs, dim = 2)
            correct += (predicted == targets).sum().item()
            total += targets.size(0) * targets.size(1)
            
    accuracy = correct / total
    return accuracy

if __name__ == '__main__':
    seq_length = 10
    num_samples = 1000
    vocab_size = 5 # Including a padding index if needed
    hidden_size = 64
    batch_size = 32
    num_epochs = 20
    learning_rate = 0.001

    # Device configuration
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    # print(f"Using device: {device}")

    dataset = generate_dataset(seq_length, num_samples, vocab_size)
    dataloader = DataLoader(dataset, batch_size = batch_size, shuffle = True)

    encoder = Encoder(input_size = vocab_size, hidden_size = hidden_size)
    decoder = Decoder(input_size = vocab_size, hidden_size = hidden_size, output_size = vocab_size)
    model = Seq2Seq(encoder, decoder).to(device)

    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(model.parameters(), lr = learning_rate)

    train_model(model, dataloader, criterion, optimizer, num_epochs, device)

    acc = evaluate_model(model, dataloader, device)
    print(f"Training Accuracy: {acc * 100:.2f}%\n")

    # 모델 평가 결과도 저장
    with open('evaluation_result.json', 'w') as f:
        json.dump({'test_accuracy': acc}, f)
    
    # 테스트 샘플 예측 결과 확인
    with torch.no_grad():
        test_input, test_target = dataset[0]
        test_input = test_input.unsqueeze(0).to(device)
        test_target = test_target.unsqueeze(0).to(device)

        output = model(test_input, test_target)
        predicted = torch.argmax(output, dim = 2)
        print("Sample Input Sequence:   ", test_input.squeeze().tolist())
        print("Sample Target Sequence:  ", test_target.squeeze().tolist())
        print("Predicted Sequence       :  ", predicted.squeeze().tolist())

    for x, y in dataset:
        print(x, y)
        break

Epoch [1/20], Loss: 1.5013, Accuracy: 24.93%
Epoch [2/20], Loss: 1.4113, Accuracy: 26.85%
Epoch [3/20], Loss: 1.3915, Accuracy: 29.99%
Epoch [4/20], Loss: 1.3780, Accuracy: 32.15%
Epoch [5/20], Loss: 1.3653, Accuracy: 35.10%
Epoch [6/20], Loss: 1.3494, Accuracy: 40.28%
Epoch [7/20], Loss: 1.3322, Accuracy: 42.46%
Epoch [8/20], Loss: 1.3142, Accuracy: 44.23%
Epoch [9/20], Loss: 1.2969, Accuracy: 45.42%
Epoch [10/20], Loss: 1.2823, Accuracy: 46.66%
Epoch [11/20], Loss: 1.2688, Accuracy: 47.96%
Epoch [12/20], Loss: 1.2562, Accuracy: 48.78%
Epoch [13/20], Loss: 1.2455, Accuracy: 48.81%
Epoch [14/20], Loss: 1.2340, Accuracy: 49.80%
Epoch [15/20], Loss: 1.2219, Accuracy: 50.47%
Epoch [16/20], Loss: 1.2112, Accuracy: 50.44%
Epoch [17/20], Loss: 1.2015, Accuracy: 50.55%
Epoch [18/20], Loss: 1.1911, Accuracy: 51.23%
Epoch [19/20], Loss: 1.1796, Accuracy: 51.89%
Epoch [20/20], Loss: 1.1708, Accuracy: 52.49%
Training Accuracy: 52.57%

Sample Input Sequence:    [1, 1, 1, 2, 3, 1, 3, 1, 3, 1]
Sampl

In [None]:
import torch
t = torch.randn(3, 5)
print(t)
print(t>0)
print(torch.sum(t > 0))

In [7]:
input_sequence = [torch.randint(1, 4, (5,)) for _ in range(4)]
input_seq = torch.stack(input_sequence)

# print(input_seq)
# print(input_seq.shape)
# print(input_seq[:, 3])
# one_hot = nn.functional.one_hot(input_seq[:, 3], 3)
# print(one_hot.shape)
# print(one_hot)
# 1 2 3
# [1, 0, 0]
# [0, 1, 0]
# [0, 0, 1]
# x_t = nn.functional.one_hot(input_seq[:, char_idx],
              # num_classes = self.linear.in_features).float()

t = torch.randn(10, 3, 5)
print(t.view(-1, 7).shape)

RuntimeError: shape '[-1, 7]' is invalid for input of size 150