In [1]:
# # install tutel

# %%bash
# python3 -m pip uninstall tutel -y
# python3 -m pip install setuptools wheel
# python3 -m pip install -v -U --no-build-isolation git+https://github.com/microsoft/tutel@main

In [2]:
# # quick test for tutel
# !python3 -m tutel.examples.helloworld --batch_size=16  

In [3]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from tqdm import tqdm
from torch.amp import autocast, GradScaler  


from moe_v4_original import MoE_Original
from moe_v4_grn import MoE_GRN 
from moe_v4_tutel import MoE_Tutel

# 파라미터 설정

## model
input_dim = output_dim = 3072  # cifar10 이미지 크기
hidden_dim = 784
num_classes = 10     # MNIST 클래스 수
num_experts = 5
topk = 2
noise_std = 0.1

## train
batch_size = 256  
lambda_cov = 0.1  # 공분산 손실의 가중치
epochs = 30       

In [4]:
# 대조군 일반 선형 모델
class SimpleNN(nn.Module):
    def __init__(self, input_dim, hidden_dim=2048, output_dim=10, num_experts=5, num_classes=10):
        super(SimpleNN, self).__init__()
        # 첫 번째 선형층
        self.fc1 = nn.Linear(input_dim, hidden_dim*num_experts)
        # 두 번째 선형층
        self.fc2 = nn.Linear(hidden_dim*num_experts, output_dim*num_experts)
        # 분류기
        self.classifier = nn.Linear(output_dim*num_experts, num_classes)

        # 초기화
        nn.init.xavier_uniform_(self.fc1.weight)
        nn.init.zeros_(self.fc1.bias)
        
        nn.init.xavier_uniform_(self.fc2.weight)
        nn.init.zeros_(self.fc2.bias)
        
        nn.init.xavier_uniform_(self.classifier.weight)
        nn.init.zeros_(self.classifier.bias)

    def forward(self, x):
        # 입력 이미지 평탄화
        x = x.view(x.size(0), -1)  # [batch_size, 3072]
        
        # 첫 번째 선형 변환과 ReLU 활성화
        x = self.fc1(x)
        x = F.relu(x)
        
        # 두 번째 선형 변환
        x = self.fc2(x)
        
        # 분류기 통과
        output = self.classifier(x)
        return output

In [5]:
simple_nn = SimpleNN(input_dim, hidden_dim, output_dim, num_experts, num_classes)

In [6]:
moe_original = MoE_Original(input_dim, hidden_dim, output_dim, num_experts, num_classes, topk=topk, noise_std=noise_std)
moe_grn = MoE_GRN(input_dim, hidden_dim, output_dim, num_experts, num_classes, topk=topk, noise_std=noise_std)
moe_tutel = MoE_Tutel(input_dim, num_classes, num_experts, topk=topk)

In [7]:
from torchinfo import summary

dummy_batch_size = 1

summary(simple_nn, input_size=(dummy_batch_size, input_dim))

Layer (type:depth-idx)                   Output Shape              Param #
SimpleNN                                 [1, 10]                   --
├─Linear: 1-1                            [1, 3920]                 12,046,160
├─Linear: 1-2                            [1, 15360]                60,226,560
├─Linear: 1-3                            [1, 10]                   153,610
Total params: 72,426,330
Trainable params: 72,426,330
Non-trainable params: 0
Total mult-adds (M): 72.43
Input size (MB): 0.01
Forward/backward pass size (MB): 0.15
Params size (MB): 289.71
Estimated Total Size (MB): 289.87

In [8]:
summary(moe_original, input_size=(dummy_batch_size, input_dim))

Layer (type:depth-idx)                   Output Shape              Param #
MoE_Original                             [1, 10]                   --
├─Linear: 1-1                            [1, 5]                    15,365
├─Linear: 1-2                            [1, 3920]                 12,046,160
├─Linear: 1-3                            [1, 15360]                60,226,560
├─Linear: 1-4                            [1, 10]                   30,730
Total params: 72,318,815
Trainable params: 72,318,815
Non-trainable params: 0
Total mult-adds (M): 72.32
Input size (MB): 0.01
Forward/backward pass size (MB): 0.15
Params size (MB): 289.28
Estimated Total Size (MB): 289.44

In [9]:
summary(moe_grn, input_size=(dummy_batch_size, input_dim))

Layer (type:depth-idx)                   Output Shape              Param #
MoE_GRN                                  [1, 10]                   --
├─Linear: 1-1                            [1, 5]                    15,365
├─GRN: 1-2                               [1, 5]                    10
├─Linear: 1-3                            [1, 3920]                 12,046,160
├─Linear: 1-4                            [1, 15360]                60,226,560
├─Linear: 1-5                            [1, 10]                   30,730
Total params: 72,318,825
Trainable params: 72,318,825
Non-trainable params: 0
Total mult-adds (M): 72.32
Input size (MB): 0.01
Forward/backward pass size (MB): 0.15
Params size (MB): 289.28
Estimated Total Size (MB): 289.44

In [10]:
summary(moe_tutel, input_size=(dummy_batch_size, input_dim))

  with torch.cuda.amp.autocast(enabled=False):


Layer (type:depth-idx)                   Output Shape              Param #
MoE_Tutel                                [1, 3072]                 30,730
├─MOELayer: 1-1                          [1, 3072]                 --
│    └─ModuleList: 2-1                   --                        --
│    │    └─LinearTopKGate: 3-1          [1, 5]                    15,360
│    └─FusedExpertsNetwork: 2-2          [5, 2, 3072]              62,940,160
Total params: 62,986,250
Trainable params: 62,986,250
Non-trainable params: 0
Total mult-adds (M): 0.27
Input size (MB): 0.01
Forward/backward pass size (MB): 0.25
Params size (MB): 251.82
Estimated Total Size (MB): 252.08

In [11]:
# GPU 사용 여부 설정
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

def train(model, train_loader, optimizer, criterion, epochs=1):
    model.to(device)
    model.train()
    scaler = GradScaler()  # GradScaler 초기화
    for epoch in range(epochs):
        total_loss = 0
        for images, labels in tqdm(train_loader):
            images = images.view(images.size(0), -1).to(device)
            labels = labels.to(device)

            optimizer.zero_grad()

            with autocast('cuda'):  # autocast 사용
                if isinstance(model, (MoE_Original, MoE_Tutel)):
                    outputs, l_aux = model(images)
                    loss = criterion(outputs, labels) + l_aux
                else:
                    outputs = model(images)
                    loss = criterion(outputs, labels)

            scaler.scale(loss).backward()  # 손실 스케일링 후 역전파
            scaler.step(optimizer)         # Optimizer 스텝
            scaler.update()                # 스케일러 업데이트

            total_loss += loss.item()
        print(f"Epoch [{epoch+1}/{epochs}], Loss: {total_loss / len(train_loader):.4f}")

# 테스트 함수 (변경 없음)
def test(model, test_loader):
    model.to(device)
    model.eval()
    correct = 0
    total = 0
    with torch.no_grad():
        for images, labels in tqdm(test_loader):
            images = images.view(images.size(0), -1).to(device)
            labels = labels.to(device)
            if isinstance(model, (MoE_Original, MoE_Tutel)):
                outputs, _ = model(images)
            else:
                outputs = model(images)
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
    print(f"테스트 정확도: {100 * correct / total:.2f}%")
    return 100 * correct / total


In [12]:
import torch.nn as nn
from torchvision import datasets, transforms
from torch.utils.data import DataLoader

# 데이터셋 로드 및 전처리
transform = transforms.Compose([
    transforms.Resize((32, 32)),  # CIFAR-10 이미지 크기
    transforms.ToTensor(),
    transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))  # RGB 채널별 정규화
])

train_dataset = datasets.CIFAR10(root='./cifar10_data/', train=True, transform=transform, download=False)
test_dataset = datasets.CIFAR10(root='./cifar10_data/', train=False, transform=transform, download=False)

train_loader = DataLoader(dataset=train_dataset, batch_size=batch_size, shuffle=True, num_workers=2)
test_loader = DataLoader(dataset=test_dataset, batch_size=batch_size, shuffle=False, num_workers=2)

  warn(

A module that was compiled using NumPy 1.x cannot be run in
NumPy 2.1.2 as it may crash. To support both 1.x and 2.x
versions of NumPy, modules must be compiled with NumPy 2.0.
Some module may need to rebuild instead e.g. with 'pybind11>=2.12'.

If you are a user of the module, the easiest solution will be to
downgrade to 'numpy<2' or try to upgrade the affected module.
We expect that some modules will need time to support NumPy 2.

Traceback (most recent call last):  File "/usr/lib/python3.10/runpy.py", line 196, in _run_module_as_main
    return _run_code(code, main_globals, None,
  File "/usr/lib/python3.10/runpy.py", line 86, in _run_code
    exec(code, run_globals)
  File "/root/.local/lib/python3.10/site-packages/ipykernel_launcher.py", line 18, in <module>
    app.launch_new_instance()
  File "/root/.local/lib/python3.10/site-packages/traitlets/config/application.py", line 1075, in launch_instance
    app.start()
  File "/root/.local/lib/python3.10/site-packages/ipykern

AttributeError: _ARRAY_API not found

SystemError: <built-in function __import__> returned a result with an exception set

In [13]:
optimizer = optim.AdamW(simple_nn.parameters(), lr=0.001)
criterion = nn.CrossEntropyLoss()
train(simple_nn, train_loader, optimizer, criterion, epochs=epochs)
test(simple_nn, test_loader)

100%|██████████| 196/196 [00:04<00:00, 43.56it/s]


Epoch [1/30], Loss: 4.3557


100%|██████████| 196/196 [00:04<00:00, 46.51it/s]


Epoch [2/30], Loss: 1.5276


100%|██████████| 196/196 [00:04<00:00, 45.79it/s]


Epoch [3/30], Loss: 1.4218


100%|██████████| 196/196 [00:04<00:00, 46.23it/s]


Epoch [4/30], Loss: 1.3856


100%|██████████| 196/196 [00:04<00:00, 46.57it/s]


Epoch [5/30], Loss: 1.3952


100%|██████████| 196/196 [00:04<00:00, 45.90it/s]


Epoch [6/30], Loss: 1.4067


100%|██████████| 196/196 [00:04<00:00, 46.14it/s]


Epoch [7/30], Loss: 1.4330


100%|██████████| 196/196 [00:04<00:00, 47.55it/s]


Epoch [8/30], Loss: 1.4490


100%|██████████| 196/196 [00:04<00:00, 46.87it/s]


Epoch [9/30], Loss: 1.4445


100%|██████████| 196/196 [00:04<00:00, 46.71it/s]


Epoch [10/30], Loss: 1.4784


100%|██████████| 196/196 [00:04<00:00, 45.91it/s]


Epoch [11/30], Loss: 1.3675


100%|██████████| 196/196 [00:04<00:00, 45.40it/s]


Epoch [12/30], Loss: 1.5507


100%|██████████| 196/196 [00:04<00:00, 46.13it/s]


Epoch [13/30], Loss: 2.2192


100%|██████████| 196/196 [00:04<00:00, 45.95it/s]


Epoch [14/30], Loss: 1.2519


100%|██████████| 196/196 [00:04<00:00, 45.91it/s]


Epoch [15/30], Loss: 1.0689


100%|██████████| 196/196 [00:04<00:00, 46.99it/s]


Epoch [16/30], Loss: 1.1089


100%|██████████| 196/196 [00:04<00:00, 46.38it/s]


Epoch [17/30], Loss: 1.2211


100%|██████████| 196/196 [00:04<00:00, 45.58it/s]


Epoch [18/30], Loss: 1.3517


100%|██████████| 196/196 [00:04<00:00, 45.57it/s]


Epoch [19/30], Loss: 1.3285


100%|██████████| 196/196 [00:04<00:00, 45.35it/s]


Epoch [20/30], Loss: 53.0635


100%|██████████| 196/196 [00:04<00:00, 46.17it/s]


Epoch [21/30], Loss: 1.7858


100%|██████████| 196/196 [00:04<00:00, 46.93it/s]


Epoch [22/30], Loss: 1.0139


100%|██████████| 196/196 [00:04<00:00, 45.85it/s]


Epoch [23/30], Loss: 0.7947


100%|██████████| 196/196 [00:04<00:00, 45.88it/s]


Epoch [24/30], Loss: 0.6636


100%|██████████| 196/196 [00:04<00:00, 46.91it/s]


Epoch [25/30], Loss: 0.6542


100%|██████████| 196/196 [00:04<00:00, 44.90it/s]


Epoch [26/30], Loss: 0.5660


100%|██████████| 196/196 [00:04<00:00, 46.97it/s]


Epoch [27/30], Loss: 0.5610


100%|██████████| 196/196 [00:04<00:00, 46.50it/s]


Epoch [28/30], Loss: 0.5582


100%|██████████| 196/196 [00:04<00:00, 46.18it/s]


Epoch [29/30], Loss: 0.5540


100%|██████████| 196/196 [00:04<00:00, 46.14it/s]


Epoch [30/30], Loss: 0.5393


100%|██████████| 40/40 [00:00<00:00, 41.23it/s]

테스트 정확도: 51.90%





51.9

In [14]:
optimizer = optim.AdamW(moe_original.parameters(), lr=0.001)
criterion = nn.CrossEntropyLoss()
train(moe_original, train_loader, optimizer, criterion, epochs=epochs)
test(moe_original, test_loader)

100%|██████████| 196/196 [00:04<00:00, 46.74it/s]


Epoch [1/30], Loss: 2.6710


100%|██████████| 196/196 [00:04<00:00, 47.34it/s]


Epoch [2/30], Loss: 1.5222


100%|██████████| 196/196 [00:04<00:00, 46.55it/s]


Epoch [3/30], Loss: 1.4159


100%|██████████| 196/196 [00:04<00:00, 47.10it/s]


Epoch [4/30], Loss: 1.3755


100%|██████████| 196/196 [00:04<00:00, 44.68it/s]


Epoch [5/30], Loss: 1.3343


100%|██████████| 196/196 [00:04<00:00, 45.56it/s]


Epoch [6/30], Loss: 1.3264


100%|██████████| 196/196 [00:04<00:00, 46.27it/s]


Epoch [7/30], Loss: 1.3088


100%|██████████| 196/196 [00:04<00:00, 46.37it/s]


Epoch [8/30], Loss: 1.2631


100%|██████████| 196/196 [00:04<00:00, 45.12it/s]


Epoch [9/30], Loss: 1.2605


100%|██████████| 196/196 [00:04<00:00, 46.28it/s]


Epoch [10/30], Loss: 1.3198


100%|██████████| 196/196 [00:04<00:00, 46.69it/s]


Epoch [11/30], Loss: 12.8953


100%|██████████| 196/196 [00:04<00:00, 45.66it/s]


Epoch [12/30], Loss: 1.4769


100%|██████████| 196/196 [00:04<00:00, 46.06it/s]


Epoch [13/30], Loss: 1.1762


100%|██████████| 196/196 [00:04<00:00, 46.16it/s]


Epoch [14/30], Loss: 1.0290


100%|██████████| 196/196 [00:04<00:00, 45.56it/s]


Epoch [15/30], Loss: 0.9327


100%|██████████| 196/196 [00:04<00:00, 45.51it/s]


Epoch [16/30], Loss: 0.8496


100%|██████████| 196/196 [00:04<00:00, 46.49it/s]


Epoch [17/30], Loss: 0.8085


100%|██████████| 196/196 [00:04<00:00, 46.57it/s]


Epoch [18/30], Loss: 0.7906


100%|██████████| 196/196 [00:04<00:00, 46.26it/s]


Epoch [19/30], Loss: 0.7725


100%|██████████| 196/196 [00:04<00:00, 47.03it/s]


Epoch [20/30], Loss: 0.7432


100%|██████████| 196/196 [00:04<00:00, 46.81it/s]


Epoch [21/30], Loss: 0.7503


100%|██████████| 196/196 [00:04<00:00, 45.86it/s]


Epoch [22/30], Loss: 0.7181


100%|██████████| 196/196 [00:04<00:00, 46.97it/s]


Epoch [23/30], Loss: 0.7676


100%|██████████| 196/196 [00:04<00:00, 45.63it/s]


Epoch [24/30], Loss: 0.7765


100%|██████████| 196/196 [00:04<00:00, 45.29it/s]


Epoch [25/30], Loss: 0.7683


100%|██████████| 196/196 [00:04<00:00, 46.67it/s]


Epoch [26/30], Loss: 0.9767


100%|██████████| 196/196 [00:04<00:00, 46.31it/s]


Epoch [27/30], Loss: 0.9523


100%|██████████| 196/196 [00:04<00:00, 46.14it/s]


Epoch [28/30], Loss: 0.9479


100%|██████████| 196/196 [00:04<00:00, 45.42it/s]


Epoch [29/30], Loss: 0.9090


100%|██████████| 196/196 [00:04<00:00, 47.13it/s]


Epoch [30/30], Loss: 0.8126


100%|██████████| 40/40 [00:00<00:00, 41.84it/s]

테스트 정확도: 52.17%





52.17

In [15]:
optimizer = optim.AdamW(moe_grn.parameters(), lr=0.001)
criterion = nn.CrossEntropyLoss()
train(moe_grn, train_loader, optimizer, criterion, epochs=epochs)
test(moe_grn, test_loader)

100%|██████████| 196/196 [00:04<00:00, 41.38it/s]


Epoch [1/30], Loss: 2.9407


100%|██████████| 196/196 [00:04<00:00, 47.28it/s]


Epoch [2/30], Loss: 1.5323


100%|██████████| 196/196 [00:04<00:00, 46.14it/s]


Epoch [3/30], Loss: 1.4085


100%|██████████| 196/196 [00:04<00:00, 46.42it/s]


Epoch [4/30], Loss: 1.3455


100%|██████████| 196/196 [00:04<00:00, 45.31it/s]


Epoch [5/30], Loss: 1.3035


100%|██████████| 196/196 [00:04<00:00, 45.62it/s]


Epoch [6/30], Loss: 1.2801


100%|██████████| 196/196 [00:04<00:00, 45.98it/s]


Epoch [7/30], Loss: 1.2517


100%|██████████| 196/196 [00:04<00:00, 46.69it/s]


Epoch [8/30], Loss: 1.2161


100%|██████████| 196/196 [00:04<00:00, 47.01it/s]


Epoch [9/30], Loss: 1.1740


100%|██████████| 196/196 [00:04<00:00, 47.18it/s]


Epoch [10/30], Loss: 1.1899


100%|██████████| 196/196 [00:04<00:00, 46.39it/s]


Epoch [11/30], Loss: 1.1536


100%|██████████| 196/196 [00:04<00:00, 46.95it/s]


Epoch [12/30], Loss: 1.1216


100%|██████████| 196/196 [00:04<00:00, 46.38it/s]


Epoch [13/30], Loss: 1.0549


100%|██████████| 196/196 [00:04<00:00, 47.36it/s]


Epoch [14/30], Loss: 1.0270


100%|██████████| 196/196 [00:04<00:00, 45.62it/s]


Epoch [15/30], Loss: 1.0340


100%|██████████| 196/196 [00:04<00:00, 45.49it/s]


Epoch [16/30], Loss: 0.9043


100%|██████████| 196/196 [00:04<00:00, 46.55it/s]


Epoch [17/30], Loss: 0.9368


100%|██████████| 196/196 [00:04<00:00, 46.56it/s]


Epoch [18/30], Loss: 0.8709


100%|██████████| 196/196 [00:04<00:00, 46.37it/s]


Epoch [19/30], Loss: 0.8637


100%|██████████| 196/196 [00:04<00:00, 45.82it/s]


Epoch [20/30], Loss: 0.7865


100%|██████████| 196/196 [00:04<00:00, 46.64it/s]


Epoch [21/30], Loss: 0.8235


100%|██████████| 196/196 [00:04<00:00, 45.52it/s]


Epoch [22/30], Loss: 0.7448


100%|██████████| 196/196 [00:04<00:00, 45.37it/s]


Epoch [23/30], Loss: 0.6861


100%|██████████| 196/196 [00:04<00:00, 45.73it/s]


Epoch [24/30], Loss: 0.6903


100%|██████████| 196/196 [00:04<00:00, 47.26it/s]


Epoch [25/30], Loss: 0.6180


100%|██████████| 196/196 [00:04<00:00, 46.97it/s]


Epoch [26/30], Loss: 0.6261


100%|██████████| 196/196 [00:04<00:00, 45.65it/s]


Epoch [27/30], Loss: 0.5995


100%|██████████| 196/196 [00:04<00:00, 46.70it/s]


Epoch [28/30], Loss: 0.6063


100%|██████████| 196/196 [00:04<00:00, 46.25it/s]


Epoch [29/30], Loss: 0.5610


100%|██████████| 196/196 [00:04<00:00, 46.32it/s]


Epoch [30/30], Loss: 0.5667


100%|██████████| 40/40 [00:00<00:00, 41.21it/s]

테스트 정확도: 53.36%





53.36

In [16]:
optimizer = optim.AdamW(moe_tutel.parameters(), lr=0.001)
criterion = nn.CrossEntropyLoss()
train(moe_tutel, train_loader, optimizer, criterion, epochs=epochs)
test(moe_tutel, test_loader)

  with torch.cuda.amp.autocast(enabled=False):
100%|██████████| 196/196 [00:04<00:00, 40.31it/s]


Epoch [1/30], Loss: 2.9119


100%|██████████| 196/196 [00:04<00:00, 45.91it/s]


Epoch [2/30], Loss: 2.4960


100%|██████████| 196/196 [00:04<00:00, 46.56it/s]


Epoch [3/30], Loss: 2.3602


100%|██████████| 196/196 [00:04<00:00, 45.21it/s]


Epoch [4/30], Loss: 2.2072


100%|██████████| 196/196 [00:04<00:00, 45.62it/s]


Epoch [5/30], Loss: 2.0724


100%|██████████| 196/196 [00:04<00:00, 45.51it/s]


Epoch [6/30], Loss: 1.9279


100%|██████████| 196/196 [00:04<00:00, 45.42it/s]


Epoch [7/30], Loss: 1.8096


100%|██████████| 196/196 [00:04<00:00, 45.46it/s]


Epoch [8/30], Loss: 1.6833


100%|██████████| 196/196 [00:04<00:00, 47.27it/s]


Epoch [9/30], Loss: 1.6159


100%|██████████| 196/196 [00:04<00:00, 46.64it/s]


Epoch [10/30], Loss: 1.5469


100%|██████████| 196/196 [00:04<00:00, 45.86it/s]


Epoch [11/30], Loss: 1.4878


100%|██████████| 196/196 [00:04<00:00, 47.12it/s]


Epoch [12/30], Loss: 1.4217


100%|██████████| 196/196 [00:04<00:00, 45.11it/s]


Epoch [13/30], Loss: 1.3537


100%|██████████| 196/196 [00:04<00:00, 46.38it/s]


Epoch [14/30], Loss: 1.3083


100%|██████████| 196/196 [00:04<00:00, 44.56it/s]


Epoch [15/30], Loss: 1.3145


100%|██████████| 196/196 [00:04<00:00, 46.73it/s]


Epoch [16/30], Loss: 1.2940


100%|██████████| 196/196 [00:04<00:00, 47.06it/s]


Epoch [17/30], Loss: 1.2790


100%|██████████| 196/196 [00:04<00:00, 46.36it/s]


Epoch [18/30], Loss: 1.3301


100%|██████████| 196/196 [00:04<00:00, 45.13it/s]


Epoch [19/30], Loss: 1.3733


100%|██████████| 196/196 [00:04<00:00, 45.48it/s]


Epoch [20/30], Loss: 1.2510


100%|██████████| 196/196 [00:04<00:00, 45.63it/s]


Epoch [21/30], Loss: 1.1887


100%|██████████| 196/196 [00:04<00:00, 43.64it/s]


Epoch [22/30], Loss: 1.1519


100%|██████████| 196/196 [00:04<00:00, 44.98it/s]


Epoch [23/30], Loss: 1.1337


100%|██████████| 196/196 [00:04<00:00, 46.28it/s]


Epoch [24/30], Loss: 1.1377


100%|██████████| 196/196 [00:04<00:00, 45.70it/s]


Epoch [25/30], Loss: 1.1349


100%|██████████| 196/196 [00:04<00:00, 44.34it/s]


Epoch [26/30], Loss: 1.1652


100%|██████████| 196/196 [00:04<00:00, 45.59it/s]


Epoch [27/30], Loss: 1.2213


100%|██████████| 196/196 [00:04<00:00, 46.49it/s]


Epoch [28/30], Loss: 1.3357


100%|██████████| 196/196 [00:04<00:00, 46.20it/s]


Epoch [29/30], Loss: 1.3021


100%|██████████| 196/196 [00:04<00:00, 46.44it/s]


Epoch [30/30], Loss: 1.1599


100%|██████████| 40/40 [00:00<00:00, 41.36it/s]

테스트 정확도: 50.12%





50.12