In [1]:
pip install torch torchvision torchaudio --extra-index-url https://download.pytorch.org/whl/cu121

Looking in indexes: https://pypi.org/simple, https://download.pytorch.org/whl/cu121
Collecting torch
  Downloading https://download.pytorch.org/whl/cu121/torch-2.1.0%2Bcu121-cp38-cp38-win_amd64.whl (2474.0 MB)
     ---------------------------------------- 0.0/2.5 GB ? eta -:--:--
     ---------------------------------------- 0.0/2.5 GB 19.4 MB/s eta 0:02:08
     ---------------------------------------- 0.0/2.5 GB 20.1 MB/s eta 0:02:03
     ---------------------------------------- 0.0/2.5 GB 22.5 MB/s eta 0:01:50
     ---------------------------------------- 0.0/2.5 GB 22.6 MB/s eta 0:01:50
     ---------------------------------------- 0.0/2.5 GB 22.6 MB/s eta 0:01:50
     ---------------------------------------- 0.0/2.5 GB 13.3 MB/s eta 0:03:07
     ---------------------------------------- 0.0/2.5 GB 14.1 MB/s eta 0:02:55
     ---------------------------------------- 0.0/2.5 GB 14.3 MB/s eta 0:02:54
     ---------------------------------------- 0.0/2.5 GB 20.6 MB/s eta 0:02:00
     ---

In [2]:
import os
import torch
from torch import nn
from torch.utils.data import DataLoader
from torchvision import datasets, transforms

In [3]:
device = (
    "cuda"
    if torch.cuda.is_available()
    else "mps"
    if torch.backends.mps.is_available()
    else "cpu"
)
print(f"Using {device} device")

Using cuda device


In [4]:
# 파이토치에서 신경망 모듈 가져오기
import torch.nn as nn

# 파이토치에서 모든 신경망 모듈의 베이스 클래스인 nn.Module을 상속하는 NeuralNetwork 클래스를 정의합니다.
class NeuralNetwork(nn.Module):
    # 생성자 메서드로 NeuralNetwork 객체를 초기화합니다.
    def __init__(self):
        # 슈퍼클래스의 생성자 호출 (nn.Module)
        super().__init__()
        
        # 입력 텐서를 평탄화할 레이어 정의하기
        # 입력 텐서가 다차원 텐서(예: 이미지)인 경우, 이 레이어는 이를 1차원 텐서로 평탄화합니다.
        self.flatten = nn.Flatten()
        
        # 레이어의 순차적 컨테이너 정의하기
        # 레이어는 정의된 순서대로 적용됩니다.
        self.linear_relu_stack = nn.Sequential(
            nn.Linear(28*28, 512),  # First linear layer with input size 28*28 and output size 512
            nn.ReLU(),             # ReLU activation function applied element-wise
            nn.Linear(512, 512),   # Second linear layer with input size 512 and output size 512
            nn.ReLU(),             # ReLU activation function applied element-wise
            nn.Linear(512, 10),    # Third linear layer with input size 512 and output size 10
        )

    # 신경망의 포워드 전달을 정의하는 포워드 메서드
    def forward(self, x):
        # 입력 텐서 평탄화
        x = self.flatten(x)
        
        # 평탄화된 텐서에 순차적인 레이어 컨테이너 적용하기
        logits = self.linear_relu_stack(x)
        
        # 출력 텐서 반환
        return logits

In [5]:
model = NeuralNetwork().to(device)
print(model)

NeuralNetwork(
  (flatten): Flatten(start_dim=1, end_dim=-1)
  (linear_relu_stack): Sequential(
    (0): Linear(in_features=784, out_features=512, bias=True)
    (1): ReLU()
    (2): Linear(in_features=512, out_features=512, bias=True)
    (3): ReLU()
    (4): Linear(in_features=512, out_features=10, bias=True)
  )
)


In [6]:
# 28x28 단일 이미지를 나타내는 크기 (1, 28, 28)의 랜덤 텐서를 생성합니다.
X = torch.rand(1, 28, 28, device=device)

# 입력 텐서를 신경망 모델에 전달하여 logits을 얻습니다.
# 'model' 오브젝트는 정의되어 로드된 신경망 클래스의 인스턴스여야 합니다.
logits = model(X)

# 소프트맥스 함수를 적용하여 logits을 확률로 변환합니다.
# 'dim=1' 인수는 1차원(열)을 따라 Softmax 함수를 적용합니다.
pred_probab = nn.Softmax(dim=1)(logits)

# 1차원을 따라 pred_probab에서 최대값의 인덱스 찾기
# y_pred에 할당된 인덱스는 예측된 클래스에 해당합니다.
y_pred = pred_probab.argmax(1)

# 예상 클래스 인쇄
print(f"Predicted class: {y_pred}")

Predicted class: tensor([9], device='cuda:0')


In [7]:
input_image = torch.rand(3,28,28)
print(input_image.size())

torch.Size([3, 28, 28])


In [8]:
flatten = nn.Flatten()
flat_image = flatten(input_image)
print(flat_image.size())

torch.Size([3, 784])


In [9]:
layer1 = nn.Linear(in_features=28*28, out_features=20)
hidden1 = layer1(flat_image)
print(hidden1.size())

torch.Size([3, 20])


In [10]:
print(f"Before ReLU: {hidden1}\n\n")
hidden1 = nn.ReLU()(hidden1)
print(f"After ReLU: {hidden1}")

Before ReLU: tensor([[ 0.5513, -0.6505,  0.4193, -0.4359,  0.9268,  0.2008,  0.0419,  0.3657,
         -0.2254, -0.2741,  1.0676, -0.2454, -0.5245, -0.2191, -0.3357, -0.4852,
         -0.4240,  0.4030, -0.1966, -0.2628],
        [ 0.5189, -0.3323,  0.1616, -0.5801,  0.6734,  0.1978, -0.0774,  0.3191,
         -0.2101, -0.3307,  1.0035, -0.1405, -0.1286, -0.2402, -0.4156, -0.0210,
         -0.8177, -0.1110, -0.2095, -0.2911],
        [ 0.2584, -0.8131,  0.0032, -0.7605,  0.8036,  0.2407,  0.2625,  0.2343,
         -0.4672, -0.3735,  0.6698,  0.1894, -0.2894, -0.3739, -0.5302, -0.3047,
         -0.8450,  0.0479, -0.2798, -0.6588]], grad_fn=<AddmmBackward0>)


After ReLU: tensor([[0.5513, 0.0000, 0.4193, 0.0000, 0.9268, 0.2008, 0.0419, 0.3657, 0.0000,
         0.0000, 1.0676, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.4030,
         0.0000, 0.0000],
        [0.5189, 0.0000, 0.1616, 0.0000, 0.6734, 0.1978, 0.0000, 0.3191, 0.0000,
         0.0000, 1.0035, 0.0000, 0.0000, 0.0000, 0.00

In [11]:
seq_modules = nn.Sequential(
    flatten,
    layer1,
    nn.ReLU(),
    nn.Linear(20, 10)
)
input_image = torch.rand(3,28,28)
logits = seq_modules(input_image)

In [12]:
softmax = nn.Softmax(dim=1)
pred_probab = softmax(logits)

In [13]:
print(f"Model structure: {model}\n\n")

for name, param in model.named_parameters():
    print(f"Layer: {name} | Size: {param.size()} | Values : {param[:2]} \n")

Model structure: NeuralNetwork(
  (flatten): Flatten(start_dim=1, end_dim=-1)
  (linear_relu_stack): Sequential(
    (0): Linear(in_features=784, out_features=512, bias=True)
    (1): ReLU()
    (2): Linear(in_features=512, out_features=512, bias=True)
    (3): ReLU()
    (4): Linear(in_features=512, out_features=10, bias=True)
  )
)


Layer: linear_relu_stack.0.weight | Size: torch.Size([512, 784]) | Values : tensor([[-0.0015, -0.0035,  0.0061,  ..., -0.0189,  0.0288, -0.0112],
        [ 0.0238, -0.0046,  0.0206,  ...,  0.0240,  0.0330, -0.0157]],
       device='cuda:0', grad_fn=<SliceBackward0>) 

Layer: linear_relu_stack.0.bias | Size: torch.Size([512]) | Values : tensor([-0.0229, -0.0037], device='cuda:0', grad_fn=<SliceBackward0>) 

Layer: linear_relu_stack.2.weight | Size: torch.Size([512, 512]) | Values : tensor([[ 0.0169, -0.0125,  0.0426,  ...,  0.0164, -0.0135, -0.0429],
        [-0.0271, -0.0017, -0.0358,  ...,  0.0365,  0.0252, -0.0396]],
       device='cuda:0', grad_fn=<Sl

In [14]:
import torch
from torch import nn
from torch.utils.data import DataLoader
from torchvision import datasets
from torchvision.transforms import ToTensor

training_data = datasets.FashionMNIST(
    root="data",
    train=True,
    download=True,
    transform=ToTensor()
)

test_data = datasets.FashionMNIST(
    root="data",
    train=False,
    download=True,
    transform=ToTensor()
)

train_dataloader = DataLoader(training_data, batch_size=64)
test_dataloader = DataLoader(test_data, batch_size=64)

class NeuralNetwork(nn.Module):
    def __init__(self):
        super().__init__()
        self.flatten = nn.Flatten()
        self.linear_relu_stack = nn.Sequential(
            nn.Linear(28*28, 512),
            nn.ReLU(),
            nn.Linear(512, 512),
            nn.ReLU(),
            nn.Linear(512, 10),
        )

    def forward(self, x):
        x = self.flatten(x)
        logits = self.linear_relu_stack(x)
        return logits

model = NeuralNetwork()

Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/train-images-idx3-ubyte.gz
Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/train-images-idx3-ubyte.gz to data\FashionMNIST\raw\train-images-idx3-ubyte.gz


100.0%


Extracting data\FashionMNIST\raw\train-images-idx3-ubyte.gz to data\FashionMNIST\raw

Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/train-labels-idx1-ubyte.gz
Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/train-labels-idx1-ubyte.gz to data\FashionMNIST\raw\train-labels-idx1-ubyte.gz


100.0%


Extracting data\FashionMNIST\raw\train-labels-idx1-ubyte.gz to data\FashionMNIST\raw

Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/t10k-images-idx3-ubyte.gz
Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/t10k-images-idx3-ubyte.gz to data\FashionMNIST\raw\t10k-images-idx3-ubyte.gz


100.0%


Extracting data\FashionMNIST\raw\t10k-images-idx3-ubyte.gz to data\FashionMNIST\raw

Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/t10k-labels-idx1-ubyte.gz
Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/t10k-labels-idx1-ubyte.gz to data\FashionMNIST\raw\t10k-labels-idx1-ubyte.gz


100.0%

Extracting data\FashionMNIST\raw\t10k-labels-idx1-ubyte.gz to data\FashionMNIST\raw





In [15]:
learning_rate = 1e-3
batch_size = 64
epochs = 5

In [16]:
loss_fn = nn.CrossEntropyLoss()

In [17]:
optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate)

In [18]:
def train_loop(dataloader, model, loss_fn, optimizer):
    size = len(dataloader.dataset)
    # 모델을 학습 모드로 설정 - 일괄 정규화 및 드롭아웃 레이어에 중요함
    # 이 상황에서는 불필요하지만 모범 사례를 위해 추가되었습니다.
    model.train()
    for batch, (X, y) in enumerate(dataloader):
        # 예측 및 손실 계산
        pred = model(X)
        loss = loss_fn(pred, y)

        # 역전파
        loss.backward()
        optimizer.step()
        optimizer.zero_grad()

        if batch % 100 == 0:
            loss, current = loss.item(), (batch + 1) * len(X)
            print(f"loss: {loss:>7f}  [{current:>5d}/{size:>5d}]")


def test_loop(dataloader, model, loss_fn):
    # 모델을 평가 모드로 설정 - 배치 정규화 및 드롭아웃 레이어에 중요합니다.
    # 이 상황에서는 불필요하지만 모범 사례를 위해 추가되었습니다.
    model.eval()
    size = len(dataloader.dataset)
    num_batches = len(dataloader)
    test_loss, correct = 0, 0

    # torch.no_grad()로 모델을 평가하면 테스트 모드에서 그라데이션이 계산되지 않습니다.
    # 또한 requires_grad = True인 텐서에서 불필요한 그라데이션 계산과 메모리 사용량을 줄이는 역할도 합니다.
    with torch.no_grad():
        for X, y in dataloader:
            pred = model(X)
            test_loss += loss_fn(pred, y).item()
            correct += (pred.argmax(1) == y).type(torch.float).sum().item()

    test_loss /= num_batches
    correct /= size
    print(f"Test Error: \n Accuracy: {(100*correct):>0.1f}%, Avg loss: {test_loss:>8f} \n")

In [19]:
loss_fn = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate)

epochs = 10
for t in range(epochs):
    print(f"Epoch {t+1}\n-------------------------------")
    train_loop(train_dataloader, model, loss_fn, optimizer)
    test_loop(test_dataloader, model, loss_fn)
print("Done!")

Epoch 1
-------------------------------
loss: 2.312476  [   64/60000]
loss: 2.291575  [ 6464/60000]
loss: 2.270168  [12864/60000]
loss: 2.269212  [19264/60000]
loss: 2.246334  [25664/60000]
loss: 2.225170  [32064/60000]
loss: 2.232420  [38464/60000]
loss: 2.198427  [44864/60000]
loss: 2.194474  [51264/60000]
loss: 2.165631  [57664/60000]
Test Error: 
 Accuracy: 50.7%, Avg loss: 2.157209 

Epoch 2
-------------------------------
loss: 2.168838  [   64/60000]
loss: 2.153795  [ 6464/60000]
loss: 2.092962  [12864/60000]
loss: 2.117739  [19264/60000]
loss: 2.058940  [25664/60000]
loss: 1.999864  [32064/60000]
loss: 2.035250  [38464/60000]
loss: 1.951489  [44864/60000]
loss: 1.954653  [51264/60000]
loss: 1.890577  [57664/60000]
Test Error: 
 Accuracy: 54.2%, Avg loss: 1.884217 

Epoch 3
-------------------------------
loss: 1.912507  [   64/60000]
loss: 1.881485  [ 6464/60000]
loss: 1.759808  [12864/60000]
loss: 1.821459  [19264/60000]
loss: 1.696427  [25664/60000]
loss: 1.641690  [32064/600

In [20]:
import torch
print(torch.__version__)
print(torch.cuda.is_available())


2.1.0+cu121
True
