In [1]:
#!pip install pytorch-lightning

In [2]:
import os
import torch
from torchvision import transforms, datasets
from torch.utils.data import DataLoader
import pytorch_lightning as pl
from pytorch_lightning import Trainer
from pytorch_lightning.callbacks import ModelCheckpoint
from torchvision.models import vgg16
import torch.nn.functional as F
from torch import nn
import matplotlib.pyplot as plt


In [3]:
#!pip install pytorch-lightning==2.2.2
#pip install --upgrade pytorch-lightning torchmetrics
#pip install torch torchvision pytorch-lightning


In [4]:
import torch
print(torch.cuda.is_available())  # True가 출력되어야 GPU를 사용할 수 있음
print(torch.cuda.device_count())  # 사용 가능한 GPU 개수 출력


True
4


In [5]:
import pytorch_lightning as pl
print(pl.__version__)  

2.2.2


In [6]:
class MNISTModel(pl.LightningModule):
    def __init__(self):
        super(MNISTModel, self).__init__()
        # VGG16 모델 로드
        self.feature_extractor = vgg16(pretrained=True)
        # MNIST는 1채널 이미지이므로 VGG16의 첫 번째 Conv 레이어를 수정
        self.feature_extractor.features[0] = nn.Conv2d(1, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
        # 분류를 위한 최종 레이어 수정 (MNIST는 10개 클래스)
        self.feature_extractor.classifier[6] = nn.Linear(4096, 10)

    def forward(self, x):
        return self.feature_extractor(x)

    def training_step(self, batch, batch_idx):
        x, y = batch
        y_hat = self(x)
        loss = F.cross_entropy(y_hat, y)
        self.log("train_loss", loss, on_step=False, on_epoch=True, prog_bar=True)
        return loss

    def validation_step(self, batch, batch_idx):
        x, y = batch
        y_hat = self(x)
        val_loss = F.cross_entropy(y_hat, y)
        self.log("val_loss", val_loss, on_step=False, on_epoch=True, prog_bar=True)

    def configure_optimizers(self):
        optimizer = torch.optim.Adam(self.parameters(), lr=1e-4)
        return optimizer

    def train_dataloader(self):
        transform = transforms.Compose([transforms.Resize((224, 224)), transforms.ToTensor()])
        mnist_train = datasets.MNIST(root="./", train=True, download=True, transform=transform)
        train_loader = DataLoader(mnist_train, batch_size=32, shuffle=True)
        return train_loader

    def val_dataloader(self):
        transform = transforms.Compose([transforms.Resize((224, 224)), transforms.ToTensor()])
        mnist_val = datasets.MNIST(root="./", train=False, download=True, transform=transform)
        val_loader = DataLoader(mnist_val, batch_size=32)
        return val_loader


In [9]:
# 모델 인스턴스화
model = MNISTModel()

# 학습 단계에서 최적의 모델을 저장하기 위한 체크포인트 콜백 설정
checkpoint_callback = ModelCheckpoint(monitor="val_loss", dirpath="model/", filename="mnist-{epoch:02d}-{val_loss:.2f}", save_top_k=1)

# 트레이너 설정
#trainer = Trainer(max_epochs=5, accelerator="gpu", devices=2, callbacks=[checkpoint_callback])
trainer = Trainer(max_epochs=5, devices=1, accelerator="gpu")


GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs


In [10]:
# 학습 시작
trainer.fit(model)

You are using a CUDA device ('NVIDIA RTX A6000') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision
2024-04-18 09:09:26.917359: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2024-04-18 09:09:26.919747: I tensorflow/tsl/cuda/cudart_stub.cc:28] Could not find cuda drivers on your machine, GPU will not be used.
2024-04-18 09:09:26.957651: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following in

Sanity Checking: |                                        | 0/? [00:00<?, ?it/s]

/home/tmttr/workspace/venv/lib/python3.8/site-packages/pytorch_lightning/trainer/connectors/data_connector.py:441: The 'val_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=79` in the `DataLoader` to improve performance.
/home/tmttr/workspace/venv/lib/python3.8/site-packages/pytorch_lightning/trainer/connectors/data_connector.py:441: The 'train_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=79` in the `DataLoader` to improve performance.


Training: |                                               | 0/? [00:00<?, ?it/s]

/home/tmttr/workspace/venv/lib/python3.8/site-packages/pytorch_lightning/trainer/call.py:54: Detected KeyboardInterrupt, attempting graceful shutdown...


In [None]:
# 학습 및 검증 손실에 대한 기록을 가져와서 그래프로 그림
train_loss = trainer.callback_metrics["train_loss"]
val_loss = trainer.callback_metrics["val_loss"]

plt.figure(figsize=(10, 6))
plt.plot(train_loss, label='Training Loss')
plt.plot(val_loss, label='Validation Loss')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.title('Training and Validation Loss')
plt.legend()
plt.show()


In [1]:
import os
import torch
import torch.nn.functional as F
from torch.nn import Linear
from torch.utils.data import DataLoader
from torchvision.datasets import MNIST
from torchvision import transforms
import pytorch_lightning as pl
from pytorch_lightning import Trainer

class MNISTModel(pl.LightningModule):
    def __init__(self):
        super().__init__()
        self.layer_1 = Linear(28 * 28, 128)
        self.layer_2 = Linear(128, 256)
        self.layer_3 = Linear(256, 10)
        
    def forward(self, x):
        x = x.view(x.size(0), -1)  # Flatten the image
        x = F.relu(self.layer_1(x))
        x = F.relu(self.layer_2(x))
        x = self.layer_3(x)
        return x
    
    def training_step(self, batch, batch_idx):
        x, y = batch
        y_hat = self(x)
        loss = F.cross_entropy(y_hat, y)
        self.log("train_loss", loss)
        return loss
    
    def configure_optimizers(self):
        return torch.optim.Adam(self.parameters(), lr=1e-3)
    
    def prepare_data(self):
        MNIST(os.getcwd(), train=True, download=True, transform=transforms.ToTensor())
        MNIST(os.getcwd(), train=False, download=True, transform=transforms.ToTensor())
    
    def train_dataloader(self):
        mnist_train = MNIST(os.getcwd(), train=True, download=False, transform=transforms.ToTensor())
        train_loader = DataLoader(mnist_train, batch_size=64)
        return train_loader
    
    def val_dataloader(self):
        mnist_val = MNIST(os.getcwd(), train=False, download=False, transform=transforms.ToTensor())
        val_loader = DataLoader(mnist_val, batch_size=64)
        return val_loader

model = MNISTModel()

# 2개의 GPU를 사용하여 학습 준비
trainer = Trainer(max_epochs=5, devices=2, accelerator="gpu")
trainer.fit(model)



GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
/home/kotech/venv-lightning/lib/python3.8/site-packages/pytorch_lightning/trainer/connectors/logger_connector/logger_connector.py:67: Starting from v1.9.0, `tensorboardX` has been removed as a dependency of the `pytorch_lightning` package, due to potential conflicts with other packages in the ML ecosystem. For this reason, `logger=True` will use `CSVLogger` as the default logger, unless the `tensorboard` or `tensorboardX` packages are found. Please `pip install lightning[extra]` or one of them to enable TensorBoard support by default
/home/kotech/venv-lightning/lib/python3.8/site-packages/pytorch_lightning/trainer/configuration_validator.py:72: You passed in a `val_dataloader` but have no `validation_step`. Skipping val loop.
/home/kotech/venv-lightning/lib/python3.8/site-packages/pytorch_lightning/trainer/configuration_validator.py:72:

Training: |                                                                                                   …

/home/kotech/venv-lightning/lib/python3.8/site-packages/pytorch_lightning/trainer/call.py:54: Detected KeyboardInterrupt, attempting graceful shutdown...
