In [None]:
import matplotlib.pyplot as plt
import torch
import torch.nn as nn
from torch.optim.adam import Adam
from torch.utils.data.dataloader import DataLoader
from torchvision.datasets.mnist import MNIST
from torchvision.transforms import ToTensor

In [None]:
train_data = MNIST(root='./', train=True, download=True, transform=ToTensor())
test_data = MNIST(root='./', train=False, download=True, transform=ToTensor())

In [None]:
train_data.data.size()

In [None]:
train_data.targets

In [None]:
plt.imshow(train_data.data[0], cmap='gray')
plt.show()
print(train_data.targets[0])

In [None]:
train_loader = DataLoader(train_data, batch_size=32, shuffle=True)
test_loader = DataLoader(test_data, batch_size=32, shuffle=False)

In [None]:
next(iter(train_loader))[0].shape

In [None]:
device = 'cuda' if torch.cuda.is_available() else 'cpu'
device

In [None]:
model = nn.Sequential(nn.Linear(784, 64), nn.ReLU(), nn.Linear(64, 64), nn.ReLU(), nn.Linear(64, 10))

model.to(device)
model

In [None]:
from tensorboardX import SummaryWriter

writer = SummaryWriter()

lr = 1e-3
optim = Adam(model.parameters(), lr=lr)

global_step = 0
for epoch in range(20):
    for data, label in train_loader:
        optim.zero_grad()
        data = torch.reshape(data, (-1, 784))
        pred = model(data.to(device))
        loss = nn.CrossEntropyLoss()(pred, label.to(device))
        writer.add_scalar('Loss/train', loss.item(), global_step)
        global_step += 1
        loss.backward()
        optim.step()
    print(f'{epoch + 1} loss:{loss.item()}')

In [None]:
%load_ext tensorboard
%tensorboard --logdir runs --port 6006

In [None]:
import torch
import torch.nn as nn


# 예시 모델: LeNet-5
class LeNet5(nn.Module):
    def __init__(self):
        super().__init__()
        self.conv1 = nn.Conv2d(1, 6, kernel_size=5)
        self.pool1 = nn.AvgPool2d(2, 2)
        self.conv2 = nn.Conv2d(6, 16, kernel_size=5)
        self.pool2 = nn.AvgPool2d(2, 2)
        self.fc1 = nn.Linear(16 * 4 * 4, 120)
        self.fc2 = nn.Linear(120, 84)
        self.fc3 = nn.Linear(84, 10)

    def forward(self, x):
        x = self.pool1(torch.relu(self.conv1(x)))
        x = self.pool2(torch.relu(self.conv2(x)))
        x = torch.relu(self.fc1(x.view(-1, 16 * 4 * 4)))
        x = torch.relu(self.fc2(x))
        x = self.fc3(x)
        return x


# 모델 생성
model = LeNet5()

# 모든 파라미터 이름과 shape 출력
for name, param in model.named_parameters():
    print(f'{name:<20} {list(param.size())}')

In [None]:
import pandas as pd

# 각 레이어별 파라미터 수 계산
param_data = []
for name, param in model.named_parameters():
    if param.requires_grad:
        param_data.append({'Layer': name, 'Shape': list(param.shape), 'Params': param.numel()})

df = pd.DataFrame(param_data)
df.loc['Total'] = ['', '', df['Params'].sum()]

print(df)

In [None]:
import matplotlib.pyplot as plt

# 첫 번째 Conv 레이어의 weight 가져오기
weights = model.conv2.weight.data.clone()

# 시각화
num_filters = weights.shape[0]  # 출력 채널 수
fig, axes = plt.subplots(1, num_filters, figsize=(num_filters * 2, 2))

for i in range(num_filters):
    ax = axes[i]
    # in_channels가 1이라면 squeeze로 2D로 변환
    filt = weights[i, 0, :, :].cpu().numpy()
    ax.imshow(filt, cmap='gray')
    ax.axis('off')
    ax.set_title(f'Filter {i + 1}')

plt.tight_layout()
plt.show()