In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
import torch.utils.data as data

import time
import matplotlib.pyplot as plt
import os

from dataset import EarthMantleDataset
from dataset import read_cdf

from resnet import ResNet
from resnet import Bottleneck
from resnet import BasicBlock

In [2]:
LEARNING_RATE = 0.0001
EPOCH_NUM = 16
MODEL_DIR = 'D:/EarthMantleConvection/models/version_01'

In [3]:
file_path = 'D:/EarthMantleConvection/mantle01/spherical001.nc'
x_volume, y_volume, volume_size = read_cdf(file_path, 5, 5, 5, './scalers')
train_set = EarthMantleDataset(x_volume, y_volume, volume_size)
train_loader = data.DataLoader(dataset=train_set, batch_size=1024, num_workers=2, shuffle=True)

In [4]:
print(train_set)
print(len(train_set))
print(train_set.in_channels)
print(train_set.depth)
print(train_loader.batch_size)

<dataset.EarthMantleDataset object at 0x0000025DAB2006A0>
13024800
7
5
1024


In [5]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f'Device: {device}')
model = ResNet(Bottleneck, [1, 1, 1, 1], [16, 32, 64, 128], train_set.in_channels, out_dim=train_set.depth)
model.to(device)

Device: cuda


ResNet(
  (conv1): Conv3d(7, 16, kernel_size=(7, 7, 7), stride=(2, 2, 2), padding=(3, 3, 3), bias=False)
  (bn1): BatchNorm3d(16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (maxpool): MaxPool3d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (block1): Sequential(
    (0): Bottleneck(
      (conv1): Conv3d(16, 16, kernel_size=(1, 1, 1), stride=(1, 1, 1), bias=False)
      (bn1): BatchNorm3d(16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv2): Conv3d(16, 16, kernel_size=(3, 3, 3), stride=(1, 1, 1), padding=(1, 1, 1), bias=False)
      (bn2): BatchNorm3d(16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv3): Conv3d(16, 64, kernel_size=(1, 1, 1), stride=(1, 1, 1), bias=False)
      (bn3): BatchNorm3d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (downsample): Sequential(
        (0): Conv3d(16, 64, kern

In [6]:
from torchinfo import summary
x, y, idx = train_set[0]
input_size = [
    train_loader.batch_size,
    train_set.in_channels,
    train_set.depth,
    train_set.height,
    train_set.width
]
print(input_size)
summary(model, input_size=input_size)

[1024, 7, 5, 5, 5]


Layer (type:depth-idx)                   Output Shape              Param #
ResNet                                   [1024, 5]                 --
├─Conv3d: 1-1                            [1024, 16, 3, 3, 3]       38,416
├─BatchNorm3d: 1-2                       [1024, 16, 3, 3, 3]       32
├─ReLU: 1-3                              [1024, 16, 3, 3, 3]       --
├─MaxPool3d: 1-4                         [1024, 16, 2, 2, 2]       --
├─Sequential: 1-5                        [1024, 64, 2, 2, 2]       --
│    └─Bottleneck: 2-1                   [1024, 64, 2, 2, 2]       --
│    │    └─Conv3d: 3-1                  [1024, 16, 2, 2, 2]       256
│    │    └─BatchNorm3d: 3-2             [1024, 16, 2, 2, 2]       32
│    │    └─ReLU: 3-3                    [1024, 16, 2, 2, 2]       --
│    │    └─Conv3d: 3-4                  [1024, 16, 2, 2, 2]       6,912
│    │    └─BatchNorm3d: 3-5             [1024, 16, 2, 2, 2]       32
│    │    └─ReLU: 3-6                    [1024, 16, 2, 2, 2]       --
│    │ 

In [7]:
optimizer = optim.Adam(model.parameters(), lr=LEARNING_RATE)
criterion = nn.MSELoss()

In [None]:
os.makedirs(MODEL_DIR, exist_ok=True)

n_data = len(train_set)
train_loss_list = []
for epoch in range(EPOCH_NUM):
    model.train()
    train_loss = []
    s = time.time()
    i = 0
    for x, y, idx in train_loader:
        ss = time.time()
        x = x.to(device)
        y = y.to(device)
        optimizer.zero_grad()
        y_hat = model(x)
        # CrossEntropy 계산
        loss = criterion(y_hat, y)
        # loss -> layer1 까지 Chain rule에 의해 gradient 계산
        loss.backward()
        # model parameter update
        optimizer.step()
        train_loss.append(loss.item())
        print(f'\rEpoch: {epoch}({i}/{n_data}), loss: {loss.item():.4f}, Step Time: {time.time()-ss:.2f}', end='')
        i += train_loader.batch_size
    print()
    train_loss = sum(train_loss)/len(train_loss)
    train_loss_list.append(train_loss)
    print(f"Epoch: {epoch}, TrainLoss: {train_loss:.4f}, Elapsed Time: {time.time()-s:.2f}")
    
    torch.save(model.state_dict(), os.path.join(MODEL_DIR, f'epoch_{epoch:02d}.pt'))

Epoch: 0(13024256/13024800), loss: 0.0121, Step Time: 0.03
Epoch: 0, TrainLoss: 0.0432, Elapsed Time: 460.41
Epoch: 1(13024256/13024800), loss: 0.0070, Step Time: 0.03
Epoch: 1, TrainLoss: 0.0106, Elapsed Time: 463.18
Epoch: 2(13024256/13024800), loss: 0.0104, Step Time: 0.02
Epoch: 2, TrainLoss: 0.0069, Elapsed Time: 476.80
Epoch: 3(13024256/13024800), loss: 0.0057, Step Time: 0.02
Epoch: 3, TrainLoss: 0.0052, Elapsed Time: 474.28
Epoch: 4(13024256/13024800), loss: 0.0045, Step Time: 0.03
Epoch: 4, TrainLoss: 0.0043, Elapsed Time: 459.88
Epoch: 5(13024256/13024800), loss: 0.0026, Step Time: 0.03
Epoch: 5, TrainLoss: 0.0038, Elapsed Time: 462.38
Epoch: 6(13024256/13024800), loss: 0.0044, Step Time: 0.02
Epoch: 6, TrainLoss: 0.0034, Elapsed Time: 428.86
Epoch: 7(13024256/13024800), loss: 0.0038, Step Time: 0.02
Epoch: 7, TrainLoss: 0.0031, Elapsed Time: 433.55
Epoch: 8(13024256/13024800), loss: 0.0022, Step Time: 0.02
Epoch: 8, TrainLoss: 0.0029, Elapsed Time: 429.83
Epoch: 9(616448/130