## 체크포인트 저장/불러오기

In [1]:
import torch
import pandas as pd
from torch import nn
from torch import optim
from torch.utils.data import Dataset, DataLoader

In [2]:
class CustomDataset(Dataset):
    def __init__(self, file_path):
        df = pd.read_csv(file_path)
        self.x = df.iloc[:, 0].values
        self.y = df.iloc[:, 1].values
        self.length = len(df)

    def __getitem__(self, index):
        x = torch.FloatTensor([self.x[index] ** 2, self.x[index]])
        y = torch.FloatTensor([self.y[index]])
        return x, y
    
    def __len__(self):
        return self.length

class CustomModel(nn.Module):
    def __init__(self):
        super().__init__()
        self.layer = nn.Linear(2, 1)

    def forward(self, x):
        x = self.layer(x)
        return x

In [3]:
train_dataset = CustomDataset('./datasets/non_linear.csv')
train_dataloader = DataLoader(train_dataset, batch_size=128, shuffle=True, drop_last=True)

device = 'cuda' if torch.cuda.is_available() else 'cpu'
model = CustomModel().to(device)
criterion = nn.MSELoss().to(device)

lr = 0.0001

optimizer = optim.SGD(model.parameters(), lr=lr)

In [13]:
checkpoint = 1
epochs = 10000

for epoch in range(epochs):
    cost = 0.0

    for x, y in train_dataloader:
        x = x.to(device)
        y = y.to(device)

        output = model(x)
        loss = criterion(output, y)

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        cost += loss
    
    cost = cost / len(train_dataloader)

    if (epoch + 1) % 1000 == 0:
        torch.save(
            {
                'model': 'CustomModel',
                'epoch': epoch,
                'model_state_dict': model.state_dict(),
                'optimizer_state_dict': optimizer.state_dict(),
                'cost': cost,
                'description': f'CustomModel 체크포인트-{checkpoint}',
            },
            f'./models/checkpoint-{checkpoint}.pt',
        )
        checkpoint += 1

In [6]:
with torch.no_grad():
    model.eval()
    inputs = torch.FloatTensor(
        [
            [1 ** 2, 1],
            [5 ** 2, 5],
            [11 ** 2, 11]
        ]
    ).to(device)
    outputs = model(inputs)
    print(outputs)

tensor([[  1.8462],
        [ 69.4509],
        [356.9030]], device='cuda:0')


In [7]:
import torch
import pandas as pd
from torch import nn
from torch import optim
from torch.utils.data import Dataset, DataLoader

In [9]:
class CustomDataset(Dataset):
    def __init__(self, file_path):
        df = pd.read_csv(file_path)
        self.x = df.iloc[:, 0].values
        self.y = df.iloc[:, 1].values
        self.length = len(df)

    def __getitem__(self, index):
        x = torch.FloatTensor([self.x[index] ** 2, self.x[index]])
        y = torch.FloatTensor([self.y[index]])
        return x, y
    
    def __len__(self):
        return self.length
    
class CustomModel(nn.Module):
    def __init__(self):
        super().__init__()
        self.layer = nn.Linear(2, 1)

    def forward(self, x):
        x = self.layer(x)
        return x

In [10]:
train_dataset = CustomDataset('./datasets/non_linear.csv')
train_dataloader = DataLoader(train_dataset, batch_size=128, shuffle=True, drop_last=True)

In [11]:
device = 'cuda' if torch.cuda.is_available() else 'cpu'
model = CustomModel().to(device)
criterion = nn.MSELoss().to(device)

lr = 0.0001

optimizer = optim.SGD(model.parameters(), lr=lr)

In [15]:
checkpoint = torch.load('./models/checkpoint-6.pt')
model.load_state_dict(checkpoint['model_state_dict'])
optimizer.load_state_dict(checkpoint['optimizer_state_dict'])
checkpoint_epoch = checkpoint['epoch']
checkpoint_description = checkpoint['description']
print(checkpoint_description)

CustomModel 체크포인트-6


  checkpoint = torch.load('./models/checkpoint-6.pt')


In [17]:
epochs = 10000

for epoch in range(checkpoint_epoch + 1, epochs):
    cost = 0.0

    for x, y in train_dataloader:
        x = x.to(device)
        y = y.to(device)

        output = model(x)
        loss = criterion(output, y)

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        cost += loss
        if (epoch + 1) % 1000 == 0:
            print(f'Epoch : {epoch+1:4d}, Model : {list(model.parameters())}, Cost : {cost:.3f}')

Epoch : 7000, Model : [Parameter containing:
tensor([[ 3.1018, -1.7032]], device='cuda:0', requires_grad=True), Parameter containing:
tensor([0.3708], device='cuda:0', requires_grad=True)], Cost : 0.090
Epoch : 8000, Model : [Parameter containing:
tensor([[ 3.1023, -1.7031]], device='cuda:0', requires_grad=True), Parameter containing:
tensor([0.3814], device='cuda:0', requires_grad=True)], Cost : 0.084
Epoch : 9000, Model : [Parameter containing:
tensor([[ 3.1017, -1.7030]], device='cuda:0', requires_grad=True), Parameter containing:
tensor([0.3909], device='cuda:0', requires_grad=True)], Cost : 0.083
Epoch : 10000, Model : [Parameter containing:
tensor([[ 3.1013, -1.7032]], device='cuda:0', requires_grad=True), Parameter containing:
tensor([0.3997], device='cuda:0', requires_grad=True)], Cost : 0.083
