## 파이토치를 사용한 회귀(v0.5)

In [42]:
import numpy as np

import torch
import torch.optim as optim
import torch.nn  as nn

from torch.utils.data import Dataset, TensorDataset, DataLoader
from torch.utils.data.dataset import random_split
from torch.utils.tensorboard import SummaryWriter

from sklearn.linear_model import LinearRegression

device = "cuda" if torch.cuda.is_available() else "cpu"
print(device)

cpu


In [43]:
true_w = 2
true_b = 1
N = 100

np.random.seed(42)
x = np.random.rand(N, 1)
epsilon = 0.1 * np.random.randn(N ,1)
y = true_b + (true_w* x) + epsilon

idx = np.arange(N)
np.random.shuffle(idx)

train_idx = idx[: int(N * 0.8)]
val_idx = idx[int(N * 0.8):]

x_train, y_train = x[train_idx], y[train_idx]
x_val, y_val = x[val_idx], y[val_idx]

x_train_tensor = torch.as_tensor(x_train).float().to(device)
y_train_tensor = torch.as_tensor(y_train).float().to(device)
lr = 0.1

torch.manual_seed(42)

model = nn.Sequential(nn.Linear(1,1)).to(device=device)
optimizer = optim.SGD(model.parameters(), lr=lr)
loss_fn = nn.MSELoss(reduction="mean") 


# n_epochs = 1000

# for epoch in range(n_epochs):
#     model.train()
#     yhat = model(x_train_tensor)
#     loss = loss_fn(yhat, y_train_tensor)
#     loss.backward()
#     optimizer.step()
#     optimizer.zero_grad()
# print(model.state_dict())

In [44]:
# CustomDataset은 Dataset을 상속하고, 해당 클래스에서 필요한 매서드를 덮어씁니다.
class CustomDataset(Dataset):
    def __init__(self, x_tensor, y_tensor):
        self.x = x_tensor
        self.y = y_tensor

    def __getitem__(self, index):
        return (self.x[index], self.y[index])
    
    def __len__(self):
        return len(self.x)    

In [45]:
rue_w = 2
true_b = 1
N = 100

np.random.seed(42)
x = np.random.rand(N, 1)
epsilon = 0.1 * np.random.randn(N ,1)
y = true_b + (true_w* x) + epsilon

idx = np.arange(N)
np.random.shuffle(idx)

train_idx = idx[: int(N * 0.8)]
val_idx = idx[int(N * 0.8):]

x_train, y_train = x[train_idx], y[train_idx]
x_val, y_val = x[val_idx], y[val_idx]

x_train_tensor = torch.as_tensor(x_train).float().to(device)
y_train_tensor = torch.as_tensor(y_train).float().to(device)

dataset = CustomDataset(x_train_tensor, y_train_tensor)

ratio = .8
n_total = len(dataset)
n_train = int(n_total * ratio)
n_val = n_total - n_train

train_data, val_data = random_split(dataset, [n_train, n_val])
train_loader = DataLoader(dataset=train_data, batch_size=16, shuffle=True)
val_loader = DataLoader(dataset=val_data, batch_size=16)

In [46]:
lr = 0.1

torch.manual_seed(42)

model = nn.Sequential(nn.Linear(1,1)).to(device=device)
optimizer = optim.SGD(model.parameters(), lr=lr)
loss_fn = nn.MSELoss(reduction="mean")

- 학습용

In [47]:
def make_train_step_fn(model, loss_fn, optimizer):
    def perform_train_step_fn(x, y):
        model.train()
        yhat = model(x)
        loss = loss_fn(yhat, y)
        loss.backward()
        optimizer.step()
        optimizer.zero_grad()
        return loss.item()
    return perform_train_step_fn

- 검증용

In [48]:
def make_val_step_fn(model, loss_fn):
    def perform_val_step_fn(x, y):
        model.eval()
        yhat = model(x)
        loss = loss_fn(yhat, y)
        return loss.item()
    return perform_val_step_fn

- 작은 컴퓨터 메모리로 큰 데이터를 다루기 위해 해야 하는 과정

In [49]:
def mini_batch(device, data_loader, step_fn):
    mini_batch_losses = []
    for x_batch, y_batch in data_loader:
        x_batch = x_batch.to(device)
        y_batch = y_batch.to(device)
        mini_batch_loss = step_fn(x_batch, y_batch)
        mini_batch_losses.append(mini_batch_loss)
    loss = np.mean(mini_batch_losses)
    return loss

- 오차를 줄이는 것에 목적에 가지기 때문에 오차를 보면서 튀는 값이 나오면 그 전까지만 학습시킬 거임

In [50]:
n_epochs = 1000
losses = [] 
train_step_fn = make_train_step_fn(model, loss_fn, optimizer)
for epoch in range(n_epochs):
    # loss = train_step_fn(x_train_tensor, y_train_tensor)
    loss = mini_batch(device, train_loader, train_step_fn)
    losses.append(loss)
print(model.state_dict())

OrderedDict([('0.weight', tensor([[1.9532]])), ('0.bias', tensor([1.0304]))])


In [52]:
n_epochs = 200
losses = [] 
val_losses = []

train_step_fn = make_train_step_fn(model, loss_fn, optimizer)
val_step_fn = make_val_step_fn(model, loss_fn)

write = SummaryWriter("runs/simple_linear_regression")
x_sample, y_sample = next(iter(train_loader))
write.add_graph(model, x_sample.to(device))

for epoch in range(n_epochs):
    # loss = train_step_fn(x_train_tensor, y_train_tensor)
    loss = mini_batch(device, train_loader, train_step_fn)
    losses.append(loss)
    with torch.no_grad():
        val_loss = mini_batch(device, val_loader, val_step_fn)
        val_losses.append(val_loss)
    write.add_scalars(
        main_tag="loss",
        tag_scalar_dict={"training":loss, "validation": val_loss},
        global_step=epoch
    )
write.close()
print(model.state_dict())

OrderedDict([('0.weight', tensor([[1.9536]])), ('0.bias', tensor([1.0318]))])


In [58]:
%load_ext tensorboard
%tensorboard --logdir runs

The tensorboard extension is already loaded. To reload it, use:
  %reload_ext tensorboard


Reusing TensorBoard on port 6006 (pid 22824), started 0:00:18 ago. (Use '!kill 22824' to kill it.)

- 상속은 상위에 있는 것을 하위에서 꼭 정의
- 인터페이스는 기본적으로 계약관계,,, 반드시 구현하던가 마커?
- 파이썬은 인터페이스 없다