# Pytorch 기초: DataLoader & CustomDataset

In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F

from torch.utils.data import TensorDataset # 텐서데이터셋
from torch.utils.data import DataLoader # 데이터로더

torch.manual_seed(1)

<torch._C.Generator at 0x7ff3c7de4b10>

## 1. DataLaoder: Batch

### 1.1 Input Data

In [2]:
X_train  =  torch.FloatTensor([[73,  80,  75], 
                               [93,  88,  93], 
                               [89,  91,  90], 
                               [96,  98,  100],   
                               [73,  66,  70]])  

y_train  =  torch.FloatTensor([[152],  [185],  [180],  [196],  [142]])

X_train.shape, y_train.shape

(torch.Size([5, 3]), torch.Size([5, 1]))

### 1.2 TensorDataset & DataLoader

In [3]:
dataset = TensorDataset(X_train, y_train)

In [4]:
dataloader = DataLoader(dataset, batch_size=2, shuffle=True)

### 1.3 torch.nn 및 Optimizer

In [5]:
model = nn.Linear(3,1)
optimizer = torch.optim.SGD(model.parameters(), lr=1e-5) 

### 1.4 Training

In [6]:
nb_epochs = 20
for epoch in range(nb_epochs + 1):
  for batch_idx, samples in enumerate(dataloader):
    # print(batch_idx)
    # print(samples)
    x_train, y_train = samples
    
    # H(x) 계산
    prediction = model(x_train)

    # cost 계산
    cost = F.mse_loss(prediction, y_train)

    # cost로 H(x) 계산
    optimizer.zero_grad()
    cost.backward()
    optimizer.step()

    print('Epoch {:4d}/{} Batch {}/{} Cost: {:.6f}'.format(
        epoch, nb_epochs, batch_idx+1, len(dataloader),
        cost.item()
        ))

Epoch    0/20 Batch 1/3 Cost: 23071.781250
Epoch    0/20 Batch 2/3 Cost: 17581.359375
Epoch    0/20 Batch 3/3 Cost: 3703.553467
Epoch    1/20 Batch 1/3 Cost: 857.131592
Epoch    1/20 Batch 2/3 Cost: 194.912628
Epoch    1/20 Batch 3/3 Cost: 103.150658
Epoch    2/20 Batch 1/3 Cost: 16.461014
Epoch    2/20 Batch 2/3 Cost: 10.970690
Epoch    2/20 Batch 3/3 Cost: 2.953000
Epoch    3/20 Batch 1/3 Cost: 1.246350
Epoch    3/20 Batch 2/3 Cost: 0.095024
Epoch    3/20 Batch 3/3 Cost: 0.104377
Epoch    4/20 Batch 1/3 Cost: 0.678442
Epoch    4/20 Batch 2/3 Cost: 0.123370
Epoch    4/20 Batch 3/3 Cost: 0.118080
Epoch    5/20 Batch 1/3 Cost: 0.094797
Epoch    5/20 Batch 2/3 Cost: 0.531496
Epoch    5/20 Batch 3/3 Cost: 0.011195
Epoch    6/20 Batch 1/3 Cost: 0.219984
Epoch    6/20 Batch 2/3 Cost: 0.042416
Epoch    6/20 Batch 3/3 Cost: 0.917457
Epoch    7/20 Batch 1/3 Cost: 0.213805
Epoch    7/20 Batch 2/3 Cost: 0.640260
Epoch    7/20 Batch 3/3 Cost: 0.005267
Epoch    8/20 Batch 1/3 Cost: 0.454904
Epoch 

### 1.5 Predict

In [7]:
# 임의의 입력 [73, 80, 75]를 선언
new_var =  torch.FloatTensor([[73, 80, 75]]) 

# 입력한 값 [73, 80, 75]에 대해서 예측값 y를 리턴받아서 pred_y에 저장
pred_y = model(new_var) 
print("훈련 후 입력이 73, 80, 75일 때의 예측값 :", pred_y) 

훈련 후 입력이 73, 80, 75일 때의 예측값 : tensor([[151.0829]], grad_fn=<AddmmBackward0>)


## 2. CustomDataset 

In [8]:
import torch
import torch.nn.functional as F
from torch.utils.data import Dataset
from torch.utils.data import DataLoader

### 2.1 CustomDataset Class

In [None]:
# 기본 구성
class CustomDataset(torch.utils.data.Dataset): 
    def __init__(self):
    # 데이터셋의 전처리를 해주는 부분

    def __len__(self):
    # 데이터셋의 길이. 즉, 총 샘플의 수를 적어주는 부분

    def __getitem__(self, idx): 
    # 데이터셋에서 특정 1개의 샘플을 가져오는 함수

In [10]:
# Dataset 상속
class CustomDataset(Dataset): 
  def __init__(self):
    self.x_data = [[73, 80, 75],
                   [93, 88, 93],
                   [89, 91, 90],
                   [96, 98, 100],
                   [73, 66, 70]]
    self.y_data = [[152], [185], [180], [196], [142]]

  # 총 데이터의 개수를 리턴
  def __len__(self): 
    return len(self.x_data)

  # 인덱스를 입력받아 그에 맵핑되는 입출력 데이터를 파이토치의 Tensor 형태로 리턴
  def __getitem__(self, idx): 
    x = torch.FloatTensor(self.x_data[idx])
    y = torch.FloatTensor(self.y_data[idx])
    return x, y

### 2.2 Batch Dataset 구축

In [11]:
dataset = CustomDataset()
dataloader = DataLoader(dataset, batch_size=2, shuffle=True)

### 2.3 torch.nn & Optimizer

In [12]:
model = torch.nn.Linear(3,1)
optimizer = torch.optim.SGD(model.parameters(), lr=1e-5)

### 2.4 Training

In [13]:
nb_epochs = 20
for epoch in range(nb_epochs + 1):
  for batch_idx, samples in enumerate(dataloader):
    # print(batch_idx)
    # print(samples)
    x_train, y_train = samples
    # H(x) 계산
    prediction = model(x_train)

    # loss 계산
    loss = F.mse_loss(prediction, y_train)

    # loss로 H(x) 계산
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

    print('Epoch {:4d}/{} Batch {}/{} Loss: {:.6f}'.format(
        epoch, nb_epochs, batch_idx+1, len(dataloader),
        cost.item()
        ))

Epoch    0/20 Batch 1/3 Loss: 0.004536
Epoch    0/20 Batch 2/3 Loss: 0.004536
Epoch    0/20 Batch 3/3 Loss: 0.004536
Epoch    1/20 Batch 1/3 Loss: 0.004536
Epoch    1/20 Batch 2/3 Loss: 0.004536
Epoch    1/20 Batch 3/3 Loss: 0.004536
Epoch    2/20 Batch 1/3 Loss: 0.004536
Epoch    2/20 Batch 2/3 Loss: 0.004536
Epoch    2/20 Batch 3/3 Loss: 0.004536
Epoch    3/20 Batch 1/3 Loss: 0.004536
Epoch    3/20 Batch 2/3 Loss: 0.004536
Epoch    3/20 Batch 3/3 Loss: 0.004536
Epoch    4/20 Batch 1/3 Loss: 0.004536
Epoch    4/20 Batch 2/3 Loss: 0.004536
Epoch    4/20 Batch 3/3 Loss: 0.004536
Epoch    5/20 Batch 1/3 Loss: 0.004536
Epoch    5/20 Batch 2/3 Loss: 0.004536
Epoch    5/20 Batch 3/3 Loss: 0.004536
Epoch    6/20 Batch 1/3 Loss: 0.004536
Epoch    6/20 Batch 2/3 Loss: 0.004536
Epoch    6/20 Batch 3/3 Loss: 0.004536
Epoch    7/20 Batch 1/3 Loss: 0.004536
Epoch    7/20 Batch 2/3 Loss: 0.004536
Epoch    7/20 Batch 3/3 Loss: 0.004536
Epoch    8/20 Batch 1/3 Loss: 0.004536
Epoch    8/20 Batch 2/3 L

### 2.5 Predict

In [14]:
# 임의의 입력 [73, 80, 75]를 선언
new_var =  torch.FloatTensor([[73, 80, 75]]) 

# 입력한 값 [73, 80, 75]에 대해서 예측값 y를 리턴받아서 pred_y에 저장
pred_y = model(new_var) 

print("훈련 후 입력이 73, 80, 75일 때의 예측값 :", pred_y) 

훈련 후 입력이 73, 80, 75일 때의 예측값 : tensor([[153.5689]], grad_fn=<AddmmBackward0>)
