### DATA LOADER
- 미니 배치 학습, 데이터 셔플, 병렬처리(GPU 여러개 학습)까지 수행 가능
- GPU, SDD(HDD), RAM 간의 병목현상을 예방해준다.

In [5]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

from torch.utils.data import TensorDataset, DataLoader, Dataset # 텐서데이터셋, 데이터로더

In [6]:
x_train  =  torch.FloatTensor([[73,  80,  75], 
                               [93,  88,  93], 
                               [89,  91,  90], 
                               [96,  98,  100],   
                               [73,  66,  70]])  
y_train  =  torch.FloatTensor([[152],  [185],  [180],  [196],  [142]])

In [7]:
dataset = TensorDataset(x_train, y_train) # 정답을 짝지어준다.

In [8]:
dataloader = DataLoader(dataset, batch_size=2, shuffle=True)

In [9]:
for i, data in enumerate(dataloader):
    print(i, data)

0 [tensor([[89., 91., 90.],
        [93., 88., 93.]]), tensor([[180.],
        [185.]])]
1 [tensor([[ 73.,  66.,  70.],
        [ 96.,  98., 100.]]), tensor([[142.],
        [196.]])]
2 [tensor([[73., 80., 75.]]), tensor([[152.]])]


In [21]:
class LinearRegressionModel(nn.Module):
    def __init__(self):
        super().__init__()
        self.linear1 = nn.Linear(3, 3)
        self.linear2 = nn.Linear(3, 1)
        self.relu = nn.ReLU()


    def forward(self, x):
        x = self.linear1(x)
        x = self.relu(x)
        x = self.linear2(x)
        return x
    
model = LinearRegressionModel()
print(model)

LinearRegressionModel(
  (linear1): Linear(in_features=3, out_features=3, bias=True)
  (linear2): Linear(in_features=3, out_features=1, bias=True)
  (relu): ReLU()
)


In [22]:
optimizer = optim.SGD(model.parameters(), lr=5e-4)

epochs = 1000

for epoch in range(epochs + 1):
    for idx, batch in enumerate(dataloader):
        x, y_true = batch

        y_pred = model(x)

        loss = F.mse_loss(y_pred, y_true)

        # if idx == 0:
        # 역전파
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        if epoch % 50 == 0:
                  print('Epoch {:4d}/{} Batch {}/{} Cost: {:.6f}'.format(
          epoch, epochs, idx + 1, len(dataloader), loss.item()
      ))

Epoch    0/1000 Batch 1/3 Cost: 26669.953125
Epoch    0/1000 Batch 2/3 Cost: 46197680.000000
Epoch    0/1000 Batch 3/3 Cost: 36614.152344
Epoch   50/1000 Batch 1/3 Cost: 21608.093750
Epoch   50/1000 Batch 2/3 Cost: 24748.507812
Epoch   50/1000 Batch 3/3 Cost: 26119.384766
Epoch  100/1000 Batch 1/3 Cost: 11589.011719
Epoch  100/1000 Batch 2/3 Cost: 20434.822266
Epoch  100/1000 Batch 3/3 Cost: 24425.197266
Epoch  150/1000 Batch 1/3 Cost: 13068.722656
Epoch  150/1000 Batch 2/3 Cost: 15499.833008
Epoch  150/1000 Batch 3/3 Cost: 8807.588867
Epoch  200/1000 Batch 1/3 Cost: 13137.139648
Epoch  200/1000 Batch 2/3 Cost: 8512.555664
Epoch  200/1000 Batch 3/3 Cost: 6104.886719
Epoch  250/1000 Batch 1/3 Cost: 10254.728516
Epoch  250/1000 Batch 2/3 Cost: 6889.776367
Epoch  250/1000 Batch 3/3 Cost: 2999.406494
Epoch  300/1000 Batch 1/3 Cost: 5135.188477
Epoch  300/1000 Batch 2/3 Cost: 8014.404297
Epoch  300/1000 Batch 3/3 Cost: 1855.531006
Epoch  350/1000 Batch 1/3 Cost: 3468.627930
Epoch  350/1000 

### 커스텀 데이터셋으로 선형회귀 구현

In [23]:
# Dataset 상속
class CustomDataset(Dataset): 
  def __init__(self):
    self.x_data = [[73, 80, 75],
                   [93, 88, 93],
                   [89, 91, 90],
                   [96, 98, 100],
                   [73, 66, 70]]
    self.y_data = [[152], [185], [180], [196], [142]]

  # 총 데이터의 개수를 리턴
  def __len__(self): 
    return len(self.x_data)

  # 인덱스를 입력받아 그에 맵핑되는 입출력 데이터를 파이토치의 Tensor 형태로 리턴
  def __getitem__(self, idx): 
    x = torch.FloatTensor(self.x_data[idx])
    y = torch.FloatTensor(self.y_data[idx])
    return x, y


In [24]:
dataset = CustomDataset()
dataset

<__main__.CustomDataset at 0x7f223e175430>

In [25]:
dataset.__len__()

5

In [31]:
dataset.__getitem__(0)

(tensor([73., 80., 75.]), tensor([152.]))

In [None]:
dataloader = DataLoader(dataset, batch_size=2, shuffle=True)

In [33]:
nb_epochs = 20
for epoch in range(nb_epochs + 1):
  for batch_idx, samples in enumerate(dataloader):
    # print(batch_idx)
    # print(samples)
    x_train, y_train = samples
    # H(x) 계산
    prediction = model(x_train)

    # cost 계산
    cost = F.mse_loss(prediction, y_train)

    # cost로 H(x) 계산
    optimizer.zero_grad()
    cost.backward()
    optimizer.step()

    print('Epoch {:4d}/{} Batch {}/{} Cost: {:.6f}'.format(
        epoch, nb_epochs, batch_idx+1, len(dataloader),
        cost.item()
        ))


Epoch    0/20 Batch 1/3 Cost: 768.688354
Epoch    0/20 Batch 2/3 Cost: 305.888428
Epoch    0/20 Batch 3/3 Cost: 299.285339
Epoch    1/20 Batch 1/3 Cost: 305.888428
Epoch    1/20 Batch 2/3 Cost: 704.083252
Epoch    1/20 Batch 3/3 Cost: 428.495544
Epoch    2/20 Batch 1/3 Cost: 305.888428
Epoch    2/20 Batch 2/3 Cost: 704.083252
Epoch    2/20 Batch 3/3 Cost: 428.495544
Epoch    3/20 Batch 1/3 Cost: 803.082520
Epoch    3/20 Batch 2/3 Cost: 363.890442
Epoch    3/20 Batch 3/3 Cost: 114.492874
Epoch    4/20 Batch 1/3 Cost: 206.889099
Epoch    4/20 Batch 2/3 Cost: 803.082520
Epoch    4/20 Batch 3/3 Cost: 428.495544
Epoch    5/20 Batch 1/3 Cost: 398.284668
Epoch    5/20 Batch 2/3 Cost: 271.494202
Epoch    5/20 Batch 3/3 Cost: 1108.881104
Epoch    6/20 Batch 1/3 Cost: 462.889771
Epoch    6/20 Batch 2/3 Cost: 206.889099
Epoch    6/20 Batch 3/3 Cost: 1108.881104
Epoch    7/20 Batch 1/3 Cost: 271.494202
Epoch    7/20 Batch 2/3 Cost: 398.284668
Epoch    7/20 Batch 3/3 Cost: 1108.881104
Epoch    8/20