In [0]:
import torch
import torchvision
from torch.utils.data import Dataset, DataLoader
import numpy as np
import math

In [0]:
class WineDataset(Dataset):

    def __init__(self):
        # data loading
        xy = np.loadtxt("https://raw.githubusercontent.com/python-engineer/pytorchTutorial/master/data/wine/wine.csv", 
                        delimiter=",", dtype = np.float32, skiprows=1)
        self.x = torch.from_numpy(xy[:, 1:])
        self.y = torch.from_numpy(xy[:, [0]])
        self.n_samples = xy.shape[0]

    def __getitem__(self, index):
        # dataset[0]
        return self.x[index], self.y[index]

    def __len__(self):
        # len(dataset)
        return self.n_samples

In [3]:
dataset = WineDataset()
first_data = dataset[0] # 0번째 index에 해당하는 x, y
features, labels = first_data
print(features, labels)

tensor([1.4230e+01, 1.7100e+00, 2.4300e+00, 1.5600e+01, 1.2700e+02, 2.8000e+00,
        3.0600e+00, 2.8000e-01, 2.2900e+00, 5.6400e+00, 1.0400e+00, 3.9200e+00,
        1.0650e+03]) tensor([1.])


DataLoader를 통해 
- iterative하게 data를 넘겨줄 수 있다.
- batch_size 조절

In [4]:
dataloader = DataLoader(dataset=dataset, batch_size=4, shuffle=True, num_workers=2)

dataiter = iter(dataloader)
data = dataiter.next()

features, labels = data
print(features, labels)

# 4개 sample 나온다 : batch_size = 4

tensor([[1.1790e+01, 2.1300e+00, 2.7800e+00, 2.8500e+01, 9.2000e+01, 2.1300e+00,
         2.2400e+00, 5.8000e-01, 1.7600e+00, 3.0000e+00, 9.7000e-01, 2.4400e+00,
         4.6600e+02],
        [1.3520e+01, 3.1700e+00, 2.7200e+00, 2.3500e+01, 9.7000e+01, 1.5500e+00,
         5.2000e-01, 5.0000e-01, 5.5000e-01, 4.3500e+00, 8.9000e-01, 2.0600e+00,
         5.2000e+02],
        [1.2290e+01, 1.4100e+00, 1.9800e+00, 1.6000e+01, 8.5000e+01, 2.5500e+00,
         2.5000e+00, 2.9000e-01, 1.7700e+00, 2.9000e+00, 1.2300e+00, 2.7400e+00,
         4.2800e+02],
        [1.2330e+01, 1.1000e+00, 2.2800e+00, 1.6000e+01, 1.0100e+02, 2.0500e+00,
         1.0900e+00, 6.3000e-01, 4.1000e-01, 3.2700e+00, 1.2500e+00, 1.6700e+00,
         6.8000e+02]]) tensor([[2.],
        [3.],
        [2.],
        [2.]])


In [5]:
# training loop
num_epochs = 2
total_samples = len(dataset)
n_iter = math.ceil(total_samples / 4)
print(total_samples, n_iter)

# 178개의 sample이 있는데 2 epochs를 돌리기 위해서 
# 45번의 iteration이 필요하다

178 45


In [6]:
for epoch in range(num_epochs):
    for i, (inputs, labels) in enumerate(dataloader):
        # forward backward, update
        if (i+1) % 5 == 0:
            print(f'epoch {epoch+1}/{num_epochs}, step {i+1}/{n_iter}, input_shape {inputs.shape}')

epoch 1/2, step 5/45, input_shape torch.Size([4, 13])
epoch 1/2, step 10/45, input_shape torch.Size([4, 13])
epoch 1/2, step 15/45, input_shape torch.Size([4, 13])
epoch 1/2, step 20/45, input_shape torch.Size([4, 13])
epoch 1/2, step 25/45, input_shape torch.Size([4, 13])
epoch 1/2, step 30/45, input_shape torch.Size([4, 13])
epoch 1/2, step 35/45, input_shape torch.Size([4, 13])
epoch 1/2, step 40/45, input_shape torch.Size([4, 13])
epoch 1/2, step 45/45, input_shape torch.Size([2, 13])
epoch 2/2, step 5/45, input_shape torch.Size([4, 13])
epoch 2/2, step 10/45, input_shape torch.Size([4, 13])
epoch 2/2, step 15/45, input_shape torch.Size([4, 13])
epoch 2/2, step 20/45, input_shape torch.Size([4, 13])
epoch 2/2, step 25/45, input_shape torch.Size([4, 13])
epoch 2/2, step 30/45, input_shape torch.Size([4, 13])
epoch 2/2, step 35/45, input_shape torch.Size([4, 13])
epoch 2/2, step 40/45, input_shape torch.Size([4, 13])
epoch 2/2, step 45/45, input_shape torch.Size([2, 13])
