## Pytorch : Wine Dataset

In [8]:
import torch
import numpy as np
import pandas as pd
import math

## Dataset Custom Dataset & DataLoader 사용
1. custom dataset class로 `Dataset` 구축
2. `DataLoader`로 구축

In [9]:
from torch.utils.data import Dataset, DataLoader

In [10]:
class WineDataset(Dataset):
#dataset은 함수 3개만 사용할 수 있도록 규칙 있음
    
    #pytorch에서는 항상 float32 사용
    def __init__(self):
        wine = pd.read_csv('../../data/wine.csv')
        wine = wine.values.astype(np.float32)
        self.n_samples = wine.shape[0]
        
        self.X_data = torch.from_numpy(wine[:, 1:])
        self.y_data = torch.from_numpy(wine[:, [0]])
    
    def __getitem__(self, index):
        return self.X_data[index], self.y_data[index]
        
    def __len__(self):
        return self.n_samples

In [11]:
dataset = WineDataset()
dataset

<__main__.WineDataset at 0x2083bb83700>

In [12]:
dataset[1]

(tensor([1.3200e+01, 1.7800e+00, 2.1400e+00, 1.1200e+01, 1.0000e+02, 2.6500e+00,
         2.7600e+00, 2.6000e-01, 1.2800e+00, 4.3800e+00, 1.0500e+00, 3.4000e+00,
         1.0500e+03]),
 tensor([1.]))

In [13]:
train_loader = DataLoader(dataset=dataset, batch_size=16, shuffle=True)

In [14]:
train_loader

<torch.utils.data.dataloader.DataLoader at 0x2083bb831f0>

In [15]:
next(iter(train_loader))

[tensor([[1.3900e+01, 1.6800e+00, 2.1200e+00, 1.6000e+01, 1.0100e+02, 3.1000e+00,
          3.3900e+00, 2.1000e-01, 2.1400e+00, 6.1000e+00, 9.1000e-01, 3.3300e+00,
          9.8500e+02],
         [1.3050e+01, 1.6500e+00, 2.5500e+00, 1.8000e+01, 9.8000e+01, 2.4500e+00,
          2.4300e+00, 2.9000e-01, 1.4400e+00, 4.2500e+00, 1.1200e+00, 2.5100e+00,
          1.1050e+03],
         [1.1790e+01, 2.1300e+00, 2.7800e+00, 2.8500e+01, 9.2000e+01, 2.1300e+00,
          2.2400e+00, 5.8000e-01, 1.7600e+00, 3.0000e+00, 9.7000e-01, 2.4400e+00,
          4.6600e+02],
         [1.2000e+01, 9.2000e-01, 2.0000e+00, 1.9000e+01, 8.6000e+01, 2.4200e+00,
          2.2600e+00, 3.0000e-01, 1.4300e+00, 2.5000e+00, 1.3800e+00, 3.1200e+00,
          2.7800e+02],
         [1.1610e+01, 1.3500e+00, 2.7000e+00, 2.0000e+01, 9.4000e+01, 2.7400e+00,
          2.9200e+00, 2.9000e-01, 2.4900e+00, 2.6500e+00, 9.6000e-01, 3.2600e+00,
          6.8000e+02],
         [1.1760e+01, 2.6800e+00, 2.9200e+00, 2.0000e+01, 1.0300e

## TensorDataset & DataLoader

In [16]:
import pandas as pd
import numpy as np

wine  = pd.read_csv('../../data/wine.csv')
wine.head()

Unnamed: 0,Wine,Alcohol,Malic.acid,Ash,Acl,Mg,Phenols,Flavanoids,Nonflavanoid.phenols,Proanth,Color.int,Hue,OD,Proline
0,1,14.23,1.71,2.43,15.6,127,2.8,3.06,0.28,2.29,5.64,1.04,3.92,1065
1,1,13.2,1.78,2.14,11.2,100,2.65,2.76,0.26,1.28,4.38,1.05,3.4,1050
2,1,13.16,2.36,2.67,18.6,101,2.8,3.24,0.3,2.81,5.68,1.03,3.17,1185
3,1,14.37,1.95,2.5,16.8,113,3.85,3.49,0.24,2.18,7.8,0.86,3.45,1480
4,1,13.24,2.59,2.87,21.0,118,2.8,2.69,0.39,1.82,4.32,1.04,2.93,735


In [17]:
wine = wine.values.astype(np.float32)
X_data = torch.from_numpy(wine[:, 1:])
y_data = torch.from_numpy(wine[:, [0]])

In [18]:
from torch.utils.data import TensorDataset

dataset = TensorDataset(X_data, y_data)
dataset

<torch.utils.data.dataset.TensorDataset at 0x2083bb83100>

In [19]:
dataset[0]

(tensor([1.4230e+01, 1.7100e+00, 2.4300e+00, 1.5600e+01, 1.2700e+02, 2.8000e+00,
         3.0600e+00, 2.8000e-01, 2.2900e+00, 5.6400e+00, 1.0400e+00, 3.9200e+00,
         1.0650e+03]),
 tensor([1.]))

In [20]:
train_loader = DataLoader(dataset=dataset, batch_size=8, shuffle=True)

In [22]:
train_loader

<torch.utils.data.dataloader.DataLoader at 0x2083bb830d0>

In [23]:
next(iter(train_loader)) # 랜덤한 하나의 batch set 확인 (8개)

[tensor([[1.2420e+01, 4.4300e+00, 2.7300e+00, 2.6500e+01, 1.0200e+02, 2.2000e+00,
          2.1300e+00, 4.3000e-01, 1.7100e+00, 2.0800e+00, 9.2000e-01, 3.1200e+00,
          3.6500e+02],
         [1.2290e+01, 1.4100e+00, 1.9800e+00, 1.6000e+01, 8.5000e+01, 2.5500e+00,
          2.5000e+00, 2.9000e-01, 1.7700e+00, 2.9000e+00, 1.2300e+00, 2.7400e+00,
          4.2800e+02],
         [1.1640e+01, 2.0600e+00, 2.4600e+00, 2.1600e+01, 8.4000e+01, 1.9500e+00,
          1.6900e+00, 4.8000e-01, 1.3500e+00, 2.8000e+00, 1.0000e+00, 2.7500e+00,
          6.8000e+02],
         [1.3860e+01, 1.3500e+00, 2.2700e+00, 1.6000e+01, 9.8000e+01, 2.9800e+00,
          3.1500e+00, 2.2000e-01, 1.8500e+00, 7.2200e+00, 1.0100e+00, 3.5500e+00,
          1.0450e+03],
         [1.1820e+01, 1.4700e+00, 1.9900e+00, 2.0800e+01, 8.6000e+01, 1.9800e+00,
          1.6000e+00, 3.0000e-01, 1.5300e+00, 1.9500e+00, 9.5000e-01, 3.3300e+00,
          4.9500e+02],
         [1.3300e+01, 1.7200e+00, 2.1400e+00, 1.7000e+01, 9.4000e