### Import

In [1]:
import numpy as np
import torch

### Data

In [2]:
data_size = 10000
x_train = np.random.rand(data_size, 5)
y_train = np.random.rand(data_size, 1)

In [3]:
x_train.shape

(10000, 5)

In [4]:
y_train.shape

(10000, 1)

### Pytorch DataSet

In [5]:
from torch.utils.data import Dataset

In [6]:
class CustomDataset(Dataset):
    def __init__(self, x_data, y_data):
        assert len(x_data) == len(y_data)
        self.x_data = x_data
        self.y_data = y_data
        
    def __len__(self):
        return len(self.x_data)
    
    def __getitem__(self, idx):
        x = torch.FloatTensor(self.x_data[idx])
        y = torch.FloatTensor(self.y_data[idx])
        return x, y

In [7]:
dataset = CustomDataset(x_train, y_train)

#### 1. Shape 확인

In [8]:
dataset.x_data.shape

(10000, 5)

In [9]:
dataset.y_data.shape

(10000, 1)

#### 2. Length 확인

In [10]:
len(dataset)

10000

#### 3. Indexing 확인

In [11]:
dataset[0:3]

(tensor([[0.0657, 0.7560, 0.8256, 0.7502, 0.8265],
         [0.5191, 0.9997, 0.8264, 0.9663, 0.0351],
         [0.2699, 0.9376, 0.1843, 0.8808, 0.9254]]),
 tensor([[0.9059],
         [0.4412],
         [0.2362]]))

### Pytorch DataLoader

In [12]:
from torch.utils.data import DataLoader

In [13]:
dataloader = DataLoader(dataset, batch_size=3, shuffle=False)

In [14]:
for samples in dataloader:
    batch_x_train, batch_y_train = samples
    break

In [15]:
batch_x_train

tensor([[0.0657, 0.7560, 0.8256, 0.7502, 0.8265],
        [0.5191, 0.9997, 0.8264, 0.9663, 0.0351],
        [0.2699, 0.9376, 0.1843, 0.8808, 0.9254]])

In [16]:
batch_y_train

tensor([[0.9059],
        [0.4412],
        [0.2362]])