# How to create and load a Dataset in PyTorch

In [1]:
import torch

In [2]:
from torch.utils.data import Dataset

class CustomDataset(Dataset):

  def __init__(self, X, y):
    self.X = X
    self.y = y
  
  def __len__(self):
    """
    Returns the total number of samples available in our dataset
    """
    return len(self.X)

  def __getitem__(self, idx):
    """
    Returns a sample of data at a precise idx
    """
    return (self.X[idx], self.y[idx])

In [3]:
from sklearn.datasets import make_classification

In [4]:
?make_classification

In [5]:
data, target = make_classification(n_samples=1000, n_features=5)

In [6]:
custom_dataset = CustomDataset(X=data, y=target)

In [7]:
len(custom_dataset)

1000

In [8]:
custom_dataset[0]

(array([-1.3488874 ,  0.52062187,  0.97409318, -0.83495675, -0.84824572]), 0)

In [9]:
data[0]

array([-1.3488874 ,  0.52062187,  0.97409318, -0.83495675, -0.84824572])

In [10]:
custom_dataset[0][0].shape

(5,)

In [11]:
from sklearn.datasets import make_multilabel_classification

In [12]:
?make_multilabel_classification

In [13]:
data, target = make_multilabel_classification(n_samples=1000, n_features=5, n_classes=3)

In [14]:
custom_dataset_mlb = CustomDataset(X=data, y=target)

In [15]:
custom_dataset_mlb[0]

(array([17.,  8., 13., 12.,  5.]), array([0, 1, 1]))

In [16]:
custom_dataset_mlb[3]

(array([10.,  8., 16., 12.,  7.]), array([0, 1, 0]))

In [17]:
from torch.utils.data import DataLoader

In [18]:
data_loader = DataLoader(dataset=custom_dataset, batch_size=8, shuffle=True)

In [19]:
?DataLoader

In [20]:
custom_dataset[0]

(array([-1.3488874 ,  0.52062187,  0.97409318, -0.83495675, -0.84824572]), 0)

In [21]:
data_iter = iter(data_loader)

In [22]:
data_iter

<torch.utils.data.dataloader._SingleProcessDataLoaderIter at 0x7fcdd1c2b9d0>

In [23]:
data_02 = data_iter.next()

In [24]:
features_02, target_02 = data_02

In [25]:
features_02

tensor([[-0.5162,  0.6790, -1.7477, -1.1760, -1.0465],
        [-1.6182, -0.3973,  0.8785,  0.8225,  0.5200],
        [ 0.1541, -0.8604,  0.3391,  1.5250,  1.3019],
        [-1.3220, -0.4310,  1.6351,  0.8619,  0.5849],
        [-0.7036, -0.5822, -0.4258,  1.0885,  0.8421],
        [ 2.6149, -1.6673, -0.5539,  2.7933,  2.6345],
        [ 0.1999, -0.0707, -1.2409,  0.1123,  0.1160],
        [-2.1790, -0.1865,  2.3925,  0.4855,  0.1758]], dtype=torch.float64)

**End Lecture 05**