# **Dataloaders**

- provides data to model
- in batches

In [1]:
import torch

In [2]:
# generated data

from sklearn.datasets import make_classification

X, y = make_classification(n_samples=1000)

In [3]:
class CustomDataset:

    def __init__(self, data, targets):
        self.data = data
        self.targets = targets

    def __len__(self):
        return self.data.shape[0]

    def __getitem__(self, i):
        sample = self.data[i]
        target = self.targets[i]

        # target dtype:
        ## long: classification
        ## float: regression

        return {
            'sample' : torch.tensor(sample, dtype=torch.float),
            'target' : torch.tensor(target, dtype=torch.long)
        }

In [4]:
dataset = CustomDataset(X,y)

In [5]:
len(dataset)

1000

In [6]:
dataset[9]

{'sample': tensor([ 1.8621,  0.7880, -0.7487, -1.2007,  0.4720,  0.5668, -0.9819, -0.2373,
         -1.0462, -0.5145, -0.0193, -1.1190, -0.4362,  0.2177, -1.2890, -0.1351,
         -2.1009, -0.4279, -0.7694, -1.4855]), 'target': tensor(0)}

In [7]:
# dataloader class
?torch.utils.data.DataLoader

In [29]:
train_loader = torch.utils.data.DataLoader(
    dataset = dataset,
    batch_size = 4,
    shuffle = True,
    num_workers=2
)

In [30]:
# is a generator
train_loader

<torch.utils.data.dataloader.DataLoader at 0x7f41f305c590>

In [35]:
len(train_loader) # 1000 samples with 4 batch size hence 250 batches :)

250

In [33]:
# each iter will be of samples: batch x features, targets: batch x targets
next(iter(train_loader))

{'sample': tensor([[ 2.0997,  0.2111,  1.8036, -1.3238, -0.5278,  0.2214, -1.7801, -0.7269,
           0.3412, -0.5125, -1.9458, -0.2825, -0.4426,  0.1222, -0.7817,  0.3509,
          -0.5815,  0.7762,  1.1726, -0.0794],
         [-0.0805, -1.8581, -1.5994, -0.1696,  0.1292,  1.1406, -0.9438, -0.7100,
          -0.6918, -0.4428, -0.6667, -0.1050,  0.7883, -0.6043, -0.3263, -0.7439,
          -0.2499, -0.2531, -0.4534, -0.7421],
         [ 1.3136, -1.0292,  1.2672,  0.8759,  0.6166, -0.0388,  0.9167,  1.0243,
           0.6572,  0.8138,  0.6286,  0.6688, -0.4730,  1.1911,  1.5173, -0.7741,
           1.3233, -0.2294,  0.5921, -0.0631],
         [ 0.2041, -0.4118, -0.3193, -0.3659, -0.1403, -1.4606,  1.0421,  1.0871,
           0.0341,  0.7892,  0.5120,  0.4898, -0.6946,  1.6081,  1.0357,  0.3273,
           0.9948, -1.0289,  1.3176,  0.1646]]),
 'target': tensor([0, 0, 1, 1])}