# Datasets

In [7]:
from pathlib import Path

ROOT_DIR = Path('..') / '..'
!pip install -q -r {ROOT_DIR / 'requirements.txt'}

## Utility Functions

In [8]:
from datasets import SizedDataset
from torch.utils.data import DataLoader

def load_dataset(dataset: SizedDataset):
    """
    Loads the provided dataset and prints batches of data samples and corresponding
    labels.

    :param dataset: An instance of `SizedDataset` containing data samples and labels.
    """
    data = DataLoader(dataset, batch_size=5)
    for x, y in data:
        print(f"Data::{x.dtype} {x.shape}:")
        print(x)
        print(f"Labels::{y.dtype} {y.shape}:")
        print(y)

## Random Uniform Dataset

In [9]:
from datasets import RandomUniformDataset

load_dataset(RandomUniformDataset(n_examples=10, n_features=8, n_classes=10))

Data::torch.float32 torch.Size([5, 8]):
tensor([[0.4859, 0.3153, 0.7910, 0.8350, 0.0757, 0.0572, 0.8253, 0.2244],
        [0.1048, 0.7102, 0.9724, 0.7968, 0.4890, 0.2614, 0.3919, 0.8870],
        [0.5683, 0.1230, 0.1857, 0.4673, 0.1865, 0.8768, 0.7154, 0.3217],
        [0.0305, 0.6168, 0.2686, 0.2557, 0.5193, 0.0839, 0.5857, 0.5615],
        [0.7598, 0.7832, 0.5571, 0.0155, 0.8894, 0.0213, 0.1412, 0.6017]])
Labels::torch.int64 torch.Size([5]):
tensor([1, 2, 2, 9, 1])
Data::torch.float32 torch.Size([5, 8]):
tensor([[0.3063, 0.1793, 0.9126, 0.1125, 0.2008, 0.7599, 0.3883, 0.8880],
        [0.3749, 0.8421, 0.1052, 0.9782, 0.1039, 0.0119, 0.4766, 0.0864],
        [0.0158, 0.3021, 0.4151, 0.6539, 0.6617, 0.3745, 0.8317, 0.3407],
        [0.6944, 0.1863, 0.2394, 0.6343, 0.1032, 0.2663, 0.1582, 0.0735],
        [0.6568, 0.8372, 0.8177, 0.3207, 0.8054, 0.1480, 0.0408, 0.4844]])
Labels::torch.int64 torch.Size([5]):
tensor([6, 5, 4, 2, 2])


## Random Normal Dataset

In [10]:
from datasets import RandomNormalDataset

load_dataset(RandomNormalDataset(n_examples=10, n_features=8, n_classes=10))

Data::torch.float32 torch.Size([5, 8]):
tensor([[-0.5836,  1.6883, -0.1915, -1.0629, -0.8910,  1.0972, -0.1911, -0.3265],
        [ 0.4118,  1.0188, -0.0678,  0.0740,  1.3948,  1.5295,  1.2983,  0.6214],
        [ 0.2540, -0.6213,  0.8664, -0.2767,  0.8486,  0.7382, -0.6392, -0.3179],
        [-0.0293, -0.2494,  0.7948,  0.8547, -0.8654, -0.7093, -0.5146, -1.2879],
        [ 0.3160, -1.1510,  2.1285, -0.7282, -0.1888, -1.0729, -0.0365,  2.9816]])
Labels::torch.int64 torch.Size([5]):
tensor([4, 8, 7, 8, 9])
Data::torch.float32 torch.Size([5, 8]):
tensor([[ 0.7021, -0.3555,  0.3389, -0.8143,  0.1166, -0.1489, -0.1894, -0.0329],
        [-1.6957, -1.1465, -0.2308,  1.5503, -0.1744,  0.6963, -0.3979, -1.0032],
        [ 0.3360,  0.1605,  0.8000, -0.6242,  1.5249,  0.1966,  1.9224, -0.4823],
        [-0.8446, -1.2487,  0.9061, -0.1132,  0.0975, -2.0703,  0.4468,  1.5608],
        [-0.8773,  0.3835, -0.1492, -0.1093, -0.5356, -0.4536,  0.4860, -1.4893]])
Labels::torch.int64 torch.Size([5]):


## Bernoulli Dataset

In [11]:
from datasets import BernoulliDataset

load_dataset(BernoulliDataset(n_examples=10, n_features=8, n_classes=10))

Data::torch.float32 torch.Size([5, 8]):
tensor([[0., 0., 0., 0., 0., 1., 1., 0.],
        [0., 0., 0., 0., 1., 0., 1., 1.],
        [0., 1., 1., 1., 0., 0., 1., 0.],
        [1., 0., 0., 1., 0., 0., 0., 0.],
        [1., 1., 0., 1., 0., 0., 0., 0.]])
Labels::torch.int64 torch.Size([5]):
tensor([2, 7, 5, 5, 2])
Data::torch.float32 torch.Size([5, 8]):
tensor([[1., 0., 1., 0., 1., 1., 1., 0.],
        [1., 1., 1., 1., 0., 0., 0., 0.],
        [0., 1., 1., 1., 0., 0., 0., 1.],
        [0., 1., 0., 0., 0., 0., 0., 0.],
        [0., 1., 1., 1., 0., 0., 0., 1.]])
Labels::torch.int64 torch.Size([5]):
tensor([1, 0, 6, 1, 1])
