# CIFAR-10 Dataset Handling with Atria

## Setup and Auto-reloading Modules
We enable auto-reloading of modules so that any changes in imported libraries are automatically reflected.

In [1]:
%load_ext autoreload
%autoreload 2

## Importing Dependencies
Here, we modify the system path to include the project's root directory and import necessary modules for dataset handling.

## Loading the CIFAR-10 Dataset
We load the CIFAR-10 dataset using the `CIFAR10.load` method, specifying the training split.

In [4]:
from atria_core.utilities.imports import _get_package_base_path

from atria_datasets import AtriaImageDataset, FileStorageType

package_path = _get_package_base_path("atria")
dataset = AtriaImageDataset.load_from_registry(
    name="cifar10",
    provider="atria_datasets",
    build_kwargs ={
        "max_train_samples": 1000,
        "max_test_samples": 1000,
        "max_validation_samples": 1000,
    }
)
dataset.train.dataframe()


[2025-07-21 20:06:23][atria_datasets.core.dataset.atria_dataset][INFO] Loading dataset cifar10 from registry.
[2025-07-21 20:06:23][atria_datasets.core.dataset.atria_dataset][INFO] Setting up data directory: /mnt/hephaistos/.atria/datasets/cifar10


AttributeError: 'Cifar10' object has no attribute '_storage_dir'

## Creating batched instances from a list of samples
We create a list of samples and then call batched on the list which is the class method of the specific instance

In [6]:
# Make a list of instances
instances = [
    dataset.train[i].to_tensor() for i in range(2)
]

# Batch the instances
batched = instances[0].batched(instances)

# Display the batched instances
print(batched.image.content[0] - dataset.train[0].to_tensor().image.content)
print(batched.image.content[1] - dataset.train[1].to_tensor().image.content)



tensor([[[0., 0., 0.,  ..., 0., 0., 0.],
         [0., 0., 0.,  ..., 0., 0., 0.],
         [0., 0., 0.,  ..., 0., 0., 0.],
         ...,
         [0., 0., 0.,  ..., 0., 0., 0.],
         [0., 0., 0.,  ..., 0., 0., 0.],
         [0., 0., 0.,  ..., 0., 0., 0.]],

        [[0., 0., 0.,  ..., 0., 0., 0.],
         [0., 0., 0.,  ..., 0., 0., 0.],
         [0., 0., 0.,  ..., 0., 0., 0.],
         ...,
         [0., 0., 0.,  ..., 0., 0., 0.],
         [0., 0., 0.,  ..., 0., 0., 0.],
         [0., 0., 0.,  ..., 0., 0., 0.]],

        [[0., 0., 0.,  ..., 0., 0., 0.],
         [0., 0., 0.,  ..., 0., 0., 0.],
         [0., 0., 0.,  ..., 0., 0., 0.],
         ...,
         [0., 0., 0.,  ..., 0., 0., 0.],
         [0., 0., 0.,  ..., 0., 0., 0.],
         [0., 0., 0.,  ..., 0., 0., 0.]]])
tensor([[[0., 0., 0.,  ..., 0., 0., 0.],
         [0., 0., 0.,  ..., 0., 0., 0.],
         [0., 0., 0.,  ..., 0., 0., 0.],
         ...,
         [0., 0., 0.,  ..., 0., 0., 0.],
         [0., 0., 0.,  ..., 0., 0., 

## Dataset handling with File Storage
Load the dataset with a file storage manager that first caches the data into disk

In [None]:
from atria.data.storage.file_storage_manager import FileStorageManager
from atria.data.storage.utilities import FileStorageType

# Creat a file storage manager
file_storage_manager = FileStorageManager(
    storage_dir="/tmp", streaming_mode=False, storage_type=FileStorageType.MSGPACK, 
    max_samples=100, # save up to 100 samples
)

TypeError: FileStorageManager.__init__() got an unexpected keyword argument 'storage_dir'

In [None]:
from atria_core.types import DatasetSplitType
from atria_examples.datasets.cifar10 import Cifar10

# load the dataset with the file storage manager
cifar10 = Cifar10.load(
    split=DatasetSplitType.train,
    storage_manager=file_storage_manager,
)

AttributeError: type object 'Cifar10' has no attribute 'load'

In [None]:
# Extract a sample instance from the dataset
cifar10[0]

ImageInstance(
    index=0,
    id=UUID('629095eb-cad3-4e9d-8b27-6e989ab27bba'),
    image=Image(
        file_path=None,
        content=<PIL.PngImagePlugin.PngImageFile image mode=RGB size=32x32 at 0x7760B0786900>,
        source_size=None,
        shape=(3, 32, 32),
        dtype=None
    ),
    label=Label(value=6, name='frog')
)