## Basic Dataset

In [1]:
import torch
from torch.utils.data import Dataset

class SquareDataset(Dataset):
    def __init__(self, data):
        """
        Initialize the dataset with the data.

        Args:
            data (list or array-like): List of numbers to use as data.
        """
        self.data = data

    def __len__(self):
        """
        Return the number of samples in the dataset.
        """
        return len(self.data)

    def __getitem__(self, idx):
        """
        Fetch the data and its label at index `idx`.

        Args:
            idx (int): Index of the sample to retrieve.
        
        Returns:
            tuple: (data, label) where label is the square of the data.
        """
        x = self.data[idx]  # Get the input data
        y = x ** 2          # Label is the square of the input
        return x, y


## Create and Test Dataset

In [4]:
# Create an instance of the dataset
numbers = list(range(1, 11))  # Data: [1, 2, 3, ..., 10]
dataset = SquareDataset(numbers)

# Test the dataset
print(f"Dataset size: {len(dataset)}")  # Should print: 10
for i in range(len(dataset)):
    x, y = dataset[i]
    print(f"Input: {x}, Label: {y}")


Dataset size: 10
Input: 1, Label: 1
Input: 2, Label: 4
Input: 3, Label: 9
Input: 4, Label: 16
Input: 5, Label: 25
Input: 6, Label: 36
Input: 7, Label: 49
Input: 8, Label: 64
Input: 9, Label: 81
Input: 10, Label: 100


## Use with DataLoader

In [5]:
from torch.utils.data import DataLoader

# Create a DataLoader
dataloader = DataLoader(dataset, batch_size=3, shuffle=True)

# Iterate through the DataLoader
for batch in dataloader:
    x_batch, y_batch = batch
    print(f"Batch Inputs: {x_batch}, Batch Labels: {y_batch}")


Batch Inputs: tensor([5, 2, 7]), Batch Labels: tensor([25,  4, 49])
Batch Inputs: tensor([9, 8, 4]), Batch Labels: tensor([81, 64, 16])
Batch Inputs: tensor([1, 6, 3]), Batch Labels: tensor([ 1, 36,  9])
Batch Inputs: tensor([10]), Batch Labels: tensor([100])


## Splitting the Dataset with random_split

In [4]:
from torch.utils.data import random_split

# Define the dataset
numbers = list(range(1, 101))  # Example dataset with numbers 1 to 100
dataset = SquareDataset(numbers)

# Define split sizes
train_size = int(0.8 * len(dataset))  # 80% for training
test_size = len(dataset) - train_size  # Remaining 20% for testing

# Split the dataset
train_dataset, test_dataset = random_split(dataset, [train_size, test_size])

# Check the sizes
print(f"Training set size: {len(train_dataset)}")
print(f"Test set size: {len(test_dataset)}")


Training set size: 80
Test set size: 20
