# Data Handling - Dataset and DataLoader

## Data Loader

In [1]:
import torch
import numpy as np
from torch.utils.data import TensorDataset, DataLoader

# Generate synthetic data: 100 samples with 10 features each
X = np.random.rand(100, 10)
y = np.random.rand(100, 1)

# Convert numpy arrays to torch tensors
X_tensor = torch.tensor(X, dtype=torch.float32)
y_tensor = torch.tensor(y, dtype=torch.float32)

# Create a TensorDataset and then split it into training and test sets
dataset = TensorDataset(X_tensor, y_tensor)
train_size = int(0.8 * len(dataset))
test_size = len(dataset) - train_size
train_dataset, test_dataset = torch.utils.data.random_split(dataset, [train_size, test_size])

# Diagnostic Output
print(f"Train dataset size: {len(train_dataset)}, Test dataset size: {len(test_dataset)}")

# Show shapes of the first batch
first_train_sample = train_dataset[0]
print(f"First training sample - X shape: {first_train_sample[0].shape}, y shape: {first_train_sample[1].shape}")

# Show content of the first training sample
print(f"First training sample - X: {first_train_sample[0].numpy()}, y: {first_train_sample[1].numpy()}")


Train dataset size: 80, Test dataset size: 20
First training sample - X shape: torch.Size([10]), y shape: torch.Size([1])
First training sample - X: [0.12899284 0.1336563  0.90335464 0.21525885 0.5082561  0.8833535
 0.9791541  0.38017845 0.15549332 0.18559232], y: [0.42843747]


In [2]:
from torch.utils.data import DataLoader

# Create a DataLoader for the training dataset
dataloader = DataLoader(train_dataset, batch_size=16, shuffle=True)

# Example: Iterate through one batch
n = 1
for batch_X, batch_y in dataloader:
    print(f"{n}) Batch X shape:", batch_X.size())
    print("      Batch y shape:", batch_y.size())
    n += 1


1) Batch X shape: torch.Size([16, 10])
      Batch y shape: torch.Size([16, 1])
2) Batch X shape: torch.Size([16, 10])
      Batch y shape: torch.Size([16, 1])
3) Batch X shape: torch.Size([16, 10])
      Batch y shape: torch.Size([16, 1])
4) Batch X shape: torch.Size([16, 10])
      Batch y shape: torch.Size([16, 1])
5) Batch X shape: torch.Size([16, 10])
      Batch y shape: torch.Size([16, 1])
