In [1]:
import torch
import random
from d2l import torch as d2l # look at https://github.com/d2l-ai/d2l-en/blob/master/d2l/torch.py

### Data generation and noise addition
We generate 1000 samples with 2-dimensional features drawn from a standard normal distribution. Then we add noise to the data. We assume that $\boldsymbol{\epsilon}$ is drawn 
from a normal distribution with mean $\mu= 0$ and standard deviation $\sigma = 0.01$.

In [7]:
class SyntheticRegressionData(d2l.DataModule):
    def __init__(self, w, b, noise=0.01, num_train=1000, num_val=1000, batch_size=32):
        super().__init__()
        self.save_hyperparameters()
        n = num_train + num_val
        self.X = torch.randn(n, len(w)) #  random numbers as input, len(w) is used to have a compatible multiplication to generate self.y
        noise = torch.randn(n, 1) * noise # generate nosie array
        self.y = torch.matmul(self.X, w.reshape((-1, 1))) + b + noise # labels:  y = X*w +b + noise 

w = torch.tensor([2, -3.4]) # ground truth of the parameters because the self.y is justa linear transformation of these parameters
b = 4.2

data = SyntheticRegressionData(w, b)

### Dataloader for batches  from scratch

In [10]:
@d2l.add_to_class(SyntheticRegressionData)
def get_dataloader(self, train):
    if train:
        indices = list(range(0, self.num_train))
        random.shuffle(indices)
    else:
        indices = list(range(self.num_train, self.num_train + self.num_val)) # validation
    for i in range(0, len(indices), self.batch_size):
        batch_indices = torch.tensor(indices[i : i + self.batch_size])
        yield self.X[batch_indices], self.y[batch_indices]

X, y = next(iter(data.train_dataloader())) # grabs data for next batch
X.shape, y.shape

(torch.Size([32, 2]), torch.Size([32, 1]))

### Using torch's data loader

In [None]:
@d2l.add_to_class(d2l.DataModule)
def get_tensorloader(self, tensors, train, indices = slice(0, None)):
    tensors = tuple(a[indices] for a in tensors)
    dataset = torch.utils.data.TensorDataset(*tensors)
    return torch.utils.data.DataLoader(dataset, self.batch_size, shuffle=train)

@d2l.add_to_class(SyntheticRegressionData)  #@save
def get_dataloader(self, train):
    i = slice(0, self.num_train) if train else slice(self.num_train, None)
    return self.get_tensorloader((self.X, self.y), train, i)

In [11]:
X, y = next(iter(data.train_dataloader()))
X.shape, y.shape

(torch.Size([32, 2]), torch.Size([32, 1]))