### Synthetic regression data

In [57]:
%matplotlib inline
import random
import torch
from d2l import torch as d2l

### Generating the Dataset

In [58]:
class SyntheticRegressionData(d2l.DataModule): #@save
    """Synthetic data for linear regression."""
    def __init__(self, w, b, noise=0.01, num_train=1000, num_val=1000, batch_size=32):
        super().__init__()
        self.save_hyperparameters()
        n = num_train + num_val
        self.X = torch.randn(n, len(w))
        noise = torch.randn(n, 1) * noise
        # Reshape to make prepare tensors for Matrix multiplication (described in 2_3), this is to simplify work with each results for plot and other calculations
        # Shapes presented below with 'x_tensor' and 'w_tensor'
        self.y = torch.matmul(self.X, w.reshape((-1, 1))) + b + noise

data = SyntheticRegressionData(w=torch.tensor([2, -3.4]), b=4.2)
print('features:', data.X[0],'\nlabel:', data.y[0])

x_tensor = torch.randn(10, 4, dtype=torch.float32)
w_tensor = torch.tensor([1, 2, 3, 4], dtype=torch.float32)
w_tensor, w_tensor.reshape((-1, 1)), x_tensor

features: tensor([ 1.9701, -0.4049]) 
label: tensor([9.5292])


(tensor([1., 2., 3., 4.]),
 tensor([[1.],
         [2.],
         [3.],
         [4.]]),
 tensor([[ 1.6392, -0.3572,  1.0948,  0.8686],
         [-0.2936, -0.2588, -0.9746,  0.3247],
         [-0.0794,  1.1770, -1.3706,  1.1014],
         [-0.2591, -0.6456, -1.3393, -0.4999],
         [-0.1826,  0.6589, -1.0825, -0.3159],
         [-0.7226,  0.6717, -1.2429, -0.7683],
         [-0.0147, -1.3330, -1.0669,  2.4194],
         [ 1.1477,  0.3940,  0.1759,  0.0827],
         [ 1.1010, -0.7197,  0.9227, -0.9725],
         [ 0.2678, -0.4877,  0.5702,  0.8050]]))

In [59]:
torch.matmul(x_tensor, w_tensor), torch.matmul(x_tensor, w_tensor.reshape((-1, 1)))

(tensor([ 7.6834, -2.4358,  2.5684, -7.5680, -3.3758, -6.1809,  3.7963,  2.7941,
         -1.4604,  4.2231]),
 tensor([[ 7.6834],
         [-2.4358],
         [ 2.5684],
         [-7.5680],
         [-3.3758],
         [-6.1809],
         [ 3.7963],
         [ 2.7941],
         [-1.4604],
         [ 4.2231]]))

### Reading the Dataset

Note that we need to be mindful of whether we’re in training or validation mode: in the former, we will want to read the data in random order, whereas for the latter, being able to read data in a pre-defined order may be important for debugging purposes.

In [63]:
@d2l.add_to_class(SyntheticRegressionData)
def get_dataloader(self, train):
    if train:
        indices = list(range(0, self.num_train))
        # The examples are read in random order
        random.shuffle(indices)
    else:
        indices = list(range(self.num_train, self.num_train+self.num_val))
    for i in range(0, len(indices), self.batch_size):
        batch_indices = torch.tensor(indices[i: i+self.batch_size])
        yield self.X[batch_indices], self.y[batch_indices]

In [68]:
# Inspect first minibatch

X, y = next(iter(data.train_dataloader()))
print('X shape:', X.shape, '\ny shape:', y.shape)

X shape: torch.Size([32, 2]) 
y shape: torch.Size([32, 1])


### Concise Implementation of the Data Loader (framework built-in iterators)

In [70]:
@d2l.add_to_class(d2l.DataModule)  #@save
def get_tensorloader(self, tensors, train, indices=slice(0, None)):
    tensors = tuple(a[indices] for a in tensors)
    dataset = torch.utils.data.TensorDataset(*tensors)
    return torch.utils.data.DataLoader(dataset, self.batch_size,
                                       shuffle=train)

@d2l.add_to_class(SyntheticRegressionData)  #@save
def get_dataloader(self, train):
    i = slice(0, self.num_train) if train else slice(self.num_train, None)
    return self.get_tensorloader((self.X, self.y), train, i)

In [71]:
X, y = next(iter(data.train_dataloader()))
print('X shape:', X.shape, '\ny shape:', y.shape)

X shape: torch.Size([32, 2]) 
y shape: torch.Size([32, 1])


In [72]:
len(data.train_dataloader())

32