# Lecture 8: DataLoader

If data size gets bigger, feeding all the data to the model becomes inefficient!

## Batch

Divide the dataset into small batches and go through each batch and compute gradients to update weight.

- One epoch = one forward and backward pass of all training examples.
- batch size = the number of training examples in one forward/backward pass.
- number of iterations = number of passes, each pass using [batch size] number of examples.

Ex) If there are 1000 training examples and the batch size is 500, it takes 2 iterations for 1 epoch.

## DataLoader

Use `DataLoader` of `PyTorch` to implement it.

In [7]:
import torch
from torch.autograd import Variable
from torch.utils.data import Dataset, DataLoader
import numpy as np

class DiabetesDataset(Dataset):
    """Diabetes Dataset"""

    # Initialie your data, download, etc.
    def __init__(self):
        xy = np.loadtxt('data/diabetes.csv', delimiter=',', dtype=np.float32)
        self.len = xy.shape[0]
        self.x_data = torch.from_numpy(xy[:, 0:-1])
        self.y_data = torch.from_numpy(xy[:, [-1]])

    # Return one item on the index
    def __getitem__(self, index):
        return self.x_data[index], self.y_data[index]

    # return the data length
    def __len__(self):
        return self.len

dataset = DiabetesDataset()
train_loader = DataLoader(dataset=dataset, batch_size=32, shuffle=True, num_workers=0)

# Design Model
class Model(torch.nn.Module):
    def __init__(self):
        """
        In the constructor we instantiate two nn.Linear module
        """
        super(Model, self).__init__()
        self.l1 = torch.nn.Linear(8,6) ## 8 in, 6 out : Wide Model
        self.l2 = torch.nn.Linear(6,4) ## 5 in, 4 out
        self.l3 = torch.nn.Linear(4,1) ## 4 in, 1 out
        # 3 layers : Deep Model

        self.sigmoid = torch.nn.Sigmoid()
    
    def forward(self, x):
        """
        In the forward function we accept a Variable of input data and we must return
        a Variable of Output data. We can use Modules defined in the constructor as
        well as arbitrary operators on Variables.
        """
        out_1 = self.sigmoid(self.l1(x))
        out_2 = self.sigmoid(self.l2(out_1))
        y_pred = self.sigmoid(self.l3(out_2))
        return y_pred

# our Model
model = Model()

# Construct our Loss function and an Optimizer. The call to model.parameters()
# in the SGD constructor will contain the learnable parameters of the two
# nn. Linear modules which are members of the model.
criterion = torch.nn.BCELoss(size_average= True)
optimizer = torch.optim.SGD(model.parameters(), lr=0.1)

# Training loop
for epoch in range(2):
    for i, data in enumerate(train_loader, 0):
        # get the inputs
        inputs, labels = data

        # wrap them in Variable
        inputs, labels = Variable(inputs), Variable(labels)

        # forward pass: Compute predicted y by passing x to the model
        y_pred= model(inputs)

        # Compute and print loss
        loss = criterion(y_pred, labels)
        print(epoch, i , loss.data)

        # Zero gradients, perform a backward pass, and update the weights.
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()



0 0 tensor(0.7503)
0 1 tensor(0.7833)
0 2 tensor(0.7923)
0 3 tensor(0.7148)
0 4 tensor(0.7649)
0 5 tensor(0.7469)
0 6 tensor(0.7166)
0 7 tensor(0.7263)
0 8 tensor(0.7057)
0 9 tensor(0.6962)
0 10 tensor(0.6909)
0 11 tensor(0.6907)
0 12 tensor(0.6866)
0 13 tensor(0.6868)
0 14 tensor(0.6887)
0 15 tensor(0.6815)
0 16 tensor(0.6675)
0 17 tensor(0.6969)
0 18 tensor(0.6766)
0 19 tensor(0.6750)
0 20 tensor(0.6435)
0 21 tensor(0.6323)
0 22 tensor(0.6403)
0 23 tensor(0.6433)
1 0 tensor(0.6757)
1 1 tensor(0.6865)
1 2 tensor(0.6329)
1 3 tensor(0.6409)
1 4 tensor(0.6634)
1 5 tensor(0.6136)
1 6 tensor(0.6626)
1 7 tensor(0.6624)
1 8 tensor(0.6486)
1 9 tensor(0.6341)
1 10 tensor(0.6619)
1 11 tensor(0.6328)
1 12 tensor(0.6768)
1 13 tensor(0.6469)
1 14 tensor(0.6010)
1 15 tensor(0.6456)
1 16 tensor(0.6616)
1 17 tensor(0.6455)
1 18 tensor(0.6289)
1 19 tensor(0.6281)
1 20 tensor(0.7133)
1 21 tensor(0.6451)
1 22 tensor(0.6283)
1 23 tensor(0.6469)
