In [1]:
import numpy as np
import matplotlib.pyplot as plt 

import torch
from torch.utils.data import DataLoader
from torch.autograd import Variable

## 8. DataLoader

The example before had a relatively small data set, and completing thousands of epochs did not take much computational time at all. However, massive increases in training size and layers make this task difficult enough that we must be attentive to the process. 

__Terminology:__  
- Epoch: One forward and one backward pass  
- Batch Size: number of training examples in one epoch
- Iterations: number of passes, each pass using (batch size) number of training examples  

__Example:__  
If you have 1000 training examples with batch size 500, then it will take 2 iterations to complete 1 epoch.
  
__Basically__, we want to split our data up into batches so that we can control our computations.

In [16]:
# Class builds dataset using Data/diabetes.csv
class DiabetesDataset():
    """ Diabetes dataset"""
    
    # Initialize data
    def __init__(self):
        xy = np.loadtxt('Data/diabetes.csv', delimiter=',', dtype=np.float32)
        self.len = xy.shape[0]
        self.x_data = torch.from_numpy(xy[:,0:-1])
        self.y_data = torch.from_numpy(xy[:,[-1]])
        
    def __getitem__(self, index):
        return(self.x_data[index], self.y_data[index])
    
    def __len__(self):
        return(self.len)

In [17]:
# Build dataset and batches using DataLoader
dataset = DiabetesDataset()

train_loader = DataLoader(dataset=dataset,
                          batch_size=32,
                          shuffle=True,
                          num_workers=0)

In [18]:
# Build Linear Model, BCE Loss, and Rprop Optimizer
class Model(torch.nn.Module):
    def __init__(self):
        super(Model, self).__init__()
        self.l1 = torch.nn.Linear(8,6)
        self.l2 = torch.nn.Linear(6,4)
        self.l3 = torch.nn.Linear(4,1)
        
        self.sigmoid = torch.nn.Sigmoid()
    
    def forward(self, x):
        out1 = self.sigmoid(self.l1(x))
        out2 = self.sigmoid(self.l2(out1))
        y_pred = self.sigmoid(self.l3(out2))
        return(y_pred)
model = Model()
criterion = torch.nn.BCELoss()
optimizer = torch.optim.SGD(model.parameters(), lr=0.01)

In [21]:
for epoch in range(2):
    for i, data in enumerate(train_loader, 0):
        inputs, labels=data
        inputs, labels = Variable(inputs), Variable(labels)
        
        y_pred = model(inputs)
        
        loss = criterion(y_pred, labels)
        print(epoch, i, loss.data.item())
        
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

0 0 0.7161743640899658
0 1 0.6987360119819641
0 2 0.6940663456916809
0 3 0.7036055326461792
0 4 0.7039104104042053
0 5 0.6963328719139099
0 6 0.6957908272743225
0 7 0.7073799967765808
0 8 0.6985552310943604
0 9 0.6960105895996094
0 10 0.7038606405258179
0 11 0.7052321434020996
0 12 0.7052555680274963
0 13 0.7039276957511902
0 14 0.7044946551322937
0 15 0.7100305557250977
0 16 0.6994779109954834
0 17 0.699142336845398
0 18 0.6976528763771057
0 19 0.7003467679023743
0 20 0.69708651304245
0 21 0.6975420713424683
0 22 0.6976831555366516
0 23 0.695855438709259
0 24 0.6971123218536377
0 25 0.6934838891029358
0 26 0.6942680478096008
0 27 0.6933479309082031
0 28 0.6923757195472717
0 29 0.6922417283058167
0 30 0.6921616792678833
0 31 0.6908133029937744
0 32 0.6891487836837769
0 33 0.6944347620010376
0 34 0.6898872256278992
0 35 0.6884573101997375
0 36 0.6898629069328308
0 37 0.6873884201049805
0 38 0.6875864863395691
0 39 0.6845865845680237
0 40 0.6866496801376343
0 41 0.689348578453064
0 42 0.