In [None]:
%matplotlib inline 
import numpy as np
import matplotlib.pyplot as plt
import torch
import torch.nn as nn  # for building up neural network
from torch.utils.data import Dataset, DataLoader  # for loading the data

## Generate some data ##
To demonstrate the capability of neural networks, we generate a classification dataset that is not easily linearly separable. So we use a common example -- spiral dataset. Data generation code comes from standford [cs231n](http://cs231n.github.io/neural-networks-case-study/) course.

In [None]:
N = 100 # number of points per class
D = 2 # dimensionality
K = 3 # number of classes
X_data = np.zeros((N*K,D), dtype='float32') # data matrix (each row = single example)
y_data = np.zeros(N*K, dtype='uint8') # class labels
for j in range(K):
  ix = range(N*j,N*(j+1))
  r = np.linspace(0.0,1,N) # radius
  t = np.linspace(j*4,(j+1)*4,N) + np.random.randn(N)*0.2 # theta
  X_data[ix] = np.c_[r*np.sin(t), r*np.cos(t)]
  y_data[ix] = j
# visualize the data:
plt.scatter(X_data[:, 0], X_data[:, 1], c=y_data, s=40, cmap=plt.cm.Spectral)
plt.show()
print('X_data shape: {}, y_data shape: {}'.format(X_data.shape, y_data.shape))

## Building up a classifier using ANN ##
We use `torch.nn.Modules` to build up the neural network

In [None]:
class ANNClassifier(nn.Module):
    """ Simple classifier consisting of fully-connected layers.
    """
    def __init__(self, dim, n_classes, n_hidden_neurons):
        super(ANNClassifier, self).__init__()
        self.fc1 = nn.Linear(dim, n_hidden_neurons)
        self.fc2 = nn.Linear(n_hidden_neurons, n_classes)
        
    def forward(self, x):
        x = nn.functional.relu(self.fc1(x))
        x = self.fc2(x)
        return x

## Loading the generated data ##
PyTorch provides a data loading API in `torch.utils.data` module. PyTorch also provides various data pre-processing algorithms on images in `torchvision` module. A good (official) tutorial on using these APIs can be found [here](https://pytorch.org/tutorials/beginner/data_loading_tutorial.html). Although our data in this tutorial do not have complicated structures, we still use `Dataset` and `DataLoader` from `torch.utils.data` from PyTorch for demonstration purposes.

In [None]:
class SpiralDataset(Dataset):
    """ Spiral Dataset to load spiral data points.
    """
    def __init__(self, X, y, transform=None):
        self.X = X
        self.y = y
        self.transform = transform
        
    def __len__(self):
        return self.X.shape[0]
    
    def __getitem__(self, idx, transform=True):
        sample = {'x': self.X[idx, :], 'y': self.y[idx]}  # We assume X has shape (N, dim), y has shape (N,)
        # this is the canonical way of performing data preprocessing
        if self.transform:
            sample = self.transform(sample)
        return sample
    
class ToTensor(object):
    """ Converts numpy arrays in sample to Tensors
    """
    def __call__(self, sample):
        x, y = sample['x'], sample['y']
        return {'x': torch.from_numpy(x), 'y': torch.LongTensor([y])}

We could have used `for` loop to load `SpiralDataset` simply by indexing it. For example:
```python
spiral_dataset = SpiralDataset(X, y)
for i in range(len(spiral_dataset)):
    sample = spiral_dataset[i]
    x, y = sample['x'], sample['y']
```
But in this way, we lose a lot of features:
- Batching the data
- Shuffling the data

`torch.utils.data.DataLoader` is an iterator which provides all these features.

In [None]:
# first create a dataset
spiral_dataset = SpiralDataset(X_data, y_data, transform=ToTensor())  # note that X and y are generated in the first section
# then use Dataloader to wrap up the dataset 
spiral_dataloader = DataLoader(spiral_dataset, batch_size=32, shuffle=True)  
# now we can use for loop to iterate over spiral_dataloader
for i, sample in enumerate(spiral_dataloader):
    if i == 3:
        print(sample['x'].size(), sample['y'].size())
        
        

## Training the neural network ##
We use `torch.optim.SGD`, i.e. stochastic gradient descent to optimize the weights of our network.

In [None]:
# hyperparameters
n_epochs = 120
batch_size = 32
lr = 0.05
n_hidden_neurons = 100

# define the network
ann = ANNClassifier(dim=D, n_classes=K, n_hidden_neurons=n_hidden_neurons)
# define the loss function
loss_fcn = nn.CrossEntropyLoss()
# define the optimizer
optimizer = torch.optim.Adam(ann.parameters(), lr=lr)
# define the dataset
spiral_dataset = SpiralDataset(X_data, y_data, transform=ToTensor())
# define the data loader
spiral_dataloader = DataLoader(spiral_dataset, batch_size=batch_size, shuffle=True, num_workers=1)  

In [None]:
for epoch in range(n_epochs):
    n_correct_examples = 0
    for i, sample in enumerate(spiral_dataloader):
        # zero the parameter gradients
        optimizer.zero_grad()
        # forward + backward + optimize
        x, y = sample['x'], sample['y'].squeeze()
        y_score = ann(x)
        loss = loss_fcn(y_score, y)
        loss.backward()
        optimizer.step()
        # compute training accuracy
        y_pred = torch.argmax(nn.functional.softmax(y_score, dim=1), dim=1)
        n_correct_examples += torch.sum(y_pred==y).item()
    if epoch % 5 == 0:
        print('epoch {}, training accuracy = {:.3f}'.format(epoch, n_correct_examples/len(spiral_dataset)))