## Neural Networks in PyTorch 

* torch.nn provides the building blocks for neural networks.
* all networks subclass the nn.Module, so do the layers. The model is thus a module of modules.


In [13]:
import os 
import torch 
from torch import nn 
from torch.utils.data import DataLoader 
from torchvision import datasets, transforms

In [14]:
#Train on GPU if one is recognized.
device = 'cuda' if torch.cuda.is_available() else 'cpu'
print(device)

cuda


### Building the Neural Network Using torch.nn

In [61]:
class MyModel(nn.Module):
    def __init__(self):
        super(MyModel, self).__init__()
        self.flatten = nn.Flatten()
        self.linear_relu_stack = nn.Sequential(
            nn.Linear(28 * 28 , 512), 
            nn.ReLU(), 
            nn.Linear(512, 512), 
            nn.ReLU(),
            nn.Linear(512, 10)
        )
        self.softmax = nn.Softmax(dim=1)
    
    #this forward is the call() method in Tensorflow. Forward pass thru the module.
    def forward(self, X):
        h = self.flatten(X)
        logits = self.linear_relu_stack(h)
        out = self.softmax(logits)
        return out
    

#### In PyTorch, you send every nn.Module and data tensor to the device of your choice explicitly using the .to() method. Loading the model to 

In [62]:
model = MyModel().to(device) 
print(model)

MyModel(
  (flatten): Flatten(start_dim=1, end_dim=-1)
  (linear_relu_stack): Sequential(
    (0): Linear(in_features=784, out_features=512, bias=True)
    (1): ReLU()
    (2): Linear(in_features=512, out_features=512, bias=True)
    (3): ReLU()
    (4): Linear(in_features=512, out_features=10, bias=True)
  )
  (softmax): Softmax(dim=1)
)


### Predicting with a model. 

In [63]:
#Generate some fake data. Make suro to push that onto the GPU memory, as well.
fake_input = torch.rand(size=(16, 1, 28, 28), dtype=torch.float).to(device)
preds = model(fake_input).argmax(dim=1)
preds
#Interestingly, Gaussian noise is interpreted as 2 by the model.

tensor([4, 4, 4, 4, 4, 8, 4, 4, 9, 8, 4, 8, 4, 4, 4, 4], device='cuda:0')

### Investigating the model parameter counts.

In [64]:
for name, param in model.named_parameters():
    print(f"Layer: {name} | Size: {param.size()} | Values: {param[:2]} \n")

Layer: linear_relu_stack.0.weight | Size: torch.Size([512, 784]) | Values: tensor([[-0.0173,  0.0040,  0.0192,  ..., -0.0224, -0.0004, -0.0068],
        [ 0.0215,  0.0209,  0.0355,  ...,  0.0013, -0.0224,  0.0178]],
       device='cuda:0', grad_fn=<SliceBackward0>) 

Layer: linear_relu_stack.0.bias | Size: torch.Size([512]) | Values: tensor([0.0260, 0.0325], device='cuda:0', grad_fn=<SliceBackward0>) 

Layer: linear_relu_stack.2.weight | Size: torch.Size([512, 512]) | Values: tensor([[ 0.0146, -0.0095, -0.0430,  ..., -0.0422,  0.0161, -0.0090],
        [-0.0178, -0.0078,  0.0217,  ..., -0.0169, -0.0076,  0.0046]],
       device='cuda:0', grad_fn=<SliceBackward0>) 

Layer: linear_relu_stack.2.bias | Size: torch.Size([512]) | Values: tensor([0.0402, 0.0210], device='cuda:0', grad_fn=<SliceBackward0>) 

Layer: linear_relu_stack.4.weight | Size: torch.Size([10, 512]) | Values: tensor([[ 0.0229,  0.0146, -0.0424,  ..., -0.0231, -0.0377,  0.0405],
        [-0.0219, -0.0006,  0.0353,  ..., -0