In [2]:
import torch
import torch.nn as nn
import torch.nn.functional as F

In [3]:
class Net(nn.Module):
    def __init__(self):
        # super runs the initialiation for the parent class we are inheriting from
        # reference - https://www.geeksforgeeks.org/python-super/
        super().__init__()
        # define the fully connected layers to the nn
        # input will be a flattened row of our image pixels
        # goal is to make 3 hidden layers of 64 neurons 
        # output will be 64
        self.fc1 = nn.Linear(28*28, 64)
        # previous layer has 64 outputs, next layer need 64 inputs
        self.fc2 = nn.Linear(64, 64) 
        # we can make the output whatever we want
        # remember there is generally a sweet spot found with trial and error
        # lets stick to 64 bc also remember people like to use base 8
        self.fc3 = nn.Linear(64, 64)
        # our final layer needs to have 10 outputs bc we have 10 classes/categories 
        # handwritten digits are identified as digits 0-9, thus 10 classes
        self.fc4 = nn.Linear(64, 10)  
    
    # the method of defining how data flows through the network
    # can throw logic in between layers like if statements and can come up with very advanced models
    # wayyy more challenging
    # pytorch is way more simple
    # gradients are automatically calculated
    # (EX:) first few layers are image processing
    #       subsequent layers are more specific to various tasks
    def forward(self, x):
        # awesomeness of pytorch
        # F.relu() (rectified linear) is the activation function & optimizer
        # the activation function is whether or not the neuron is firing (like a brain)
        # keeps the outputs of the layers from exploding into CRAZY large numbers
        # prevent loss explosion
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = F.relu(self.fc3(x))
        # dont want to run relu in output layer
        # network doesnt know anything (ver stupid)
        # we want a function that constrains to what we want
        # we want only one neuron in the output layer to completely fire (0, 1) not (0.22, 0.78)
        # we are not trying to make a regression algorithm (can do that but we arent trying to do that here)
        # we want a probability distribution on the output
        x = self.fc4(x)
        # use log_softmax for probability distribution
        # pretty much everyone uses relu for optimizer
        # but when you have different types of what you want for the output, this is where things change
        # softmax generally works best for multi-class
        # also include the dimension of the output
        # just defines which thing is the probability distribution on a sum of one
        # what do we want to sum to one? dim=1 (distrubuting across the output layer tensors)
        return F.log_softmax(x, dim=1)


net = Net()
print(net)

Net(
  (fc1): Linear(in_features=784, out_features=64, bias=True)
  (fc2): Linear(in_features=64, out_features=64, bias=True)
  (fc3): Linear(in_features=64, out_features=64, bias=True)
  (fc4): Linear(in_features=64, out_features=10, bias=True)
)


### Pass data through model

In [4]:
# randomize values
X = torch.rand((28,28))

In [5]:
# output = net(X) !!!
# will result in size mismatch
# needs to be flattened first

# X = X.view(28*28) !!!
# will result in dimension out of range

# have to format things exactly how the libraries want them

# -1 tells to not worry about the size of the array/tensor, expect anything
# 
X = X.view(-1, 28*28)


output = net(X)




In [6]:
output

tensor([[-2.3086, -2.4022, -2.3518, -2.4247, -2.2184, -2.2160, -2.1890, -2.4396,
         -2.2639, -2.2502]], grad_fn=<LogSoftmaxBackward>)