In [1]:
import torch
import torchvision
from torchvision import transforms, datasets

train = datasets.MNIST('', train=True, download=True,
                       transform=transforms.Compose([
                           transforms.ToTensor()
                       ]))

test = datasets.MNIST('', train=False, download=True,
                       transform=transforms.Compose([
                           transforms.ToTensor()
                       ]))

In [2]:
import torch.nn as nn
import torch.nn.functional as F

In [3]:
class Net(nn.Module):
    def __init__(self):
        super().__init__()

net = Net()
print(net)

Net()


In [8]:
class a:
    '''Will be a parent class'''
    def __init__(self):
        print("initializing a")

class b(a):
    '''Inherits from a, but does not run a's init method '''
    def __init__(self):
        print("initializing b")

class c(a):
    '''Inhereits from a, but DOES run a's init method'''
    def __init__(self):
        super().__init__()
        print("initializing c")

b_ob = b()

initializing b


In [9]:
c_ob = c()

initializing a
initializing c


In [12]:
class Net(nn.Module):
    def __init__(self):
        super().__init__()
        self.fc1 = nn.Linear(28*28, 64)
        self.fc2 = nn.Linear(64, 64)
        self.fc3 = nn.Linear(64, 64)
        self.fc4 = nn.Linear(64, 10)

net = Net()
print(net)

Net(
  (fc1): Linear(in_features=784, out_features=64, bias=True)
  (fc2): Linear(in_features=64, out_features=64, bias=True)
  (fc3): Linear(in_features=64, out_features=64, bias=True)
  (fc4): Linear(in_features=64, out_features=10, bias=True)
)


All we're doing is just defining values for some layers, we're calling them fc1, fc2...etc, but you could call them whatever you wanted. The fc just stands for fully connected. Fully connected refers to the point that every neuron in this layer is going to be fully connected to attaching neurons. Nothing fancy going on here! Recall, each "connection" comes with weights and possibly biases, so each connection is a "parameter" for the neural network to play with.

In our case, we have 4 layers. Each of our nn.Linear layers expects the first parameter to be the input size, and the 2nd parameter is the output size.

So, our first layer takes in 28x28, because our images are 28x28 images of hand-drawn digits. A basic neural network is going to expect to have a flattened array, so not a 28x28, but instead a 1x784.

Then this outputs 64 connections. This means the next layer, fc2 takes in 64 (the next layer is always going to accept however many connections the previous layer outputs). From here, this layer ouputs 64, then fc3 just does the same thing.

fc4 takes in 64, but outputs 10. Why 10? Our "output" layer needs 10 neurons. Why 10 neurons? We have 10 classes.

Now, that's great, we have those layers, but nothing really dictating how they interact with eachother, they're just simply defined.

The simplest neural network is fully connected, and feed-forward, meaning we go from input to output. In one side and out the other in a "forward" manner. We do not have to do this, but, for this model, we will. So let's define a new method for this network called forward and then dictate how our data will pass through this model:

In [13]:
class Net(nn.Module):
    def __init__(self):
        super().__init__()
        self.fc1 = nn.Linear(28*28, 64)
        self.fc2 = nn.Linear(64, 64)
        self.fc3 = nn.Linear(64, 64)
        self.fc4 = nn.Linear(64, 10)

    def forward(self, x):
        x = self.fc1(x)
        x = self.fc2(x)
        x = self.fc3(x)
        x = self.fc4(x)
        return x

net = Net()
print(net)

Net(
  (fc1): Linear(in_features=784, out_features=64, bias=True)
  (fc2): Linear(in_features=64, out_features=64, bias=True)
  (fc3): Linear(in_features=64, out_features=64, bias=True)
  (fc4): Linear(in_features=64, out_features=10, bias=True)
)


In [14]:
class Net(nn.Module):
    def __init__(self):
        super().__init__()
        self.fc1 = nn.Linear(28*28, 64)
        self.fc2 = nn.Linear(64, 64)
        self.fc3 = nn.Linear(64, 64)
        self.fc4 = nn.Linear(64, 10)

    def forward(self, x):
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = F.relu(self.fc3(x))
        x = self.fc4(x)
        return F.log_softmax(x, dim=1)

net = Net()
print(net)

Net(
  (fc1): Linear(in_features=784, out_features=64, bias=True)
  (fc2): Linear(in_features=64, out_features=64, bias=True)
  (fc3): Linear(in_features=64, out_features=64, bias=True)
  (fc4): Linear(in_features=64, out_features=10, bias=True)
)


In [19]:
X = torch.randn((28,28))
X = X.view(1,28*28)


In [18]:
X

tensor([[ 1.3921e-01,  6.0459e-01, -7.1354e-02, -5.3037e-01, -9.6750e-01,
          4.3706e-01,  6.4271e-01, -4.5614e-01,  2.7999e-01, -1.1592e+00,
          1.2905e+00, -8.3107e-01,  2.6175e-01,  1.0125e+00,  7.6001e-01,
          2.2098e+00, -1.2968e+00,  7.7191e-01,  1.5508e-01, -4.5879e-01,
         -7.3078e-02,  2.5715e-01, -1.4791e+00,  1.4386e+00, -6.8889e-01,
          4.8600e-01, -5.2128e-01,  1.6282e-01, -3.1453e-01,  3.0997e-01,
         -1.0359e+00,  6.3585e-01, -2.4402e-01, -1.7737e+00, -7.3454e-02,
         -7.7227e-01,  6.6589e-02,  1.6685e+00,  2.9934e-01, -1.7126e+00,
          3.2949e-01,  1.9297e+00,  9.3294e-01, -1.6818e+00,  1.3527e-01,
          1.9030e+00,  2.4102e+00,  4.7415e-01, -5.1598e-01,  1.3860e+00,
          9.0688e-01,  7.5869e-02,  9.6084e-01,  4.8678e-01, -7.1458e-01,
         -1.2259e+00,  2.8310e-02,  3.4644e-01,  2.8503e-01, -1.9093e+00,
         -2.0819e-01, -1.6413e-01, -7.4862e-01, -6.9157e-01, -3.4856e-02,
          2.8256e-01,  2.4093e-01,  4.

In [21]:
output = net(X)
output

tensor([[-2.2787, -2.3532, -2.3873, -2.2256, -2.3054, -2.4224, -2.2334, -2.2393,
         -2.3433, -2.2584]], grad_fn=<LogSoftmaxBackward>)