In [10]:
import torch
import torchvision
from torchvision import transforms, datasets

train = datasets.MNIST('', train=True, download=True,
                       transform=transforms.Compose([
                           transforms.ToTensor()
                       ]))

test = datasets.MNIST('', train=False, download=True,
                       transform=transforms.Compose([
                           transforms.ToTensor()
                       ]))

trainset = torch.utils.data.DataLoader(train, batch_size = 10,shuffle=True)
testset = torch.utils.data.DataLoader(test, batch_size=10, shuffle=False)

In [12]:
import torch.nn as nn ##Object oriented programming, requires initializing things
import torch.nn.functional as F #Contains lots of functions, need to pass parameters

In [21]:
#Building the neural network
class Net(nn.Module):
    def __init__(self):
        super().__init__()
        self.fc1 = nn.Linear(28*28, 64) 
        self.fc2 = nn.Linear(64, 64) 
        self.fc3 = nn.Linear(64, 64) 
        self.fc4 = nn.Linear(64, 10) 
    
    def forward(self,x): #The method that defines how the data will flow through the network
        x = F.relu(self.fc1(x)) #X passes through fully connected layer 1. F.relu is the activation function. 
                                #Stands for "rectified linear", it's either active or it's not.
        x = F.relu(self.fc2(x)) #X is redefined and passed through fully connected layer 2, etc.
        x = F.relu(self.fc3(x))
        x = self.fc4(x) #Data is just passed to 4, it's not activated. The activation function is run on the output.
            #The goal is to show which class is selected with a probability distribution on the output
        return F.log_softmax(x, dim=1) #The output is a batch of tensors of probability distributions.
                                       #If dim were to =0, we'd be distribution across batches. With dim=1, we're distributing
                                       #across the output layer tensors
       

    
    
net = Net()
print(net)


#super --> initialization of nn.module. Defining the fully-connected layers to the neural network
#Format: self.fc1 = nn.Linear(input, output)
#input = input images from the data-->784=28*28. Need to pass the flattened image
#Output = 3 layers of 64 nodes for the hidden layers. This can be whatever we want, but we're going with 64
#nn.Linear = fully connected neural network
#For self.fc2, the previous layer outputs 64, fc2 needs to take in 64. Output is still whatever we want it to be.
#Output layer of fc4 only has 10 outputs because we have 10 classes: digits 0 to 9.



#The layers are defined, but they don't have a path to take through the layers. We're going to used Feed Forward

Net(
  (fc1): Linear(in_features=784, out_features=64, bias=True)
  (fc2): Linear(in_features=64, out_features=64, bias=True)
  (fc3): Linear(in_features=64, out_features=64, bias=True)
  (fc4): Linear(in_features=64, out_features=10, bias=True)
)


In [23]:
#Passing data through the neural network

X = torch.rand((28,28))
X

tensor([[6.6379e-01, 5.9441e-02, 2.4479e-01, 2.2510e-01, 5.3719e-01, 8.1988e-02,
         5.2100e-01, 7.5905e-01, 8.0735e-01, 5.4350e-01, 8.1206e-01, 4.3662e-01,
         2.5564e-01, 2.8489e-01, 7.8546e-01, 2.8004e-01, 9.4542e-01, 5.3788e-01,
         9.4180e-01, 9.2032e-01, 8.2502e-01, 2.4136e-01, 1.0047e-01, 7.5425e-01,
         6.8503e-01, 4.6010e-01, 9.2078e-01, 2.5537e-01],
        [6.4456e-01, 3.3165e-01, 1.1791e-01, 3.7207e-01, 4.7573e-01, 9.0808e-01,
         2.9885e-01, 3.1651e-01, 8.2023e-01, 1.0947e-01, 8.8690e-01, 2.5897e-01,
         1.0487e-01, 1.7253e-01, 4.8910e-01, 7.6729e-01, 1.6653e-01, 8.5624e-01,
         7.3375e-01, 4.9507e-01, 2.4630e-01, 4.0087e-01, 4.5676e-01, 5.5024e-01,
         9.1131e-01, 9.2202e-01, 9.7953e-01, 6.6076e-01],
        [4.6891e-01, 2.8643e-01, 7.8704e-01, 5.6443e-01, 3.7518e-01, 6.3746e-01,
         3.3159e-02, 7.2843e-01, 2.5250e-01, 2.0442e-01, 9.3485e-01, 2.3461e-01,
         8.0097e-01, 2.3791e-01, 3.4927e-01, 5.0986e-01, 3.0932e-01, 3.368

In [24]:
#What happens if we try to pass the X data as-is?
output = net(X)

##Ewwie errors. Size mismatch. Needs to be FLATTENED

RuntimeError: mat1 and mat2 shapes cannot be multiplied (28x28 and 784x64)

In [26]:
#Let's try it again by making it a 28*28

X = X.view(28*28)
output = net(X)

#Still some nasty errors!

IndexError: Dimension out of range (expected to be in range of [-1, 0], but got 1)

In [29]:
#The data needs to be formatted exactly how the libraries want them. 

X = X.view(-1,28*28) #Recall: the -1 specifies this will be of an unknown shape. -1 says it will be a tensor of any size
output = net(X)
output
#The output is the actual predictions for the digits 0 to 9
#Looks nasty, but weights haven't been initialized and it hasn't been trained, but data was successfully passed through
#The next step is to see how far off we were from the actual value

tensor([[-2.3407, -2.5028, -2.3920, -2.2320, -2.2502, -2.2687, -2.2515, -2.2567,
         -2.2576, -2.3048]], grad_fn=<LogSoftmaxBackward>)