<a href="https://colab.research.google.com/github/rraasch/pytorch_training/blob/main/softmax_pytorch.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import torch

In [None]:
#custom softmax implementation
class Softmax(torch.nn.Module):
    def __init__(self, in_size, out_size):
        super(Softmax, self).__init__()
        self.linear = torch.nn.Linear(in_size, out_size)

    def forward(self, x):
        return self.linear(x)


In [None]:
torch.manual_seed(1)

<torch._C.Generator at 0x79607eef4270>

In [None]:
x = torch.tensor([[1.0, 2.0]])
model = Softmax(2, 3)

In [None]:
z = model(x)
print(z)

tensor([[-0.4053,  0.8864,  0.2807]], grad_fn=<AddmmBackward0>)


In [None]:
_, yhat = z.max(1)
print(yhat)

tensor([1])


In [None]:
#now use multiple inputs
X=torch.tensor([[1.0, 1.0], [1.0, 2.0], [1.0, 3.0]])
z = model(X)

In [None]:
_, yhat = z.max(1)
print(yhat)

tensor([1, 1, 1])


Softmax training

In [55]:
import torch
import torch.nn as nn
import torchvision.transforms as transforms
import torchvision.datasets as dsets


In [56]:
#set the device to use
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [57]:
#load the sample data
train_data = dsets.MNIST(root='./data', train=True, download=True, transform=transforms.ToTensor())
validation_data = dsets.MNIST(root='./data', train=False, download=True, transform=transforms.ToTensor())

In [58]:
#check out the dataset
train_data[0][0], train_data[0][1]

(tensor([[[0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
           0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
           0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
           0.0000, 0.0000, 0.0000, 0.0000],
          [0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
           0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
           0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
           0.0000, 0.0000, 0.0000, 0.0000],
          [0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
           0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
           0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
           0.0000, 0.0000, 0.0000, 0.0000],
          [0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
           0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
           0.0000, 0.0000, 0.0000, 0.0000, 

In [59]:
#create the softmax classifier -- same as above
class Softmax(torch.nn.Module):
    def __init__(self, in_size, out_size):
        super(Softmax, self).__init__()
        self.linear = torch.nn.Linear(in_size, out_size)

    def forward(self, x):
        return self.linear(x)



In [60]:
#images are 28x28, 10 possible numbers
input_size = 28*28
output_size = 10

In [61]:
#initialize the class
model = Softmax(input_size, output_size)
model.to(device)

Softmax(
  (linear): Linear(in_features=784, out_features=10, bias=True)
)

In [62]:
#view initial size of vectors and weights of model
print("W:", list(model.parameters())[0].size())
print("b:", list(model.parameters())[1].size())


W: torch.Size([10, 784])
b: torch.Size([10])


In [63]:
list(model.parameters())

[Parameter containing:
 tensor([[ 0.0073,  0.0187,  0.0010,  ..., -0.0242, -0.0112,  0.0253],
         [-0.0240,  0.0242, -0.0134,  ..., -0.0275, -0.0067, -0.0063],
         [-0.0316,  0.0263, -0.0218,  ..., -0.0127, -0.0256,  0.0158],
         ...,
         [-0.0131,  0.0292,  0.0298,  ..., -0.0013, -0.0006, -0.0299],
         [-0.0056,  0.0197, -0.0331,  ..., -0.0149, -0.0310, -0.0238],
         [ 0.0048, -0.0196, -0.0235,  ...,  0.0160,  0.0323,  0.0238]],
        device='cuda:0', requires_grad=True),
 Parameter containing:
 tensor([ 0.0040, -0.0246, -0.0128,  0.0283,  0.0132,  0.0312, -0.0108,  0.0285,
          0.0272, -0.0258], device='cuda:0', requires_grad=True)]

In [64]:
#specify our criterion function and optimizer and other params
criterion = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(model.parameters(), lr=0.01)

epochs = 100
accuracy_list = []

In [66]:
#create the trainloader and valloader
trainloader = torch.utils.data.DataLoader(dataset=train_data, batch_size=100)
valloader = torch.utils.data.DataLoader(dataset=validation_data, batch_size=5000)


In [None]:
#train the model
for epoch in range(epochs):
    print("Starting epoch", epoch)
    for x,y in trainloader:
        x = x.to(device)
        y = y.to(device)
        optimizer.zero_grad()
        z = model(x.view(-1, 28*28)) # this converts the image to a row tensor
        loss = criterion(z, y)
        loss.backward()
        optimizer.step()

    correct = 0
    for x_test, y_test in valloader:
        x_test = x_test.to(device)
        y_test = y_test.to(device)
        z = model(x_test.view(-1, 28*28))
        _, yhat = torch.max(z.data, 1)
        correct += (yhat==y_test).sum().item()

    accuracy = correct/len(valloader.dataset)
    accuracy_list.append(accuracy)
    print("Epoch accuracy:", accuracy)

Starting epoch 0
Epoch accuracy: 0.8957
Starting epoch 1
Epoch accuracy: 0.898
Starting epoch 2
Epoch accuracy: 0.9002
Starting epoch 3
Epoch accuracy: 0.903
Starting epoch 4
Epoch accuracy: 0.9047
Starting epoch 5
Epoch accuracy: 0.9054
Starting epoch 6
Epoch accuracy: 0.9062
Starting epoch 7
Epoch accuracy: 0.9077
Starting epoch 8
Epoch accuracy: 0.9084
Starting epoch 9
Epoch accuracy: 0.9088
Starting epoch 10
Epoch accuracy: 0.91
Starting epoch 11
Epoch accuracy: 0.9107
Starting epoch 12
Epoch accuracy: 0.9109
Starting epoch 13
Epoch accuracy: 0.9118
Starting epoch 14
Epoch accuracy: 0.9122
Starting epoch 15
Epoch accuracy: 0.9129
Starting epoch 16
Epoch accuracy: 0.9139
Starting epoch 17
Epoch accuracy: 0.9145
Starting epoch 18
Epoch accuracy: 0.9148
Starting epoch 19
Epoch accuracy: 0.9154
Starting epoch 20
Epoch accuracy: 0.9154
Starting epoch 21
Epoch accuracy: 0.9158
Starting epoch 22
Epoch accuracy: 0.9157
Starting epoch 23
Epoch accuracy: 0.916
Starting epoch 24
Epoch accurac