In [2]:
import torch
import torch.nn as nn
import torchvision
import torchvision.transforms as transforms
import matplotlib.pyplot as plt
from torch.utils.tensorboard import SummaryWriter
import torch.nn.functional as F

# Hyper-parameters
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
input_size = 784 # 28x28
hidden_size = 100
num_classes = 10
num_epochs = 5
batch_size = 100
learning_rate = 0.001
writer = SummaryWriter('runs/mnist')

In [3]:
# MNIST dataset
train_dataset = torchvision.datasets.MNIST(root='./data', train=True, transform=transforms.ToTensor(), download=True)
test_dataset = torchvision.datasets.MNIST(root='./data', train=False, transform=transforms.ToTensor(), download=True)
# Data loader
train_loader = torch.utils.data.DataLoader(dataset=train_dataset, batch_size=batch_size, shuffle=True)
test_loader = torch.utils.data.DataLoader(dataset=test_dataset, batch_size=batch_size, shuffle=False)

examples = iter(train_loader)
samples, labels = next(examples)
print(samples.shape, labels.shape)

# for i in range(6):
#   plt.subplot(2, 3, i+1)
#   plt.imshow(samples[i][0], cmap='gray')
# plt.show()
img_grid = torchvision.utils.make_grid(samples)
writer.add_image('mnist_images', img_grid)
writer.close()

torch.Size([100, 1, 28, 28]) torch.Size([100])


In [4]:
class NeuralNet(nn.Module):
  def __init__(self, input_size, hidden_size, num_classes):
    super(NeuralNet, self).__init__()
    self.fc1 = nn.Linear(input_size, hidden_size)
    self.relu = nn.ReLU()
    self.fc2 = nn.Linear(hidden_size, num_classes)
  def forward(self, x):
    out = self.fc1(x)
    out = self.relu(out)
    out = self.fc2(out)
    return out
  
model = NeuralNet(input_size, hidden_size, num_classes)
writer.add_graph(model, samples.reshape(-1, 28*28))

In [5]:
# Loss and Optimizer
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

In [6]:
print(next(iter(train_loader))[0].shape)
print(len(train_loader)) # The number of batches in the train_loader
# Total sample is
total_samples = len(train_loader.dataset)
print(total_samples)

torch.Size([100, 1, 28, 28])
600
60000


In [7]:
# Training Loop
n_total_steps = len(train_loader)
running_loss = 0.0
running_correct = 0

for epoch in range(num_epochs):
  for i, (images, labels) in enumerate(train_loader, 0):
    # origin shape: [100, 1, 28, 28]
    # resized: [100, 784]
    images = images.reshape(-1, 28*28).to(device)
    labels = labels.to(device)
    
    # Forward pass
    outputs = model(images)
    loss = criterion(outputs, labels)
    
    # Backward pass and optimization
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()
    
    running_loss += loss.item()
    _, predicted = torch.max(outputs.data, 1)
    running_correct += (predicted == labels).sum().item()
    
    if (i+1) % 100 == 0:
      # 100 step meaning for 10000 samples
      print(f'Epoch [{epoch+1}/{num_epochs}], Step [{i+1}/{n_total_steps}], Loss: {loss.item():.4f}')
      writer.add_scalar('training loss', running_loss / 100, epoch * n_total_steps + i)
      writer.add_scalar('training accuracy', running_correct / 100, epoch * n_total_steps + i)
      running_loss = 0.0
      running_correct = 0


Epoch [1/5], Step [100/600], Loss: 0.4258
Epoch [1/5], Step [200/600], Loss: 0.3194
Epoch [1/5], Step [300/600], Loss: 0.2558
Epoch [1/5], Step [400/600], Loss: 0.4943
Epoch [1/5], Step [500/600], Loss: 0.1080
Epoch [1/5], Step [600/600], Loss: 0.1907
Epoch [2/5], Step [100/600], Loss: 0.1883
Epoch [2/5], Step [200/600], Loss: 0.2891
Epoch [2/5], Step [300/600], Loss: 0.1842
Epoch [2/5], Step [400/600], Loss: 0.2441
Epoch [2/5], Step [500/600], Loss: 0.1567
Epoch [2/5], Step [600/600], Loss: 0.2557
Epoch [3/5], Step [100/600], Loss: 0.1703
Epoch [3/5], Step [200/600], Loss: 0.1135
Epoch [3/5], Step [300/600], Loss: 0.1262
Epoch [3/5], Step [400/600], Loss: 0.1228
Epoch [3/5], Step [500/600], Loss: 0.1405
Epoch [3/5], Step [600/600], Loss: 0.0948
Epoch [4/5], Step [100/600], Loss: 0.0650
Epoch [4/5], Step [200/600], Loss: 0.1767
Epoch [4/5], Step [300/600], Loss: 0.0415
Epoch [4/5], Step [400/600], Loss: 0.1506
Epoch [4/5], Step [500/600], Loss: 0.1270
Epoch [4/5], Step [600/600], Loss:

In [30]:
# Test the model
real_predictions = []
probability_predictions = []

with torch.no_grad():
  n_correct = 0
  n_samples = 0
  for images, labels in test_loader: # 100 samples / batch
    images = images.reshape(-1, 28*28).to(device)
    labels = labels.to(device)
    
    batched_raw_predictions = model(images)
    batched_probability_predictions = [F.softmax(o, dim=0) for o in batched_raw_predictions]

    # max returns (value ,index)
    _, batched_real_predictions = torch.max(batched_raw_predictions.data, 1)
    
    probability_predictions.append(batched_probability_predictions)
    real_predictions.append(batched_real_predictions)
    
    n_correct += (batched_real_predictions == labels).sum().item()
    n_samples += labels.shape[0] # Can also just use len(test_loader.dataset)

  real_predictions = torch.cat(real_predictions) # 10000
  probability_predictions = torch.cat([torch.stack(batch) for batch in probability_predictions]) # 10000 x 10
  
  for i in range(10):
    label_i = (real_predictions == i)
    pred_i = probability_predictions[:, i]
    writer.add_pr_curve(str(i), label_i, pred_i, global_step=0)
    writer.close()
  
  acc = 100.0 * n_correct / n_samples
  print(f'Accuracy of the network on the 10000 test images: {acc} %')

Accuracy of the network on the 10000 test images: 97.14 %


In [31]:
a = torch.tensor([
  [0, 1, 2, 3, 4, 5, 6, 7, 8, 9],
  [0, 1, 2, 3, 4, 5, 6, 7, 8, 9],
  [0, 1, 2, 3, 4, 5, 6, 7, 8, 9],
  [0, 1, 2, 3, 4, 5, 6, 7, 8, 9],
  [0, 1, 2, 3, 4, 5, 6, 7, 8, 9],
  [0, 1, 2, 3, 4, 5, 6, 7, 8, 9],
  [0, 1, 2, 3, 4, 5, 6, 7, 8, 9],
])
a[:, 1]



tensor([1, 1, 1, 1, 1, 1, 1])

In [35]:
import torch

a = torch.tensor(2.0, requires_grad=True)
b = a * 3
c = b + 1
c.backward()  # Computes gradients for 'a'
print(b.is_leaf)

False


In [None]:

import torch

# Input features (not trainable)
# 2 features

# 2, 4, 6
# 4, 8, 12
x = torch.tensor([1.0, 2.0])

# Weights for layer 1 (trainable)
w1 = torch.tensor(0.5, requires_grad=True)
w2 = torch.tensor(0.3, requires_grad=True)
w3 = torch.tensor(0.2, requires_grad=True)
w4 = torch.tensor(0.7, requires_grad=True)

# Weights for layer 2 (trainable)
w5 = torch.tensor(1.0, requires_grad=True)
w6 = torch.tensor(2.0, requires_grad=True)

# Hidden layer
h1 = x[0]*w1 + x[1]*w2
h2 = x[0]*w3 + x[1]*w4

# (optional) Add an activation
h1_act = torch.relu(h1)
h2_act = torch.relu(h2)

# Output layer
y = h1_act * w5 + h2_act * w6

# Backward pass
y.backward()

# Show gradients
for name, param in zip(['w1','w2','w3','w4','w5','w6'], [w1,w2,w3,w4,w5,w6]):
    print(f"{name}.grad = {param.grad}")


w1.grad = 1.0
w2.grad = 2.0
w3.grad = 2.0
w4.grad = 4.0
w5.grad = 1.100000023841858
w6.grad = 1.600000023841858


In [39]:
# With Bias
import torch

# Input features (not trainable)
x = torch.tensor([1.0, 2.0])

# Weights for layer 1 (trainable)
w1 = torch.tensor(0.5, requires_grad=True)
w2 = torch.tensor(0.3, requires_grad=True)
w3 = torch.tensor(0.2, requires_grad=True)
w4 = torch.tensor(0.7, requires_grad=True)

# Bias terms for layer 1 (trainable)
b1 = torch.tensor(0.1, requires_grad=True) # Bias for h1
b2 = torch.tensor(0.1, requires_grad=True) # Bias for h2

# Weights for layer 2 (trainable)
w5 = torch.tensor(1.0, requires_grad=True)
w6 = torch.tensor(2.0, requires_grad=True)

# Bias term for layer 2 (output) (trainable)
b3 = torch.tensor(3.0, requires_grad=True) # Bias for the output neuron (y)

# -----------------
# Hidden layer (Weighted Sum + Bias)
# Bias b1 is added to h1
h1 = x[0]*w1 + x[1]*w2 + b1 
# Bias b2 is added to h2
h2 = x[0]*w3 + x[1]*w4 + b2

# (optional) Add an activation
h1_act = torch.relu(h1)
h2_act = torch.relu(h2)

# Output layer (Weighted Sum + Bias)
# Bias b3 is added to the final output sum
y = h1_act * w5 + h2_act * w6 + b3

# Backward pass
y.backward()

# Show gradients
all_params = [w1, w2, w3, w4, b1, b2, w5, w6, b3]
all_names = ['w1','w2','w3','w4', 'b1', 'b2', 'w5','w6', 'b3']

for name, param in zip(all_names, all_params):
    print(f"{name}.grad = {param.grad}")

w1.grad = 1.0
w2.grad = 2.0
w3.grad = 2.0
w4.grad = 4.0
b1.grad = 1.0
b2.grad = 2.0
w5.grad = 1.2000000476837158
w6.grad = 1.7000000476837158
b3.grad = 1.0
