In [21]:
import torch
from torch import nn
from torch.utils.data import DataLoader
from torchvision import datasets
from torchvision.transforms import ToTensor

In [22]:
training_data = datasets.FashionMNIST(
    root="data",
    train=True,
    download=True,
    transform=ToTensor(),
)

# Download test data from open datasets.
test_data = datasets.FashionMNIST(
    root="data",
    train=False,
    download=True,
    transform=ToTensor(),
)

In [23]:
batch_size = 64

# Create data loaders.
train_dataloader = DataLoader(training_data, batch_size=batch_size)
test_dataloader = DataLoader(test_data, batch_size=batch_size)

for X, y in train_dataloader:
    print(f"Shape of X [N, C, H, W]: {X.shape}")
    print(f"Shape of y: {y.shape} {y.dtype}")
    break
    
for X, y in test_dataloader:
    print(f"Shape of X [N, C, H, W]: {X.shape}")
    print(f"Shape of y: {y.shape} {y.dtype}")
    break

Shape of X [N, C, H, W]: torch.Size([64, 1, 28, 28])
Shape of y: torch.Size([64]) torch.int64
Shape of X [N, C, H, W]: torch.Size([64, 1, 28, 28])
Shape of y: torch.Size([64]) torch.int64


In [24]:
device = (
    "cuda"
    if torch.cuda.is_available()
    else "mps"
    if torch.backends.mps.is_available()
    else "cpu"
)
print(f"Using {device} device")

# Define model
class NeuralNetwork(nn.Module):
    def __init__(self):
        super().__init__()
        self.flatten = nn.Flatten()
        self.linear_relu_stack = nn.Sequential(
            nn.Linear(28*28, 512),
            nn.ReLU(),
            nn.Linear(512, 512),
            nn.ReLU(),
            nn.Linear(512, 10)
        )

    def forward(self, x):
        x = self.flatten(x)
        logits = self.linear_relu_stack(x)
        return logits

model = NeuralNetwork().to(device)
print(model)

Using cuda device
NeuralNetwork(
  (flatten): Flatten(start_dim=1, end_dim=-1)
  (linear_relu_stack): Sequential(
    (0): Linear(in_features=784, out_features=512, bias=True)
    (1): ReLU()
    (2): Linear(in_features=512, out_features=512, bias=True)
    (3): ReLU()
    (4): Linear(in_features=512, out_features=10, bias=True)
  )
)


In [25]:
loss_fn = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(model.parameters(), lr=1e-3)

In [26]:
def train(dataloader, model, loss_fn, optimizer):
    size = len(dataloader.dataset)
    model.train()
    for batch, (X, y) in enumerate(dataloader):
        X, y = X.to(device), y.to(device)

        # Compute prediction error
        pred = model(X)
        loss = loss_fn(pred, y)

        # Backpropagation
        loss.backward()
        optimizer.step()
        optimizer.zero_grad()

        if batch % 100 == 0:
            loss, current = loss.item(), (batch + 1) * len(X)
            print(f"loss: {loss:>7f}  [{current:>5d}/{size:>5d}]")

In [27]:
def test(dataloader, model, loss_fn):
    size = len(dataloader.dataset)
    num_batches = len(dataloader)
    model.eval()
    test_loss, correct = 0, 0
    with torch.no_grad():
        for X, y in dataloader:
            X, y = X.to(device), y.to(device)
            pred = model(X)
            test_loss += loss_fn(pred, y).item()
            correct += (pred.argmax(1) == y).type(torch.float).sum().item()
    test_loss /= num_batches
    correct /= size
    print(f"Test Error: \n Accuracy: {(100*correct):>0.1f}%, Avg loss: {test_loss:>8f} \n")

In [28]:
epochs = 5
for t in range(epochs):
    print(f"Epoch {t+1}\n-------------------------------")
    train(train_dataloader, model, loss_fn, optimizer)
    test(test_dataloader, model, loss_fn)
print("Done!")

Epoch 1
-------------------------------
loss: 2.310104  [   64/60000]
loss: 2.282546  [ 6464/60000]
loss: 2.271273  [12864/60000]
loss: 2.267850  [19264/60000]
loss: 2.243557  [25664/60000]
loss: 2.218509  [32064/60000]
loss: 2.238286  [38464/60000]
loss: 2.203471  [44864/60000]
loss: 2.204998  [51264/60000]
loss: 2.168993  [57664/60000]
Test Error: 
 Accuracy: 38.5%, Avg loss: 2.160171 

Epoch 2
-------------------------------
loss: 2.175207  [   64/60000]
loss: 2.157259  [ 6464/60000]
loss: 2.101014  [12864/60000]
loss: 2.124861  [19264/60000]
loss: 2.072226  [25664/60000]
loss: 2.007615  [32064/60000]
loss: 2.054337  [38464/60000]
loss: 1.969816  [44864/60000]
loss: 1.981094  [51264/60000]
loss: 1.906002  [57664/60000]
Test Error: 
 Accuracy: 52.5%, Avg loss: 1.899131 

Epoch 3
-------------------------------
loss: 1.931684  [   64/60000]
loss: 1.896624  [ 6464/60000]
loss: 1.776191  [12864/60000]
loss: 1.831017  [19264/60000]
loss: 1.721192  [25664/60000]
loss: 1.659628  [32064/600

In [29]:
torch.save(model.state_dict(), "model.pth")
print("Saved PyTorch Model State to model.pth")

Saved PyTorch Model State to model.pth


In [30]:
model = NeuralNetwork().to(device)
model.load_state_dict(torch.load("model.pth"))

<All keys matched successfully>

In [31]:
classes = [
    "T-shirt/top",
    "Trouser",
    "Pullover",
    "Dress",
    "Coat",
    "Sandal",
    "Shirt",
    "Sneaker",
    "Bag",
    "Ankle boot",
]

model.eval()
x, y = test_data[0][0], test_data[0][1]
with torch.no_grad():
    x = x.to(device)
    pred = model(x)
    predicted, actual = classes[pred[0].argmax(0)], classes[y]
    print(f'Predicted: "{predicted}", Actual: "{actual}"')

Predicted: "Ankle boot", Actual: "Ankle boot"


In [41]:
with torch.no_grad():
    x = x.to(device)
    pred = model(x)
    predicted, actual = classes[pred[0].argmax(0)], classes[y]
    print(f'Predicted: "{predicted}", Actual: "{actual}"')

TypeError: 'Tensor' object is not callable

In [42]:
# Code in file autograd/two_layer_net_autograd.py
import torch
# 
# device = torch.device('cpu')
device = torch.device('cuda') # Uncomment this to run on GPU

# N is batch size; D_in is input dimension;
# H is hidden dimension; D_out is output dimension.
N, D_in, H, D_out = 64, 1000, 100, 10

# Create random Tensors to hold input and outputs
x = torch.randn(N, D_in, device=device)
y = torch.randn(N, D_out, device=device)

# Create random Tensors for weights; setting requires_grad=True means that we
# want to compute gradients for these Tensors during the backward pass.
w1 = torch.randn(D_in, H, device=device, requires_grad=True)
w2 = torch.randn(H, D_out, device=device, requires_grad=True)

learning_rate = 1e-6
for t in range(500):
  # Forward pass: compute predicted y using operations on Tensors. Since w1 and
  # w2 have requires_grad=True, operations involving these Tensors will cause
  # PyTorch to build a computational graph, allowing automatic computation of
  # gradients. Since we are no longer implementing the backward pass by hand we
  # don't need to keep references to intermediate values.
  y_pred = x.mm(w1).clamp(min=0).mm(w2)
  
  # Compute and print loss. Loss is a Tensor of shape (), and loss.item()
  # is a Python number giving its value.
  loss = (y_pred - y).pow(2).sum()
  print(t, loss.item())

  # Use autograd to compute the backward pass. This call will compute the
  # gradient of loss with respect to all Tensors with requires_grad=True.
  # After this call w1.grad and w2.grad will be Tensors holding the gradient
  # of the loss with respect to w1 and w2 respectively.
  loss.backward()

  # Update weights using gradient descent. For this step we just want to mutate
  # the values of w1 and w2 in-place; we don't want to build up a computational
  # graph for the update steps, so we use the torch.no_grad() context manager
  # to prevent PyTorch from building a computational graph for the updates
  with torch.no_grad():
    w1 -= learning_rate * w1.grad
    w2 -= learning_rate * w2.grad

    # Manually zero the gradients after running the backward pass
    w1.grad.zero_()
    w2.grad.zero_()

0 30309770.0
1 24341560.0
2 20327402.0
3 16258935.0
4 12074539.0
5 8379966.5
6 5574837.0
7 3677681.75
8 2473084.0
9 1727488.5
10 1261133.125
11 960544.5
12 758384.625
13 615936.375
14 510782.65625
15 430158.8125
16 366478.84375
17 314924.28125
18 272537.03125
19 237045.609375
20 207144.0625
21 181760.046875
22 160105.09375
23 141492.734375
24 125431.234375
25 111496.5234375
26 99360.5546875
27 88770.8125
28 79507.78125
29 71365.609375
30 64176.046875
31 57821.140625
32 52204.6640625
33 47220.44140625
34 42789.2421875
35 38835.9453125
36 35297.99609375
37 32127.04296875
38 29287.09765625
39 26736.521484375
40 24440.6640625
41 22367.63671875
42 20497.033203125
43 18805.08203125
44 17272.810546875
45 15882.33203125
46 14619.517578125
47 13470.86328125
48 12424.666015625
49 11470.552734375
50 10599.4453125
51 9804.076171875
52 9076.8623046875
53 8411.34375
54 7800.8974609375
55 7241.3662109375
56 6726.66015625
57 6254.146484375
58 5819.30859375
59 5418.6904296875
60 5049.0771484375
61 4707

In [43]:
# Code in file autograd/two_layer_net_custom_function.py
import torch

class MyReLU(torch.autograd.Function):
  """
  We can implement our own custom autograd Functions by subclassing
  torch.autograd.Function and implementing the forward and backward passes
  which operate on Tensors.
  """
  @staticmethod
  def forward(ctx, x):
    """
    In the forward pass we receive a context object and a Tensor containing the
    input; we must return a Tensor containing the output, and we can use the
    context object to cache objects for use in the backward pass.
    """
    ctx.save_for_backward(x)
    return x.clamp(min=0)

  @staticmethod
  def backward(ctx, grad_output):
    """
    In the backward pass we receive the context object and a Tensor containing
    the gradient of the loss with respect to the output produced during the
    forward pass. We can retrieve cached data from the context object, and must
    compute and return the gradient of the loss with respect to the input to the
    forward function.
    """
    x, = ctx.saved_tensors
    grad_x = grad_output.clone()
    grad_x[x < 0] = 0
    return grad_x


# device = torch.device('cpu')
device = torch.device('cuda') # Uncomment this to run on GPU

# N is batch size; D_in is input dimension;
# H is hidden dimension; D_out is output dimension.
N, D_in, H, D_out = 64, 1000, 100, 10

# Create random Tensors to hold input and output
x = torch.randn(N, D_in, device=device)
y = torch.randn(N, D_out, device=device)

# Create random Tensors for weights.
w1 = torch.randn(D_in, H, device=device, requires_grad=True)
w2 = torch.randn(H, D_out, device=device, requires_grad=True)

learning_rate = 1e-6
for t in range(500):
  # Forward pass: compute predicted y using operations on Tensors; we call our
  # custom ReLU implementation using the MyReLU.apply function
  y_pred = MyReLU.apply(x.mm(w1)).mm(w2)
 
  # Compute and print loss
  loss = (y_pred - y).pow(2).sum()
  print(t, loss.item())

  # Use autograd to compute the backward pass.
  loss.backward()

  with torch.no_grad():
    # Update weights using gradient descent
    w1 - learning_rate * w1.grad
    w2 -= learning_rate * w2.grad

    # Manually zero the gradients after running the backward pass
    w1.grad.zero_()
    w2.grad.zero_()


0 27082540.0
1 22889330.0
2 25783054.0
3 32690892.0
4 39422956.0
5 39447416.0
6 29799432.0
7 16613492.0
8 7460503.5
9 3255219.0
10 1648669.0
11 1036029.8125
12 767065.6875
13 620430.5
14 522809.96875
15 449106.9375
16 389791.53125
17 340534.53125
18 298966.8125
19 263548.4375
20 233196.8125
21 207026.1875
22 184354.921875
23 164681.34375
24 147553.5
25 132569.359375
26 119413.203125
27 107808.1875
28 97539.046875
29 88410.1796875
30 80291.78125
31 73050.03125
32 66577.53125
33 60778.8984375
34 55574.48828125
35 50890.97265625
36 46672.75
37 42862.77734375
38 39419.59375
39 36299.78125
40 33466.234375
41 30889.296875
42 28541.896484375
43 26401.43359375
44 24445.111328125
45 22656.09765625
46 21017.30078125
47 19517.955078125
48 18139.5
49 16871.740234375
50 15706.25
51 14632.45703125
52 13642.138671875
53 12728.08203125
54 11882.587890625
55 11100.2890625
56 10376.3203125
57 9705.353515625
58 9082.8115234375
59 8504.8544921875
60 7968.03369140625
61 7469.2958984375
62 7005.7490234375
6



AttributeError: module 'tensorflow' has no attribute 'placeholder'