In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print(device)

cuda:0


In [23]:
# test defining the neural network

class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()

        # 2D conv layer with 1 input channel
        self.conv1 = nn.Conv2d(1, 32, 3, 1)

        # 2D conv layer with 32 input
        self.conv2 = nn.Conv2d(32, 64, 3, 1)

        self.dropout1 = nn.Dropout2d(0.25)
        self.dropout2 = nn.Dropout2d(0.5)

        self.fc1 = nn.Linear(12544, 128)
        self.fc2 = nn.Linear(128, 10)

    def forward(self, x):
        x = self.conv1(x)
        x = F.relu(x)

        x = self.conv2(x)
        x = F.relu(x)

        x = F.max_pool2d(x, 2)

        x = self.dropout1(x)
        x = torch.flatten(x, 1)

        x = self.fc1(x)
        x = F.relu(x)
        x= self.dropout2(x)
        x = self.fc2(x)

        output = F.log_softmax(x, dim=1)
        
        return output


my_nn = Net()
my_nn = my_nn.to(device)

print(my_nn)

Net(
  (conv1): Conv2d(1, 32, kernel_size=(3, 3), stride=(1, 1))
  (conv2): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1))
  (dropout1): Dropout2d(p=0.25, inplace=False)
  (dropout2): Dropout2d(p=0.5, inplace=False)
  (fc1): Linear(in_features=12544, out_features=128, bias=True)
  (fc2): Linear(in_features=128, out_features=10, bias=True)
)


In [24]:
random_data = torch.rand((1, 1, 32, 32))
random_data

tensor([[[[0.7609, 0.0773, 0.0609,  ..., 0.6435, 0.0414, 0.0056],
          [0.2555, 0.5196, 0.5760,  ..., 0.9935, 0.3711, 0.7654],
          [0.4036, 0.4930, 0.5340,  ..., 0.6762, 0.5140, 0.6784],
          ...,
          [0.3164, 0.0737, 0.7842,  ..., 0.1095, 0.2935, 0.6337],
          [0.7820, 0.6172, 0.7207,  ..., 0.4584, 0.9975, 0.5004],
          [0.2885, 0.4372, 0.0693,  ..., 0.8950, 0.4473, 0.9937]]]])

In [26]:
result = my_nn(random_data.to(device))
print(result)

tensor([[-2.4368, -2.3488, -2.3533, -2.2978, -2.1482, -2.2690, -2.2128, -2.2063,
         -2.3981, -2.3963]], device='cuda:0', grad_fn=<LogSoftmaxBackward0>)


In [27]:
x = torch.ones(5)  # input tensor
y = torch.zeros(3)  # expected output
w = torch.randn(5, 3, requires_grad=True)
b = torch.randn(3, requires_grad=True)
z = torch.matmul(x, w)+b
loss = torch.nn.functional.binary_cross_entropy_with_logits(z, y)

In [28]:
print(f"Gradient function for z = {z.grad_fn}")
print(f"Gradient function for loss = {loss.grad_fn}")

Gradient function for z = <AddBackward0 object at 0x000001C28FC0AA10>
Gradient function for loss = <BinaryCrossEntropyWithLogitsBackward0 object at 0x000001C28FC0AD70>


In [29]:
loss.backward()
print(w.grad)
print(b.grad)

tensor([[0.2895, 0.0270, 0.3141],
        [0.2895, 0.0270, 0.3141],
        [0.2895, 0.0270, 0.3141],
        [0.2895, 0.0270, 0.3141],
        [0.2895, 0.0270, 0.3141]])
tensor([0.2895, 0.0270, 0.3141])


In [30]:
z = torch.matmul(x, w)+b
print(z.requires_grad)

with torch.no_grad():
    z = torch.matmul(x, w)+b
print(z.requires_grad)

True
False
