In [25]:
import torch
from torch import nn
from torch.utils.data import DataLoader
from torchvision import datasets
from torchvision.transforms import ToTensor

In [26]:
# Download training data from open datasets.
training_data = datasets.FashionMNIST(
    root="data",
    train=True,
    download=True,
    transform=ToTensor(),
)

In [27]:
# Download test data from open datasets.
test_data = datasets.FashionMNIST(
    root="data",
    train=False,
    download=True,
    transform=ToTensor(),
)

In [28]:
batch_size = 64

# Create data loaders.
train_dataloader = DataLoader(training_data, batch_size=batch_size)
test_dataloader = DataLoader(test_data, batch_size=batch_size)

for X, y in test_dataloader:
    print(f"Shape of X [N, C, H, W]: {X.shape}")
    print(f"Shape of y: {y.shape} {y.dtype}")
    break

Shape of X [N, C, H, W]: torch.Size([64, 1, 28, 28])
Shape of y: torch.Size([64]) torch.int64


In [29]:
# Get cpu or gpu device for training.
device = "cuda" if torch.cuda.is_available() else "cpu"
print(f"Using {device} device")

# Define model
class NeuralNetwork(nn.Module):
    def __init__(self):
        super(NeuralNetwork, self).__init__()
        self.flatten = nn.Flatten()
        self.linear_relu_stack = nn.Sequential(
            nn.Linear(28*28, 512),
            nn.ReLU(),
            nn.Linear(512, 512),
            nn.ReLU(),
            nn.Linear(512, 10)
        )

    def forward(self, x):
        x = self.flatten(x)
        logits = self.linear_relu_stack(x)
        return logits

model = NeuralNetwork().to(device)
print(model)

Using cpu device
NeuralNetwork(
  (flatten): Flatten(start_dim=1, end_dim=-1)
  (linear_relu_stack): Sequential(
    (0): Linear(in_features=784, out_features=512, bias=True)
    (1): ReLU()
    (2): Linear(in_features=512, out_features=512, bias=True)
    (3): ReLU()
    (4): Linear(in_features=512, out_features=10, bias=True)
  )
)


In [30]:
nn.Linear(20,30)

Linear(in_features=20, out_features=30, bias=True)

In [31]:
m = nn.Linear(3,4)
dataIn = torch.randn(3)
dataOut = m(dataIn)
print(dataIn)
print(dataOut)


tensor([ 1.4123, -0.8541,  0.3511])
tensor([-1.1075, -0.4650,  0.6804,  1.2347], grad_fn=<AddBackward0>)


In [32]:
r = nn.ReLU()
dataIn = dataOut
dataOut = r(dataIn)
print(dataIn)
print(dataOut)


tensor([-1.1075, -0.4650,  0.6804,  1.2347], grad_fn=<AddBackward0>)
tensor([0.0000, 0.0000, 0.6804, 1.2347], grad_fn=<ReluBackward0>)


In [35]:
print(m.weight)
print(m.bias)

Parameter containing:
tensor([[-0.5413, -0.1121,  0.0348],
        [ 0.1294,  0.2328, -0.4927],
        [ 0.3957,  0.0455,  0.4524],
        [ 0.4345, -0.1850,  0.3517]], requires_grad=True)
Parameter containing:
tensor([-0.4510, -0.2760,  0.0016,  0.3395], requires_grad=True)


In [39]:
m.weight.detach().cpu().numpy()

array([[-0.54127204, -0.11207987,  0.03484505],
       [ 0.12940036,  0.23275413, -0.49270204],
       [ 0.39569384,  0.04546758,  0.45241955],
       [ 0.4345113 , -0.18501234,  0.35171035]], dtype=float32)

In [40]:
model.named_parameters()

<generator object Module.named_parameters at 0x7f7b5d920e40>

In [44]:
for a, b in model.named_parameters():
    print(a)

linear_relu_stack.0.weight
linear_relu_stack.0.bias
linear_relu_stack.2.weight
linear_relu_stack.2.bias
linear_relu_stack.4.weight
linear_relu_stack.4.bias
