In [1]:
'''
First, we will import everything we need. We will also define a couple
of useful functions.
'''
import torch
from torch import nn

# This is a function that prints the number of trainable parameters 
# of a model.
def count_parameters(model):
    return sum(p.numel() for p in model.parameters() if p.requires_grad)

# This functions prints all parameters (and their gradients) of a model.
def print_parameters(model):
    for name, param in model.named_parameters():
        print(name)
        print(param.data)
        print(param.grad)

In [2]:
'''
Now, we define a logistic regression w/o the final sigmoid function = 
a perceptron without the output threshold.
In Python, that's called a linear layer.
'''
input_units = 6
output_units = 1

percy = nn.Linear(input_units, output_units)

In [3]:
count_parameters(percy)

7

In [4]:
from torch import optim

optimizer = optim.SGD(percy.parameters(), lr=1)

# We set percy's weights and bias to those from our example.
for name, param in percy.named_parameters():
    print(name)
    print(param)
    if name == 'weight':
        param.data = torch.tensor([[2.5, -5, -1.2, 0.5, 2.0, 0.7]])
    else:
        param.data = torch.tensor([0.1])
    print(param)

weight
Parameter containing:
tensor([[-0.3793, -0.2659,  0.2570, -0.1893,  0.1284, -0.0832]],
       requires_grad=True)
Parameter containing:
tensor([[ 2.5000, -5.0000, -1.2000,  0.5000,  2.0000,  0.7000]],
       requires_grad=True)
bias
Parameter containing:
tensor([-0.0718], requires_grad=True)
Parameter containing:
tensor([0.1000], requires_grad=True)


In [5]:
# Define an input and compute the output of our classifier for it.
x = torch.tensor([3, 2, 1, 3, 0, 4.15])
sigi = nn.Sigmoid()

y_raw = percy(x)
print(y_raw)

y_hat = sigi(y_raw) 
print(y_hat)

tensor([0.8050], grad_fn=<AddBackward0>)
tensor([0.6910], grad_fn=<SigmoidBackward>)


In [6]:
# Compute the cross-entropy loss. 
y = torch.tensor([1.])

ce = nn.BCELoss()  # Binary cross-entropy loss
loss = ce(y_hat, y)
print(loss)

tensor(0.3696, grad_fn=<BinaryCrossEntropyBackward>)


In [7]:
# Compute the gradient.
loss.backward()

print_parameters(percy)

weight
tensor([[ 2.5000, -5.0000, -1.2000,  0.5000,  2.0000,  0.7000]])
tensor([[-0.9269, -0.6179, -0.3090, -0.9269,  0.0000, -1.2822]])
bias
tensor([0.1000])
tensor([-0.3090])


In [8]:
# Update our parameters, using the learning rate and the gradient.
optimizer.step()

# Print percy's parameters again.
print_parameters(percy)

weight
tensor([[ 3.4269, -4.3821, -0.8910,  1.4269,  2.0000,  1.9822]])
tensor([[-0.9269, -0.6179, -0.3090, -0.9269,  0.0000, -1.2822]])
bias
tensor([0.4090])
tensor([-0.3090])
