In [None]:
import numpy as np
import torch
import torch.nn as nn
import matplotlib.pyplot as plt

In [None]:
# Autograd (automatic differentiation) is the pytorch module that performs gradient tracking and computation
# By default, when tensors are created their gradients is not tracked
x = torch.ones(1, 10)
print(x.requires_grad)

# You can change it using the requires_grad_() function
x.requires_grad_()
print(x.requires_grad)

# Alternatively, when creating a tensor, you can directly set 'requires_grad=True'
x = torch.ones(1, 10, requires_grad=True)
print(x.requires_grad)

In [None]:
# If you create a tensor y from x as the result of an operation, it will have a gradient function (grad_fn) which is specific to this operation
y = x + 50
print(y.grad_fn)

y = x * 50
print(y.grad_fn)

y = x.mean()
print(y.grad_fn)

In [None]:
# Backward: computes the gradients 
x = torch.ones(1, 10, requires_grad=True)
y = x.mean()
y.backward()
print(x.grad)

In [None]:
# Sometimes (for instance at testing), you don't need to keep tracking of the gradients for some operations
# Then, in order to save memory you can simply deactivate gradient tracking
y = x.mean()
print(y.requires_grad)

with torch.no_grad():
    y = x.mean()
    print(y.requires_grad)

In [None]:
# TO DO: define a simple operation Y=W*X+B (with X=1, W=2 and B=3)
# Compute and print the gradients of X, W and B


In [None]:
# Example of creating a simple network, computing the output and the gradients

# Define the network (one linear layer and a nonlinearity)
linear_layer = nn.Linear(256, 2)
activation_fn = nn.Sigmoid()
print('Input size: ', linear_layer.in_features)
print('Output size: ', linear_layer.out_features)
print(linear_layer.bias)
print(linear_layer.weight.shape)

In [None]:
# Create an image-like input and an arbitrary output
input_image = torch.randn(16,16)
plt.imshow(input_image.numpy())
plt.show()

output_true = torch.tensor([0, 1], dtype=torch.float)
print(output_true)

In [None]:
# forward pass
input_reshape = input_image.reshape(256) # vectorize the input image
output_predicted = activation_fn(linear_layer(input_reshape))
print(output_predicted)

In [None]:
# Now, we need to define a loss function to measure the difference between the 'true' and 'predicted' output
# This loss function will be use to compute the gradients and update the network parameters

# Use the binary cross entropy loss function
loss_fn = nn.BCELoss() 

# calculate the loss with the given values (true and predicted)
loss = loss_fn(output_predicted, output_true)

# You can print the loss value (or store it, which is useful for monitoring the training)
print(loss.item())

# Compute the gradients
loss.backward()

print ('Weight gradient: ', linear_layer.weight.grad)
print ('Biases gradient: ', linear_layer.bias.grad)

In [None]:
# To update the network parameters, we need to use an 'optimizer': it basically defines which optimization algorithm is used

# Let's use the stochastic gradient algorithm
optimizer = torch.optim.SGD(linear_layer.parameters(), lr=0.01)

# and apply it to update the parameters
optimizer.step()


In [None]:
# Save and load the linear layer of the model
torch.save(linear_layer, 'fnn_model.pt')
model = torch.load('fnn_model.pt')
model
print(model.bias)

In [None]:
# Save and load only the model's parameters (recommended)
torch.save(linear_layer.state_dict(), 'fnn_model_params.pt')
model = nn.Linear(256, 2) #need to first instanciate the model
model.load_state_dict(torch.load('fnn_model_params.pt')) #now load its parameters
print(model.bias)