# PyTorch

Here's your best friend when working with PyTorch: https://pytorch.org/docs/stable/index.html.

In [1]:
%matplotlib inline
%load_ext autoreload
%autoreload 2

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from matplotlib import cm
from mpl_toolkits.mplot3d import Axes3D, art3d
from matplotlib.patches import Circle, Ellipse
import seaborn as sns
import matplotlib.ticker as ticker
from IPython.display import Markdown, display, HTML

import torch
import torch.nn as nn
import torch.optim as optim

# Fix the dying kernel problem (only a problem in some installations - you can remove it, if it works without it)
import os
os.environ['KMP_DUPLICATE_LIB_OK'] = 'True'

## PyTorch basic operations tasks

**Task 1.** Calculate the sigmoid (logistic) function on every element of the following array [0.3, 1.2, -1.4, 0.2, -0.1, 0.1, 0.8, -0.25] and print the last 5 elements. Use only tensor operations.

In [2]:
########################
# Write your code here #
########################

array_a = torch.tensor([0.3, 1.2, -1.4, 0.2, -0.1, 0.1, 0.8, -0.25])
sigmoid_on_array_a = torch.sigmoid(array_a)
last_5_sigmoid_on_array_a = sigmoid_on_array_a[((len(sigmoid_on_array_a))-5):]
print(last_5_sigmoid_on_array_a)



tensor([0.5498, 0.4750, 0.5250, 0.6900, 0.4378])


**Task 2.** Calculate the dot product of the following two vectors:<br/>
$x = [3, 1, 4, 2, 6, 1, 4, 8]$<br/>
$y = [5, 2, 3, 12, 2, 4, 17, 9]$<br/>
a) by using element-wise mutliplication and torch.sum,<br/>
b) by using torch.dot,<br/>
b) by using torch.matmul and transposition (x.T).

In [3]:
########################
# Write your code here #
########################

array_x = torch.tensor([3,1,4,2,6,1,4,8])
array_y = torch.tensor([5,2,3,12,2,4,17,9])

dot_product_x_y_a = array_x * array_y
sum_dot_product_x_y_a = torch.sum(dot_product_x_y_a).item()
print(sum_dot_product_x_y_a)

dot_product_x_y_b = torch.dot(array_x, array_y).item()
print(dot_product_x_y_b)

dot_product_x_y_c = torch.matmul(array_x, array_y.T).item()
print(dot_product_x_y_c)


209
209
209


**Task 3.** Calculate the following expression<br/>
$$\frac{1}{1 + e^{-x_0 \theta_0 - \ldots - x_9 \theta_9 - \theta_{10}}}$$
for<br/>
$x = [1.2, 2.3, 3.4, -0.7, 4.2, 2.7, -0.5, 1.4, -3.3, 0.2]$<br/>
$\theta = [1.7, 0.33, -2.12, -1.73, 2.9, -5.8, -0.9, 12.11, 3.43, -0.5, -1.65]$<br/>
and print the result. Use only tensor operations.

In [4]:
########################
# Write your code here #
########################

array_x_3 = torch.tensor([1.2,2.3,3.4, -0.7,4.2,2.7,-0.5,1.4,-3.3,0.2])
#array_x_3 = array_x_3.add(1)
array_theta_3 = torch.tensor([1.7,0.33,-2.12,-1.73,2.9,-5.8,-0.9,12.11,3.43,-0.5,-1.65])
exponent_3 = -(torch.matmul(array_x_3, array_theta_3[:(len(array_theta_3)-1)].T).item())
exponent_3 -= array_theta_3[len(array_theta_3)-1]
value_3 = 1 / (1 + torch.exp(exponent_3)).item()
print(value_3)

'''
poprawne
x = torch.tensor([1.2, 2.3, 3.4, -0.7, 4.2, 2.7, -0.5, 1.4, -3.3, 0.2])
theta = torch.tensor([1.7, 0.33, -2.12, -1.73, 2.9, -5.8, -0.9, 12.11, 3.43, -0.5, -1.65])
print((1 / (1 + torch.exp(-torch.sum(x * theta[:10]) - theta[10]))).item())
'''

0.08762359797780898


# Tensor gradients

**Task 4.** Calculate the derivative $f'(w)$ using PyTorch and backward propagation (the backward method of the Tensor class) for the following functions and points:
  - $f(w) = w^3 + w^2$ and $w = 2.0$,
  - $f(w) = \text{sin}(w)$ and $w = \pi$,
  - $f(w) = \ln(w * e^{3w})$ and $w = 1.0$.
  
Print the values of those derivatives.

In [5]:
########################
# Write your code here #
########################

w_4_1 = torch.tensor(2.0, requires_grad=True)
w_4_2 = torch.tensor(np.pi, requires_grad=True)
w_4_3 = torch.tensor(1.0, requires_grad=True)

print(w_4_1)
print(w_4_2)
print(w_4_3)

out_w_4_1 = w_4_1.pow(3) + w_4_1.pow(2)
print("out={}".format(out_w_4_1))
out_w_4_1.backward()
print(w_4_1.grad)

out_w_4_2 = w_4_2.sin()
print("out={}".format(out_w_4_2))
out_w_4_2.backward()
print(w_4_2.grad)

w_4_2.grad.data.zero_()

out_w_4_3 = (((3 * w_4_3).exp()) * w_4_3).log()
print("out={}".format(out_w_4_3))
out_w_4_3.backward()
print(w_4_3.grad)



tensor(2., requires_grad=True)
tensor(3.1416, requires_grad=True)
tensor(1., requires_grad=True)
out=12.0
tensor(16.)
out=-8.742277657347586e-08
tensor(-1.)
out=3.0
tensor(4.)



**Task 5.** Calculate the derivative $\frac{\partial f}{\partial w_1}(w_1, w_2, w_3)$ using PyTorch and backward propagation (the backward method of the Tensor class) for the following functions and points:
  - $f(w_1, w_2) = w_1^3 + w_1^2 + w_2$ and $(w_1, w_2) = (2.0, 3.0)$,
  - $f(w_1, w_2, w_3) = \text{sin}(w_1) * w_2 + w_1^2 * w_3$ and $(w_1, w_2) = (\pi, 2.0, 4.0)$,
  - $f(w_1, w_2, w_3) = e^{w_1^2 + w_2^2 + w_3^2} + w_1^2 + w_2^2 + w_3^2$ and $(w_1, w_2, w_3) = (0.5, 0.67, 0.55)$.
  
Print the values of those derivatives.

In [6]:
########################
# Write your code here #
########################

w_5_1 = torch.tensor([2.0, 3.0], requires_grad=True)
w_5_2 = torch.tensor([np.pi, 2.0, 4.0], requires_grad=True)
w_5_3 = torch.tensor([0.5, 0.67, 0.55], requires_grad=True)

out_w_5_1 = (w_5_1[0]).pow(3) + (w_5_1[0]).pow(2) + w_5_1[1]
print("out={}".format(out_w_5_1))
out_w_5_1.backward()
print(w_5_1.grad)

out_w_5_2 = (w_5_2[0]).sin() * w_5_2[1] + (w_5_2[0]).pow(2) * w_5_2[2]
print("out={}".format(out_w_5_2))
out_w_5_2.backward()
print(w_5_2.grad)

out_w_5_3 = ((w_5_3[0]).pow(2) + (w_5_3[1]).pow(2) + (w_5_3[2]).pow(2)).exp() + (w_5_3[0]).pow(2) + (w_5_3[1]).pow(2) + (w_5_3[2]).pow(2)
print("out={}".format(out_w_5_3))
out_w_5_3.backward()
print(w_5_3.grad)



out=15.0
tensor([16.,  1.])
out=39.47842025756836
tensor([ 2.3133e+01, -8.7423e-08,  9.8696e+00])
out=3.723489999771118
tensor([3.7221, 4.9876, 4.0943])


# Backpropagation

**Task 6.** Write the calculate_se_array(base_w, x, y_target, weight_idx, weight_start, weight_end) which generates a numpy array of squared errors for the following sigmoid function $\text{sigmoid}(w * x)$ and target value $y_target$, where $x$ is a vector of dimension 3 and $w$ is a vector of weights of the same dimension. The function should generate a set of weight vectors w which have the same value as base_w on all coordinates other than weight_idx, and should have 100 values on the weight_idx coordinate ranging from weight_start to weight_end. Example for weight_idx =1: 

w = [[base_w[0], weight_start, base_w[2]], [base_w[0], weight_start + step, base_w[2]], ..., [base_w[0], weight_end - step, base_w[2]], [base_w[0], weight_end, base_w[2]]]

Then the function should calculate and return an array of squared errors between the value of the sigmoid on x with respect to all those weight vectors and the target value y_target. This will allow you to see how the error of a single neuron depends on every weight separately.

The function should work even if base_w and x are tensors (a safe way to do is tensor.cpu().detach().numpy()).

You're encouraged to experiment with different neural nets to see how the error depends on all its parameters.

In [53]:
def calculate_se_array(base_w, x, y_target, weight_idx, weight_start, weight_end):
    ########################
    # Write your code here #
    ########################
    sigmoid_result = torch.sigmoid(base_w * x)
    step = (weight_end - weight_start) / 100
    w = np.array([])
    
    temp = [0] * len(base_w)
    
    for i in range(len(base_w)):
        temp[i] = base_w[i]
    
    for i in range(101):
        print(type(w))
        #if type(w) != np.ndarray:
            
         #   w = w.detach()
        temp[weight_idx] = weight_start + i * step
        w = w.cpu().detach().np().append(w, temp)
        #w.append(temp)
    return w

test = np.array([])
print(test)
test = np.append(test, [1,2,3])
print(test)

'''
poprawne
def calculate_se_array(base_w, x, y_target, weight_idx, weight_start, weight_end):
    if isinstance(base_w, torch.Tensor):
        base_w = base_w.cpu().detach().numpy()
    if isinstance(x, torch.Tensor):
        x = x.cpu().detach().numpy()
        
    w = np.linspace(weight_start, weight_end, 100)
    if weight_idx == 0:
        w_array = np.array([[w_single, base_w[1], base_w[2]] for w_single in w])
    elif weight_idx == 1:
        w_array = np.array([[base_w[0], w_single, base_w[2]] for w_single in w])
    elif weight_idx == 2:
        w_array = np.array([[base_w[0], base_w[1], w_single] for w_single in w])
        
    y = np.array([1 / (1 + np.exp(-np.sum(x * w))) for w in w_array])
    
    return np.power(y_target - y, 2)
'''


[]
[1. 2. 3.]


In the next cell you can test the method and in the cell after that you can see the error plots for the training you saw in the lecture. The red dot indicates the current weight value and the error it gives. The red dot should arrive at the lowest point at the end of training.

In [54]:
x = torch.tensor([2., -1., 3.], requires_grad=False)
w = torch.tensor([-0.65, -0.4, -1.], requires_grad=True)
y_target = 0.65

base_w = w
weight_idx = 0 
weight_start = -5
weight_end = 5

print(calculate_se_array(base_w, x, y_target, weight_idx, weight_start, weight_end))

<class 'numpy.ndarray'>


AttributeError: 'numpy.ndarray' object has no attribute 'cpu'

In [None]:
x = torch.tensor([2., -1., 3.], requires_grad=False)
w = torch.tensor([-0.65, -0.4, -1.], requires_grad=True)
y_target = 0.65

optimizer = optim.SGD([w], lr=0.1)

losses = []
n_epochs = 100
for epoch in range(n_epochs):

    optimizer.zero_grad()
    y = torch.sigmoid(torch.sum(x * w))
    loss = torch.pow(y - y_target, 2)
    loss.backward()
    losses.append(loss.item())
    optimizer.step()

    if epoch > 0 and epoch % 10 == 0:
        print("Epoch: {}".format(epoch))
        print(w.cpu().detach().numpy())
        print(loss.item())
        for i in range(3):    
            w_range = np.linspace(-5, 5, 100)
            errors = calculate_se_array(w, x, y_target, i, -5, 5)
            sns.lineplot(x=w_range, y=errors).set_title('Training loss')
            plt.scatter([w[i].item()], [loss.item()], color='red')
            plt.xlabel("weight {}".format(i))
            plt.ylabel("squared loss")
            plt.show()
            
'''
poprawne
class TwoLayerModel(nn.Module):
    def __init__(self, seed):
        super().__init__()

        self.seed = torch.manual_seed(seed)

        self.fc_1 = nn.Linear(2, 4, bias=False)
        self.fc_2 = nn.Linear(4, 4, bias=False)
        self.fc_3 = nn.Linear(4, 1, bias=False)

    def forward(self, x):
        x = torch.sigmoid(self.fc_1(x))
        x = torch.sigmoid(self.fc_2(x))
        x = self.fc_3(x)

        return x
        

# Initialize the data

x_data = [torch.tensor([1.0, 1.0]), 
          torch.tensor([0.0, 0.0]), 
          torch.tensor([2.0, -1.0]), 
          torch.tensor([-1.0, 0.5]), 
          torch.tensor([-0.5, -2.0])
         ]
y_data = [torch.tensor(2.0),
          torch.tensor(1.0),
          torch.tensor(-1.0),
          torch.tensor(0.5),
          torch.tensor(2.0)]

# Initialize the neural network and optimizer

neural_net = TwoLayerModel(seed=6789)

optimizer = optim.SGD(neural_net.parameters(), lr=0.1)

# Perform the training for 1000 epochs

losses = []
n_epochs = 1000
for epoch in range(n_epochs):

    optimizer.zero_grad()
    
    for i in range(len(x_data)):
        
        y = neural_net(x_data[i])
        if i == 0:
            loss = torch.pow(y - y_data[i], 2)
        else:
            loss += torch.pow(y - y_data[i], 2)
    
    loss.backward()
    losses.append(loss.item())
        
    optimizer.step()
          
print("All weights - layer by layer starting from left to right")

########################
# Write your code here #
########################
for param in neural_net.parameters():
    print(param.data)
    
print()
print("w_{{1, 2}}^{{(1)}} (rounded to 4 decimal points)")

########################
# Write your code here #
########################
print(np.round(list(neural_net.parameters())[0].data[0][1].item(), 4))

print()
print("w_{{1, 3}}^{{(2)}} (rounded to 4 decimal points)")

########################
# Write your code here #
########################
print(np.round(list(neural_net.parameters())[2].data[0][2].item(), 4))

print()
print("Values on the training data (verify the difference is below epsilon)")

def f(x):
    return x[0] * x[1] + 1

########################
# Write your code here #
########################

epsilon = 0.1
for i in range(5):
    assert np.abs(neural_net(x_data[i]).item() - f(x_data[i]).item()) < epsilon
    print(neural_net(x_data[i]).item())
    print(f(x_data[i]).item())
    print()

print()
print("NN value on x=[2.0, 2.0] vs real value")

########################
# Write your code here #
########################
x = torch.tensor([2.0, 2.0])
print(neural_net(x).item())
print(f(x).item())

print()
print("NN value on x=[-1.0, -1.0] vs real value")

########################
# Write your code here #
########################
x = torch.tensor([-1.0, -1.0])
print(neural_net(x).item())
print(f(x).item())

print()
print("NN value on x=[3.0, -3.0] vs real value")

########################
# Write your code here #
########################
x = torch.tensor([3.0, -3.0])
print(neural_net(x).item())
print(f(x).item())
'''

**Task 7*.** Train a neural network with:
  - two input neurons, 
  - four hidden neurons with no bias and sigmoid activation in the first hidden layer,
  - four hidden neurons with no bias and with sigmoid activation in the second hidden layer,
  - one output neuron with no bias and without sigmoid activation 
  
to get a good approximation of $f(x) = x_1 * x_2 + 1$ on the following dataset $D = \{(1.0, 1.0), (0.0, 0.0), (2.0, -1.0), (-1.0, 0.5), (-0.5, -2.0)\}$, i.e. the network should satisfy:
  - $\text{net}(1.0, 1.0) \sim 2.0$,
  - $\text{net}(0.0, 0.0) \sim 1.0$,
  - $\text{net}(2.0, -1.0) \sim -1.0$,
  - $\text{net}(-1.0, 0.5) \sim 0.5$,
  - $\text{net}(-0.5, -2.0) \sim 2.0$.
  
Use seed=6789 to initialize the network.

After training print all weights and separately print $w_{1, 2}^{(1)}$ (the weight from the second input to the first hidden neuron in the first hidden layer) and $w_{1, 3}^{(3)}$ (the weight from the third hidden neuron in the second hidden layer to the output unit).

Print the values of the network on the training points and verify that these values are closer to the real values of the $f$ function than $\epsilon = 0.1$, i.e. $|\text{net}(x) - f(x)| < \epsilon$ for $x \in D$.

Because this network is only tested on the training set, it will certainly overfit if trained long enough. Train for 1000 epochs and then calculate
  - $\text{net}(2.0, 2.0)$,
  - $\text{net}(-1.0, -1.0)$,
  - $\text{net}(3.0, -3.0)$.
  
How far are these values from real values of the function $f$?

In [None]:
class TwoLayerModel(nn.Module):
    def __init__(self, seed):
        super().__init__()

        self.seed = torch.manual_seed(seed)

        ########################
        # Write your code here #
        ########################


    def forward(self, x):
        ########################
        # Write your code here #
        ########################


        return x

In [None]:
# Initialize the data

########################
# Write your code here #
########################


# Initialize the neural network and optimizer

########################
# Write your code here #
########################


# Perform the training for 1000 epochs

########################
# Write your code here #
########################


print("All weights - layer by layer starting from left to right")

########################
# Write your code here #
########################

    
print()
print("w_{{1, 2}}^{{(1)}} (rounded to 4 decimal points)")

########################
# Write your code here #
########################


print()
print("w_{{1, 3}}^{{(2)}} (rounded to 4 decimal points)")

########################
# Write your code here #
########################


print()
print("Values on the training data (verify the difference is below epsilon)")

########################
# Write your code here #
########################


print()
print("NN value on x=[2.0, 2.0] vs real value")

########################
# Write your code here #
########################


print()
print("NN value on x=[-1.0, -1.0] vs real value")

########################
# Write your code here #
########################

print()
print("NN value on x=[3.0, -3.0] vs real value")

########################
# Write your code here #
########################
