# import supporting packages (not all are useful here) 

In [1]:
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from tqdm import tqdm

from sklearn import linear_model
from sklearn.metrics import mean_squared_error, r2_score
from sklearn import svm

from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, confusion_matrix, precision_score, recall_score, ConfusionMatrixDisplay
from sklearn.model_selection import RandomizedSearchCV, train_test_split
from scipy.stats import randint

from sklearn.tree import export_graphviz
from IPython.display import Image
#import graphviz

from sklearn.neural_network import MLPClassifier


# import pytorch

In [2]:
import torch
from torch import nn
#from torch.utils.data import DataLoader
#from torchvision import datasets
#from torchvision.transforms import ToTensor



# define a univariate function to be minimized

The function is: $f(x) = (x-10)^2$

When defining this function as a class, we inherit from nn.Module, so that we can use the auto gradient functionality in PyTorch to compute derivatives



In [3]:
# define a class that inherit from nn.Module, so that the auto_gradient capability is obtained
class objective_function_layer(nn.Module):
    def __init__(self):
        super(objective_function_layer, self).__init__()

    # we always have to define the forward function
    def forward(self, x):
        y = (x-10)**2
        return y
    
    

In [4]:
# initiate an instance of the class

model = objective_function_layer()

print(model)


objective_function_layer()


# Given an input tensor, evaluate the function and its gradient w.r.t. input

In [5]:
# define an input tensor of 0 dimsnion
input_tensor = torch.tensor(0.0, requires_grad=True)
print(f'The input_tensor is: {input_tensor}')
print(f'The dimension of the input_tensor is: {input_tensor.dim()}')
print(f'The input_tensor.grad is currently: {input_tensor.grad}')

# we can also define an input tensor of 1 dimension by [0.0]

The input_tensor is: 0.0
The dimension of the input_tensor is: 0
The input_tensor.grad is currently: None


In [6]:
# compute/evaluate the function at the input_tensor's value
output = model(input_tensor)
print(f'output  = {output }')


output  = 100.0


In [7]:
# compute/evaluate the gradient of the function at the input and output value
output.backward()
print(f'Since we just did output.backward(), the input_tensor.grad is currently: {input_tensor.grad}')

Since we just did output.backward(), the input_tensor.grad is currently: -20.0


# Implement gradient descent

In [8]:
learning_rate = 0.2   # we can play with the learning rate, make it larger or smaller

x = 0.0
for i in range(100):
    input_tensor = torch.tensor(x, requires_grad=True)
    output = model(input_tensor)
    output.backward()
    x = input_tensor.item() - learning_rate*input_tensor.grad.item()
    print(f'input = {x}, output  = {output.item()}')
    
    


input = 4.0, output  = 100.0
input = 6.4, output  = 36.0
input = 7.840000057220459, output  = 12.959999084472656
input = 8.704000091552734, output  = 4.665599346160889
input = 9.222400283813476, output  = 1.679614782333374
input = 9.533440399169923, output  = 0.6046607494354248
input = 9.72006435394287, output  = 0.21767768263816833
input = 9.832038497924804, output  = 0.07836407423019409
input = 9.899223327636719, output  = 0.028210937976837158
input = 9.939533996582032, output  = 0.010155937634408474
input = 9.963720512390136, output  = 0.0036561144515872
input = 9.978232192993165, output  = 0.0013162150280550122
input = 9.986939430236816, output  = 0.0004738291318062693
input = 9.99216365814209, output  = 0.00017057848162949085
input = 9.995298194885255, output  = 6.14082528045401e-05
input = 9.99717903137207, output  = 2.2105177777120844e-05
input = 9.998307418823241, output  = 7.957863999763504e-06
input = 9.998984336853027, output  = 2.865476744773332e-06
input = 9.99939060211181

# try a two variable gradient descent analysis

The function is: $f(x_1, x_2) = (x_1 - x_2)^2 + (x_1 - 10)^2$

In [9]:
# define a class that inherit from nn.Module, so that the auto_gradient capability is obtained
class objective_function_layer(nn.Module):
    def __init__(self):
        super(objective_function_layer, self).__init__()

    # we always have to define the forward function
    def forward(self, x1, x2):
        return (x1-x2)**2 + (x1-10)**2
     

In [10]:
# initiate an instance of the class

model = objective_function_layer()

print(model)


objective_function_layer()


# Given an input (one tensor for x1, another tensor for x2), evaluate the function and its gradient w.r.t. input

In [11]:
x1= torch.tensor(2.0, requires_grad=True)
x2= torch.tensor(3.0, requires_grad=True)
print(f'value of input variables: x1={x1.item()}, x2={x2.item()}')


value of input variables: x1=2.0, x2=3.0


In [12]:
# compute/evaluate the function at the input_tensor's value
y = model(x1,x2)
print(f'output  = {y}')


output  = 65.0


In [13]:
# compute/evaluate the gradient of the function at the input and output value
y.backward()
print(f'the gradient of y w.r.t x1: {x1.grad.item()}')
print(f'the gradient of y w.r.t x2: {x2.grad.item()}')


the gradient of y w.r.t x1: -18.0
the gradient of y w.r.t x2: 2.0


# Implement gradient descent

In [14]:
learning_rate = 0.2

x1 = 2.0
x2 = 3.0
for i in range(100):
    input1 = torch.tensor(x1, requires_grad=True)
    input2 = torch.tensor(x2, requires_grad=True)
    output = model(input1, input2)
    output.backward()
    x1 = x1 - learning_rate * input1.grad.item()
    x2 = x2 - learning_rate * input2.grad.item()
    print(f'input = {x1}, {x2}, output  = {output.item()}')
    
    

    

input = 5.6, 2.6, output  = 65.0
input = 6.160000038146972, 3.8000000000000003, output  = 28.360000610351562
input = 6.752000141143798, 4.74399995803833, output  = 20.315200805664062
input = 7.2479998588562005, 5.547200107574463, output  = 14.58156681060791
input = 7.668480205535888, 6.227519893646241, output  = 10.466224670410156
input = 8.024703884124754, 6.803904056549072, output  = 7.512350559234619
input = 8.326502323150633, 7.292224025726318, output  = 5.392147064208984
input = 8.582190418243407, 7.705935192108154, output  = 3.8703267574310303
input = 8.79881200790405, 8.056437397003172, output  = 2.7780075073242188
input = 8.982337474822996, 8.353387165069579, output  = 1.993972659111023
input = 9.137822628021238, 8.604967212677002, output  = 1.4312162399291992
input = 9.269551753997801, 8.818109226226806, output  = 1.027285099029541
input = 9.381154537200926, 8.998685932159423, output  = 0.7373549938201904
input = 9.475705623626707, 9.151673221588135, output  = 0.52925223112106