In [1]:
import numpy as np
from sklearn import metrics
# seed random number generator

# Gradient Descent with Backpropagation, simple example from the lecture

In [2]:
def sigmoid(v):
    return 1/(1+np.exp(-v))

In [3]:
def sigmoid_derivative(v):
    return sigmoid(v)*(1-sigmoid(v))

**Passing a single input through the network and calculating error**

In [4]:
np.set_printoptions(precision=5)

l = 0.05 #learning rate
x=np.array([0.5,0.1])
y = 0.9
w = np.array([[0.15],[-0.3]])
b = 0

#Passing the example through the network
in_ = x@w + b
out = sigmoid(in_)

print('Input: ', in_)
print('Output: ', out)

#Calulating error
error = 0.5*np.power(y-out,2)
print('Error: ', error)

Input:  [0.045]
Output:  [0.51125]
Error:  [0.07556]


**Calculating partial derivatives**

In [5]:
#Gradient Descent with Backpropagation, simple example from the lecture
np.set_printoptions(precision=5)
l = 0.05 #learning rate
x=np.array([0.5,0.1])
y = 0.9
w = np.array([[0.15],[-0.3]])
b = 0

#Passing the example through the network
in_ = x@w + b
out = sigmoid(in_)

#Calulating error
error = 0.5*np.power(y-out,2)

#Calculating partial derivatives
dEdOut = out - y
dOutdIn = sigmoid_derivative(in_)
dIndW1 = x[0]
dIndW2 = x[1]
dIndB = 1

print('dEdOut: ', dEdOut)
print('dOutdIn: ', dOutdIn)
print('dIndW1: ', dIndW1)
print('dIndW2: ', dIndW2)

dEdOut:  [-0.38875]
dOutdIn:  [0.24987]
dIndW1:  0.5
dIndW2:  0.1


**Backpropagation: Calculating Gradients**

In [6]:
#Gradient Descent with Backpropagation, simple example from the lecture
np.set_printoptions(precision=5)
l = 0.05 #learning rate
x=np.array([0.5,0.1])
y = 0.9
w = np.array([[0.15],[-0.3]])
b = 0

#Passing the example through the network
in_ = x@w + b
out = sigmoid(in_)

#Calulating error
error = 0.5*np.power(y-out,2)

#Calculating partial derivatives
dEdOut = out - y
dOutdIn = sigmoid_derivative(in_)
dIndW1 = x[0]
dIndW2 = x[1]
dIndB = 1

#Calculating gradients/backpropagation
dEdW1 = dEdOut*dOutdIn*dIndW1
dEdW2 = dEdOut*dOutdIn*dIndW2
dEdB = dEdOut*dOutdIn*dIndB

print('dEdW1: ', dEdW1)
print('dEdW2: ', dEdW2)
print('dEdB: ', dEdB)

dEdW1:  [-0.04857]
dEdW2:  [-0.00971]
dEdB:  [-0.09714]


**Gradient Descent: Updating parameters of the model**

In [7]:
#Gradient Descent with Backpropagation, simple example from the lecture
np.set_printoptions(precision=5)
l = 0.05 #learning rate
x=np.array([0.5,0.1])
y = 0.9
w = np.array([[0.15],[-0.3]])
b = 0

#Passing the example through the network
in_ = x@w + b
out = sigmoid(in_)

#Calulating error
error = 0.5*np.power(y-out,2)

#Calculating partial derivatives
dEdOut = out - y
dOutdIn = sigmoid_derivative(in_)
dIndW1 = x[0]
dIndW2 = x[1]
dIndB = 1

#Calculating gradients/backpropagation
dEdW1 = dEdOut*dOutdIn*dIndW1
dEdW2 = dEdOut*dOutdIn*dIndW2
dEdB = dEdOut*dOutdIn*dIndB

#Updating parameters
w[0] = w[0] - l* dEdW1
w[1] = w[1] - l* dEdW2
b = b - l* dEdB

print('w1 = ', w[0])
print('w2 = ', w[1])
print('b = ', b)

w1 =  [0.15243]
w2 =  [-0.29951]
b =  [0.00486]


**Matrix notation**

In [8]:
#Gradient Descent with Backpropagation, simple example from the lecture
np.set_printoptions(precision=5)
l = 0.05 #learning rate
x=np.array([0.5,0.1])
y = 0.9
w = np.array([[0.15],[-0.3]])
b = 0

#Passing the example through the network
in_ = x@w + b
out = sigmoid(in_)

#Calulating error
error = 0.5*np.power(y-out,2)

#Calculating partial derivatives - matrix notation
dEdOut = out - y
dOutdIn = sigmoid_derivative(in_)
dIndW = x
dIndB = 1

#Matrix notation
dEdW = x.reshape([len(x),1])*dEdOut*dOutdIn
dEdB = dEdOut*dOutdIn*dIndB

#Updating parameters
w = w - l*dEdW
b = b - l* dEdB

print(dEdW)
print(dEdB)

[[-0.04857]
 [-0.00971]]
[-0.09714]


# Batch Gradient Descent

In [10]:
#Read data from a text file
from sklearn import preprocessing
data = np.loadtxt("/Users/3049848/Dropbox/Python Files/Jupyter Files/CSC4007/2022_23/Datasets/simpleregression.txt")

# split into inputs and outputs
X, y = data[:, :-1], data[:, -1]

#scaling the input data
#X = preprocessing.MinMaxScaler().fit_transform(X)

print ("X.shape:", X.shape)
print ("y.shape:", y.shape)
print(y)

X.shape: (100, 2)
y.shape: (100,)
[0.32276 0.39129 0.02366 0.42037 0.29666 0.01224 0.33582 0.22183 0.13878
 0.22321 0.06433 0.33642 0.25884 0.31426 0.09194 0.53826 0.26779 0.51274
 0.21551 0.07303 0.09603 0.5409  0.2428  0.01251 0.30088 0.32565 0.23311
 0.37082 0.03979 0.17671 0.10083 0.35578 0.1892  0.70879 0.08183 0.31787
 0.02324 0.17285 0.49951 0.12807 0.3561  0.00157 0.2372  0.29016 0.0651
 0.36162 0.02155 0.21984 0.18231 0.56569 0.51658 0.21467 0.00243 0.41947
 0.04843 0.28867 0.30489 0.15673 0.41711 0.05501 0.19028 0.09314 0.01974
 0.23027 0.12081 0.06365 0.17914 0.58763 0.04623 0.35102 0.09351 0.36981
 0.7152  1.      0.11697 0.12186 0.00588 0.0652  0.03183 0.11888 0.14047
 0.13801 0.37693 0.11726 0.18447 0.22876 0.04379 0.09764 0.15827 0.05284
 0.0823  0.19482 0.15841 0.02946 0.2519  0.13752 0.21464 0.01277 0.28875
 0.20878]


In [14]:
np.random.seed(0)

#initialising parameters
w = np.random.uniform(-1,1,[X.shape[1],1])
b = np.random.uniform(1)
l = 0.05

y=y.reshape(len(y),1)

predictions = sigmoid(X@w + b)
error = (0.5*np.power(predictions - y,2)).mean()
print('Error before training: ', error)


#forward pass
in_ = X@w+b
out = sigmoid(in_)

#backpropagation
dEdOut = out - y
dOutdIn = sigmoid_derivative(in_) 
dIndW = X
dEdW = (1/X.shape[0])*(dIndW.T@(dEdOut*dOutdIn))
dEdB = (1/X.shape[0])*np.ones([1,len(X)])@(dEdOut*dOutdIn)

#updating weights
w -= l*dEdW
b -= l*dEdB

predictions = sigmoid(X@w + b)
error = (0.5*np.power(predictions - y,2)).mean()
print('Error after training: ', error)

Error before training:  0.15662897876245127
Error after training:  0.15606568471922683


In [18]:
in_.shape

(100, 1)

# Stochastic Gradient Descent 

In [12]:
np.random.seed(0)

#initialising parameters
w = np.random.uniform(-1,1,[X.shape[1],1])
b = np.random.uniform(1)
l = 0.05

for i in range(len(X)):
    #forward pass
    in_ = X[i]@w+b
    out = sigmoid(in_)
    
    #error calculation
    error = (0.5)*(np.power((out-y[i]),2))
    
    #backpropagation
    dEdOut = out - y[i]
    dOutdIn = sigmoid_derivative(X[i]@w + b)
    dEdIn = dEdOut*dEdOut
    dIndW = X[i]
    dEdW = (dIndW.reshape(len(dIndW),1))*dEdIn #transposing 1D arrya, we can only use x.T for 2D or higher array
    dEdB = dEdIn*1
    
    #updating parameters of the model
    w = w - l*dEdW
    b = b - l*dEdB

In [13]:
np.random.seed(0)

#initialising parameters
w = np.random.uniform(-1,1,[X.shape[1],1])
b = np.random.uniform(1)
l = 0.05

predictions = sigmoid(X@w + b)
error = (0.5*np.power(predictions - y,2)).mean()
print('Error before training: ', error)

for i in range(len(X)):
    #forward pass
    in_ = X[i]@w+b
    out = sigmoid(in_)
    
    #error calculation
    error = (0.5)*(np.power((out-y[i]),2))
    
    #backpropagation
    dEdOut = out - y[i]
    dOutdIn = sigmoid_derivative(X[i]@w + b)
    dEdIn = dEdOut*dEdOut
    dIndW = X[i]
    dEdW = (dIndW.reshape(len(dIndW),1))*dEdIn #transposing 1D arrya, we can only use x.T for 2D or higher array
    dEdB = dEdIn*1
    
    #updating parameters of the model
    w = w - l*dEdW
    b = b - l*dEdB
    
predictions = sigmoid(X@w + b)
error = (0.5*np.power(predictions - y,2)).mean()
print('Error after training: ', error)

Error before training:  0.15662897876245127
Error after training:  0.05485830213775867
