In [1]:
import numpy as np # Importing the package NumPy
x = np.array([[4], [3]]) # defining a vector (an array in Python jargon).
                         # This is a column vector with two rows

#l1 norm
np.linalg.norm(x, ord=1) # See the link in the Text cell below to understand what
                         # this commands does

np.float64(7.0)

[numpy.linalg.norm](https://numpy.org/doc/stable/reference/generated/numpy.linalg.norm.html)

This function is able to return one of eight different matrix norms, or one of an infinite number of vector norms (described below), depending on the value of the ord parameter.

# Usage
linalg.norm(x, ord=None, axis=None, keepdims=False)
x, input array.

ord, order of the norm


In [2]:
#l2 norm
np.linalg.norm(x, ord=2) # Same as above but now calculating an L2 norm

np.float64(5.0)

In [3]:
#l-infinity norm
np.linalg.norm(x, ord=np.inf) # Calculating the L infinity norm

np.float64(4.0)

In [4]:
#l0 norm (different function) Tis note: Cardinality
np.count_nonzero(x) # Notice, here we're using a different command. See description below

2

[numpy.count_nonzero](https://numpy.org/doc/stable/reference/generated/numpy.count_nonzero.html)

Counts the number of non-zero values in the input array

In [5]:
# get the csv reading function
from numpy import genfromtxt
X = genfromtxt('numonly_gtky.csv', delimiter=',', dtype=int)
X

array([[17, 12],
       [17,  0],
       [17, 15],
       [16,  3],
       [16,  0],
       [17,  0],
       [12,  5],
       [16, 25],
       [12,  8],
       [17,  0],
       [18,  0],
       [17,  0],
       [16,  5],
       [18,  0],
       [16, 12],
       [17, 15],
       [16,  0],
       [16,  0],
       [18, 10],
       [16, 10],
       [17, 10],
       [16,  0],
       [16, 20],
       [17,  0],
       [17, 20],
       [16,  0],
       [18,  0]])

In [6]:
# after manipulation, you can save numpy arrays as csv files
Y = X.T
np.savetxt('test.csv', Y, delimiter=',', fmt = '%d') #can be %d or %f or %.3f

In [7]:
import autograd.numpy as np
from autograd import grad
import random

In [8]:
# building the x and w vectors for a single observation (regression objective function)
x = np.array([[2],[1]]) # inputs (features), represents your independent variable(s)
w = np.array([[1],[1]]) # the weight vector (parameters to learn), these are the coefficients that the model will adjust to minimize error.
y = 1 # target output (true label), this is the dependent variable — the “true” value your model is trying to predict.

In [16]:
# setting n = 1; you'll need to update for different value of n
# this is a regression objective function (loss function)
# this is computing squared error loss — the difference between the predicted output, and the true output y.
# we try to MINIMIZE the objective/loss function. 

def f(w):
	return (np.dot(w.T,x) - y)**2

In [10]:
# derivative wrt w of the regression objective function
# this is also known as the GRADIENT, the derivative of the loss function is the gradient
# it gives you the gradient of the loss with respect to w, and you use this gradient to update w during gradient descent
# the final goal of this regression is to minimize loss function (f(w)) and learn the optimal weights (w)
def dfd(w):
	return 2*(w.T.dot(x) - y)*x

In [11]:
grad_foo = grad(f)       # Obtain its gradient function, autograd computes df/dw automatically
print('Autogen Gradient : \n', grad_foo(w))
print('Theoretical Gradient : \n', dfd(w))

Autogen Gradient : 
 [[8]
 [4]]
Theoretical Gradient : 
 [[8]
 [4]]


In [12]:
# trace of a quadratic
n = 3
A = np.random.random((n,n))
x = np.random.random((n,1))

In [13]:
# the function
def f(x, A):
    return np.trace(np.dot(np.dot(np.transpose(x), A), x))

# the theoretical derivative (see lecture notes)
def dfd(x, A):
    return np.dot((np.transpose(A) + A), x)

# the autograd derivative
grad_foo = grad(f)

print('Quadratic Autogen Gradient: \n', grad_foo(x, A))
print('Quadratic Theoretical Gradient: \n', dfd(x, A))

Quadratic Autogen Gradient: 
 [[1.82593449]
 [1.84990888]
 [0.41076563]]
Quadratic Theoretical Gradient: 
 [[1.82593449]
 [1.84990888]
 [0.41076563]]


In [14]:
# multivariate gaussian function
def f(x, A):
    return np.exp(-np.trace(np.dot(np.dot(np.transpose(x), A), x)))

# theoretical derivative (see lecture notes)
def dfd(x, A):
    return -f(x, A) * np.dot((np.transpose(A) + A), x)

# autograd
grad_foo = grad(f)

print('Multivariate Gaussian Autogen Grad: \n', grad_foo(x, A))
print('Multivariate Gaussian Theoretical Grad: \n', dfd(x, A))

Multivariate Gaussian Autogen Grad: 
 [[-0.3967452 ]
 [-0.40195444]
 [-0.08925254]]
Multivariate Gaussian Theoretical Grad: 
 [[-0.3967452 ]
 [-0.40195444]
 [-0.08925254]]


In [15]:
# rectified linear unit (ReLU)
# it is generally better practice to name the functions something better than f
def ReLU(w, x):
    v = np.dot(np.transpose(w), x)
    return np.maximum(0, v)

# theoretical
def grad_ReLU(w, x):
    if np.dot(np.transpose(w), x) > 0:
        return x
    else:
        return 0

# it also generally good practice to check multiple random initializations
for i in range(3):
    x = np.random.randn(2, 1)
    w = np.random.randn(2, 1)

    grad_foo = grad(ReLU)
    print('Random w.Tx = %.3f'%np.dot(w.T, x)[0][0]) #this is ugly notation but I can't figure out how to make it cleaner... any thoughts?
    print('ReLU Autogen Grad: \n', grad_foo(w, x), '\n')
    print('ReLU Theoretical Grad: \n', grad_ReLU(w, x), '\n\n')

Random w.Tx = -0.624
ReLU Autogen Grad: 
 [[0.]
 [0.]] 

ReLU Theoretical Grad: 
 0 


Random w.Tx = -0.560
ReLU Autogen Grad: 
 [[0.]
 [0.]] 

ReLU Theoretical Grad: 
 0 


Random w.Tx = 2.047
ReLU Autogen Grad: 
 [[1.6035526 ]
 [1.06075404]] 

ReLU Theoretical Grad: 
 [[1.6035526 ]
 [1.06075404]] 


