In [4]:
import os  
import numpy as np  
import pandas as pd  
import matplotlib.pyplot as plt  

In [5]:
# NOTE: you will need to tinker with the meta-parameters below yourself (do not think of them as defaults by any means)
# meta-parameters for program
trial_name = 'p1_fit' # will add a unique sub-string to output of this program
degree = 3 # p, order of model
beta = 3.0 # regularization coefficient
alpha = 0.5 # step size coefficient
eps = 0.0 # controls convergence criterion
n_epoch = 100 # number of epochs (full passes through the dataset)

In [6]:
path = os.getcwd() + '/data/prob2.dat'  
data = pd.read_csv(path, header=None, names=['X', 'Y']) 

In [7]:
# set X (training data) and y (target variable)
cols = data.shape[1]  
X = data.iloc[:,0:cols-1]  
y = data.iloc[:,cols-1:cols] 

# convert from data frames to numpy matrices
X = np.array(X.values)
y = np.array(y.values)

In [8]:
# apply feature map to input features x1
# WRITEME: write code to turn X_feat into a polynomial feature map (hint: you could use a loop and array concatenation)
X_feat = np.ones((X.shape[0], 1))
for j in range(1,degree+1):
    X_j = np.array([x**j for x in X])
    X_feat = np.concatenate((X_feat, X_j), axis = 1)
X_feat;

In [9]:
# convert to numpy arrays and initalize the parameter array theta 
w = np.zeros((1,X_feat.shape[1]-1))
b = np.array([0])
theta = (b, w)

## Matrix multiplication in Numpy

(1) The standard * is the entrywise product. <br>
(2) To perform matrix multiplication, use np.dot( , )

In [104]:
A = np.array([[1,2],[3,4]])
B = np.array([[3,4],[5,6]])
np.dot(A,B)

array([[13, 16],
       [29, 36]])

--------------------------

In [10]:
def computeCost(X, y, theta, beta): # loss is now Bernoulli cross-entropy/log likelihood
    # WRITEME: write your code here to complete the routine
    m = X.shape[0]
    THETA = np.concatenate((theta[0].reshape(1,1),theta[1].reshape(degree,1)),axis = 0)
    Cost = (1/(2*m))*((np.dot(X_feat,THETA)-y)**2).sum()+beta/2*(theta[1]**2).sum()
    return Cost

In [11]:
def computeGrad(X, y, theta, beta):
    # WRITEME: write your code here to complete the routine (
    # NOTE: you do not have to use the partial derivative symbols below, they are there to guide your thinking)
    m = X.shape[0]
    THETA = np.concatenate((theta[0].reshape(1,1),theta[1].reshape(degree,1)),axis = 0)
    dL_db = (1/m)*(np.dot(X_feat,THETA)-y).sum() # derivative w.r.t. model bias b
    dL_dw = np.array([(1/m)*(((np.dot(X_feat,THETA)-y)*X_feat[:,i].reshape(m,1)).sum())+(beta*theta[1])[0,i-1] for i in range(1,theta[1].shape[1]+1)]) # derivative w.r.t model weights w
    nabla = (dL_db, dL_dw) # nabla represents the full gradient
    return nabla

In [12]:
# convert to numpy arrays and initalize the parameter array theta 
w = np.zeros((1,X_feat.shape[1]-1))
b = np.array([0])
theta = (b, w)

L = computeCost(X, y, theta, beta)
print("-1 L = {0}".format(L))
i = 0

while(i < n_epoch):
    dL_db, dL_dw = computeGrad(X, y, theta, beta)
    b = theta[0]
    w = theta[1]
    # update rules go here...
    # WRITEME: write your code here to perform a step of gradient descent & record anything else desired for later
    b = b - alpha*dL_db
    w = w - alpha*dL_dw
    theta = (b,w)
    
    L = computeCost(X, y, theta, beta)
    
    # WRITEME: write code to perform a check for convergence (or simply to halt early)
    
    print(" {0} L = {1}".format(i,L))
    i += 1
# print parameter values found after the search
print("w = ",w)
print("b = ",b)

-1 L = 0.2913055978424858
 0 L = 0.28585912152057374
 1 L = 0.283918919957383
 2 L = 0.28316228825511397
 3 L = 0.28284258189278455
 4 L = 0.28269901805368924
 5 L = 0.28263185820319514
 6 L = 0.28259963499029916
 7 L = 0.2825839428422267
 8 L = 0.2825762361154492
 9 L = 0.28257243323017384
 10 L = 0.28257055176389007
 11 L = 0.2825696195636105
 12 L = 0.2825691573216849
 13 L = 0.2825689280126293
 14 L = 0.28256881422915076
 15 L = 0.28256875776196355
 16 L = 0.28256872973693875
 17 L = 0.28256871582734844
 18 L = 0.28256870892347
 19 L = 0.28256870549675556
 20 L = 0.28256870379590504
 21 L = 0.28256870295168346
 22 L = 0.2825687025326508
 23 L = 0.2825687023246619
 24 L = 0.2825687022214255
 25 L = 0.2825687021701836
 26 L = 0.2825687021447493
 27 L = 0.28256870213212487
 28 L = 0.28256870212585866
 29 L = 0.28256870212274837
 30 L = 0.28256870212120455
 31 L = 0.28256870212043833
 32 L = 0.28256870212005797
 33 L = 0.2825687021198691
 34 L = 0.2825687021197754
 35 L = 0.28256870211

In [40]:
X_test = np.linspace(data.X.min(), data.X.max(), 100).reshape(100,1)

# apply feature map to input features x1
# WRITEME: write code to turn X_test into a polynomial feature map (hint: you could use a loop and array concatenation)
X_test_feat = np.ones((X_test.shape[0], 1))
for j in range(1,degree+1):
    X_test_j = np.array([x**j for x in X_test])
    X_test_feat = np.concatenate((X_test_feat, X_test_j), axis = 1)

X_test_feat

array([[  1.00000000e+00,  -9.73379901e-01,   9.47468431e-01,
         -9.22246727e-01],
       [  1.00000000e+00,  -9.53671495e-01,   9.09489320e-01,
         -8.67354039e-01],
       [  1.00000000e+00,  -9.33963089e-01,   8.72287051e-01,
         -8.14683908e-01],
       [  1.00000000e+00,  -9.14254683e-01,   8.35861625e-01,
         -7.64190404e-01],
       [  1.00000000e+00,  -8.94546277e-01,   8.00213041e-01,
         -7.15827596e-01],
       [  1.00000000e+00,  -8.74837871e-01,   7.65341300e-01,
         -6.69549553e-01],
       [  1.00000000e+00,  -8.55129464e-01,   7.31246401e-01,
         -6.25310343e-01],
       [  1.00000000e+00,  -8.35421058e-01,   6.97928345e-01,
         -5.83064037e-01],
       [  1.00000000e+00,  -8.15712652e-01,   6.65387131e-01,
         -5.42764702e-01],
       [  1.00000000e+00,  -7.96004246e-01,   6.33622760e-01,
         -5.04366408e-01],
       [  1.00000000e+00,  -7.76295840e-01,   6.02635232e-01,
         -4.67823224e-01],
       [  1.00000000e

In [49]:
theta[0].reshape(1,1)

array([[ 0.09466877]])

In [54]:
Theta = np.concatenate((theta[0].reshape(1,1), theta[1]), axis = 1).T
np.dot(X_test_feat,Theta)

In [58]:
import os  
import numpy as np  
import pandas as pd  
import matplotlib.pyplot as plt  

'''
IST 597: Foundations of Deep Learning
Problem 2: Polynomial Regression & 

@author - Alexander G. Ororbia II

    This program is free software: you can redistribute it and/or modify
    it under the terms of the GNU General Public License as published by
    the Free Software Foundation, either version 3 of the License, or
    (at your option) any later version.

    This program is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    GNU General Public License for more details.

    You should have received a copy of the GNU General Public License
    along with this program.  If not, see <http://www.gnu.org/licenses/>.
'''

# NOTE: you will need to tinker with the meta-parameters below yourself (do not think of them as defaults by any means)
# meta-parameters for program
trial_name = 'p1_fit' # will add a unique sub-string to output of this program
degree = 1 # p, order of model
beta = 0.0 # regularization coefficient
alpha = 0.0 # step size coefficient
eps = 0.0 # controls convergence criterion
n_epoch = 1 # number of epochs (full passes through the dataset)

# begin simulation

def regress(X, theta):
    Theta = np.concatenate((theta[0].reshape(1,1), theta[1]), axis = 1).T
    return np.dot(X,Theta)

def gaussian_log_likelihood(mu, y):
	# WRITEME: write your code here to complete the routine
	return -1.0
	
def computeCost(X, y, theta, beta): # loss is now Bernoulli cross-entropy/log likelihood
    # WRITEME: write your code here to complete the routine
    m = X.shape[0]
    THETA = np.concatenate((theta[0].reshape(1,1),theta[1].reshape(degree,1)),axis = 0)
    Cost = (1/(2*m))*((np.dot(X_feat,THETA)-y)**2).sum()+beta/2*(theta[1]**2).sum()
    return Cost

def computeGrad(X, y, theta, beta):
    # WRITEME: write your code here to complete the routine (
    # NOTE: you do not have to use the partial derivative symbols below, they are there to guide your thinking)
    m = X.shape[0]
    THETA = np.concatenate((theta[0].reshape(1,1),theta[1].reshape(degree,1)),axis = 0)
    dL_db = (1/m)*(np.dot(X_feat,THETA)-y).sum() # derivative w.r.t. model bias b
    dL_dw = np.array([(1/m)*(((np.dot(X_feat,THETA)-y)*X_feat[:,i].reshape(m,1)).sum())+(beta*theta[1])[0,i-1] for i in range(1,theta[1].shape[1]+1)]) # derivative w.r.t model weights w
    nabla = (dL_db, dL_dw) # nabla represents the full gradient
    return nabla

path = os.getcwd() + '/data/prob2.dat'  
data = pd.read_csv(path, header=None, names=['X', 'Y']) 

# set X (training data) and y (target variable)
cols = data.shape[1]  
X = data.iloc[:,0:cols-1]  
y = data.iloc[:,cols-1:cols] 

# convert from data frames to numpy matrices
X = np.array(X.values)
y = np.array(y.values)

# apply feature map to input features x1
# WRITEME: write code to turn X_feat into a polynomial feature map (hint: you could use a loop and array concatenation)
X_feat = np.ones((X.shape[0], 1))
for j in range(1,degree+1):
    X_j = np.array([x**j for x in X])
    X_feat = np.concatenate((X_feat, X_j), axis = 1)

X_feat

# convert to numpy arrays and initalize the parameter array theta 
w = np.zeros((1,X_feat.shape[1]-1))
b = np.array([0])
theta = (b, w)

L = computeCost(X, y, theta, beta)
print("-1 L = {0}".format(L))
i = 0

while(i < n_epoch):
    dL_db, dL_dw = computeGrad(X, y, theta, beta)
    b = theta[0]
    w = theta[1]
    # update rules go here...
    # WRITEME: write your code here to perform a step of gradient descent & record anything else desired for later
    b = b - alpha*dL_db
    w = w - alpha*dL_dw
    theta = (b,w)
    
    L = computeCost(X, y, theta, beta)
    
    # WRITEME: write code to perform a check for convergence (or simply to halt early)
    
    print(" {0} L = {1}".format(i,L))
    i += 1
# print parameter values found after the search
print("w = ",w)
print("b = ",b)

kludge = 0.25
# visualize the fit against the data
X_test = np.linspace(data.X.min(), data.X.max(), 100).reshape(100,1)

# apply feature map to input features x1
# WRITEME: write code to turn X_test into a polynomial feature map (hint: you could use a loop and array concatenation)
X_test_feat = np.ones((X_test.shape[0], 1))
for j in range(1,degree+1):
    X_test_j = np.array([x**j for x in X_test])
    X_test_feat = np.concatenate((X_test_feat, X_test_j), axis = 1)

X_test_feat

plt.plot(X_test, regress(X_test_feat, theta), label="Model")
plt.scatter(X[:,0], y, edgecolor='g', s=20, label="Samples")
plt.xlabel("x")
plt.ylabel("y")
plt.xlim((np.amin(X_test) - kludge, np.amax(X_test) + kludge))
plt.ylim((np.amin(y) - kludge, np.amax(y) + kludge))
plt.legend(loc="best")
# WRITEME: write your code here to save plot to disk (look up documentation/inter-webs for matplotlib)

plt.show()


-1 L = 0.2913055978424858


TypeError: can't multiply sequence by non-int of type 'float'