In [None]:
'''
PURPOSE: 
- define cost and its gradient for one-hot encoded output and 2D theta
- Dimensions:
    X = n x m, where n is the number of pixels and m, the number of training examples
    Y = k x m, where k is the number of different outputs
    Theta = n x k   
'''


In [56]:
import numpy as np
import random

In [98]:
def cost_vec(x,y,theta):
    return np.sum ( ( y - np.dot(theta.T,x) ) ** 2) 

def dcost_dtheta_vec(x, y, theta):
    return 2* np.dot( x, (np.dot( theta.T, x ) - y).T )

def grad_desc_vec():
    EPS = 1e-5   #EPS = 10**(-5)
    prev_t = init_theta-10*EPS
    t = init_theta.copy()

    while np.linalg.norm(t - prev_t) >  EPS and itr < max_iter:
        prev_t = t.copy()
        t -= alpha*dcost_dtheta(x, y, t)
    return t

In [91]:
m = 30
n = 1025
k = 6
X = np.random.rand(n, m)
Y = np.random.rand(k, m)
theta = np.random.rand(n, k)

print(X.shape, Y.shape, theta.shape)


((1025, 30), (6, 30), (1025, 6))


In [134]:
#finite difference method
#just need to make sure I compute finite differences for 5 different axis
h = 0.0001
random.seed(5000)
c1= np.random.randint(0,1025, (5,1))
random.seed(5000)
c2= np.random.randint(0,6, (5,1))
for i, j in zip(c1,c2):
    print(i[0],j[0])       
    theta_disturbed = np.copy(theta)
    theta_disturbed[i, j] = theta_disturbed[i,j]+h
    costs_1 = cost_vec(X,Y, theta)
    costs_2 = cost_vec(X,Y,theta_disturbed)
    dcost = (costs_2 - costs_1) / h
    grad = dcost_dtheta_vec(X,Y,theta)
    print(grad[i,j][0], dcost)



(469, 3)
(7254.2044241580215, 7254.2053647339344)
(60, 2)
(9275.0148265722055, 9275.016114115715)
(333, 5)
(7736.8800098744869, 7736.8811145424843)
(192, 1)
(5740.9156341901598, 5740.9162819385529)
(621, 3)
(7821.98149966865, 7821.9825960695744)
