In [2]:
import numpy as np 
from sklearn import linear_model           # for logistic regression
from sklearn.metrics import accuracy_score # for evaluation
from scipy import misc                     # for loading image
from scipy import sparse
np.random.seed(1)                          # for fixing random values



# randomly generate data 
N = 10 # number of training sample 
d = 5 # data dimension 
C = 3 # number of classes 

X = np.random.randn(d, N)
y = np.random.randint(0, 3, (N,))

print(y)

def convert_labels(y):
    """
    convert 1d label to a matrix label: each column of this matrix 
    coresponding with 1 element in y. In i-th column of Y, only one non-zeros
    element located in the y[i]-th position, and = 1 
    ex: y = [0, 2, 1, 0], and 3 classes then return 

            [[1, 0, 0, 1],
             [0, 0, 1, 0],
             [0, 1, 0, 0]]
    """
    Y = sparse.coo_matrix((np.ones_like(y), (y, np.arange(len(y)))), 
                          shape = (np.amax(y) + 1, len(y))).toarray()
    return Y 

Y = convert_labels(y)
print(convert_labels(y))

def softmax(x):
    """Compute softmax values for each sets of scores in x."""
    e_x = np.exp(x - np.max(x))
    return e_x / e_x.sum()


scores = [3.0, 1.0, 0.2]
print(softmax(scores))
    

# cost 
def cost(X, Y, W):
    Z = softmax(np.exp(W.T.dot(X)))
    return np.sum(Y*np.log(np.exp(Z)))

W_init = np.random.randn(d, C)

def grad(X, Y, W):
    Z = softmax(np.exp(W.T.dot(X)))
    V = Z - Y
    return X.dot(V.T)
    
def numerical_grad(X, Y, W, cost):
    eps = 1e-4
    g = np.zeros_like(W)
    for i in range(W.shape[0]):
        for j in range(W.shape[1]):
            W_p = W.copy()
            W_n = W.copy()
            W_p[i, j] += eps 
            W_n[i, j] -= eps
        g[i] = (cost(X, Y, W_p) - cost(X, Y, W_n))/(2*eps)
    return g 

g1 = grad(X, Y, W_init)
g2 = numerical_grad(X, Y, W_init, cost)

print(g1)
print(g2)
print(np.linalg.norm(g1 - g2))
# print(cost(X, Y, W_init))