In [33]:
import numpy as np 

np.random.seed(1)

# randomly generate data 
N = 2 # number of training sample 
d = 2 # data dimension 
C = 3 # number of classes 

X = np.random.randn(d, N)
y = np.random.randint(0, 3, (N,))
print("X:\n", X)
print("y:\n", y)

X:
 [[ 1.62434536 -0.61175641]
 [-0.52817175 -1.07296862]]
y:
 [2 1]


In [38]:
from scipy import sparse 
def convert_labels(y, C = C):
    """
    convert 1d label to a matrix label one-host
    """
    Y = sparse.coo_matrix((np.ones_like(y), 
        (y, np.arange(len(y)))), shape = (C, len(y))).toarray()
    return Y 

Y = convert_labels(y, C)
print("y:\n", y)
print("Y:\n", Y)

print((np.ones_like(y),(y, np.arange(len(y)))))

y:
 [2 1]
Y:
 [[0 0]
 [0 1]
 [1 0]]
(array([1, 1]), (array([2, 1]), array([0, 1])))


In [39]:
def softmax(Z):
    e_Z = np.exp(Z)
    A = e_Z / e_Z.sum(axis = 0)
    return A

# loss function  
def loss(X, Y, W):
    A = softmax(W.T.dot(X))
    return -np.sum(Y*np.log(A))

In [49]:
def softmax_regression(X, y, W_init, eta, tol = 1e-4, max_count = 10000):
    print("W_init:")
    print(W_init)
    W = [W_init]    
    C = W_init.shape[1]
    print("C:")
    print(C)
    Y = convert_labels(y, C)
    print("Y:")
    print(Y)
    it = 0
    N = X.shape[1]
    print("N:")
    print(N)
    d = X.shape[0]
    print("d:")
    print(d)
    
    count = 0
    check_w_after = 20
    while count < max_count:
        # mix data 
        mix_id = np.random.permutation(N)
        for i in mix_id:
            xi = X[:, i].reshape(d, 1)
            yi = Y[:, i].reshape(C, 1)
            ai = softmax(np.dot(W[-1].T, xi))
            W_new = W[-1] + eta*xi.dot((yi - ai).T)
            count += 1
            # stopping criteria
            if count%check_w_after == 0:                
                if np.linalg.norm(W_new - W[-check_w_after]) < tol:
                    return W
            W.append(W_new)
    return W
eta = .05 
d = X.shape[0]
W_init = np.random.randn(d, C)

W = softmax_regression(X, y, W_init, eta)
print("W final:")
print(W[-1])

W_init:
[[ 0.17246684 -1.77382823  1.21958759]
 [-0.94770704  1.55915476  1.27115498]]
C:
3
Y:
[[0 0]
 [0 1]
 [1 0]]
N:
2
d:
2
W final:
[[-0.3185071  -3.72815912  3.66489242]
 [ 2.53472793 -1.77884557  1.12672034]]


In [18]:
def pred(W, X):
    A = softmax(W.T.dot(X))
    return np.argmax(A, axis = 0)