In [2]:
import numpy as np

In [39]:
P = np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9], [10, 11, 12]])
y = np.array([0, 1, 2])
P

array([[ 1,  2,  3],
       [ 4,  5,  6],
       [ 7,  8,  9],
       [10, 11, 12]])

In [41]:
P * np.array([10, 1, 1, 1]).reshape(-1, 1)

array([[10, 20, 30],
       [ 4,  5,  6],
       [ 7,  8,  9],
       [10, 11, 12]])

In [17]:
np.eye(4)[:,0:3]

array([[1., 0., 0.],
       [0., 1., 0.],
       [0., 0., 1.],
       [0., 0., 0.]])

In [6]:
P[[1,1,2], np.arange(len(P.T))] = 0
P

NameError: name 'P' is not defined

In [56]:
results = []
for X in np.array_split(P, 2):
	results.append(X)

np.vstack(results)

array([[ 1,  2,  3],
       [ 0,  0,  6],
       [ 7,  8,  0],
       [10, 11, 12]])

In [4]:
indices = np.random.shuffle(np.arange(10))
indices

In [5]:
indices

In [42]:
import pandas as pd

In [78]:
test = pd.read_csv('test.csv')
train = pd.read_csv('train.csv')
train['target'] = train['target'].factorize()[0]
X_test_ids = test['id'].to_numpy()
train.drop(['id'], axis=1, inplace=True)
test.drop(['id'], axis=1, inplace=True)
train = train.to_numpy()
X_train = train[:, :-1]
y_train = train[:, -1]
X_test = test.to_numpy()

In [79]:
X_test_ids

array([    1,     2,     3, ..., 11876, 11877, 11878], dtype=int64)

In [77]:
X_test.shape

(11878, 93)

In [113]:
def softmax(parameters, X):
	"""
	Args:
		parameters (1d n*r numpy array): r is the number of classes and n is the number of attributes
		X (2d m*n numpy array): m rows where each row is a sample with n attributes

	Returns:
		2d m*r numpy array: each row is a sample with r probabilities
	"""
	parameters = parameters.reshape(X.shape[1], -1)
	P = np.exp(X @ parameters)
	return P / np.sum(P, axis=1, keepdims=True)    

In [95]:
softmax(
	np.array([
		[10,0,0],
		[0,10,10]
	]),
	np.array([[1,0],[0,1]]),
)

[[10  0  0]
 [ 0 10 10]]
[[10  0  0]
 [ 0 10 10]]
[[2.20264658e+04 1.00000000e+00 1.00000000e+00]
 [1.00000000e+00 2.20264658e+04 2.20264658e+04]]


array([[9.99909208e-01, 4.53958078e-05, 4.53958078e-05],
       [2.26994496e-05, 4.99988650e-01, 4.99988650e-01]])

In [109]:
def cost(parameters, X, y, lambda_=0.0):
    """
    Args:
        parameters (1d n*r numpy array): r is the number of classes and n is the number of attributes
        X (2d m*n numpy array): m rows where each row is a sample with n attributes
        y (1d m numpy array): class labels for each sample
        lambda_ (number): regularization parameter 

    Returns:
        number: cost of the model
    """
    P = softmax(parameters, X)
    return np.sum(np.log(P[np.arange(len(P)), y])) - lambda_ * np.sum(parameters ** 2)

In [105]:
cost(np.array([
		[10,0,0],
		[0,10,10]
	]),
	np.array([[1,0],[0,1]]),
	np.array([1,0]))

[[10  0  0]
 [ 0 10 10]]
[[10  0  0]
 [ 0 10 10]]
[[2.20264658e+04 1.00000000e+00 1.00000000e+00]
 [1.00000000e+00 2.20264658e+04 2.20264658e+04]]


-20.693260676004655

In [111]:

def grad(parameters, X, y, lambda_=0):
    """
    Args:
        parameters (1d n*r numpy array): r is the number of classes and n is the number of attributes
        X (2d m*n numpy array): m rows where each row is a sample with n attributes
        y (1d m numpy array): class labels for each sample
        lambda_ (number): regularization parameter 

    Returns:
        1d n*r numpy array: gradient of the cost function with respect to the parameters
    """
    P = softmax(parameters, X)
    parameters = parameters.reshape(X.shape[1], -1)
    E = np.zeros_like(P)
    E[np.arange(len(P)), y] = 1
    grad = X.T @ (E-P) - 2 * lambda_ * parameters
    return grad.flatten()

In [115]:
grad(np.array([
		[10,0,0],
		[0,10,10]
	]),
	np.array([[1,0],[0,1]]),
	np.array([1,0])).reshape(2,3)

array([[-9.99909208e-01,  9.99954604e-01, -4.53958078e-05],
       [ 9.99977301e-01, -4.99988650e-01, -4.99988650e-01]])