# Imports

In [7]:
import numpy as np
import matplotlib.pyplot as plt
# from pandas import read_csv - not needed since the data file is in .mat format that can be loaded using scipy.io.loadmat
from scipy import optimize as opt
from scipy.io import loadmat

# Model preparation

## Cost function

## Sigmoid function

In [8]:
def sigmoid(z):
    return 1/(1+np.exp(-z))

In [9]:
def compute_cost(theta, X, Y, lambda_reg, m):
    
    htheta_vector = sigmoid(np.dot(X, theta))
    J = (-1/m) * np.sum((Y * np.log(htheta_vector) + (1-Y)*np.log(1-htheta_vector)))
    
    J += lambda_reg * np.sum(theta[1:]**2)
    
    return J
    

In [10]:
def compute_gradient(theta, X, Y, lambda_reg, m):
    htheta_vector = sigmoid(np.dot(X, theta))
    error_vector = htheta_vector - Y
    gradient = (1/m) * np.dot(X.T, error_vector)
    
    return gradient

## Main

In [11]:
def main():
    mat = loadmat('data3.mat')
    
    X = mat['X'] # numpy array
    Y = mat['y'] # numpy array
    
#     print('X:\n' + str(X))
#     print('Y:\n' + str(Y))
    
#     print('X.shape: ', X.shape)
    print('Y.shape: ', Y.shape)
    m = X.shape[0]
    n = X.shape[1]
    
    lambda_reg = 0.1
    
    ones = np.ones(shape = (m, 1))
    X_with_bias = np.concatenate([ones, X], axis = 1)
    
    num_classes = 10
    
    theta = np.zeros(shape = (n+1, 1))
    
    for i in range(10):
        Y_vector_for_present_class = np.array([]) # no need to flatten it in opt.minimize args list since it is an np.array
        if i == 0:
            for j in range(m):
                if Y[j] == 10:
#                     print(Y[j])
                    Y_vector_for_present_class = np.append(Y_vector_for_present_class, 1)
                else:
#                     print(Y[j])
                    Y_vector_for_present_class = np.append(Y_vector_for_present_class, 0)
        else:
            for j in range(m):
                if Y[j] == i:
#                     print(Y[j])
                    Y_vector_for_present_class = np.append(Y_vector_for_present_class, 1)
                else:
#                     print(Y[j])
                    Y_vector_for_present_class = np.append(Y_vector_for_present_class, 0)
        print(str(i) + ' ' + str(Y_vector_for_present_class.shape))
        Y_vector_for_present_class.reshape(m, 1)
        result = opt.fmin_cg(compute_cost, fprime=compute_gradient, x0=theta, \
                              args=(X_with_bias, Y_vector_for_present_class , lambda_reg, m), maxiter=50, disp=False,\
                              full_output=True)
        print('Optimized theta for ' + str(i) + ' class: ' + str(result))
    

In [12]:
if __name__ == '__main__':
    main()

Y.shape:  (5000, 1)
0 (5000,)
Optimized theta for 0 class: (array([-2.37863517e-01,  0.00000000e+00,  0.00000000e+00, -4.60581186e-08,
        5.59515908e-07,  1.06448600e-06, -3.48774538e-05, -4.32272944e-05,
       -2.39325883e-05, -5.73323668e-06, -1.37791608e-05, -4.28451802e-06,
        3.23247338e-06,  6.98931545e-06,  1.24161541e-05,  1.94300803e-05,
        1.40240315e-05,  4.07734797e-06,  2.75742684e-08, -4.35224811e-08,
        0.00000000e+00, -1.46761855e-08,  1.18421635e-07,  4.96003515e-07,
        1.55823742e-06,  7.98649978e-06, -4.99540056e-05, -1.78569894e-04,
       -2.67485011e-04, -2.14316433e-04, -8.57748052e-05, -1.14358426e-05,
       -9.14337793e-06, -3.84747665e-05, -3.86511110e-05, -6.99413812e-05,
       -4.58809492e-05,  2.52816020e-06,  1.54347523e-05,  2.40883775e-06,
        5.26804209e-07,  1.25902291e-07, -1.30532344e-06,  6.02261482e-06,
       -4.06504722e-05, -2.51079226e-04, -8.38577993e-04, -1.45283712e-03,
       -1.67563182e-03, -1.35746705e-03,

Optimized theta for 3 class: (array([-2.34112859e-01,  0.00000000e+00,  0.00000000e+00, -4.53318691e-08,
        4.29046522e-07,  2.54801256e-06, -2.09285259e-05, -4.48654839e-05,
       -2.55732801e-05, -5.63097971e-06, -1.27637911e-05, -3.76179960e-06,
       -7.42094569e-07, -5.58034713e-06, -3.03091118e-07, -4.86064013e-06,
       -1.04777628e-05, -4.25752118e-06, -2.71394743e-08,  4.28362139e-08,
        0.00000000e+00, -1.44447698e-08,  1.87084674e-07,  6.10936408e-07,
       -6.57623249e-06, -1.75288626e-05, -7.37497377e-05, -1.90273138e-04,
       -2.52054759e-04, -2.11445165e-04, -7.97470447e-05,  2.40889379e-05,
        9.82213534e-05,  1.64770801e-04,  9.85897578e-05,  1.15958274e-04,
        1.26290866e-04,  3.75325626e-05, -6.46676013e-06,  1.04520362e-07,
        7.21720471e-07,  1.23917050e-07, -1.93716199e-06,  1.42483635e-07,
        6.05073337e-05,  1.85902538e-04, -8.96027101e-05, -8.23324907e-04,
       -1.36677115e-03, -1.15566364e-03, -5.67663265e-04, -3.70270780e

Optimized theta for 5 class: (array([-2.28598127e-01,  0.00000000e+00,  0.00000000e+00, -4.42640374e-08,
        5.37721336e-07,  1.02302156e-06, -3.35188880e-05, -4.15434811e-05,
       -2.30003529e-05, -5.55280966e-06, -1.24631289e-05, -4.02944761e-06,
        3.12081071e-06,  7.08457179e-06,  6.20909740e-06,  1.08225612e-05,
        1.33399246e-05,  3.96171386e-06,  2.65001803e-08, -4.18271695e-08,
        0.00000000e+00, -1.41045106e-08,  1.13808810e-07,  4.76682913e-07,
        1.46561526e-06,  8.19386760e-06, -4.69159748e-05, -1.72409013e-04,
       -2.60018995e-04, -2.06844103e-04, -8.12156214e-05,  2.60652067e-05,
        5.08250273e-05,  4.40794913e-05,  1.18877864e-04,  8.91156784e-05,
       -9.27037444e-06,  9.12267103e-06,  1.37521861e-05,  2.30391831e-06,
        5.06283841e-07,  1.20998076e-07, -1.25447777e-06,  5.80252563e-06,
       -3.88545245e-05, -2.46342855e-04, -8.26111499e-04, -1.40006960e-03,
       -1.55225251e-03, -1.15493619e-03, -5.64505598e-04, -3.53568334e

Optimized theta for 7 class: (array([-2.23956608e-01,  0.00000000e+00,  0.00000000e+00, -4.33652883e-08,
        2.96768617e-07, -5.78026623e-08,  2.39819315e-05,  3.71830971e-05,
        9.84733452e-06,  2.24249362e-06, -1.35956970e-05, -5.18224716e-06,
        2.97832137e-06,  6.22320037e-06,  8.27739270e-06,  1.71028648e-05,
        1.33541971e-05,  3.83896208e-06,  2.59621134e-08, -4.09778993e-08,
        0.00000000e+00, -1.38181288e-08,  1.11498005e-07,  4.67004214e-07,
        1.07209334e-06,  2.36151345e-06,  6.55115324e-05,  2.10693145e-04,
        2.64731871e-04,  1.97574668e-04,  3.52860662e-05,  5.01997468e-06,
        4.41781255e-05,  6.12724364e-05,  1.16750800e-04,  4.03262315e-05,
        6.90263183e-07,  1.21969764e-05,  1.25923192e-05,  1.33185796e-06,
        2.95631719e-07,  1.18541298e-07, -1.22900651e-06,  5.83120728e-06,
       -4.24554570e-05, -2.51323500e-04, -1.91763637e-04,  3.63550371e-04,
        1.02487803e-03,  9.23529956e-04,  3.43938275e-04,  2.54235080e

9 (5000,)
Optimized theta for 9 class: (array([-2.30533748e-01,  0.00000000e+00,  0.00000000e+00, -4.46388368e-08,
        5.42274414e-07,  1.03168384e-06, -3.38027044e-05, -4.18952445e-05,
       -2.31951051e-05, -5.59982727e-06, -1.25686586e-05, -4.06356638e-06,
        3.18794562e-06,  5.27246922e-06,  1.02582554e-05,  1.89870696e-05,
        1.35918807e-05,  3.95170442e-06,  2.67245667e-08, -4.21813350e-08,
        0.00000000e+00, -1.42239386e-08,  1.14772470e-07,  4.80719157e-07,
        1.47908459e-06,  8.32363597e-06, -4.80412593e-05, -1.75089452e-04,
       -2.65039885e-04, -2.17027967e-04, -1.01226298e-04,  7.57148391e-06,
        4.94020900e-05,  8.38922367e-05,  1.01110838e-04,  2.30843977e-05,
        1.66037997e-06,  1.49877248e-05,  1.51208284e-05,  2.09749141e-07,
       -5.04429909e-07,  1.22022610e-07, -1.26509988e-06,  5.83702783e-06,
       -3.92616728e-05, -2.45962091e-04, -8.14604296e-04, -1.39678467e-03,
       -1.60553284e-03, -1.18113141e-03, -3.67300634e-04, -1