In [None]:
####################################################################################################################
# Machine Learning by Stanford University, using python(numpy,scipy) to implement neural network instead of octave #
####################################################################################################################

import numpy as np
from scipy.special import expit as sigmoid #sigmoid function, assigns a number from (0,1) interval, to a real number
from scipy.optimize import fmin_cg #Minimize a function using a nonlinear conjugate gradient algorithm

In [2]:
#def sigmoid(x): #sigmoid function, assigns a number from (0,1) interval, to a real number
#    return 1/(1+np.exp(-x))
#----------------------- replaced by scipy.special.expit (much faster) -----------------------#

In [3]:
#cost function, vectorized regularized logistic regression for one theta

def cost_function(theta, X, y, lambda1): #theta, X, y => np.matrix()
    m=X.shape[0] #number of training values
    n=X.shape[1] #number of features
    J,grad=0,np.matrix(np.zeros(n).reshape(n,1))
    h_theta=sigmoid(X*theta) #hypothesis
    
    J=(-( y.T*np.log(h_theta)+(1-y).T*log(1-h_theta) ) + lambda1*theta[1:].T *theta[1:]/2)/m #cost function + regularization
    
    grad= X.T*(h_theta-y)/m #gradient function + regularization for j=0 (feature 0)
    grad[1:]+=lambda1*theta[1:]/m #gradient function + regularization for j>0 (feature > 0)
    
    return [J,grad]

SyntaxError: invalid syntax (<ipython-input-3-a7515d85f51d>, line 3)

In [None]:
#Minimize a function using a nonlinear conjugate gradient algorithm
#------------------------------------------------------------------------
#scipy.optimize.fmin_cg(f, x0, fprime=None, args=(), gtol=1e-05, norm=inf, epsilon=1.4901161193847656e-08, 
#                                                    maxiter=None, full_output=0, disp=1, retall=0, callback=None)
#------------------------------------------------------------------------
#f : callable, f(x, *args)
# Objective function to be minimized. Here x must be a 1-D array of the variables that are to be changed in 
# the search for a minimum, and args are the other (fixed) parameters of f.
#------------------------------------------------------------------------
#x0 : ndarray
# A user-supplied initial estimate of xopt, the optimal value of x. It must be a 1-D array of values.
#------------------------------------------------------------------------
#fprime : callable, fprime(x, *args), optional
# A function that returns the gradient of f at x. Here x and args are as described above for f. 
# The returned value must be a 1-D array. Defaults to None, in which case the gradient is approximated numerically 
# (see epsilon, below).
#------------------------------------------------------------------------
#args : tuple, optional
# Parameter values passed to f and fprime. Must be supplied whenever additional fixed parameters 
# are needed to completely specify the functions f and fprime.
#------------------------------------------------------------------------
#reference => http://docs.scipy.org/doc/scipy/reference/generated/scipy.optimize.fmin_cg.html#scipy.optimize.fmin_cg

def one_vs_all(X, y, num_labels, lambda1):
    m=X.shape[0] #number of training values
    n=X.shape[1] #number of features
    all_theta=np.zeros(num_labels,n+1) #number of classes, n features + X0 (bias)
    X=np.insert(X,0,1,axis=1)#add column 0 with values 1 (bias)
    for k in num_labels: #for all classes 0-9
        args=(X, y==k, lambda1)
        initial_theta=np.zeros(n+1,1)
        f=lambda x,*args: cost_function(x,args[0],args[1],args[2])[0] #minimalize cost function, search theta (x=theta)
        fprime=lambda x,*args:cost_function(x,args[0],args[1],args[2])[1] #return gradient for given theta (x=theta)
        theta=fmin_cg( f, initial_theta,fprime,args,maxiter=1000)
        all_theta[k]=theta
    return all_theta