In [157]:
import pandas as pd
from matplotlib import pyplot as plt
import numpy as np
import matplotlib as mpl
from scipy.optimize import minimize

In [70]:
%matplotlib notebook

In [71]:
font = {'weight' : 'normal',
        'size'   : 15}
mpl.rcParams['figure.figsize'] = (6., 6.0)
mpl.rc('font', **font)
mpl.rcParams['axes.unicode_minus']=False
mpl.rc('axes', linewidth = 1.5)
mpl.rc('xtick', labelsize=15)
mpl.rc('ytick', labelsize=15)
mpl.rcParams['lines.linewidth'] = 2
mpl.rcParams['axes.labelsize'] = 'large'
mpl.rcParams['xtick.major.size'] = 5.5     # major tick size in points
mpl.rcParams['xtick.major.width'] = 1.5     # major tick size in points
mpl.rcParams['ytick.major.size'] = 5.5     # major tick size in points
mpl.rcParams['ytick.major.width'] = 1.5     # major tick size in points

# Problem definition

Suppose that you are the administrator of a university department and you want to determine each applicant’s chance of admission based on their results on two exams. You have historical data from previous applicants that you can use as a training set for logistic regression. For each training example, you have the applicant’s scores on two exams and the admissions decision.
Your task is to build a classification model that estimates an applicant’s probability of admission based the scores from those two exams

In [220]:
!head ex2data1.txt # training dataset

34.62365962451697,78.0246928153624,0
30.28671076822607,43.89499752400101,0
35.84740876993872,72.90219802708364,0
60.18259938620976,86.30855209546826,1
79.0327360507101,75.3443764369103,1
45.08327747668339,56.3163717815305,0
61.10666453684766,96.51142588489624,1
75.02474556738889,46.55401354116538,1
76.09878670226257,87.42056971926803,1
84.43281996120035,43.53339331072109,1


In [248]:
df1 = pd.read_csv('ex2data1.txt',header=None)
df1.columns = ['grade1','grade2','pass']

In [510]:
fig = plt.figure()
ax = fig.add_subplot()
# ax.scatter( df1[:][0],df1[:][1], df1[:][2],lw=3,color='r')
ax.scatter( df1['grade1'],df1['grade2'], c=df1['pass'],lw=3)


<IPython.core.display.Javascript object>

<matplotlib.collections.PathCollection at 0x7fb85c687ca0>

In [509]:
# defining regressor
def regressor(theta,X):
    '''
    inputs
    theta: coefficients (nx1 array)
    x: features dataset (mxn array) with m example and n features
    '''
    return np.dot(X,theta)
    
# implementing sigmoid function for logistic classification
def sigm(theta,X):
    '''
    inputs
    theta: coefficients (nx1 array)
    x: features dataset (mxn array) with m example and n features
    '''
    y = regressor(theta,X)
    return 1/(1 + np.exp(-y))

# X = df1.loc[:,('grade1','grade2')].values
# X = np.insert(X, 0, 100*np.ones(len(df1.loc[:,'grade1'])),axis=1)

X = np.array([np.ones(70),np.arange(30,100,1),np.arange(30,100,1)]).T

# # a = np.array([theta[0,i] * x[i] for i in range(3)]  )
theta = np.array([-24,0.2,0.2]).reshape((3,1))
ax.plot(X[:,1],regressor(theta,X),'*')


[<matplotlib.lines.Line2D at 0x7fb85ca92b50>]

In [492]:
# implementing linear cost function
# note that scipy optimise requires an gradient function for finding minima.
# if not provided, it will perform a 2 point finite difference scheme to estimate the derivative 

def cost_func(theta, *args):
    '''
    inputs
    theta: coefficients (nx1 array)
    X: features dataset (mxn array) with m example and n features
    y: binary data (mx1 array)
    '''
    X = args[0]
    y = args[1]
    SQE = -np.dot(y.T,np.log(sigm(theta,X))) - np.dot((1-y.T),np.log(1 - sigm(theta,X)))
    MSQE = 1/len(y) * SQE
    return MSQE

def grad_func(theta,*args):
    
    X = args[0]
    y = args[1]
    
    h_thet = sigm(theta,X)
    
    theta_upd = 1/len(y) * np.dot((h_thet - y).T,X)
    return theta_upd

Constructing Data matrix and result array

In [513]:
X = df1.loc[:,('grade1','grade2')].values
X = np.insert(X, 0, np.ones(len(df1.loc[:,'grade1'])),axis=1)
y = df1.loc[:,'pass'].values.reshape((len(y),1))
# test theta 
theta = np.array([0,0,0]).reshape((3,1))

Testing cost function

In [514]:
cost_func(theta,X,y)

array([[0.69314718]])

In [515]:
grad_func(theta,X,y)

array([[ -0.1       , -12.00921659, -11.26284221]])

Lets try to play around and see if we can let optimise find the local minima

In [526]:
theta = np.array([-24,0.2,0.2]).reshape((3,1))
res =minimize(cost_func,theta,args = (X,y),method = 'CG',
              options={'gtol': 1e-6, 'disp': True})

Optimization terminated successfully.
         Current function value: 0.203498
         Iterations: 36
         Function evaluations: 340
         Gradient evaluations: 85
