In [None]:
%matplotlib inline
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd

In [None]:
datafile = 'ex2data1.txt'
#!head $datafile
cols = np.loadtxt(datafile,delimiter=',',usecols=(0,1,2),unpack=True) #Read in comma separated data
##Form the usual "X" matrix and "y" vector
X = np.transpose(np.array(cols[:-1]))
y = np.transpose(np.array(cols[-1:]))
m = y.size # number of training examples
##Insert the usual column of 1's into the "X" matrix
X = np.insert(X,0,1,axis=1)
np.shape(X)

In [None]:
#Divide the sample into two: ones with positive classification, one with null classification
pos = np.array([X[i] for i in xrange(100) if y[i]==1])
neg = np.array([X[i] for i in xrange(100) if y[i]==0])


In [None]:
# Visualization of data
def plotData():
    plt.figure(figsize=(10,6))
    plt.plot(pos[:,1],pos[:,2],'k+',label='Admitted')
    plt.plot(neg[:,1],neg[:,2],'yo',label='Not admitted')
    plt.xlabel('Exam 1 score')
    plt.ylabel('Exam 2 score')
    plt.legend()
    plt.grid(True)
    

plotData()
    


In [None]:
from scipy.special import expit
myx = np.arange(-10,10,.1)
plt.plot(myx,expit(myx))

plt.grid(True)

In [None]:
#Hypothesis function and cost function for logistic regression

def h(mytheta,myX):
    # X [100,3]
    # Y [100,1]
    return expit(np.dot(myX,mytheta))

# cost function, default lambda 0


def h(mytheta,myX): #Logistic hypothesis function
    return expit(np.dot(myX,mytheta))

#Cost function, default lambda (regularization) 0
def computeCost(mytheta,myX,myy,mylambda = 0.): 
    term1 = np.dot(-np.array(myy).T, np.log(h(mytheta,myX)))
    term2 = np.dot((1-np.array(myy)).T, np.log(1-h(mytheta,myX)))
    regterm = (mylambda/2) * np.sum(np.dot(mytheta[1:].T,mytheta[1:]))
    return float((1./m)* (np.sum(term1-term2) + regterm))
    
    

In [None]:
initial_theta = np.zeros((X.shape[1],1))
c=computeCost(initial_theta,X,y)
print float ('%.4f'%c)


In [None]:
from scipy import optimize

def optimizeTheta(mytheta,myX,myy,mylambda=0.):
    result = optimize.fmin(computeCost, x0=mytheta, args=(myX, myy, mylambda), maxiter=400, full_output=True)
    return result[0], result[1]



In [None]:
theta, mincost = optimizeTheta(initial_theta,X,y)
print theta
print mincost

In [None]:
np.shape(theta)
type(theta)

In [None]:
computeCost(theta,X,y)

In [None]:
#decision boundary h=0

boundary_xs = np.array([np.min(X[:,1]),np.max(X[:,1])])
boundary_ys = (-1./theta[2])*(theta[0] + theta[1]*boundary_xs)
plotData()
plt.plot(boundary_xs,boundary_ys,'b-',label='Decision Boundary')
plt.legend()

print h(theta,np.array([1, 45.,85.]))


In [None]:
def makePrediction(mytheta, myx):
    return h(mytheta,myx) >= 0.5

makePrediction(theta,pos)
TP=float(np.sum(makePrediction(theta,pos)))
FP=float(np.sum(makePrediction(theta,neg)))
Pres= '%.2f' %float(TP/(TP+FP))
type(Pres)
Prescion=float(Pres)
print Prescion




In [None]:
#Part 2
datafile = 'ex2data2.txt'
#!head $datafile
cols = np.loadtxt(datafile,delimiter=',',usecols=(0,1,2),unpack=True) #Read in comma separated data
##Form the usual "X" matrix and "y" vector
X = np.transpose(np.array(cols[:-1]))
y = np.transpose(np.array(cols[-1:]))
m = y.size # number of training examples
##Insert the usual column of 1's into the "X" matrix
X = np.insert(X,0,1,axis=1)


In [None]:
# classified
pos = np.array([X[i] for i in xrange (X.shape[0]) if y[i]==1])
neg = np.array([X[i] for i in xrange (X.shape[0]) if y[i]==0])
np.shape(pos)
np.shape(neg)

In [None]:
def plotData():
    plt.plot(pos[:,1],pos[:,2],'x',label='y=1')
    plt.plot(neg[:,1],neg[:,2],'yo',label='y=0')
    plt.xlabel('Microchip Test 1')
    plt.ylabel('Microchip Test 2')
    plt.legend()
    plt.grid(True)

#Draw it square to emphasize circular features
plt.figure(figsize=(8,6))
plotData()

In [None]:
def mapFeature( x1col, x2col ):
    """ 
    Function that takes in a column of n- x1's, a column of n- x2s, and builds
    a n- x 28-dim matrix of featuers as described in the homework assignment
    """
    degrees = 6
    out = np.ones( (x1col.shape[0], 1) )

    for i in range(1, degrees+1):
        for j in range(0, i+1):
            term1 = x1col ** (i-j)
            term2 = x2col ** (j)
            term  = (term1 * term2).reshape( term1.shape[0], 1 ) 
            out   = np.hstack(( out, term ))
    return out

mappedX = mapFeature(X[:,1],X[:,2])
print np.shape(mappedX)

In [None]:
initial_theta = np.zeros((mappedX.shape[1],1))
cmp=computeCost(initial_theta,mappedX,y)
print '%.4f' %cmp

In [None]:
def optimizeRegularizedTheta(mytheta,myX,myy,mylambda=0.):
    result = optimize.minimize(computeCost,mytheta,args=(myX,myy,mylambda), method='BFGS',options={"maxiter":1000,"disp":False})
 
    return np.array([result.x]), result.fun
    
theta, mincost = optimizeRegularizedTheta(initial_theta,mappedX,y)
print type(theta)
print np.shape(theta)
print theta


In [None]:
def plotBoundary(mytheta, myX, myy, mylambda=0.):
    
    theta, mincost = optimizeRegularizedTheta(mytheta,myX,myy,mylambda)
    xvals = np.linspace(-1,1.5,50)
    yvals = np.linspace(-1,1.5,50)
    zvals = np.zeros((len(xvals),len(yvals)))
    for i in xrange(len(xvals)):
        for j in xrange(len(yvals)):
            myfeaturesij = mapFeature(np.array([xvals[i]]),np.array([yvals[j]]))
            zvals[i][j] = np.dot(theta,myfeaturesij.T)
    zvals=np.transpose(zvals)

    u, v = np.meshgrid( xvals, yvals )
    mycontour = plt.contour( xvals, yvals, zvals, [0])
    #Kind of a hacky way to display a text on top of the decision boundary
    myfmt = { 0:'Lambda = %d'%mylambda}
    plt.clabel(mycontour, inline=1, fontsize=15, fmt=myfmt)
    plt.title("Decision Boundary")

In [None]:
plt.figure(figsize=(12,10))
plt.subplot(221)
plotData()
plotBoundary(theta,mappedX,y,0.)

plt.subplot(222)
plotData()
plotBoundary(theta,mappedX,y,1.)

plt.subplot(223)
plotData()
plotBoundary(theta,mappedX,y,3.)

plt.subplot(224)
plotData()
plotBoundary(theta,mappedX,y,4.)