# Programming Exercise 2: Logistic Regression

## Introduction

In this exercise, you will implement logistic regression and apply it to two different datasets. 

In [10]:
# used for manipulating directory paths
import os

# Scientific and vector computation for python
import numpy as np

# Plotting library
from matplotlib import pyplot

# Optimization module in scipy
from scipy import optimize

# library written for this exercise providing additional functions for assignment submission, and others
import utils

# tells matplotlib to embed plots within the notebook
%matplotlib inline

## 1 Logistic Regression

In this part of the exercise, you will build a logistic regression model to predict whether a student gets admitted into a university. Suppose that you are the administrator of a university department and
you want to determine each applicant’s chance of admission based on their results on two exams. You have historical data from previous applicants that you can use as a training set for logistic regression. For each training example, you have the applicant’s scores on two exams and the admissions
decision. Your task is to build a classification model that estimates an applicant’s probability of admission based the scores from those two exams. 

The following cell will load the data and corresponding labels:

In [11]:
data = np.loadtxt(os.path.join('ex2data1.txt'), delimiter=',')
#this is used to shuffle the data randomize the output
np.random.shuffle(data)

X, y = data[:, 0:2], data[:, 2]
#adding the bais collumn of ones
X = np.concatenate([np.ones((data.shape[0], 1)), X], axis=1)
print(X)

[[ 1.         85.40451939 57.05198398]
 [ 1.         34.62365962 78.02469282]
 [ 1.         51.54772027 46.85629026]
 [ 1.         62.22267576 52.06099195]
 [ 1.         34.21206098 44.2095286 ]
 [ 1.         82.36875376 40.61825516]
 [ 1.         60.45788574 73.0949981 ]
 [ 1.         99.8278578  72.36925193]
 [ 1.         75.02474557 46.55401354]
 [ 1.         94.44336777 65.56892161]
 [ 1.         68.46852179 85.5943071 ]
 [ 1.         67.37202755 42.83843832]
 [ 1.         77.92409145 68.97235999]
 [ 1.         30.05882245 49.59297387]
 [ 1.         74.78925296 41.57341523]
 [ 1.         82.30705337 76.4819633 ]
 [ 1.         42.07545454 78.844786  ]
 [ 1.         78.63542435 96.64742717]
 [ 1.         79.03273605 75.34437644]
 [ 1.         80.19018075 44.82162893]
 [ 1.         77.19303493 70.4582    ]
 [ 1.         50.4581598  75.80985953]
 [ 1.         70.66150955 92.92713789]
 [ 1.         94.09433113 77.15910509]
 [ 1.         72.34649423 96.22759297]
 [ 1.         32.72283304

In [12]:
new_X = np.zeros((X.shape[0],4))

new_X[:,0:3] = X[:,0:3]

new_X[:,3] = X[:,1]**2
print(new_X)

#deviding the set size into train=60 cross=20 test=20
X_train = new_X[0:60,:]
y_train = y[0:60]
X_cross = new_X[60:80,:]
y_cross = y[60:80]
X_test = new_X[80:100,:]
y_test = y[80:100]

[[1.00000000e+00 8.54045194e+01 5.70519840e+01 7.29393193e+03]
 [1.00000000e+00 3.46236596e+01 7.80246928e+01 1.19879781e+03]
 [1.00000000e+00 5.15477203e+01 4.68562903e+01 2.65716746e+03]
 [1.00000000e+00 6.22226758e+01 5.20609919e+01 3.87166138e+03]
 [1.00000000e+00 3.42120610e+01 4.42095286e+01 1.17046512e+03]
 [1.00000000e+00 8.23687538e+01 4.06182552e+01 6.78461160e+03]
 [1.00000000e+00 6.04578857e+01 7.30949981e+01 3.65515595e+03]
 [1.00000000e+00 9.98278578e+01 7.23692519e+01 9.96560119e+03]
 [1.00000000e+00 7.50247456e+01 4.65540135e+01 5.62871245e+03]
 [1.00000000e+00 9.44433678e+01 6.55689216e+01 8.91954972e+03]
 [1.00000000e+00 6.84685218e+01 8.55943071e+01 4.68793848e+03]
 [1.00000000e+00 6.73720275e+01 4.28384383e+01 4.53899010e+03]
 [1.00000000e+00 7.79240915e+01 6.89723600e+01 6.07216403e+03]
 [1.00000000e+00 3.00588224e+01 4.95929739e+01 9.03532807e+02]
 [1.00000000e+00 7.47892530e+01 4.15734152e+01 5.59343236e+03]
 [1.00000000e+00 8.23070534e+01 7.64819633e+01 6.774451

In [26]:
newNew_X = np.zeros((X.shape[0],4))
newNew_X[:,0:3] = X[: ,0:3]
newNew_X[:,2] = X[:, 2]**2
X_Newtrain = newNew_X[0:60,:]
y_Newtrain = y[0:60]
X_Newcross = newNew_X[60:80,:]
y_Newcross = y[60:80]
X_Newtest = newNew_X[80:100,:]
y_Newtest = y[80:100]

In [27]:
def sigmoid(z):
    """
    Compute sigmoid function given the input z.
    
    Parameters
    ----------
    z : array_like
        The input to the sigmoid function. This can be a 1-D vector 
        or a 2-D matrix. 
    
    Returns
    -------
    g : array_like
        The computed sigmoid function. g has the same shape as z, since
        the sigmoid is computed element-wise on z.
        
    Instructions
    ------------
    Compute the sigmoid of each value of z (z can be a matrix, vector or scalar).
    """
    # convert input to a numpy array
    z = np.array(z)
    
    # You need to return the following variables correctly 
    g = np.zeros(z.shape)

    # ====================== YOUR CODE HERE ======================
    g = 1 / (1 + np.exp(-z))

    # =============================================================
    return g

In [28]:
def costFunction(theta, X, y):
    """
    Compute cost and gradient for logistic regression. 
    
    Parameters
    ----------
    theta : array_like
        The parameters for logistic regression. This a vector
        of shape (n+1, ).
    
    X : array_like
        The input dataset of shape (m x n+1) where m is the total number
        of data points and n is the number of features. We assume the 
        intercept has already been added to the input.
    
    y : arra_like
        Labels for the input. This is a vector of shape (m, ).
    
    Returns
    -------
    J : float
        The computed value for the cost function. 
    
    grad : array_like
        A vector of shape (n+1, ) which is the gradient of the cost
        function with respect to theta, at the current values of theta.
        
    Instructions
    ------------
    Compute the cost of a particular choice of theta. You should set J to 
    the cost. Compute the partial derivatives and set grad to the partial
    derivatives of the cost w.r.t. each parameter in theta.
    """
    # Initialize some useful values
    m = y.size  # number of training examples

    # You need to return the following variables correctly 
    J = 0
    grad = np.zeros(theta.shape)

    # ====================== YOUR CODE HERE ======================
    h = sigmoid(X.dot(theta.T))
    
    J = (1 / m) * np.sum(-y.dot(np.log(h)) - (1 - y).dot(np.log(1 - h)))
    grad = (1 / m) * (h - y).dot(X)
    
    
    # =============================================================
    return J, grad

In [42]:
def computeCost(theta,X,y):
    # Initialize some useful values
    m = y.size  # number of training examples

    # You need to return the following variables correctly 
    J = 0

    # ====================== YOUR CODE HERE ======================
    h = sigmoid(X.dot(theta.T))
    
    J = (1 / m) * np.sum(-y.dot(np.log(h)) - (1 - y).dot(np.log(1 - h)))
    
    return J

Once you are done call your `costFunction` using two test cases for  $\theta$ by executing the next cell.

In [43]:
options= {'maxiter': 400}
initial_theta = np.zeros(3)
res = optimize.minimize(costFunction,initial_theta,(X_train[:,0:3], y_train),jac=True,method='TNC',options=options)
thata1 = res.x
print ( res )

     fun: 0.1142909479044377
     jac: array([ 8.96064706e-07, -8.20471970e-07,  2.49710283e-03])
 message: 'Converged (|f_n-f_(n-1)| ~= 0)'
    nfev: 52
     nit: 22
  status: 1
 success: True
       x: array([-41.60836284,   0.37752445,   0.30006989])


In [49]:
options= {'maxiter': 400}
initial_theta = np.zeros(3)
res = optimize.minimize(costFunction,initial_theta,(X_Newtrain[:,0:3], y_Newtrain),jac=True,method='TNC',options=options)
thata2 = res.x
print ( res )

     fun: 0.15947446031345885
     jac: array([0.00041525, 0.02064597, 0.00430398])
 message: 'Converged (|f_n-f_(n-1)| ~= 0)'
    nfev: 64
     nit: 28
  status: 1
 success: True
       x: array([-2.27812985e+01,  2.67717690e-01,  1.54839025e-03])


In [45]:
def predict(theta, X):
    """
    Predict whether the label is 0 or 1 using learned logistic regression.
    Computes the predictions for X using a threshold at 0.5 
    (i.e., if sigmoid(theta.T*x) >= 0.5, predict 1)
    
    Parameters
    ----------
    theta : array_like
        Parameters for logistic regression. A vecotor of shape (n+1, ).
    
    X : array_like
        The data to use for computing predictions. The rows is the number 
        of points to compute predictions, and columns is the number of
        features.

    Returns
    -------
    p : array_like
        Predictions and 0 or 1 for each row in X. 
    
    Instructions
    ------------
    Complete the following code to make predictions using your learned 
    logistic regression parameters.You should set p to a vector of 0's and 1's    
    """
    m = X.shape[0] # Number of training examples

    # You need to return the following variables correctly
    p = np.zeros(m)

    # ====================== YOUR CODE HERE ======================
    p = np.round(sigmoid(X.dot(theta.T)))

    
    # ============================================================
    return p

After you have completed the code in `predict`, we proceed to report the training accuracy of your classifier by computing the percentage of examples it got correct.

In [47]:
#Normal training without Degree

p = predict(thata1, X_test[:,0:3])
print('Train Accuracy: {:.2f} %'.format(np.mean(p == y_Newtest) * 100))

Train Accuracy: 85.00 %


In [51]:
# for training using Degree
p = predict(thata2, X_Newtest[:,0:3])
print('Train Accuracy: {:.2f} %'.format(np.mean(p == y_Newtest) * 100))

Train Accuracy: 80.00 %


In [58]:
testArray = (p == y_test)
count = np.array(testArray, dtype=np.bool)
print(np.count_nonzero(count), len(testArray))
per = np.count_nonzero(count)/len(testArray)
print(per*100)

16 20
80.0


## 2 Regularized logistic regression

In this part of the exercise, you will implement regularized logistic regression to predict whether microchips from a fabrication plant passes quality assurance (QA). During QA, each microchip goes through various tests to ensure it is functioning correctly.
Suppose you are the product manager of the factory and you have the test results for some microchips on two different tests. From these two tests, you would like to determine whether the microchips should be accepted or rejected. To help you make the decision, you have a dataset of test results on past microchips, from which you can build a logistic regression model.

First, we load the data from a CSV file:

<a id="section5"></a>
### 2.3 Cost function and gradient

Now you will implement code to compute the cost function and gradient for regularized logistic regression. Complete the code for the function `costFunctionReg` below to return the cost and gradient.

Recall that the regularized cost function in logistic regression is

$$ J(\theta) = \frac{1}{m} \sum_{i=1}^m \left[ -y^{(i)}\log \left( h_\theta \left(x^{(i)} \right) \right) - \left( 1 - y^{(i)} \right) \log \left( 1 - h_\theta \left( x^{(i)} \right) \right) \right] + \frac{\lambda}{2m} \sum_{j=1}^n \theta_j^2 $$

Note that you should not regularize the parameters $\theta_0$. The gradient of the cost function is a vector where the $j^{th}$ element is defined as follows:

$$ \frac{\partial J(\theta)}{\partial \theta_0} = \frac{1}{m} \sum_{i=1}^m \left( h_\theta \left(x^{(i)}\right) - y^{(i)} \right) x_j^{(i)} \qquad \text{for } j =0 $$

$$ \frac{\partial J(\theta)}{\partial \theta_j} = \left( \frac{1}{m} \sum_{i=1}^m \left( h_\theta \left(x^{(i)}\right) - y^{(i)} \right) x_j^{(i)} \right) + \frac{\lambda}{m}\theta_j \qquad \text{for } j \ge 1 $$
<a id="costFunctionReg"></a>

In [59]:
def costFunctionReg(theta, X, y, lambda_):
    """
    Compute cost and gradient for logistic regression with regularization.
    
    Parameters
    ----------
    theta : array_like
        Logistic regression parameters. A vector with shape (n, ). n is 
        the number of features including any intercept. If we have mapped
        our initial features into polynomial features, then n is the total 
        number of polynomial features. 
    
    X : array_like
        The data set with shape (m x n). m is the number of examples, and
        n is the number of features (after feature mapping).
    
    y : array_like
        The data labels. A vector with shape (m, ).
    
    lambda_ : float
        The regularization parameter. 
    
    Returns
    -------
    J : float
        The computed value for the regularized cost function. 
    
    grad : array_like
        A vector of shape (n, ) which is the gradient of the cost
        function with respect to theta, at the current values of theta.
    
    Instructions
    ------------
    Compute the cost `J` of a particular choice of theta.
    Compute the partial derivatives and set `grad` to the partial
    derivatives of the cost w.r.t. each parameter in theta.
    """
    # Initialize some useful values
    m = y.size  # number of training examples

    # You need to return the following variables correctly 
    J = 0
    grad = np.zeros(theta.shape)

    # ===================== YOUR CODE HERE ======================
    h = sigmoid(X.dot(theta.T))
    
    temp = theta
    temp[0] = 0
    
    J = (1 / m) * np.sum(-y.dot(np.log(h)) - (1 - y).dot(np.log(1 - h))) + (lambda_ / (2 * m)) * np.sum(np.square(temp))
    
    grad = (1 / m) * (h - y).dot(X) 
    grad = grad + (lambda_ / m) * temp
    # =============================================================
    return J, grad

In [60]:
def computeCostReg(theta,X,y,lambda_):
    # Initialize some useful values
    m = y.size  # number of training examples

    # You need to return the following variables correctly 
    J = 0

    h = sigmoid(X.dot(theta.T))
    
    J = (1 / m) * np.sum(-y.dot(np.log(h)) - (1 - y).dot(np.log(1 - h))) + (lambda_ / (2 * m)) * np.sum(np.square(theta))
    
    return J

In [61]:

Lambda = [0,0.01,0.02,0.04,0.08,0.16,0.32,0.64,1.024]
options= {'maxiter': 200}
for i in range(len(Lambda)):
    initTheta = np.zeros(4)
    res = optimize.minimize(costFunctionReg,
                            initTheta,
                            (X_train[:,0:4], y_train, Lambda[i]),
                            jac=True,
                            method='TNC',
                            options=options)
    print('cost is equal = ',computeCostReg(res.x,X_cross[:,0:4],y_cross,Lambda[i]))







cost is equal =  0.35179856440909946
cost is equal =  0.35185393693382194
cost is equal =  0.351912182433913
cost is equal =  0.35202446024880296
cost is equal =  0.35225358437417137
cost is equal =  0.35270768706556194
cost is equal =  0.35362583544731935
cost is equal =  0.3554878582230348
cost is equal =  0.3577961444250099
