# Logistic Regression
Constructing a model to predict whether or not a particular student will be admitted to a university, you have the results of two test scores and the final admissions of students who have previously applied, requiring you to construct a classification model to assess admissions.

## Load Data

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
np.set_printoptions(precision=3)

In [2]:
path = 'data.csv'
data = pd.read_csv(path, header=None, names=['Exam1', 'Exam2', 'Admitted'])

## Sigmoid Function

In [3]:
def sigmoid(z):
    return 1 / (1 + np.exp(-z))

## Cost Function

In [4]:
def cost(theta, X, Y):
    first = Y * np.log(sigmoid(X@theta.T))
    second = (1 - Y) * np.log(1 - sigmoid(X@theta.T))
    return -1 * np.mean(first + second)

## Preprocess Data

In [5]:
# add ones column
data.insert(0, 'Ones', 1)

In [6]:
# set X(training data) and Y(target variable)
X = data.iloc[:, 0: -1].values
Y = data.iloc[:, -1].values
theta = np.zeros(3)

Check the dimensions of the matrix

In [7]:
theta

array([0., 0., 0.])

In [8]:
X.shape, Y.shape, theta.shape

((100, 3), (100,), (3,))

Calculate the initial cost (theta=0)

In [9]:
print(f'{cost(theta, X, Y):.3f}')

0.693


## Gradient Descent

In [10]:
# Calculate step size
def gradient(theta, X, Y):
    return (1/len(X) * X.T @ (sigmoid(X @ theta.T) - Y))

#     theta = np.matrix(theta)
#     X = np.matrix(X)
#     Y = np.matrix(Y)
    
#     parameters = int(theta.ravel().shape[1])
#     grad = np.zeros(parameters)
#     print(X.shape, theta.shape, (theta.T).shape, (X*theta.T).shape)
#     error = sigmoid(X * theta.T) - Y
    
#     for i in range(parameters):
#         term = np.multiply(error, X[:, i])
#         grad[i] = np.sum(term) / len(X)
        
#     return grad

In [11]:
gradient(theta, X, Y)

array([ -0.1  , -12.009, -11.263])

## Parameter Fitting

In [12]:
import scipy.optimize as opt
result = opt.fmin_tnc(func=cost, x0=theta, fprime=gradient, args=(X, Y))
result

  NIT   NF   F                       GTG
    0    1  6.931471805599453E-01   2.71082898E+02
    1    3  6.318123602631788E-01   7.89087138E-01
    2    5  5.892425240512910E-01   7.39226410E+01
    3    7  4.227824121103003E-01   1.85265932E+01
    4    9  4.072926906292383E-01   1.68671158E+01
    5   11  3.818854881101514E-01   1.07735051E+01
    6   13  3.786234784862133E-01   2.31584968E+01
tnc: stepmx = 1000
    7   16  2.389267639345792E-01   3.00821242E+00
    8   18  2.047203864002273E-01   1.52226291E-01
    9   20  2.046713880084462E-01   6.62492840E-02
   10   22  2.035303168264609E-01   9.30777002E-04
tnc: fscale = 32.7776
   11   24  2.035293527202483E-01   8.07318769E-06
   12   26  2.035251121374061E-01   1.80198187E-04
   13   28  2.034984108708112E-01   5.02846491E-04
   14   30  2.034978382232995E-01   9.91837930E-06
   15   32  2.034977907365883E-01   3.77628250E-06
   16   34  2.034977388717188E-01   1.94730968E-05
   17   36  2.034977015894746E-01   2.30411629E-13


(array([-25.161,   0.206,   0.201]), 36, 0)

In [13]:
type(result)

tuple

In [14]:
print(f'{cost(result[0], X, Y):.3f}')

0.203


Fit the optimal $\theta$ using Scipy.optimize.minimize

In [15]:
res = opt.minimize(fun=cost, x0=np.array(theta), args=(X, np.array(Y)), method='Newton-CG', jac=gradient)

In [16]:
print(f'{cost(res.x, X, Y):.3f}')

0.203


## Predict

Predict by $\theta$：${h_\theta(x)=\frac{1}{1+e^{-\theta^TX}}}$

$h_\theta\ge0.5$, be $y=1$

$h_\theta<0.5$, be $y=0$

In [17]:
def predict(theta, X):
    probability = sigmoid(X @ theta.T)
    return [1 if x >= 0.5 else 0 for x in probability]

Accuracy

In [18]:
theta_min = np.matrix(result[0])
predictions = predict(theta_min, X)
correct = [1 if a^b == 0 else 0 for (a,b) in zip(predictions, Y)]
accuracy = (sum(correct) / len(correct))
print('accuracy = {0:.0f}%'.format(accuracy*100))

accuracy = 89%
