In [1]:
import numpy as np
import pandas as pd
import warnings 
import matplotlib.pyplot as plt
warnings.filterwarnings('ignore')

In [2]:
#sigmoid function
def sigmoid(x):
    h = 1/(1+np.exp(-x))
    return h

In [27]:
def gradient_descent(X, y, theta, iter_nums, alpha, verbose=False):
    '''
    input:
    X: matrix input (m, n+1)
    y: label output vector (m, 1)
    theta: weight (n+1, 1)
    alpha: learning rate
    '''
    m = X.shape[0]
    for i in range(iter_nums):
        #ham logistic
        h = sigmoid(np.dot(X, theta))
        #Cost function
        J = -1/m * (np.dot(y.T, np.log(h))+np.dot((1-y).T, np.log(1-h)))
        #update gradient
        theta = theta - alpha/m*(np.dot(X.T, (h-y)))
        if verbose and i%100==0:
            print(f"the cost at loop {i} is {float(J):.8f}")
            print(f"the corresponding weight is {[round(t, 8) for t in np.squeeze(theta)]} ")
    return J, theta

    

In [23]:
#training model
from sklearn import datasets
iris = datasets.load_iris()
print(f"Feature name {iris['feature_names']}")
print(f"Label name: {iris['target_names']}")
#petal width feature
X = iris['data'][:, 3:]
y = (iris['target']==2).astype(int)

Feature name ['sepal length (cm)', 'sepal width (cm)', 'petal length (cm)', 'petal width (cm)']
Label name: ['setosa' 'versicolor' 'virginica']


In [29]:
np.random.seed(42)
m = X.shape[0]
n = X.shape[1]
#bias
X0 = np.ones((m, 1))
theta = np.random.rand(n+1, 1)
alpha = 0.5
num_iters = 500
y_train = y.reshape(m, 1)
X_train = np.c_[X0, X]
J, theta = gradient_descent(X_train, y_train, theta, num_iters, alpha, verbose=True)

the cost at loop 0 is 0.98598725
the corresponding weight is [0.14300946, 0.76888329] 
the cost at loop 100 is 0.29144065
the corresponding weight is [-3.54407475, 2.20761718] 
the cost at loop 200 is 0.23629106
the corresponding weight is [-4.9469312, 3.05622182] 
the cost at loop 300 is 0.20971306
the corresponding weight is [-5.92919089, 3.64817334] 
the cost at loop 400 is 0.19302536
the corresponding weight is [-6.70893281, 4.11858305] 


In [20]:
iris['data'][:, 3:].shape

(150, 1)

In [46]:
#test model
def predict(X, theta, threshold):
    '''
    Input:
    X: input matrix (m, n+1)
    theta: weight (n+1, 1)
    threshold: rate of sigmoid function to decide the class (1, 0) the sample belong to
    '''

    h = sigmoid(np.dot(X, theta))
    predict_binary = (h>= threshold).astype(int)
    return h, predict_binary



In [47]:
num = 1000
X_gen = np.linspace(0, 3, num).reshape(-1, 1)
X_test = np.c_[np.ones((num, 1)), X_gen]
predict_prob, predict_binary = predict(X_test, theta, 0.5) 



(1000, 1)