In [54]:
import numpy as np
import pandas as pd
from sklearn.datasets import load_iris
from sklearn.metrics import accuracy_score

data = load_iris()

shapesLogging = False

# make binary
y = (data['target'] > 0).astype('int').reshape(1,-1)
X = data['data'].T

def cost(y, a):
    cost = - np.sum(
        y * np.log(a)
        + (1 - y) * np.log(1 - a)

#         keepdims=True
    ) / y.shape[1]
    return cost

def calc_dz(a, y):
    dz = a - y
    return dz

def calc_dw(X, dz):
    dw = X * dz
#     print('dw shape: {}'.format(dw.shape))
    return dw

def calc_db(dz):
    db = dz
    return dz

def sigmoid(z):
    return (
        1 / (1 + np.exp(-z))
    )

def calcZ(X, w, b):
    return np.dot(w.T, X) + b

def calcA(X, w, b):
    a = sigmoid(
        calcZ(X, w, b)
    )
    return a

def initializeWb(X):
    """Returns w, b"""
    dim = X.shape[0]
    w = np.zeros((dim, 1))
    b = 0
    return w, b

def gradientDescent(X, y, nIter=100, alpha = 0.1):
    w, b = initializeWb(X)
    m = X.shape[1]
    
    for i in range(nIter):
        a = calcA(X, w, b)
        dz = calc_dz(a, y)
        db = calc_db(dz)
        dw = calc_dw(X, dz)
        
        b = b - alpha * np.sum(db) / m
        w = w - alpha * np.sum(dw, axis=1, keepdims=True) / m
#         print('w shape: {}'.format(w.shape))
        J = cost(y, a)
        
        if shapesLogging:
            print('dz shape: {}'.format(dz.shape))
            print('cost shape: {}'.format(cost.shape))
            print('dw shape: {}'.format(dw.shape))
            print('db shape: {}'.format(db.shape))
            print('a shape: {}'.format(a.shape))
            print('w shape: {}'.format(w.shape))
            print('b shape: {}'.format('scalar'))
            print('X shape: {}'.format(X.shape))
            print('y shape: {}'.format(y.shape))
        
        if not i % 1:
            print('Cost: {}, iter: {}'.format(J, i))
            print('Accuracy: {}'.format(
                accuracy_score(
                    np.squeeze(y), 
                    np.round(np.squeeze(a))
                )
            ))
#             print('w: {}, b: {}'.format(w, b))
    return J, w, b


J, w, b = gradientDescent(X, y)

Cost: 0.6931471805599454, iter: 0
Accuracy: 0.3333333333333333
Cost: 0.5518148780651356, iter: 1
Accuracy: 0.6666666666666666
Cost: 0.5020745862976461, iter: 2
Accuracy: 0.6666666666666666
Cost: 0.4716517350431836, iter: 3
Accuracy: 0.6666666666666666
Cost: 0.4439082969004399, iter: 4
Accuracy: 0.6666666666666666
Cost: 0.4186096902203111, iter: 5
Accuracy: 0.6666666666666666
Cost: 0.39553047636891736, iter: 6
Accuracy: 0.68
Cost: 0.3744577718911598, iter: 7
Accuracy: 0.7466666666666667
Cost: 0.35519383209413335, iter: 8
Accuracy: 0.88
Cost: 0.33755736791502094, iter: 9
Accuracy: 0.9666666666666667
Cost: 0.32138380114982323, iter: 10
Accuracy: 1.0
Cost: 0.306524810233557, iter: 11
Accuracy: 1.0
Cost: 0.29284743102591093, iter: 12
Accuracy: 1.0
Cost: 0.2802329094394062, iter: 13
Accuracy: 1.0
Cost: 0.2685754445996217, iter: 14
Accuracy: 1.0
Cost: 0.2577809155452957, iter: 15
Accuracy: 1.0
Cost: 0.24776565038333714, iter: 16
Accuracy: 1.0
Cost: 0.23845527242210102, iter: 17
Accuracy: 1.0
