In [3]:
import numpy as np
import matplotlib.pyplot as plt
import sklearn.datasets

In [4]:
iris = sklearn.datasets.load_iris()
X = iris.data[:, :2]
y = (iris.target != 0)*1

In [24]:
class LogisticRegression:
    def __init__(self, lr=0.01, iters=100000, verbose=False):
        self.lr = lr
        self.iters = iters

    def sigmoid(self, z):
      return 1.0 / (1 + np.exp(-z))

    def predict(self, X, theta):
        z = np.dot(X, theta)
        return self.sigmoid(z)

    def loss_function(self, X, y, theta):
        n = len(y)
        pred = self.predict(X, theta)

        class1_cost = -y * np.log(pred)
        class0_cost = -(1 - y) * np.log(1 - pred)
        loss = class1_cost + class0_cost

        loss = loss.sum() / n

        return loss

    def update_theta(self, X, y, theta):
        n = len(y)
        pred = self.predict(X, theta)

        gradient_theta = np.dot(X.T, pred - y)

        gradient_theta /= n
        theta -= self.lr * gradient_theta
        return theta

    def classify(self, X, theta):
      return (self.predict(X,theta) >= 0.5).astype(int)

    def grad_descent_theta(self, X, y, theta):
        cost_history = []

        for i in range(self.iters):
            weights = self.update_theta(X, y, theta)

            #Calculate error for auditing purposes
            cost = self.loss_function(X, y, theta)
            cost_history.append(cost)

            # Log Progress
            if i % 1000 == 0:
                print("iter: "+str(i) + " cost: "+str(cost))

        return weights, cost_history

    def accuracy(predicted_labels, actual_labels):
        diff = predicted_labels - actual_labels
        return 1.0 - (float(np.count_nonzero(diff)) / len(diff))

In [27]:
logreg = LogisticRegression()
theta = np.full((X.shape[1],), 1/X.shape[0])
logreg.grad_descent_theta(X, y, theta)
logreg.predict(X, theta)

iter: 0 cost: 0.6687074232261783
iter: 1000 cost: 0.3184330521849697
iter: 2000 cost: 0.2204488607083133
iter: 3000 cost: 0.1737708777886455
iter: 4000 cost: 0.1464501661616072
iter: 5000 cost: 0.12843289951920084
iter: 6000 cost: 0.11560521808395043
iter: 7000 cost: 0.10597489063770808
iter: 8000 cost: 0.09845894959612554
iter: 9000 cost: 0.09241714824336872
iter: 10000 cost: 0.08744608183886389
iter: 11000 cost: 0.08327855093751765
iter: 12000 cost: 0.07973030578848482
iter: 13000 cost: 0.07667001960143219
iter: 14000 cost: 0.07400145797125485
iter: 15000 cost: 0.07165242326210398
iter: 16000 cost: 0.06956764447067916
iter: 17000 cost: 0.06770405890644514
iter: 18000 cost: 0.06602759481738955
iter: 19000 cost: 0.0645109246626808
iter: 20000 cost: 0.06313186289030547
iter: 21000 cost: 0.0618722017858741
iter: 22000 cost: 0.06071685135122951
iter: 23000 cost: 0.059653194169348445
iter: 24000 cost: 0.05867059487844514
iter: 25000 cost: 0.05776002254967247
iter: 26000 cost: 0.05691375667

array([4.39051748e-03, 1.52921763e-01, 8.55537000e-03, 1.28230210e-02,
       9.63208812e-04, 4.92471824e-04, 7.18479045e-04, 6.59444532e-03,
       2.85921585e-02, 6.43713925e-02, 3.38093819e-03, 2.17992327e-03,
       9.38461291e-02, 6.39398815e-03, 1.73066148e-03, 2.09796616e-05,
       4.92471824e-04, 4.39051748e-03, 6.80114398e-03, 2.44034712e-04,
       5.77489078e-02, 6.40082406e-04, 1.04416271e-04, 2.94679082e-02,
       2.17992327e-03, 2.39361977e-01, 6.59444532e-03, 7.62838108e-03,
       1.97715943e-02, 8.55537000e-03, 3.79705106e-02, 5.77489078e-02,
       2.35510161e-05, 4.75375668e-05, 6.43713925e-02, 4.37074224e-02,
       3.91219416e-02, 5.52799271e-04, 1.10928842e-02, 1.14389599e-02,
       2.52346922e-03, 9.43649736e-01, 1.62655695e-03, 2.52346922e-03,
       2.44034712e-04, 9.38461291e-02, 2.44034712e-04, 4.92599937e-03,
       1.94237151e-03, 1.71201855e-02, 9.99673963e-01, 9.90933471e-01,
       9.99783385e-01, 9.99769500e-01, 9.99889289e-01, 9.90650016e-01,
      