In [3]:
import numpy as np
import pandas
from sklearn.model_selection import train_test_split as tts
from sklearn.metrics import accuracy_score as acc_score
from sklearn.linear_model import LogisticRegression as LogReg

In [4]:
class NewtonClf:
    def __init__(self, itr=10):
        self.itr = itr
        self.w = None

    def _sig(self, v):
        return 1 / (1 + np.exp(-v))

    def _loss(self, y, p):
        p = np.clip(p, 1e-13, 1 - 1e-13)
        return -np.mean(y * np.log(p) + (1 - y) * np.log(1 - p))

    def _H(self, X, p):
        D = np.diagflat(p * (1 - p))
        return X.T @ D @ X

    def fit(self, X, y):
        X = np.hstack((np.ones((X.shape[0], 1)), X))
        self.w = np.zeros(X.shape[1])

        for i in range(self.itr):
            p = self._sig(X @ self.w)
            l = self._loss(y, p)
            print(f"Iter {i+1}/{self.itr}, Mean Absolute Error: {l:.6f}")
            g = X.T @ (p - y) / y.size
            H = self._H(X, p)
            self.w -= np.linalg.inv(H) @ g

    def pred(self, X):
        X = np.hstack((np.ones((X.shape[0], 1)), X))
        return (self._sig(X @ self.w) > 0.5).astype(int)


In [5]:
df = pandas.read_csv('wdbc.data', header=None)

feat = df.iloc[:, 2:].values
lbl = df.iloc[:, 1].factorize()[0]


In [6]:
n_iter = 10
split_ratio = 0.2
accs_nr = []

nr_model = NewtonClf(n_iter)

for _ in range(n_iter):
    Xt, Xv, yt, yv = tts(feat, lbl, test_size=split_ratio)
    nr_model.fit(Xt, yt)
    preds = nr_model.pred(Xv)
    accs_nr.append(acc_score(yv, preds))

print(f"Avg acc Custom: {np.mean(accs_nr):.3f}")

accs_sk = []
sk_model = LogReg(solver='newton-cg')

for _ in range(n_iter):
    Xt, Xv, yt, yv = tts(feat, lbl, test_size=split_ratio)
    sk_model.fit(Xt, yt)
    preds_sk = sk_model.predict(Xv)
    accs_sk.append(acc_score(yv, preds_sk))

print(f"Avg acc SK: {np.mean(accs_sk):.3f}")


Iter 1/10, Mean Absolute Error: 0.693147
Iter 2/10, Mean Absolute Error: 0.691399
Iter 3/10, Mean Absolute Error: 0.689658
Iter 4/10, Mean Absolute Error: 0.687925
Iter 5/10, Mean Absolute Error: 0.686200
Iter 6/10, Mean Absolute Error: 0.684482
Iter 7/10, Mean Absolute Error: 0.682771
Iter 8/10, Mean Absolute Error: 0.681068
Iter 9/10, Mean Absolute Error: 0.679373
Iter 10/10, Mean Absolute Error: 0.677684
Iter 1/10, Mean Absolute Error: 0.693147
Iter 2/10, Mean Absolute Error: 0.691408
Iter 3/10, Mean Absolute Error: 0.689677
Iter 4/10, Mean Absolute Error: 0.687953
Iter 5/10, Mean Absolute Error: 0.686237
Iter 6/10, Mean Absolute Error: 0.684529
Iter 7/10, Mean Absolute Error: 0.682827
Iter 8/10, Mean Absolute Error: 0.681133
Iter 9/10, Mean Absolute Error: 0.679447
Iter 10/10, Mean Absolute Error: 0.677767
Iter 1/10, Mean Absolute Error: 0.693147
Iter 2/10, Mean Absolute Error: 0.691426
Iter 3/10, Mean Absolute Error: 0.689712
Iter 4/10, Mean Absolute Error: 0.688006
Iter 5/10, Mea

In [2]:
%%shell
jupyter nbconvert --to html /content/Prashant.ipynb

[NbConvertApp] Converting notebook /content/Prashant.ipynb to html
[NbConvertApp] Writing 592732 bytes to /content/Prashant.html


