In [52]:
import math
import numpy as np
from sklearn import datasets

1.0

In [65]:
def pred(X_test, coeff):
    M = len(X_train)
    N = X_train.shape[1]
    Y_pred = np.zeros(M)
    for i in range(M):
        sig_power = 0
        y = Y_train[i]
        for j in range(N):
            x = X_train[i][j]
            sig_power += coeff[j]*x
        sig_power += coeff[N]
        if(sig_power > 0):
            Y_pred[i] = 1
        else:
            Y_pred[i] = 0
    return Y_pred

In [60]:
def cost(X_train, Y_train, coeff):
    M = len(X_train)
    N = X_train.shape[1]
    error = 0
    for i in range(M):
        sig_power = 0
        y = Y_train[i]
        for j in range(N):
            x = X_train[i][j]
            sig_power += coeff[j]*x
        sig_power += coeff[N]
        error += -(1/M)*(-y*math.log(1/(1 + math.exp(-sig_power))) - (1 - y)*math.log(1 - 1/(1 + math.exp(-sig_power))))
    print(-error)

In [48]:
def step_gd_logistic(X_train, Y_train, alpha, coeff):
    M = len(X_train)
    N = X_train.shape[1]
    coeff_slope = np.zeros(N + 1)
    for i in range(M):
        y = Y_train[i]
        sig_power = 0
        for j in range(N):
            x = X_train[i][j]
            sig_power += coeff[j]*x
        sig_power += coeff[N]
        for j in range(N):
            x = X_train[i][j]
            coeff_slope[j] += -(1/M)*(y - 1/(1 + math.exp(-sig_power)))*x
        coeff_slope[N] += -(1/M)*(y - 1/(1 + math.exp(-sig_power)))
    for i in range(N + 1):
        coeff[i] = coeff[i] - alpha*coeff_slope[i]
    return coeff
        

In [49]:
def gd_logistic(X_train, Y_train, alpha, numIterations):
    M = len(X_train)
    N = X_train.shape[1]
    coeff = np.zeros(N + 1)
    for i in range(numIterations):
        coeff = step_gd_logistic(X_train, Y_train, alpha, coeff)
        cost(X_train, Y_train, coeff)
    return coeff

In [70]:
def run():
    import numpy as np
    from sklearn import preprocessing
    cancer_ds = datasets.load_breast_cancer()
    X_trainRaw = cancer_ds.data
    Y_train = cancer_ds.target
    scaler = preprocessing.StandardScaler().fit(X_trainRaw)
    X_train = scaler.transform(X_trainRaw)
    alpha = 0.40
    numIterations = 100
    coeff = gd_logistic(X_train, Y_train, alpha, numIterations)
    #print(coeff)
    return coeff

In [72]:
coeff = run()
print(coeff.shape)
cancer_ds = datasets.load_breast_cancer()
X_trainRaw = cancer_ds.data
Y_train = cancer_ds.target
scaler = preprocessing.StandardScaler().fit(X_trainRaw)
X_test = scaler.transform(X_trainRaw)
Y_pred = pred(X_test, coeff)
print(Y_pred - Y_train)

0.2713492608355814
0.22748627996075335
0.20132888470830498
0.18342742350856675
0.1702376583445585
0.16004179953706843
0.15188047688462647
0.1451694903142846
0.13953162688707318
0.13471222244966752
0.13053296109834042
0.1268650798854385
0.12361311643648129
0.12070467274469494
0.11808375220968231
0.11570629433497902
0.11353710381927037
0.11154768946439636
0.10971471182738489
0.10801884748239517
0.10644394426466924
0.10497638352316847
0.10360459211624305
0.10231866437951616
0.10111006598195921
0.09997139953452015
0.09889621731137187
0.0978788703017981
0.09691438555718171
0.0959983657781953
0.095126906532937
0.09429652756372828
0.09350411543603958
0.09274687538226048
0.0920222906484378
0.09132808800115677
0.09066220832143111
0.09002278142242763
0.08940810439245765
0.08881662289462501
0.08824691495773501
0.08769767687558021
0.0871677108980327
0.08665591445096799
0.08616127066559744
0.0856828400333272
0.08521975303143658
0.0847712035888874
0.08433644328147205
0.08391477616202735
0.0835055541