In [1]:
import numpy as np
import pandas as pd
from sklearn.metrics import mean_squared_error as rms
from sklearn.metrics import roc_auc_score
from math import exp

In [3]:
data = pd.read_csv('data-logistic.csv', header=None)

In [4]:
x = data.iloc[:, 1:]
y = data[0]

In [5]:
K = 0.1
ERROR = 1e-5

def sigma_y(i, w1, w2):
    return 1. / (1. + exp(-y[i] * (w1*x[1][i] + w2*x[2][i])))

def delta_for_w(w_index, w1, w2, C):
    addition = sum((
        y[i] * x[w_index][i] * (1. - sigma_y(i, w1, w2)) for i in np.arange(0, len(y))
    ))
    addition *= K / len(y)
    addition -= K * C * (w1 if w_index == 1 else w2)
    
    return addition

In [6]:
def gradient_regressor(C, iterations_remaining=10000):
    changed_w1, changed_w2 = 0., 0.
    while iterations_remaining:
        iterations_remaining -= 1
        w1, w2 = changed_w1, changed_w2
        changed_w1 = w1 + delta_for_w(1, w1, w2, C)
        changed_w2 = w2 + delta_for_w(2, w1, w2, C)
        if np.sqrt(rms([w1, w2], [changed_w1, changed_w2])) <= ERROR:
            break
    return changed_w1, changed_w2

def sigma(xi, w1, w2):
    return 1. / (1 + np.exp(-w1 * xi[1] - w2 * xi[2]))

In [7]:
w1, w2 = gradient_regressor(0.)
l2w1, l2w2 = gradient_regressor(10.)

print(w1, w2, l2w1, l2w2)

scores = x.apply(lambda xi: sigma(xi, w1, w2), axis=1)
l2scores = x.apply(lambda xi: sigma(xi, l2w1, l2w2), axis=1)

0.28768325128305977 0.09210201173422795 0.028558754546234223 0.024780137249735563


In [8]:
auc_score = roc_auc_score(y, scores)
l2_auc_score = roc_auc_score(y, l2scores)

print(auc_score)
print(l2_auc_score)

f = open('answer.txt', 'w')
f.write(str(auc_score) + ' ' + str(l2_auc_score))
f.close()

0.9268571428571428
0.9362857142857142
