In [1]:
import pandas as pd
data = pd.read_csv("data-logistic.csv", header=None)
data.head()

Unnamed: 0,0,1,2
0,-1,-0.663827,-0.138526
1,1,1.994596,2.468025
2,-1,-1.247395,0.749425
3,1,2.309374,1.899836
4,1,0.849143,2.40775


In [2]:
import numpy as np


def sigmoid(z):
    return 1.0 / (1.0 + np.exp(z))


def gradient(k, C, w, y, x):
    l = len(y)
    s = sigmoid(-y * np.dot(x, w))
    summa = np.dot(y.transpose(), x * (1 - s)).transpose()
    return w + (k / l) * summa - k * C * w

In [3]:
C = 10
k = 0.1
w = np.array([[0], [0]])
y = data[[0]].values
x = data[[1, 2]].values
gradient(k, C, w, y, x)

array([[ 0.03573127],
       [ 0.03245997]])

In [11]:
def gradient_descent(k, C, w, y, x):
    for i in xrange(1000):
        w_next = gradient(k, C, w, y, x)
        if (np.linalg.norm(w - w_next) < 1e-5):
            return w_next
        else:
            w = w_next
    return w

w_good = gradient_descent(k, 10, w, y, x)
w_good

array([[ 0.02855875],
       [ 0.02478014]])

In [6]:
w_bad = gradient_descent(k, 0, w, y, x)
w_bad

array([[ 0.28810819],
       [ 0.0917091 ]])

In [7]:
def weighted(w, x):
    y = x * w.transpose()
    return sigmoid(y[:, 0] - y[:, 1])

In [8]:
y_out_good = weighted(w_good, x)
y_out_good

array([ 0.50388135,  0.50104883,  0.51354576,  0.49528143,  0.50885285,
        0.48549838,  0.4979298 ,  0.50958299,  0.50104412,  0.50759908,
        0.50422234,  0.50365003,  0.49226504,  0.48102894,  0.49982332,
        0.48766022,  0.49391065,  0.50250994,  0.49202696,  0.49160164,
        0.49030279,  0.56397596,  0.49934827,  0.49883076,  0.49004358,
        0.49687691,  0.50086493,  0.49923122,  0.50555719,  0.50172118,
        0.50008294,  0.51000257,  0.49789727,  0.48450506,  0.51039179,
        0.477498  ,  0.5057425 ,  0.49520578,  0.48908557,  0.48658171,
        0.49173597,  0.51249244,  0.49740659,  0.49605152,  0.50569279,
        0.49685645,  0.4843251 ,  0.50279853,  0.49556991,  0.51380209,
        0.49167282,  0.51624432,  0.48837639,  0.49604328,  0.49691471,
        0.52160422,  0.51085179,  0.4991004 ,  0.49622378,  0.50175902,
        0.49797078,  0.48367048,  0.49647722,  0.49097063,  0.49806398,
        0.50873632,  0.49846677,  0.49363863,  0.49780382,  0.51

In [9]:
y_out_bad = weighted(w_bad, x)
y_out_bad

array([ 0.54451924,  0.41379012,  0.60542316,  0.37963036,  0.49404214,
        0.38224928,  0.39129985,  0.53847475,  0.54306584,  0.49623968,
        0.52297433,  0.52020568,  0.46775518,  0.35552478,  0.42807007,
        0.40231036,  0.48051175,  0.45634684,  0.37740999,  0.33668688,
        0.4267043 ,  0.96509578,  0.53595186,  0.51726112,  0.39450983,
        0.46590545,  0.40885141,  0.36652718,  0.54333582,  0.46855073,
        0.51809132,  0.56076357,  0.34358529,  0.3005325 ,  0.54790593,
        0.31064153,  0.43072298,  0.39499355,  0.40297904,  0.33742472,
        0.38661768,  0.59443015,  0.44181244,  0.49710091,  0.51678107,
        0.41609027,  0.27003194,  0.52798866,  0.45146248,  0.54913355,
        0.38479613,  0.54787541,  0.40332856,  0.378214  ,  0.51370045,
        0.62773434,  0.55684541,  0.39368279,  0.44667095,  0.41105117,
        0.40844515,  0.34362124,  0.38649701,  0.34359229,  0.47473371,
        0.47745421,  0.47474307,  0.90629298,  0.45853642,  0.48

In [10]:
from sklearn.metrics import roc_auc_score
print(roc_auc_score(y, y_out_bad))
print(roc_auc_score(y, y_out_good))

0.205428571429
0.430952380952
