In [1]:
import numpy as np
from bokeh.plotting import figure, output_notebook, show
from bokeh.io import save
from sklearn.linear_model import LogisticRegression


output_notebook()

In [2]:
from bokeh.layouts import gridplot

def draw_odds_graph(N=100):
    prob = np.linspace(0.01, 0.95, N)
    odds = np.array([p/(1-p) for p in prob])
    p = figure(title='Probability - Odds function', height=400, width=400,
        x_axis_label='Probability', y_axis_label='Odds')
    p.scatter(prob, odds, size=5)
    return p

def draw_logit_graph(N=100):
    prob = np.linspace(0.01, 0.95, N)
    odds = np.array([p/(1-p) for p in prob])
    logit = np.log(odds)
    p = figure(title='Probability - Logit function', height=400, width=400,
        x_axis_label='Probability', y_axis_label='Logit')
    p.scatter(prob, logit, size=5)
    return p

gp = gridplot([[draw_odds_graph(), draw_logit_graph()]])
show(gp)

In [3]:
X = np.array([
    [0.57, 0.35],
    [0.04, 0.15],
    [0.64, 0.25],
    [-0.27, -0.25],
    [-0.33, -0.44],
    [-0.35, -0.34],
    [-0.42, -0.25],
])

y = np.array([1, 1, 1, 0, 0, 0, 0])
colormap = ['#e34a33', '#43a2ca']
colors = [colormap[l] for l in y]

p = figure()
p.scatter(X[:,0], X[:,1], color=colors, size=10)
show(p)

In [4]:
def draw_classification_map(X, model, N=40, height=600, width=600, title=None):
    image, x_min, y_min, dw, dh = make_classification_map(X, model, N)
    p = draw_image(image, x_min, y_min, dw, dh, height, width, title)
    return p

def make_classification_map(X, model, N=40):
    x_min, x_max = X[:,0].min() * 1.1, X[:,0].max() * 1.1
    y_min, y_max = X[:,1].min() * 1.1, X[:,1].max() * 1.1
    x_test = np.linspace(x_min, x_max, N)
    y_test = np.linspace(y_min, y_max, N)
    X_test = np.array([(xi, yi) for xi in x_test for yi in y_test])
    y_test = model.predict_proba(X_test)
    S = y_test[:,0] - y_test[:,1]
    image = score_to_image(S, N)
    return image, x_min, y_min, x_max - x_min, y_max - y_min

def score_to_image(S, N=40):
    S = S.reshape(N,N)
    h, w = S.shape
    image = np.zeros((h, w), dtype=np.uint32)
    view = image.view(dtype=np.uint8).reshape(h, w, 4)

    for i in range(h):
        for j in range(w):
            s = S[i,j]
            if s > 0:
                view[i,j,0] = 128
                view[i,j,1] = 26
                view[i,j,2] = 0
                view[i,j,3] = int(200 * s)
            elif s < 0:
                view[i,j,0] = 0
                view[i,j,1] = 26
                view[i,j,2] = 128
                view[i,j,3] = int(-s * 200)
    return image

def draw_image(image, x_min, y_min, dw, dh, height=600, width=600, title=None):
    p = figure(height=height, width=width, title=title)
    p.image_rgba(image=[image], x=x_min, y=y_min, dw=dw, dh=dh)
    return p

model = LogisticRegression().fit(X, y)

title = f'b0 = {model.intercept_[0]:.3}, b1 = {model.coef_[0,0]:.3}, b2 = {model.coef_[0,1]:.3}'

p = draw_classification_map(X, model, title=title)
p.scatter(X[:,0], X[:,1], color=colors, size=10)
show(p)



In [5]:
model.coef_ = np.array([[-0.5, 0.7]])
model.intercept_ = np.array([0.3])

title = f'b0 = {model.intercept_[0]:.3}, b1 = {model.coef_[0,0]:.3}, b2 = {model.coef_[0,1]:.3}'

p = draw_classification_map(X, model, title=title)
p.scatter(X[:,0], X[:,1], color=colors, size=10)
show(p)

In [6]:
beta = np.array([
    [0.1, 0.1],
    [2.0, 2.0],
    [10., 10.],
    [20., 20.],
])

model.intercept_ = np.array([-0.144])
for b in beta:
    model.coef_ = np.array([b])
    logprob = model.predict_log_proba(X)
    logprob[np.where(y != 0)[0],0] = 0
    logprob[np.where(y != 1)[0],1] = 0
    nll = -logprob[np.where(logprob < 0)].mean()
    print(nll)

0.6531857858657759
0.25245049605743686
0.023815314814262255
0.0036480429472656437


In [7]:
def make_l1_classification_data(n_data=100, n_features=20, n_importants=5, n_classes=2):
    X = 0.2 * np.random.random_sample((n_data * n_classes, n_features * n_classes))
    for c in range(n_classes):
        row_b, row_e = c * n_data, (c+1) * n_data
        col_b, col_e = c * n_importants, (c+1) * n_importants
        X[row_b:row_e,col_b:col_e] = 0.6 + np.random.random_sample((n_data, n_importants)) * 0.4
    y = np.array([c for c in range(n_classes) for _ in range(n_data)], dtype=np.int)
    return X, y

X, y = make_l1_classification_data(n_features=5, n_importants=2, n_classes=3)
print(X.shape)
print(y.shape)

(300, 15)
(300,)


In [8]:
p = figure()
p.image(image=[X], x=0, y=0, dw=1, dh=1)
show(p)

In [9]:
lasso = LogisticRegression(penalty='l1', C=0.03).fit(X, y)
print(lasso.coef_)

[[ 0.          0.35948556  0.          0.          0.          0.
   0.          0.          0.          0.          0.          0.
   0.          0.          0.        ]
 [ 0.         -0.10279524  0.24732585  0.          0.          0.
   0.          0.          0.          0.          0.          0.
   0.          0.          0.        ]
 [ 0.         -0.3607675  -0.31552198  0.          0.          0.
   0.          0.          0.          0.          0.          0.
   0.          0.          0.        ]]




In [10]:
ridge = LogisticRegression(penalty='l2', C=0.5).fit(X, y)
print(ridge.coef_)

[[ 2.4617357   2.45710151 -1.56604351 -1.55313199 -1.54487463 -1.60830176
  -0.05681405 -0.04050241 -0.04460427 -0.08791885 -0.05720098 -0.06973952
  -0.11075643 -0.06716527 -0.06664289]
 [-1.56303755 -1.5466943   2.54689275  2.46016628 -1.58865469 -1.57969747
  -0.08370188 -0.06022821 -0.06511064 -0.04948992 -0.11075977 -0.06804898
  -0.0873082  -0.07197883 -0.1154976 ]
 [-1.57871211 -1.62441989 -1.67109574 -1.56845106  2.44165371  2.50498424
  -0.06867316 -0.10389544 -0.08656815 -0.06045278 -0.02754029 -0.06807017
  -0.00549838 -0.05522777 -0.03302182]]




In [11]:
from soydata.data import make_radial
from soydata.visualize import scatterplot

n_classes = 5
X, labels = make_radial(n_samples_per_cluster=100, n_classes=n_classes, 
    n_clusters_per_class=1, gap=0.3, equal_proportion=True,
    radius_min=0.1, radius_scale=1.0, radius_variance=0.5)

ridge = LogisticRegression(penalty='l2', C=0.001).fit(X, labels)
p = scatterplot(X, labels=labels, title='Radial', alpha=0.5, show_inline=False)
p = scatterplot(ridge.coef_, labels=np.arange(n_classes), marker='triangle', size=10, p=p)







In [12]:
X_ = X[np.where(labels < 3)[0]]
labels_ = labels[np.where(labels < 3)[0]]
ridge = LogisticRegression(penalty='l2', C=0.01).fit(X_, labels_)
p = scatterplot(X_, labels=labels_, title='Radial', alpha=0.5, show_inline=False)
p = scatterplot(ridge.coef_, labels=np.arange(3), marker='triangle', size=10, p=p)





