In [1]:
import numpy as np
np.random.seed(1234)

In [2]:
x

NameError: name 'x' is not defined

In [3]:
w

NameError: name 'w' is not defined

In [4]:
def logistic_model(inputs, weights, bias):
    z = np.dot(inputs, weights) + bias
    z = z - np.max(z, axis=1, keepdims=True) ## Safe softmax!
    _e = np.exp(z)
    logits = np.divide(_e, np.sum(_e, axis=1, keepdims=True))
    return logits

In [5]:
def ce_loss(output, target):
    return -1.0 * np.mean( np.sum(np.multiply(target, np.log(output)), axis=1) )

In [6]:
x = np.random.rand(5, 4)
w = np.random.randn(4, 3)
b = np.ones((1, 3))

t = np.zeros((5, 3))
t[0, 0] = 1
t[1, 0] = 1
t[2, 1] = 1
t[3, 1] = 1
t[4, 2] = 1

In [7]:
output = logistic_model(x+1, w, b)

In [8]:
ce_loss(output, t)

4.173286345846108

---

In [9]:
def gradients(logits, target, inputs):
    assert logits.shape == target.shape
    assert inputs.shape[0] == logits.shape[0]
    
    grad_z =  (logits - target) / (1.0 * target.shape[0])
    grad_w = np.dot(inputs.T, grad_z)
    grad_b = np.sum(grad_z, axis=0, keepdims=True)
    
    return grad_w, grad_b

In [52]:
eg_w, eg_b = gradients(logistic_model(x, w, b), t, x)
assert eg_w.shape == w.shape
assert eg_b.shape == b.shape

In [53]:
def update_params((w, b), (eg_w, eg_b), alpha = 0.5):
    assert w.shape == eg_w.shape
    assert b.shape == eg_b.shape
    
    return (w - alpha*eg_w, b - alpha*eg_b)

## Data

In [12]:
## Data loader.
from tensorflow.examples.tutorials.mnist import input_data
mnist = input_data.read_data_sets('MNIST_data', one_hot=True)
print "train examples", mnist.train.num_examples
print "test examples", mnist.test.num_examples

  from ._conv import register_converters as _register_converters


Extracting MNIST_data/train-images-idx3-ubyte.gz
Extracting MNIST_data/train-labels-idx1-ubyte.gz
Extracting MNIST_data/t10k-images-idx3-ubyte.gz
Extracting MNIST_data/t10k-labels-idx1-ubyte.gz
train examples 55000
test examples 10000


In [54]:
## Hyper-parameters.
batch_size = 128
nfeats = 784
nclasses = 10
alpha = 0.5

w = np.random.rand(nfeats, nclasses)
b = np.ones((1, nclasses))

In [55]:
mnist.train.images.shape

(55000, 784)

In [56]:
print "Index, TestLoss, TestAcc\n"
for ix in range(500):
    x, t = mnist.train.next_batch(batch_size=batch_size)
    logits = logistic_model(x, w, b)
    loss = ce_loss(logits, t)
    
    grads = gradients(logits, t, x)
    w, b = update_params((w, b), grads, alpha)
    
    if (ix+1)%20 == 0:
        test_logits = logistic_model(mnist.test.images, w, b)
        test_acc = np.mean(np.argmax(test_logits, axis=1) == np.argmax(mnist.test.labels, axis=1))
        
        print ix+1, ce_loss(test_logits, mnist.test.labels), test_acc
        alpha = alpha * 0.8 ## Decay.

Index, TestLoss, TestAcc

20 0.8676575412261578 0.7263
40 0.6883210182447401 0.782
60 0.5736027161245246 0.8225
80 0.5396352407717 0.834
100 0.5126702605857998 0.8437
120 0.4995277509605476 0.8484
140 0.4894401207409667 0.85
160 0.4822880963840118 0.853
180 0.47619868951752825 0.8547
200 0.4703943518212104 0.8569
220 0.46692700132385645 0.8577
240 0.4656269444902915 0.8592
260 0.4653851764110128 0.8586
280 0.4620481095224446 0.8593
300 0.46101464411040977 0.8595
320 0.4596697332494375 0.8605
340 0.4586724748999032 0.8615
360 0.4579304321476966 0.8617
380 0.45702922222001807 0.8626
400 0.45666732047291436 0.8628
420 0.45639255729532135 0.8626
440 0.4561059196307431 0.863
460 0.45594423671680295 0.8631
480 0.45580723352517744 0.863
500 0.4556427637938159 0.863


In [16]:

test_logits.shape

(10000, 10)

In [23]:
np.mean(np.argmax(test_logits, axis=1) == np.argmax(mnist.test.labels, axis=1))

0.8941

In [22]:
np.argmax(mnist.test.labels, axis=1).shape

(10000,)

## Binarization

In [61]:
def binarize(m):
    bm = np.ones_like(m)
    bm[m<0] = -1
    return bm

In [67]:
a1 = np.random.randn(2, 4)
print a1
print binarize(a1)

[[ 1.02541265  0.14457986 -0.16644927  0.27761987]
 [ 0.42070886 -0.14406326 -0.12995927 -1.18648191]]
[[ 1.  1. -1.  1.]
 [ 1. -1. -1. -1.]]


In [62]:
def clip(m):
    return np.clip(m, -1, 1)

In [80]:
clip(a1)

array([[ 1.        ,  0.14457986, -0.16644927,  0.27761987],
       [ 0.42070886, -0.14406326, -0.12995927, -1.        ]])

In [98]:
## Binarization algorithm.
np.random.seed(1234)
w = np.random.rand(nfeats, nclasses)
b = np.ones((1, nclasses))
alpha = 0.5
changes = 0

print "\n\tWITH BINARIZATION\n"
print "Index, TestLoss, TestAcc\n"
prev_loss = 0
for ix in range(10000):
    x, t = mnist.train.next_batch(batch_size=batch_size)
    w_bin = binarize(w) ## Binarize before forward pass.
    
    logits = logistic_model(x, w_bin, b) ## Forward pass with binarized weights.
    loss = ce_loss(logits, t)
    
    grads = gradients(logits, t, x) ## Gradients agains the binarized weights.
    w, b = update_params((w, b), grads, alpha)
    w = clip(w) ## Clip.
    
    if (ix+1)%200 == 0:
        test_logits = logistic_model(mnist.test.images, w_bin, b) ## Use binarized weights.
        test_acc = np.mean(np.argmax(test_logits, axis=1) == np.argmax(mnist.test.labels, axis=1))
        test_loss = ce_loss(test_logits, mnist.test.labels)
        
        print ix+1, test_loss, test_acc
        
        if prev_loss<test_loss:
            alpha = alpha * 0.9 ## Decay.
            changes += 1
            print "Updating alpha. Changes at", changes
        
        prev_loss = test_loss
            
    if changes>4:
        print "Exiting."
        print "Last test loss:", test_loss
        print "Last test accuracy:", test_acc
        break


	WITH BINARIZATION

Index, TestLoss, TestAcc

200 0.8735591990334786 0.7882
Updating alpha. Changes at 1
400 1.3220157012354152 0.7674
Updating alpha. Changes at 2
600 0.9633363888298269 0.8377
800 0.903804915904683 0.8288
1000 1.2096846063292197 0.8118
Updating alpha. Changes at 3
1200 1.5352151377310235 0.799
Updating alpha. Changes at 4
1400 0.861254799516688 0.8705
1600 0.9397290421057929 0.8519
Updating alpha. Changes at 5
Exiting.
Last test loss: 0.9397290421057929
Last test accuracy: 0.8519


In [102]:
w

array([[0.19151945, 0.62210877, 0.43772774, ..., 0.80187218, 0.95813935,
        0.87593263],
       [0.35781727, 0.50099513, 0.68346294, ..., 0.01376845, 0.77282662,
        0.88264119],
       [0.36488598, 0.61539618, 0.07538124, ..., 0.78873014, 0.31683612,
        0.56809865],
       ...,
       [0.0905448 , 0.46266612, 0.58650049, ..., 0.2905413 , 0.07627532,
        0.69052108],
       [0.8000275 , 0.41220923, 0.07180416, ..., 0.77861286, 0.57415148,
        0.8240048 ],
       [0.07931056, 0.97242024, 0.75399442, ..., 0.02676522, 0.50262157,
        0.94026889]])