In [119]:
from setup_mnist import MNIST
import helper
import numpy as np
import time
import matplotlib.pyplot as plt
from sklearn import svm
from cvxopt import matrix, solvers
from itertools import product
import tensorflow as tf
%matplotlib inline

In [120]:
data = MNIST()

In [121]:
label_dict = {4:-1, 9:1}
X_train, Y_train = helper.subsetData(data.train_data, data.train_labels, label_dict)
X_train, Y_train, _ = helper.shuffleArraysInUnison(X_train, Y_train)

In [122]:
X_test, Y_test = helper.subsetData(data.test_data, data.test_labels, label_dict)

In [156]:
class LinearBinaryClassifier(object):
    """
    Class for Linear Binary Classifiers
    
    weights: np array of shape (dim, 1)
    bias: scalar
    """
    def __init__(self, weights, bias):
        self.dim = weights.shape[0]
        self.weights = weights
        self.bias = bias

    def predict(self, X):
        """
        X: np array of shape (num_points, dim) 
        
        returns: a vector of shape (num_points,) with predicted labels for each point
        """
        return np.sign(np.matmul(X, self.weights) + self.bias).T[0]
    
    def distance(self, X):
        """
        Computes the signed distance from a point to the decision boundary (hyperplane)
        
        returns: a vector of shape (num_points,) with the correspoding distances
        """
        return abs((np.matmul(X, self.weights) + self.bias) / np.linalg.norm(self.weights)).T[0]
    
    def evaluate(self, X, Y):
        """
        returns accuracy
        """
        return np.mean(np.equal(self.predict(X), Y))
    
    def gradient(self, X, Y):
        """
        returns gradient
        """
        if not hasattr(Y, "__len__"): # make it robust to single items
            X = X.reshape(1, self.dim)
            Y = np.array([Y])
            
        return np.array([Y[i] * self.weights.reshape(-1,) if self.predict(X[i]) == Y[i] 
                         else np.zeros(self.dim) for i in xrange(len(X))])

In [157]:
type(1) is int

True

In [158]:
# train the classifiers
n = 4
train_size = 100 #len(X_train) / n

binary_classifiers = []

for i in xrange(n):
    model = svm.SVC(kernel = "linear")
    start = train_size * i
    end = start + train_size
    model.fit(X_train[start:end], Y_train[start:end])
    lbc = LinearBinaryClassifier(model.coef_.T, model.intercept_)
    print i, start, end, lbc.evaluate(X_test, Y_test)
    binary_classifiers.append(lbc)


0 0 100 0.923656454043
1 100 200 0.907584128579
2 200 300 0.920140632848
3 300 400 0.895027624309


In [161]:
start = time.time()
X = tf.placeholder(tf.float32, shape=[None, 784])
Y = tf.placeholder(tf.float32, shape=[None, 1])

w0 = tf.placeholder(tf.float32, shape=[784,1])
b0 = tf.placeholder(tf.float32, shape=[1])

w1 = tf.placeholder(tf.float32, shape=[784,1])
b1 = tf.placeholder(tf.float32, shape=[1])

p0 = 1
p1 = 2
l0 = tf.maximum(0.0, Y * (tf.matmul(X, w0) + b0)) 
l1 = tf.maximum(0.0, Y * (tf.matmul(X, w1) + b1)) 

l = p0 * l0 + p1 * l1

grad = tf.gradients(l, X)
sess = tf.Session()
result = sess.run(grad, {X: X_test[30:40], Y: Y_test[30:40].reshape(10,1), w0: binary_classifiers[0].weights, 
                         b0: binary_classifiers[0].bias, w1: binary_classifiers[1].weights, 
                         b1: binary_classifiers[1].bias})

print time.time() - start

0.391469955444


In [172]:
sum([model.gradient(X_test[0], Y_test[0]) for model in binary_classifiers])[0].shape

(784,)

In [176]:
def gradientDescent(distribution, models, x, y, alpha):
    learning_rate = .001
    T = 1000
    v = np.zeros(len(x))
    for i in xrange(T):
        gradient = sum([w * model.gradient(x + v, y) for w, model in zip(distribution, models)])[0]
        v += learning_rate * gradient
        norm  = np.linalg.norm(v)
        if norm >= alpha:
            v = v / norm * alpha
    return v
            
        
        
    
    

In [177]:
gradientDescent([1] * n, binary_classifiers, X_test[0], Y_test[0], .5)

array([ -1.22677525e-18,  -1.22677525e-18,  -1.22677525e-18,
        -1.22677525e-18,  -1.22677525e-18,  -1.22677525e-18,
        -1.22677525e-18,  -1.22677525e-18,  -1.22677525e-18,
        -1.22677525e-18,  -1.22677525e-18,  -1.22677525e-18,
        -1.22677525e-18,  -1.22677525e-18,  -1.22677525e-18,
        -1.22677525e-18,  -1.22677525e-18,  -1.22677525e-18,
        -1.22677525e-18,  -1.22677525e-18,  -1.22677525e-18,
        -1.22677525e-18,  -1.22677525e-18,  -1.22677525e-18,
        -1.22677525e-18,  -1.22677525e-18,  -1.22677525e-18,
        -1.22677525e-18,  -1.22677525e-18,  -1.22677525e-18,
        -1.22677525e-18,  -1.22677525e-18,  -1.22677525e-18,
        -1.22677525e-18,  -1.22677525e-18,  -1.22677525e-18,
        -1.22677525e-18,  -1.22677525e-18,  -1.22677525e-18,
        -1.22677525e-18,  -1.22677525e-18,  -1.22677525e-18,
        -1.22677525e-18,  -1.22677525e-18,  -1.22677525e-18,
        -1.22677525e-18,  -1.22677525e-18,  -1.22677525e-18,
        -1.22677525e-18,

In [170]:
np.linalg.norm(binary_classifiers[].weights)

IndexError: list index out of range

In [105]:
result[0][0,:5]

array([ -1.90819582e-17,  -1.90819582e-17,  -1.90819582e-17,
        -1.90819582e-17,  -1.90819582e-17], dtype=float32)

In [58]:
binary_classifiers[0].evaluate(X_test[30:40], Y_test[30:40])

0.90000000000000002

In [79]:
g0 = binary_classifiers[0].gradient(X_test[30:40], Y_test[30:40])
g1 = binary_classifiers[1].gradient(X_test[30:40], Y_test[30:40])
test = p0 * g0 + p1 * g1

In [96]:
Y_test[39]

1

In [98]:
binary_classifiers[0].predict(X_test[39])

-1.0

In [99]:
g1[9,:5] * p1

array([  1.02348685e-16,   1.02348685e-16,   1.02348685e-16,
         1.02348685e-16,   1.02348685e-16])

In [106]:
test[0, :5]

array([ -1.90819582e-17,  -1.90819582e-17,  -1.90819582e-17,
        -1.90819582e-17,  -1.90819582e-17])