In [None]:
from setup_mnist import MNIST
import helper
import numpy as np
import time
import matplotlib.pyplot as plt
from sklearn import svm
from cvxopt import matrix, solvers
from itertools import product
%matplotlib inline

In [None]:
data = MNIST()

In [None]:
label_dict = {4:-1, 9:1}
X_train, Y_train = helper.subsetData(data.train_data, data.train_labels, label_dict)
X_train, Y_train, _ = helper.shuffleArraysInUnison(X_train, Y_train)

In [None]:
X_test, Y_test = helper.subsetData(data.test_data, data.test_labels, label_dict)

In [None]:
class LinearBinaryClassifier(object):
    """
    Class for Linear Binary Classifiers
    
    weights: np array of shape (dim, 1)
    bias: scalar
    """
    def __init__(self, weights, bias):
        self.weights = tf.constant(weights)
        self.bias = tf.constant(bias)

    def predict(self, X):
        """
        X: np array of shape (num_points, dim) 
        
        returns: a vector of shape (num_points,) with predicted labels for each point
        """
        return np.sign(tf.matmul(X, self.weights) + self.bias).T[0]
    
    def distance(self, X):
        """
        Computes the signed distance from a point to the decision boundary (hyperplane)
        
        returns: a vector of shape (num_points,) with the correspoding distances
        """
        return abs((np.matmul(X, self.weights) + self.bias) / np.linalg.norm(self.weights)).T[0]
    
    def evaluate(self, X, Y):
        """
        returns accuracy
        """
        return np.mean(np.equal(self.predict(X), Y))
    
     def gradient(self, X, Y):
        """
        returns gradient
        """
        return np.array([Y[i] * self.weights.reshape(-1,) if self.predict(X[i]) == Y[i] 
                         else np.zeros(self.dim) for i in xrange(len(X))])

In [None]:
# train the classifiers
n = 4
train_size = len(X_train) / n

binary_classifiers = []

for i in xrange(n):
    model = svm.SVC(kernel = "linear")
    start = train_size * i
    end = start + train_size
    model.fit(X_train[start:end], Y_train[start:end])
    lbc = LinearBinaryClassifier(model.coef_.T, model.intercept_)
    print i, start, end, lbc.evaluate(X_test, Y_test)
    binary_classifiers.append(lbc)


In [None]:
def tryRegionBinary(models, signs, x, delta=1e-10):
    """
    models: list of LinearBinaryClassifiers
    signs: list of signs of length num_models
    x: np array of shape dim
    finds a vector in the region denoted by the signs vector
    """
    dim = x.shape[0]
    P = matrix(np.identity(dim))
    q = matrix(np.zeros(dim))
    h = []
    G = []
    num_models = len(models)
    for i in xrange(num_models):
        weights, bias = models[i].weights.T, models[i].bias
        ineq_val  = -1.0 * delta + signs[i] * (np.dot(weights, x) + bias)
        h.append(ineq_val[0])
        G.append(-1.0 * signs[i] * weights.reshape(-1,))
    h = matrix(h)
    G = matrix(np.array(G))
    solvers.options['show_progress'] = False
    sol = solvers.qp(P, q, G, h)
    if sol['status'] == 'optimal':
        v = np.array(sol['x']).reshape(-1,)
        perturbed_x = np.array(x + v).reshape(1, -1)
        is_desired_sign = [models[i].predict(perturbed_x)[0] == signs[i] for i in xrange(num_models)]
        if sum(is_desired_sign) == num_models:
            return v
        else:
            return tryRegionBinary(models, signs, x, delta * 1.5)
    else:
        return None

In [None]:
t = binary_classifiers[0]
x = X_test[0]
y = Y_test[0]
print y

In [None]:
[model.predict(x) for model in binary_classifiers]

In [None]:
for s in product([-1.0, 1.0], repeat=n):
    v = tryRegionBinary(binary_classifiers, s, x)
    print s, [model.predict(x +  v) for model in binary_classifiers]

In [None]:
def distributionalOracle(distribution, models, x, y, alpha):
    candidates = []
    num_models = len(models)
    # we should only take into consideration models that we could feasibly trick
    dists = [model.distance(x) for model in models]
    feasible_models = [models[i] for i in xrange(num_models) if dists[i] < alpha]
    distribution = np.array([distribution[i] for i in xrange(num_models) if dists[i] < alpha])
    num_models = len(feasible_models)
    
    # can't trick anything
    if num_models == 0:
        return np.zeros(x.shape)

    signs_values = []
    for signs in product([-1.0, 1.0], repeat=num_models):  # iterate over all possible regions
        is_misclassified = np.equal(-1.0 * y * np.ones(num_models), signs)  # y = -1, or 1
        value = np.dot(is_misclassified, distribution)
        signs_values.append((signs, value))

    values = sorted(set([value for signs, value in signs_values]), reverse=True)
    for value in values:
        feasible_candidates = []
        for signs in [sign for sign, val in signs_values if val == value]:
            v = tryRegionBinary(feasible_models, signs, x)
            if v is not None:
                norm = np.linalg.norm(v)
                if norm <= alpha:
                    feasible_candidates.append((v, norm))
        # amongst those with the max value, return the one with the minimum norm
        if feasible_candidates:
            # break out of the loop since we have already found the optimal answer
            return min(feasible_candidates, key=lambda x: x[1])[0]

In [None]:
def coordinateAscent(distribution, models, x, y, alpha, greedy=True):
    
    dists = [model.distance(x) for model in models]
    num_models = len(models)
    feasible_models = [models[i] for i in xrange(num_models) if dists[i] < alpha]
    distribution = np.array([distribution[i] for i in xrange(num_models) if dists[i] < alpha])
    num_models = len(feasible_models)
    
    sol = np.zeros(x.shape)
    
    # can't trick anything
    if num_models == 0:
        return sol
    
    signs = [y] * num_models # initialize to the original point, of length feasible_models
    options = dict(zip(range(num_models), distribution))
#     print "Options ", options
    for i in xrange(num_models):
        
        if greedy:
            coord = max(options, key=options.get)
        else:
            coord = np.random.choice(options.keys())
        
#         print "i, Selected Coordinate ", i, coord
        del options[coord]    
#         print "Curr Options ", options
        signs[coord] *= -1    
#         print signs
        v = tryRegionBinary(feasible_models, signs, x)
        
        valid_sol = False
        if v is not None:
#             print "not None"
            norm = np.linalg.norm(v)
#             print "norm ", norm
            if norm <= alpha:
#                 print "norm is good"
                valid_sol = True
                sol = v
        if not valid_sol:
#             print "BreaKkkkkk"
            break
        
#         print "Predicted Signs ", [model.predict(x + sol) for model in feasible_models]
#         print
#     print "Final Predicted Signs ", [model.predict(x + sol) for model in feasible_models]

#     print np.linalg.norm(sol)
    return sol

In [None]:
def adversary(distribution, models, X, Y, alpha, noiseFunc):
    return np.array([noiseFunc(distribution, models, x, y, alpha) for x, y in zip(X,Y)])

In [None]:
def evaluateCosts(models, V, X, Y):
    return np.array([1 - model.evaluate(X + V, Y) for model in models])

In [None]:
def runMWU(models, T, X, Y, alpha, noiseFunc, epsilon=None):
    num_models = len(models)

    if epsilon is None:
        delta = np.sqrt(4 * np.log(num_models) / float(T))
        epsilon = delta / 2.0
    else:
        delta = 2.0 * epsilon

    print "Running MWU for {} Iterations with Epsilon {}\n".format(T, epsilon)

    print "Guaranteed to be within {} of the minimax value \n".format(delta)

    loss_history = []
    costs = []
    max_acc_history = []
    v = []
    w = []

    w.append(np.ones(num_models) / num_models)

    for t in xrange(T):
        print "Iteration ", t
        print
        start_time = time.time()

        v_t = adversary(w[t], models, X, Y, alpha, noiseFunc)
        v.append(v_t)

        cost_t = evaluateCosts(models, v_t, X, Y)
        costs.append(cost_t)

        print "Shape of costs matrix", np.array(costs).shape
        avg_acc = np.mean((1 - np.array(costs)), axis=0)
        max_acc = max(avg_acc)
        max_acc_history.append(max_acc)

        loss = np.dot(w[t], cost_t)
        individual = [w[t][j] * cost_t[j] for j in xrange(num_models)]
        
        
        print "Weights, ", w[t], sum(w[t])
        print "Maximum (Average) Accuracy of Classifier ", max_acc
        print "Cost (Before Noise), ", np.array([1 - model.evaluate(X, Y) for model in models])
        print "Cost (After Noise), ", cost_t
        print "Loss, ", loss, individual

        loss_history.append(loss)

        new_w = np.copy(w[t])

        # penalize experts
        for i in xrange(num_models):
            new_w[i] *= (1.0 - epsilon) ** cost_t[i]

        # renormalize weights
        w_sum = new_w.sum()
        for i in xrange(num_models - 1):
            new_w[i] = new_w[i] / w_sum
        new_w[-1] = 1.0 - new_w[:-1].sum()

        w.append(new_w)

        print
        print "time spent ", time.time() - start_time
        print

    return w, v, loss_history, max_acc_history

In [None]:
X_experiment, Y_experiment = helper.generate_data(200, X_test, Y_test, binary_classifiers)

In [None]:
def findNoiseBounds(models, X, Y):
    max_bounds = []
    num_models = len(models)
    for i in xrange(len(X)):
        max_r = -1 * Y[i] * np.ones(num_models)
        max_v = tryRegionBinary(models, max_r, X[i])
        max_bounds.append(np.linalg.norm(max_v))
    min_bounds = np.array([model.distance(X) for model in models]).T
    min_bounds = np.mean(min_bounds, axis=1)
    return max_bounds, min_bounds

In [None]:
mm, mi = findNoiseBounds(binary_classifiers, X_experiment, Y_experiment)

In [None]:
np.mean(mm), np.mean(mi)

In [None]:
res = runMWU(binary_classifiers, 20, X_experiment, Y_experiment, .5, coordinateAscent)

In [None]:
def train0Layer(train_X, train_Y):
    dim = train_X.shape[1]
    model = Sequential()
    model.add(Dense(1, input_dim=dim, activation="linear", kernel_regularizer=regularizers.l2(0.01),))
    model.compile(loss="hinge", optimizer="sgd")
    model.fit(train_X, train_Y, nb_epoch=50, verbose=0)
    return model

In [None]:
nns = []
train_size = 1000
init = 5
for i in xrange(5):
    start = train_size * (i + init)
    end = start + train_size
    print start, end
    model = train0Layer(X_train[start:end], Y_train[start:end])
    nns.append(model)
    print model.evaluate(X_test, Y_test)


In [None]:
model = train0Layer(X_train, Y_train, 784)