In [1]:
import tensorflow as tf
import numpy as np
import pandas as pd


tdata = pd.read_csv('./higgs/training.csv')
nasdaq = np.array(tdata)

from sklearn.model_selection import train_test_split
x=nasdaq[:,1:nasdaq.shape[1]-2]
y=nasdaq[:,nasdaq.shape[1]-1]
x_train, x_test, y_train, y_test = train_test_split(x, y , train_size = 0.5, random_state =  14)
print(x_train.shape)



(125000, 30)


In [2]:
from sklearn.preprocessing import StandardScaler

# training
scaler = StandardScaler().fit(x_train)
x_train = scaler.transform(x_train)

# validation
norm_vcolumns=x_test
x_test = scaler.transform(x_test)

In [3]:
learning_rate = 0.0001
batch_size = 100
dimensionality = 30

iter_num = 60 # number of iterations of alternating scheme
steps_number = 10000 # number of gradient steps

code_size1 = 20 #dimension of code1
code_size2 = 10 #dimension of code2
k = 5 #needed dimension code_size = 2k k=10,20
gamma = 1.0 # smoothness of manifold
mu = 10.0 # main parameter mu=10,20,40,80,160
epsilon = 0.1

images = np.reshape(x_train, (-1, dimensionality))
print(images.shape)
labels = y_train

test_images = np.reshape(x_test, (-1, dimensionality))
test_labels = y_test

(125000, 30)


In [4]:
# Define placeholders
training_data = tf.placeholder(tf.float32, [None, dimensionality])
gradient_training_data = tf.placeholder(tf.float32, [None, dimensionality])
old_P = tf.placeholder(tf.float32, shape=[None, dimensionality, dimensionality])
old_W_1 = tf.placeholder(tf.float32, shape=[dimensionality, code_size1])
old_W_2 = tf.placeholder(tf.float32, shape=[code_size1, code_size2])
old_W_3 = tf.placeholder(tf.float32, shape=[code_size2, code_size1])
old_W_4 = tf.placeholder(tf.float32, shape=[code_size1, dimensionality])
old_b_1 = tf.placeholder(tf.float32, shape=[code_size1])
old_b_2 = tf.placeholder(tf.float32, shape=[code_size2])
old_b_3 = tf.placeholder(tf.float32, shape=[code_size1])
old_b_4 = tf.placeholder(tf.float32, shape=[dimensionality])

In [5]:
# Variables to be tuned
W_1 = tf.Variable(tf.truncated_normal([dimensionality, code_size1], stddev=0.1))
W_2 = tf.Variable(tf.truncated_normal([code_size1, code_size2], stddev=0.1))
W_3 = tf.Variable(tf.truncated_normal([code_size2, code_size1], stddev=0.1))
W_4 = tf.Variable(tf.truncated_normal([code_size1, dimensionality], stddev=0.1))
b_1 = tf.Variable(tf.constant(0.1, shape=[code_size1]))
b_2 = tf.Variable(tf.constant(0.1, shape=[code_size2]))
b_3 = tf.Variable(tf.constant(0.1, shape=[code_size1]))
b_4 = tf.Variable(tf.constant(0.1, shape=[dimensionality]))


code_data1 = tf.nn.sigmoid(tf.matmul(training_data, W_1) + b_1)
code_data2 = tf.nn.sigmoid(tf.matmul(code_data1, W_2) + b_2)
code_data3 = tf.nn.sigmoid(tf.matmul(code_data2, W_3) + b_3)
recover = tf.matmul(code_data3, W_4) + b_4
grad_phi_psi = []
for i in range(batch_size):
    for j in range(dimensionality):
        grad_phi_psi.append(tf.gradients(recover[i][j], [training_data[i]], unconnected_gradients='zero')[0])
grad_phi_psi = tf.reshape(tf.stack(grad_phi_psi), [batch_size, dimensionality, dimensionality])


# this is gradient field close to our points
rand_training_data = training_data + tf.random.normal(shape=[batch_size, dimensionality],
                                                      mean=0.0,stddev=epsilon)
rand_code_data1 = tf.nn.sigmoid(tf.matmul(rand_training_data, W_1) + b_1)
rand_code_data2 = tf.nn.sigmoid(tf.matmul(rand_code_data1, W_2) + b_2)
rand_code_data3 = tf.nn.sigmoid(tf.matmul(rand_code_data2, W_3) + b_3)
rand_recover = tf.matmul(rand_code_data3, W_4) + b_4
rand_grad_phi_psi = []
for i in range(batch_size):
    for j in range(dimensionality):
        rand_grad_phi_psi.append(tf.gradients(rand_recover[i][j], [rand_training_data[i]], unconnected_gradients='zero')[0])
rand_grad_phi_psi = tf.reshape(tf.stack(rand_grad_phi_psi), [batch_size, dimensionality, dimensionality])

new_code_data1 = tf.nn.sigmoid(tf.matmul(gradient_training_data, W_1) + b_1)
new_code_data2 = tf.nn.sigmoid(tf.matmul(new_code_data1, W_2) + b_2)
new_code_data3 = tf.nn.sigmoid(tf.matmul(new_code_data2, W_3) + b_3)
new_recover = tf.matmul(new_code_data3, W_4) + b_4
new_grad_phi_psi = []
for i in range(batch_size):
    for j in range(dimensionality):
        new_grad_phi_psi.append(tf.gradients(new_recover[i][j], [gradient_training_data[i]], unconnected_gradients='zero')[0])
new_grad_phi_psi = tf.reshape(tf.stack(new_grad_phi_psi), [batch_size, dimensionality, dimensionality])

In [6]:
# Define the loss function
loss = tf.reduce_mean(tf.square(training_data - recover)) + \
       gamma*tf.reduce_mean(tf.square(grad_phi_psi-rand_grad_phi_psi)) + \
       mu*tf.reduce_mean(tf.square(new_grad_phi_psi - old_P))

# Training step
train_step = tf.train.AdamOptimizer(learning_rate).minimize(loss)

In [7]:
# Run the training
sess = tf.InteractiveSession()
sess.run(tf.global_variables_initializer())

In [8]:
N = 125000
x_train = images[:N]
N_grad = 1000
grad_x_train = images[0:N_grad*125:125]


cur_U = np.zeros((N_grad, dimensionality, k))
cur_Sigma = np.zeros((N_grad, k, k))
cur_V = np.zeros((N_grad, k, dimensionality))
feed_P = np.zeros((batch_size, dimensionality, dimensionality))
cur_W_1 = np.random.normal(0, 0.35, (dimensionality, code_size1))
cur_W_2 = np.random.normal(0, 0.35, (code_size1, code_size2))
cur_W_3 = np.random.normal(0, 0.35, (code_size2, code_size1))
cur_W_4 = np.random.normal(0, 0.35, (code_size1, dimensionality))
cur_b_1 = np.zeros((code_size1))
cur_b_2 = np.zeros((code_size2))
cur_b_3 = np.zeros((code_size1))
cur_b_4 = np.zeros((dimensionality))

num_batches = int(N/batch_size)
grad_num_batches = int(N_grad/batch_size)

for iter in range(iter_num):
    for i in range(steps_number):
        # Get the next batch
        which_batch = i%num_batches
        input_batch = x_train[which_batch*batch_size:(which_batch+1)*batch_size]
        grad_which_batch = i%grad_num_batches
        grad_input_batch = grad_x_train[grad_which_batch*batch_size:(grad_which_batch+1)*batch_size]
        for r in range(batch_size):
            U = cur_U[grad_which_batch*batch_size+r]
            Sigma = cur_Sigma[grad_which_batch*batch_size+r]
            V = cur_V[grad_which_batch*batch_size+r]
            feed_P[r] = np.matmul(U,np.matmul(Sigma,V))
        feed_dict = {training_data: input_batch, gradient_training_data: grad_input_batch, 
                     old_P:feed_P,
                  old_W_1:cur_W_1, old_W_2:cur_W_2, old_W_3:cur_W_3, old_W_4:cur_W_4, 
                     old_b_1:cur_b_1, old_b_2:cur_b_2, old_b_3:cur_b_3, old_b_4:cur_b_4}
        # Run the training step
        train_step.run(feed_dict=feed_dict)
        # Print the accuracy progress on the batch every 100 steps
        if i%1000 == 0:
            train_accuracy = sess.run(loss, feed_dict=feed_dict)
            print("Step %d, training batch accuracy %g %%"%(i, train_accuracy*100))
    for grad_which_batch in range(grad_num_batches):
        grad_input_batch = grad_x_train[grad_which_batch*batch_size:(grad_which_batch+1)*batch_size]
        feed_dict = {gradient_training_data: grad_input_batch}
        local_grad = sess.run(new_grad_phi_psi, feed_dict=feed_dict)
        for r in range(batch_size):
            u, s, vh = np.linalg.svd(local_grad[r,:,:], full_matrices=True)
            cur_U[grad_which_batch*batch_size+r] = u[:,0:k:1]
            cur_V[grad_which_batch*batch_size+r] = np.transpose(vh[:,0:k:1])
            cur_Sigma[grad_which_batch*batch_size+r] = np.diag(s[0:k:1])
    [cur_W_1, cur_W_2, cur_W_3, cur_W_4, cur_b_1, cur_b_2, cur_b_3, cur_b_4] = sess.run([W_1, W_2, W_3, W_4, b_1, b_2, b_3, b_4])


Step 0, training batch accuracy 98.4511 %
Step 1000, training batch accuracy 93.2525 %
Step 2000, training batch accuracy 88.5821 %
Step 3000, training batch accuracy 61.1664 %
Step 4000, training batch accuracy 54.4366 %
Step 5000, training batch accuracy 53.732 %
Step 6000, training batch accuracy 61.654 %
Step 7000, training batch accuracy 71.107 %
Step 8000, training batch accuracy 55.6711 %
Step 9000, training batch accuracy 47.1964 %
Step 0, training batch accuracy 45.1073 %
Step 1000, training batch accuracy 54.528 %
Step 2000, training batch accuracy 64.4973 %
Step 3000, training batch accuracy 50.8303 %
Step 4000, training batch accuracy 44.5957 %
Step 5000, training batch accuracy 42.893 %
Step 6000, training batch accuracy 52.4828 %
Step 7000, training batch accuracy 59.2967 %
Step 8000, training batch accuracy 46.7084 %
Step 9000, training batch accuracy 41.6204 %
Step 0, training batch accuracy 41.1509 %
Step 1000, training batch accuracy 50.9154 %
Step 2000, training batc

Step 4000, training batch accuracy 13.6678 %
Step 5000, training batch accuracy 14.6599 %
Step 6000, training batch accuracy 14.6278 %
Step 7000, training batch accuracy 19.4899 %
Step 8000, training batch accuracy 14.1407 %
Step 9000, training batch accuracy 13.1406 %
Step 0, training batch accuracy 14.1184 %
Step 1000, training batch accuracy 14.1438 %
Step 2000, training batch accuracy 19.0205 %
Step 3000, training batch accuracy 13.6967 %
Step 4000, training batch accuracy 12.7205 %
Step 5000, training batch accuracy 13.7131 %
Step 6000, training batch accuracy 13.7666 %
Step 7000, training batch accuracy 18.6917 %
Step 8000, training batch accuracy 13.3417 %
Step 9000, training batch accuracy 12.3124 %
Step 0, training batch accuracy 13.2172 %
Step 1000, training batch accuracy 13.1903 %
Step 2000, training batch accuracy 18.0597 %
Step 3000, training batch accuracy 12.626 %
Step 4000, training batch accuracy 11.4829 %
Step 5000, training batch accuracy 12.0784 %
Step 6000, traini

Step 8000, training batch accuracy 7.7073 %
Step 9000, training batch accuracy 7.70218 %
Step 0, training batch accuracy 7.28633 %
Step 1000, training batch accuracy 8.09498 %
Step 2000, training batch accuracy 11.3859 %
Step 3000, training batch accuracy 7.6318 %
Step 4000, training batch accuracy 7.62036 %
Step 5000, training batch accuracy 7.25556 %
Step 6000, training batch accuracy 8.04526 %
Step 7000, training batch accuracy 11.2845 %
Step 8000, training batch accuracy 7.55686 %
Step 9000, training batch accuracy 7.53691 %
Step 0, training batch accuracy 7.22408 %
Step 1000, training batch accuracy 7.99474 %
Step 2000, training batch accuracy 11.1834 %
Step 3000, training batch accuracy 7.48314 %
Step 4000, training batch accuracy 7.45227 %
Step 5000, training batch accuracy 7.19168 %
Step 6000, training batch accuracy 7.94356 %
Step 7000, training batch accuracy 11.0833 %
Step 8000, training batch accuracy 7.41114 %
Step 9000, training batch accuracy 7.36688 %
Step 0, training b

Step 2000, training batch accuracy 9.50512 %
Step 3000, training batch accuracy 5.97381 %
Step 4000, training batch accuracy 5.387 %
Step 5000, training batch accuracy 5.74426 %
Step 6000, training batch accuracy 6.27108 %
Step 7000, training batch accuracy 9.49041 %
Step 8000, training batch accuracy 5.9604 %
Step 9000, training batch accuracy 5.37627 %
Step 0, training batch accuracy 5.72861 %
Step 1000, training batch accuracy 6.24663 %
Step 2000, training batch accuracy 9.47557 %
Step 3000, training batch accuracy 5.94763 %
Step 4000, training batch accuracy 5.36633 %
Step 5000, training batch accuracy 5.71429 %
Step 6000, training batch accuracy 6.22335 %
Step 7000, training batch accuracy 9.46057 %
Step 8000, training batch accuracy 5.93541 %
Step 9000, training batch accuracy 5.35708 %
Step 0, training batch accuracy 5.70113 %
Step 1000, training batch accuracy 6.20116 %
Step 2000, training batch accuracy 9.44539 %
Step 3000, training batch accuracy 5.92369 %
Step 4000, training

In [9]:
def euclidean_distance(img_a, img_b):
    '''Finds the distance between 2 images: img_a, img_b'''
    # element-wise computations are automatically handled by numpy
    return sum((img_a - img_b) ** 2)

from collections import defaultdict

def find_majority(labels):
    '''Finds the majority class/label out of the given labels'''
    # defaultdict(type) is to automatically add new keys without throwing error.
    counter = defaultdict(int)
    for label in labels:
        counter[label] += 1

    # Finding the majority class.
    majority_count = max(counter.values())
    for key, value in counter.items():
        if value == majority_count:
            return key
train_images = np.asarray(images[:5000])
train_labels = np.asarray(labels[:5000])
test_images = np.asarray(test_images[:5000])
test_labels = np.asarray(test_labels[:5000])

def sigmoid(x):
    return 1. /(1+np.exp(-x))
def new_euclidean_distance(img_a, img_b):
    img_a = np.reshape(img_a, (1,-1))
    img_b = np.reshape(img_b, (1,-1))
    img_a = sigmoid(np.matmul(img_a, cur_W_1) + cur_b_1)
    img_a = sigmoid(np.matmul(img_a, cur_W_2) + cur_b_2)
    img_b = sigmoid(np.matmul(img_b, cur_W_1) + cur_b_1)
    img_b = sigmoid(np.matmul(img_b, cur_W_2) + cur_b_2)
    return np.sum((img_a - img_b) ** 2)

def new_predict(k, train_images, train_labels, test_images):
    '''
    Predicts the new data-point's category/label by 
    looking at all other training labels
    '''
    # distances contains tuples of (distance, label)
    distances = [(new_euclidean_distance(test_image, image), label)
                    for (image, label) in zip(train_images, train_labels)]
    # sort the distances list by distances
    compare = lambda distance: distance[0]
    by_distances = sorted(distances, key=compare)
    # extract only k closest labels
    k_labels = [label for (_, label) in by_distances[:k]]
    # return the majority voted label
    return find_majority(k_labels)

# Predicting and printing the accuracy
i = 0
total_correct = 0
for test_image in test_images[:5000]:
    pred = new_predict(10, train_images, train_labels, test_image)
    if pred == test_labels[i]:
        total_correct += 1
    acc = (total_correct / (i+1)) * 100
    if i%100 == 0:
        print('test image['+str(i)+']', '\tpred:', pred, '\torig:', test_labels[i], '\tacc:', str(round(acc, 2))+'%')
    i += 1

test image[0] 	pred: b 	orig: s 	acc: 0.0%
test image[100] 	pred: s 	orig: b 	acc: 74.26%
test image[200] 	pred: s 	orig: b 	acc: 67.66%
test image[300] 	pred: b 	orig: b 	acc: 69.1%
test image[400] 	pred: b 	orig: b 	acc: 68.83%
test image[500] 	pred: s 	orig: b 	acc: 68.86%
test image[600] 	pred: b 	orig: s 	acc: 69.38%
test image[700] 	pred: b 	orig: b 	acc: 69.9%
test image[800] 	pred: s 	orig: b 	acc: 70.16%
test image[900] 	pred: b 	orig: b 	acc: 70.81%
test image[1000] 	pred: b 	orig: b 	acc: 71.33%
test image[1100] 	pred: s 	orig: s 	acc: 71.39%
test image[1200] 	pred: b 	orig: s 	acc: 70.69%
test image[1300] 	pred: b 	orig: b 	acc: 70.95%
test image[1400] 	pred: s 	orig: b 	acc: 71.23%
test image[1500] 	pred: b 	orig: s 	acc: 71.35%
test image[1600] 	pred: s 	orig: s 	acc: 71.52%
test image[1700] 	pred: b 	orig: b 	acc: 71.31%
test image[1800] 	pred: s 	orig: s 	acc: 71.57%
test image[1900] 	pred: b 	orig: b 	acc: 71.8%
test image[2000] 	pred: b 	orig: s 	acc: 71.61%
test imag