In [1]:
import tensorflow as tf
import numpy as np
import pandas as pd


tdata = pd.read_csv('./higgs/training.csv')
nasdaq = np.array(tdata)

from sklearn.model_selection import train_test_split
x=nasdaq[:,1:nasdaq.shape[1]-2]
y=nasdaq[:,nasdaq.shape[1]-1]
x_train, x_test, y_train, y_test = train_test_split(x, y , train_size = 0.5, random_state =  14)
print(x_train.shape)



(125000, 30)


In [2]:
from sklearn.preprocessing import StandardScaler

# training
scaler = StandardScaler().fit(x_train)
x_train = scaler.transform(x_train)

# validation
norm_vcolumns=x_test
x_test = scaler.transform(x_test)

In [3]:
learning_rate = 0.0001
batch_size = 100
dimensionality = 30

iter_num = 20 # number of iterations of alternating scheme
steps_number = 10000 # number of gradient steps

code_size = 15 #dimension of code
k = 5 #needed dimension code_size = 2k k=10,20
gamma = 1.0 # smoothness of manifold
mu = 10.0 # main parameter mu=10,20,40,80,160
epsilon = 0.1

images = np.reshape(x_train, (-1, dimensionality))
print(images.shape)
labels = y_train

test_images = np.reshape(x_test, (-1, dimensionality))
test_labels = y_test

(125000, 30)


In [4]:
# Define placeholders
training_data = tf.placeholder(tf.float32, [None, dimensionality])
gradient_training_data = tf.placeholder(tf.float32, [None, dimensionality])
old_P = tf.placeholder(tf.float32, shape=[None, dimensionality, dimensionality])
old_W = tf.placeholder(tf.float32, shape=[dimensionality, code_size])
old_b = tf.placeholder(tf.float32, shape=[code_size])
old_b_r = tf.placeholder(tf.float32, shape=[dimensionality])

In [5]:
# Variables to be tuned
W = tf.Variable(tf.truncated_normal([dimensionality, code_size], stddev=0.1))
b = tf.Variable(tf.constant(0.1, shape=[code_size]))

b_r = tf.Variable(tf.constant(0.1, shape=[dimensionality]))

code_data = tf.nn.sigmoid(tf.matmul(training_data, W) + b)
recover = tf.matmul(code_data, tf.transpose(W)) + b_r
grad_phi_psi = []
for i in range(batch_size):
    for j in range(dimensionality):
        grad_phi_psi.append(tf.gradients(recover[i][j], [training_data[i]], unconnected_gradients='zero')[0])
grad_phi_psi = tf.reshape(tf.stack(grad_phi_psi), [batch_size, dimensionality, dimensionality])


# this is gradient field close to our points
rand_training_data = training_data + tf.random.normal(shape=[batch_size, dimensionality],
                                                      mean=0.0,stddev=epsilon)
rand_code_data = tf.nn.sigmoid(tf.matmul(rand_training_data, W) + b)
rand_recover = tf.matmul(rand_code_data, tf.transpose(W)) + b_r
rand_grad_phi_psi = []
for i in range(batch_size):
    for j in range(dimensionality):
        rand_grad_phi_psi.append(tf.gradients(rand_recover[i][j], [rand_training_data[i]], unconnected_gradients='zero')[0])
rand_grad_phi_psi = tf.reshape(tf.stack(rand_grad_phi_psi), [batch_size, dimensionality, dimensionality])

new_code_data = tf.nn.sigmoid(tf.matmul(gradient_training_data, W) + b)
new_recover = tf.matmul(new_code_data, tf.transpose(W)) + b_r
new_grad_phi_psi = []
for i in range(batch_size):
    for j in range(dimensionality):
        new_grad_phi_psi.append(tf.gradients(new_recover[i][j], [gradient_training_data[i]], unconnected_gradients='zero')[0])
new_grad_phi_psi = tf.reshape(tf.stack(new_grad_phi_psi), [batch_size, dimensionality, dimensionality])

In [6]:
# Define the loss function
loss = tf.reduce_mean(tf.square(training_data - recover)) + \
       gamma*tf.reduce_mean(tf.square(grad_phi_psi-rand_grad_phi_psi)) + \
       mu*tf.reduce_mean(tf.square(new_grad_phi_psi - old_P))

# Training step
train_step = tf.train.AdamOptimizer(learning_rate).minimize(loss)

In [7]:
# Run the training
sess = tf.InteractiveSession()
sess.run(tf.global_variables_initializer())

In [8]:
N = 125000
x_train = images[:N]
N_grad = 1000
grad_x_train = images[0:N_grad*125:125]


cur_U = np.zeros((N_grad, dimensionality, k))
cur_Sigma = np.zeros((N_grad, k, k))
cur_V = np.zeros((N_grad, k, dimensionality))
feed_P = np.zeros((batch_size, dimensionality, dimensionality))
cur_W = np.random.normal(0, 0.35, (dimensionality, code_size))
cur_b = np.zeros((code_size))
cur_b_r = np.zeros((dimensionality))

num_batches = int(N/batch_size)
grad_num_batches = int(N_grad/batch_size)

for iter in range(iter_num):
    for i in range(steps_number):
        # Get the next batch
        which_batch = i%num_batches
        input_batch = x_train[which_batch*batch_size:(which_batch+1)*batch_size]
        grad_which_batch = i%grad_num_batches
        grad_input_batch = grad_x_train[grad_which_batch*batch_size:(grad_which_batch+1)*batch_size]
        for r in range(batch_size):
            U = cur_U[grad_which_batch*batch_size+r]
            Sigma = cur_Sigma[grad_which_batch*batch_size+r]
            V = cur_V[grad_which_batch*batch_size+r]
            feed_P[r] = np.matmul(U,np.matmul(Sigma,V))
        feed_dict = {training_data: input_batch, gradient_training_data: grad_input_batch, 
                     old_P:feed_P,
                  old_W:cur_W, old_b:cur_b, old_b_r:cur_b_r}
        # Run the training step
        train_step.run(feed_dict=feed_dict)
        # Print the accuracy progress on the batch every 1000 steps
        if i%1000 == 0:
            train_accuracy = sess.run(loss, feed_dict=feed_dict)
            print("Step %d, training batch accuracy %g %%"%(i, train_accuracy*100))
    for grad_which_batch in range(grad_num_batches):
        grad_input_batch = grad_x_train[grad_which_batch*batch_size:(grad_which_batch+1)*batch_size]
        feed_dict = {gradient_training_data: grad_input_batch}
        local_grad = sess.run(new_grad_phi_psi, feed_dict=feed_dict)
        for r in range(batch_size):
            u, s, vh = np.linalg.svd(local_grad[r,:,:], full_matrices=True)
            cur_U[grad_which_batch*batch_size+r] = u[:,0:k:1]
            cur_V[grad_which_batch*batch_size+r] = np.transpose(vh[:,0:k:1])
            cur_Sigma[grad_which_batch*batch_size+r] = np.diag(s[0:k:1])
    [cur_W, cur_b, cur_b_r] = sess.run([W, b, b_r])


Step 0, training batch accuracy 89.1313 %
Step 1000, training batch accuracy 64.1789 %
Step 2000, training batch accuracy 68.9554 %
Step 3000, training batch accuracy 46.7028 %
Step 4000, training batch accuracy 36.8568 %
Step 5000, training batch accuracy 32.4755 %
Step 6000, training batch accuracy 30.7288 %
Step 7000, training batch accuracy 40.2513 %
Step 8000, training batch accuracy 25.6327 %
Step 9000, training batch accuracy 21.8597 %
Step 0, training batch accuracy 20.465 %
Step 1000, training batch accuracy 21.1298 %
Step 2000, training batch accuracy 31.0943 %
Step 3000, training batch accuracy 18.4758 %
Step 4000, training batch accuracy 15.9219 %
Step 5000, training batch accuracy 15.2534 %
Step 6000, training batch accuracy 17.2022 %
Step 7000, training batch accuracy 27.0987 %
Step 8000, training batch accuracy 16.197 %
Step 9000, training batch accuracy 13.3423 %
Step 0, training batch accuracy 13.4318 %
Step 1000, training batch accuracy 15.7136 %
Step 2000, training b

Step 4000, training batch accuracy 8.38346 %
Step 5000, training batch accuracy 7.72569 %
Step 6000, training batch accuracy 11.1356 %
Step 7000, training batch accuracy 19.0076 %
Step 8000, training batch accuracy 10.563 %
Step 9000, training batch accuracy 8.37901 %
Step 0, training batch accuracy 7.70639 %
Step 1000, training batch accuracy 11.133 %
Step 2000, training batch accuracy 18.8942 %
Step 3000, training batch accuracy 10.5645 %
Step 4000, training batch accuracy 8.36877 %
Step 5000, training batch accuracy 7.68953 %
Step 6000, training batch accuracy 11.1186 %
Step 7000, training batch accuracy 18.7866 %
Step 8000, training batch accuracy 10.5655 %
Step 9000, training batch accuracy 8.35315 %


In [9]:
def euclidean_distance(img_a, img_b):
    '''Finds the distance between 2 images: img_a, img_b'''
    # element-wise computations are automatically handled by numpy
    return sum((img_a - img_b) ** 2)

from collections import defaultdict

def find_majority(labels):
    '''Finds the majority class/label out of the given labels'''
    # defaultdict(type) is to automatically add new keys without throwing error.
    counter = defaultdict(int)
    for label in labels:
        counter[label] += 1

    # Finding the majority class.
    majority_count = max(counter.values())
    for key, value in counter.items():
        if value == majority_count:
            return key
train_images = np.asarray(images[:5000])
train_labels = np.asarray(labels[:5000])
test_images = np.asarray(test_images[:5000])
test_labels = np.asarray(test_labels[:5000])

def sigmoid(x):
    return 1. /(1+np.exp(-x))
def new_euclidean_distance(img_a, img_b):
    img_a = np.reshape(img_a, (1,-1))
    img_b = np.reshape(img_b, (1,-1))
    img_a = sigmoid(np.matmul(img_a, cur_W) + cur_b)
    img_b = sigmoid(np.matmul(img_b, cur_W) + cur_b)
    return np.sum((img_a - img_b) ** 2)

def new_predict(k, train_images, train_labels, test_images):
    '''
    Predicts the new data-point's category/label by 
    looking at all other training labels
    '''
    # distances contains tuples of (distance, label)
    distances = [(new_euclidean_distance(test_image, image), label)
                    for (image, label) in zip(train_images, train_labels)]
    # sort the distances list by distances
    compare = lambda distance: distance[0]
    by_distances = sorted(distances, key=compare)
    # extract only k closest labels
    k_labels = [label for (_, label) in by_distances[:k]]
    # return the majority voted label
    return find_majority(k_labels)

# Predicting and printing the accuracy
i = 0
total_correct = 0
for test_image in test_images[:5000]:
    pred = new_predict(10, train_images, train_labels, test_image)
    if pred == test_labels[i]:
        total_correct += 1
    acc = (total_correct / (i+1)) * 100
    if i%100 == 0:
        print('test image['+str(i)+']', '\tpred:', pred, '\torig:', test_labels[i], '\tacc:', str(round(acc, 2))+'%')
    i += 1

test image[0] 	pred: b 	orig: s 	acc: 0.0%
test image[100] 	pred: s 	orig: b 	acc: 73.27%
test image[200] 	pred: s 	orig: b 	acc: 70.65%
test image[300] 	pred: b 	orig: b 	acc: 70.43%
test image[400] 	pred: b 	orig: b 	acc: 69.08%
test image[500] 	pred: b 	orig: b 	acc: 68.86%
test image[600] 	pred: b 	orig: s 	acc: 69.05%
test image[700] 	pred: b 	orig: b 	acc: 70.47%
test image[800] 	pred: s 	orig: b 	acc: 70.54%
test image[900] 	pred: b 	orig: b 	acc: 70.7%
test image[1000] 	pred: b 	orig: b 	acc: 71.73%
test image[1100] 	pred: b 	orig: s 	acc: 71.84%
test image[1200] 	pred: b 	orig: s 	acc: 71.27%
test image[1300] 	pred: b 	orig: b 	acc: 71.87%
test image[1400] 	pred: s 	orig: b 	acc: 71.88%
test image[1500] 	pred: b 	orig: s 	acc: 71.95%
test image[1600] 	pred: s 	orig: s 	acc: 72.2%
test image[1700] 	pred: b 	orig: b 	acc: 72.13%
test image[1800] 	pred: s 	orig: s 	acc: 71.9%
test image[1900] 	pred: b 	orig: b 	acc: 72.33%
test image[2000] 	pred: b 	orig: s 	acc: 71.86%
test imag

In [None]:
def predict(k, train_images, train_labels, test_images):
    '''
    Predicts the new data-point's category/label by 
    looking at all other training labels
    '''
    # distances contains tuples of (distance, label)
    distances = [(euclidean_distance(test_image, image), label)
                    for (image, label) in zip(train_images, train_labels)]
    # sort the distances list by distances
    compare = lambda distance: distance[0]
    by_distances = sorted(distances, key=compare)
    # extract only k closest labels
    k_labels = [label for (_, label) in by_distances[:k]]
    # return the majority voted label
    return find_majority(k_labels)

# Predicting and printing the accuracy
i = 0
total_correct = 0
for test_image in test_images[:5000]:
    pred = predict(10, train_images, train_labels, test_image)
    if pred == test_labels[i]:
        total_correct += 1
    acc = (total_correct / (i+1)) * 100
    if i%100 == 0:
        print('test image['+str(i)+']', '\tpred:', pred, '\torig:', test_labels[i], '\tacc:', str(round(acc, 2))+'%')
    i += 1

test image[0] 	pred: b 	orig: s 	acc: 0.0%
test image[100] 	pred: b 	orig: b 	acc: 79.21%
test image[200] 	pred: s 	orig: b 	acc: 73.63%
test image[300] 	pred: b 	orig: b 	acc: 74.09%
test image[400] 	pred: b 	orig: b 	acc: 72.32%
test image[500] 	pred: s 	orig: b 	acc: 72.46%
test image[600] 	pred: b 	orig: s 	acc: 72.88%
test image[700] 	pred: b 	orig: b 	acc: 73.89%
test image[800] 	pred: s 	orig: b 	acc: 74.16%
test image[900] 	pred: b 	orig: b 	acc: 74.25%
test image[1000] 	pred: b 	orig: b 	acc: 74.83%
test image[1100] 	pred: s 	orig: s 	acc: 74.66%
test image[1200] 	pred: b 	orig: s 	acc: 74.1%
test image[1300] 	pred: b 	orig: b 	acc: 74.63%
test image[1400] 	pred: s 	orig: b 	acc: 74.38%
