In [1]:
import tensorflow as tf
import numpy as np
import pandas as pd

data = pd.read_csv("covtype.csv", sep=",")
data.head()

from sklearn.model_selection import train_test_split
x=data[data.columns[:data.shape[1]-1]]
y=data[data.columns[data.shape[1]-1:]]-1
x_train, x_test, y_train, y_test = train_test_split(x, y , train_size = 0.5, random_state =  14)

print(x_train.shape)



(290506, 54)


In [2]:
from sklearn.preprocessing import StandardScaler

# training
norm_tcolumns=x_train[x_train.columns[:10]] # only the first ten columns need normalization, the rest is binary
scaler = StandardScaler().fit(norm_tcolumns.values)
scaledf = scaler.transform(norm_tcolumns.values)
training_examples = pd.DataFrame(scaledf, index=norm_tcolumns.index, columns=norm_tcolumns.columns) # scaledf is converted from array to dataframe
x_train.update(training_examples)

# validation
norm_vcolumns=x_test[x_test.columns[:10]]
vscaled = scaler.transform(norm_vcolumns.values) # this scaler uses std and mean of training dataset
validation_examples = pd.DataFrame(vscaled, index=norm_vcolumns.index, columns=norm_vcolumns.columns)
x_test.update(validation_examples)



In [3]:
learning_rate = 0.0001
batch_size = 100
dimensionality = 54

iter_num = 20 # number of iterations of alternating scheme
steps_number = 10000 # number of gradient steps

code_size1 = 27 #dimension of code1
code_size2 = 10 #dimension of code2
k = 5 #needed dimension code_size = 2k k=10,20
gamma = 10.0 # smoothness of manifold
mu = 10.0 # main parameter mu=10,20,40,80,160
epsilon = 0.1

images = np.reshape(x_train, (-1, dimensionality))
print(images.shape)
labels = y_train

test_images = np.reshape(x_test, (-1, dimensionality))
test_labels = y_test

(290506, 54)


In [4]:
# Define placeholders
training_data = tf.placeholder(tf.float32, [None, dimensionality])
gradient_training_data = tf.placeholder(tf.float32, [None, dimensionality])
old_P = tf.placeholder(tf.float32, shape=[None, dimensionality, dimensionality])
old_W_1 = tf.placeholder(tf.float32, shape=[dimensionality, code_size1])
old_W_2 = tf.placeholder(tf.float32, shape=[code_size1, code_size2])
old_W_3 = tf.placeholder(tf.float32, shape=[code_size2, code_size1])
old_W_4 = tf.placeholder(tf.float32, shape=[code_size1, dimensionality])
old_b_1 = tf.placeholder(tf.float32, shape=[code_size1])
old_b_2 = tf.placeholder(tf.float32, shape=[code_size2])
old_b_3 = tf.placeholder(tf.float32, shape=[code_size1])
old_b_4 = tf.placeholder(tf.float32, shape=[dimensionality])

In [5]:
# Variables to be tuned
W_1 = tf.Variable(tf.truncated_normal([dimensionality, code_size1], stddev=0.1))
W_2 = tf.Variable(tf.truncated_normal([code_size1, code_size2], stddev=0.1))
W_3 = tf.Variable(tf.truncated_normal([code_size2, code_size1], stddev=0.1))
W_4 = tf.Variable(tf.truncated_normal([code_size1, dimensionality], stddev=0.1))
b_1 = tf.Variable(tf.constant(0.1, shape=[code_size1]))
b_2 = tf.Variable(tf.constant(0.1, shape=[code_size2]))
b_3 = tf.Variable(tf.constant(0.1, shape=[code_size1]))
b_4 = tf.Variable(tf.constant(0.1, shape=[dimensionality]))


code_data1 = tf.nn.sigmoid(tf.matmul(training_data, W_1) + b_1)
code_data2 = tf.nn.sigmoid(tf.matmul(code_data1, W_2) + b_2)
code_data3 = tf.nn.sigmoid(tf.matmul(code_data2, W_3) + b_3)
recover = tf.matmul(code_data3, W_4) + b_4
grad_phi_psi = []
for i in range(batch_size):
    for j in range(dimensionality):
        grad_phi_psi.append(tf.gradients(recover[i][j], [training_data[i]], unconnected_gradients='zero')[0])
grad_phi_psi = tf.reshape(tf.stack(grad_phi_psi), [batch_size, dimensionality, dimensionality])


# this is gradient field close to our points
rand_training_data = training_data + tf.random.normal(shape=[batch_size, dimensionality],
                                                      mean=0.0,stddev=epsilon)
rand_code_data1 = tf.nn.sigmoid(tf.matmul(rand_training_data, W_1) + b_1)
rand_code_data2 = tf.nn.sigmoid(tf.matmul(rand_code_data1, W_2) + b_2)
rand_code_data3 = tf.nn.sigmoid(tf.matmul(rand_code_data2, W_3) + b_3)
rand_recover = tf.matmul(rand_code_data3, W_4) + b_4
rand_grad_phi_psi = []
for i in range(batch_size):
    for j in range(dimensionality):
        rand_grad_phi_psi.append(tf.gradients(rand_recover[i][j], [rand_training_data[i]], unconnected_gradients='zero')[0])
rand_grad_phi_psi = tf.reshape(tf.stack(rand_grad_phi_psi), [batch_size, dimensionality, dimensionality])

new_code_data1 = tf.nn.sigmoid(tf.matmul(gradient_training_data, W_1) + b_1)
new_code_data2 = tf.nn.sigmoid(tf.matmul(new_code_data1, W_2) + b_2)
new_code_data3 = tf.nn.sigmoid(tf.matmul(new_code_data2, W_3) + b_3)
new_recover = tf.matmul(new_code_data3, W_4) + b_4
new_grad_phi_psi = []
for i in range(batch_size):
    for j in range(dimensionality):
        new_grad_phi_psi.append(tf.gradients(new_recover[i][j], [gradient_training_data[i]], unconnected_gradients='zero')[0])
new_grad_phi_psi = tf.reshape(tf.stack(new_grad_phi_psi), [batch_size, dimensionality, dimensionality])

In [6]:
# Define the loss function
loss = tf.reduce_mean(tf.square(training_data - recover)) + \
       gamma*tf.reduce_mean(tf.square(grad_phi_psi-rand_grad_phi_psi)) + \
       mu*tf.reduce_mean(tf.square(new_grad_phi_psi - old_P))

# Training step
train_step = tf.train.AdamOptimizer(learning_rate).minimize(loss)

In [7]:
# Run the training
sess = tf.InteractiveSession()
sess.run(tf.global_variables_initializer())

In [10]:
N = 290506
x_train = images[:N]
N_grad = 1000
grad_x_train = images[0:N_grad*290:290]


cur_U = np.zeros((N_grad, dimensionality, k))
cur_Sigma = np.zeros((N_grad, k, k))
cur_V = np.zeros((N_grad, k, dimensionality))
feed_P = np.zeros((batch_size, dimensionality, dimensionality))
cur_W_1 = np.random.normal(0, 0.35, (dimensionality, code_size1))
cur_W_2 = np.random.normal(0, 0.35, (code_size1, code_size2))
cur_W_3 = np.random.normal(0, 0.35, (code_size2, code_size1))
cur_W_4 = np.random.normal(0, 0.35, (code_size1, dimensionality))
cur_b_1 = np.zeros((code_size1))
cur_b_2 = np.zeros((code_size2))
cur_b_3 = np.zeros((code_size1))
cur_b_4 = np.zeros((dimensionality))

num_batches = int(N/batch_size)
grad_num_batches = int(N_grad/batch_size)

for iter in range(iter_num):
    for i in range(steps_number):
        # Get the next batch
        which_batch = i%num_batches
        input_batch = x_train[which_batch*batch_size:(which_batch+1)*batch_size]
        grad_which_batch = i%grad_num_batches
        grad_input_batch = grad_x_train[grad_which_batch*batch_size:(grad_which_batch+1)*batch_size]
        for r in range(batch_size):
            U = cur_U[grad_which_batch*batch_size+r]
            Sigma = cur_Sigma[grad_which_batch*batch_size+r]
            V = cur_V[grad_which_batch*batch_size+r]
            feed_P[r] = np.matmul(U,np.matmul(Sigma,V))
        feed_dict = {training_data: input_batch, gradient_training_data: grad_input_batch, 
                     old_P:feed_P,
                  old_W_1:cur_W_1, old_W_2:cur_W_2, old_W_3:cur_W_3, old_W_4:cur_W_4, 
                     old_b_1:cur_b_1, old_b_2:cur_b_2, old_b_3:cur_b_3, old_b_4:cur_b_4}
        # Run the training step
        train_step.run(feed_dict=feed_dict)
        # Print the accuracy progress on the batch every 100 steps
        if i%1000 == 0:
            train_accuracy = sess.run(loss, feed_dict=feed_dict)
            print("Step %d, training batch accuracy %g %%"%(i, train_accuracy*100))
    for grad_which_batch in range(grad_num_batches):
        grad_input_batch = grad_x_train[grad_which_batch*batch_size:(grad_which_batch+1)*batch_size]
        feed_dict = {gradient_training_data: grad_input_batch}
        local_grad = sess.run(new_grad_phi_psi, feed_dict=feed_dict)
        for r in range(batch_size):
            u, s, vh = np.linalg.svd(local_grad[r,:,:], full_matrices=True)
            cur_U[grad_which_batch*batch_size+r] = u[:,0:k:1]
            cur_V[grad_which_batch*batch_size+r] = np.transpose(vh[:,0:k:1])
            cur_Sigma[grad_which_batch*batch_size+r] = np.diag(s[0:k:1])
    [cur_W_1, cur_W_2, cur_W_3, cur_W_4, cur_b_1, cur_b_2, cur_b_3, cur_b_4] = sess.run([W_1, W_2, W_3, W_4, b_1, b_2, b_3, b_4])


Step 0, training batch accuracy 28.6287 %
Step 1000, training batch accuracy 22.0753 %
Step 2000, training batch accuracy 19.053 %
Step 3000, training batch accuracy 16.9812 %
Step 4000, training batch accuracy 16.5551 %
Step 5000, training batch accuracy 15.2465 %
Step 6000, training batch accuracy 16.5262 %
Step 7000, training batch accuracy 15.0553 %
Step 8000, training batch accuracy 12.3506 %
Step 9000, training batch accuracy 12.9387 %
Step 0, training batch accuracy 11.8805 %
Step 1000, training batch accuracy 13.1683 %
Step 2000, training batch accuracy 10.8702 %
Step 3000, training batch accuracy 10.3593 %
Step 4000, training batch accuracy 11.4411 %
Step 5000, training batch accuracy 9.86214 %
Step 6000, training batch accuracy 10.2406 %
Step 7000, training batch accuracy 10.0146 %
Step 8000, training batch accuracy 8.85558 %
Step 9000, training batch accuracy 8.26899 %
Step 0, training batch accuracy 7.76484 %
Step 1000, training batch accuracy 8.02175 %
Step 2000, training 

Step 4000, training batch accuracy 1.4854 %
Step 5000, training batch accuracy 1.52253 %
Step 6000, training batch accuracy 1.55318 %
Step 7000, training batch accuracy 1.60422 %
Step 8000, training batch accuracy 1.52307 %
Step 9000, training batch accuracy 1.62122 %
Step 0, training batch accuracy 1.57059 %
Step 1000, training batch accuracy 1.60509 %
Step 2000, training batch accuracy 1.44667 %
Step 3000, training batch accuracy 1.61729 %
Step 4000, training batch accuracy 1.47831 %
Step 5000, training batch accuracy 1.51746 %
Step 6000, training batch accuracy 1.54425 %
Step 7000, training batch accuracy 1.59507 %
Step 8000, training batch accuracy 1.51721 %
Step 9000, training batch accuracy 1.61249 %


In [11]:
def euclidean_distance(img_a, img_b):
    '''Finds the distance between 2 images: img_a, img_b'''
    # element-wise computations are automatically handled by numpy
    return sum((img_a - img_b) ** 2)

from collections import defaultdict

def find_majority(labels):
    '''Finds the majority class/label out of the given labels'''
    # defaultdict(type) is to automatically add new keys without throwing error.
    counter = defaultdict(int)
    for label in labels:
        counter[label[0]] += 1

    # Finding the majority class.
    majority_count = max(counter.values())
    for key, value in counter.items():
        if value == majority_count:
            return key
train_images = np.asarray(images[:5000])
train_labels = np.asarray(labels[:5000])
test_images = np.asarray(test_images[:5000])
test_labels = np.asarray(test_labels[:5000])

def sigmoid(x):
    return 1. /(1+np.exp(-x))
def new_euclidean_distance(img_a, img_b):
    img_a = np.reshape(img_a, (1,-1))
    img_b = np.reshape(img_b, (1,-1))
    img_a = sigmoid(np.matmul(img_a, cur_W_1) + cur_b_1)
    img_a = sigmoid(np.matmul(img_a, cur_W_2) + cur_b_2)
    img_b = sigmoid(np.matmul(img_b, cur_W_1) + cur_b_1)
    img_b = sigmoid(np.matmul(img_b, cur_W_2) + cur_b_2)
    return np.sum((img_a - img_b) ** 2)

def new_predict(k, train_images, train_labels, test_images):
    '''
    Predicts the new data-point's category/label by 
    looking at all other training labels
    '''
    # distances contains tuples of (distance, label)
    distances = [(new_euclidean_distance(test_image, image), label)
                    for (image, label) in zip(train_images, train_labels)]
    # sort the distances list by distances
    compare = lambda distance: distance[0]
    by_distances = sorted(distances, key=compare)
    # extract only k closest labels
    k_labels = [label for (_, label) in by_distances[:k]]
    # return the majority voted label
    return find_majority(k_labels)

# Predicting and printing the accuracy
i = 0
total_correct = 0
for test_image in test_images[:5000]:
    pred = new_predict(10, train_images, train_labels, test_image)
    if pred == test_labels[i]:
        total_correct += 1
    acc = (total_correct / (i+1)) * 100
    if i%100 == 0:
        print('test image['+str(i)+']', '\tpred:', pred, '\torig:', test_labels[i], '\tacc:', str(round(acc, 2))+'%')
    i += 1

test image[0] 	pred: 0 	orig: [1] 	acc: 0.0%
test image[100] 	pred: 1 	orig: [1] 	acc: 74.26%
test image[200] 	pred: 1 	orig: [1] 	acc: 72.64%
test image[300] 	pred: 0 	orig: [0] 	acc: 73.42%
test image[400] 	pred: 1 	orig: [0] 	acc: 75.56%
test image[500] 	pred: 0 	orig: [0] 	acc: 75.65%
test image[600] 	pred: 1 	orig: [1] 	acc: 77.2%
test image[700] 	pred: 1 	orig: [1] 	acc: 76.32%
test image[800] 	pred: 0 	orig: [1] 	acc: 76.15%
test image[900] 	pred: 0 	orig: [0] 	acc: 74.92%
test image[1000] 	pred: 1 	orig: [1] 	acc: 75.52%
test image[1100] 	pred: 1 	orig: [1] 	acc: 75.3%
test image[1200] 	pred: 0 	orig: [1] 	acc: 75.19%
test image[1300] 	pred: 1 	orig: [1] 	acc: 75.33%
test image[1400] 	pred: 0 	orig: [0] 	acc: 75.37%
test image[1500] 	pred: 1 	orig: [1] 	acc: 74.95%
test image[1600] 	pred: 1 	orig: [1] 	acc: 75.02%
test image[1700] 	pred: 1 	orig: [4] 	acc: 75.01%
test image[1800] 	pred: 1 	orig: [1] 	acc: 75.18%
test image[1900] 	pred: 1 	orig: [0] 	acc: 75.28%
test image[2000]