In [2]:
import numpy as np

from sklearn.metrics.pairwise import cosine_similarity
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import Normalizer
from scipy.spatial.distance import cosine

import tensorflow as tf

In [3]:
N = 2000
dim = 5
theta = np.pi/8
np.random.seed(0)

vecs = np.random.rand(N, dim)
dists = cosine_similarity(vecs, vecs)
cond = dists > np.cos(theta)
ixs = np.triu_indices(N, k=1)

#################################################################

#data = np.hstack((vecs[ixs[0]], vecs[ixs[1]]))
#data = np.subtract(vecs[ixs[0]],  vecs[ixs[1]])
data = np.reshape(1 - dists[ixs], (len(ixs[0]), -1))

#################################################################

data_y = cond[ixs].astype(int)
un = np.unique(data_y, return_counts=True)
print(un)
counts = un[1]
print('baseline: %s\n' % (counts[0]/sum(counts)))

X_train, X_test, y_train, y_test = train_test_split(
    data, data_y, test_size=0.33, random_state=42, stratify=data_y)

if data.shape[1] > 1:
    norm = Normalizer()
    X_train = norm.fit_transform(X_train)
    X_test = norm.transform(X_test)

def gen_batches(X, y, batch_size=100):
    for i in range(0, X.shape[0], batch_size):
        yield X[i:i+batch_size], y[i:i+batch_size]


(array([0, 1]), array([1765742,  233258]))
baseline: 0.8833126563281641



In [4]:
# Parameters
learning_rate = 0.001
batch_size = 128
num_steps = int(X_train.shape[0]/batch_size) + 1
display_step = 100

# Network Parameters
n_hidden_1 = 128  # 1st layer number of neurons
n_hidden_2 = 128  # 2nd layer number of neurons
num_input = data.shape[1]  
num_classes = 2  #  total classes

In [5]:
# tf Graph input
X = tf.placeholder("float", [None, num_input])
labels = tf.placeholder(name='labels', shape=None, dtype=tf.int64)

def neural_net(x):
    layer_1 = tf.layers.dense(x, n_hidden_1, activation=tf.nn.relu)
    #layer_2 = tf.layers.dense(layer_1, n_hidden_2, activation=tf.nn.relu)
    out_layer = tf.layers.dense(layer_1, num_classes)
    return out_layer

# Construct model
logits = neural_net(X)
prediction = tf.nn.softmax(logits)

# Define loss and optimizer
loss_op = tf.reduce_mean(tf.nn.sparse_softmax_cross_entropy_with_logits(
    logits=logits, labels=labels))
optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate)
train_op = optimizer.minimize(loss_op)

# Evaluate model
correct_pred = tf.equal(tf.argmax(prediction, 1), labels)
accuracy = tf.reduce_mean(tf.cast(correct_pred, tf.float32))

# Initialize the variables (i.e. assign their default value)
init = tf.global_variables_initializer()

# Start training
with tf.Session() as sess:

    # Run the initializer
    sess.run(init)

    batcher = gen_batches(X_train, y_train, batch_size)

    for step in range(1, num_steps+1):
        batch_x, batch_y = next(batcher)
        # Run optimization op (backprop)
        sess.run(train_op, feed_dict={X: batch_x, labels: batch_y})
        if step % display_step == 0 or step == 1:
            # Calculate batch loss and accuracy
            loss, acc = sess.run([loss_op, accuracy], feed_dict={X: batch_x,
                                                                 labels: batch_y})
            print("Step " + str(step) + ", Minibatch Loss= " +
                  "{:.4f}".format(loss) + ", Training Accuracy= " +
                  "{:.3f}".format(acc))

    print("Optimization Finished!")

    # Calculate accuracy for test data
    print("Testing Accuracy:",
          sess.run(accuracy, feed_dict={X: X_test,
                                        labels: y_test}))


Step 1, Minibatch Loss= 0.6965, Training Accuracy= 0.273
Step 100, Minibatch Loss= 0.4011, Training Accuracy= 0.828
Step 200, Minibatch Loss= 0.3095, Training Accuracy= 0.875
Step 300, Minibatch Loss= 0.2630, Training Accuracy= 0.891
Step 400, Minibatch Loss= 0.2064, Training Accuracy= 0.914
Step 500, Minibatch Loss= 0.2097, Training Accuracy= 0.898
Step 600, Minibatch Loss= 0.1673, Training Accuracy= 0.906
Step 700, Minibatch Loss= 0.1675, Training Accuracy= 0.898
Step 800, Minibatch Loss= 0.1531, Training Accuracy= 0.883
Step 900, Minibatch Loss= 0.1095, Training Accuracy= 0.961
Step 1000, Minibatch Loss= 0.1327, Training Accuracy= 0.930
Step 1100, Minibatch Loss= 0.0934, Training Accuracy= 0.969
Step 1200, Minibatch Loss= 0.1008, Training Accuracy= 0.977
Step 1300, Minibatch Loss= 0.0767, Training Accuracy= 0.977
Step 1400, Minibatch Loss= 0.0962, Training Accuracy= 0.969
Step 1500, Minibatch Loss= 0.0638, Training Accuracy= 0.977
Step 1600, Minibatch Loss= 0.0985, Training Accuracy

In [None]:
0.9831552