In [1]:
import numpy as np

from sklearn.metrics.pairwise import cosine_similarity
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import Normalizer
from scipy.spatial.distance import cosine

import tensorflow as tf

In [2]:
N = 2000
dim = 5
theta = np.pi/8
np.random.seed(0)

vecs = np.random.rand(N, dim)
dists = cosine_similarity(vecs, vecs)
cond = dists > np.cos(theta)
ixs = np.triu_indices(N, k=1)

#################################################################

data = np.hstack((vecs[ixs[0]], vecs[ixs[1]]))
#data = np.subtract(vecs[ixs[0]],  vecs[ixs[1]])
#data = np.reshape(1 - dists[ixs], (len(ixs[0]), -1))

#################################################################

data_y = cond[ixs].astype(int)
un = np.unique(data_y, return_counts=True)
print(un)
counts = un[1]
print('baseline: %s\n' % (counts[0]/sum(counts)))

X_train, X_test, y_train, y_test = train_test_split(
    data, data_y, test_size=0.33, random_state=42, stratify=data_y)

if data.shape[1] > 1:
    norm = Normalizer()
    X_train = norm.fit_transform(X_train)
    X_test = norm.transform(X_test)

def gen_batches(X, y, batch_size=100):
    for i in range(0, X.shape[0], batch_size):
        yield X[i:i+batch_size], y[i:i+batch_size]


(array([0, 1]), array([1765742,  233258]))
baseline: 0.8833126563281641



In [3]:
# Parameters
learning_rate = 0.001
batch_size = 128
num_steps = int(X_train.shape[0]/batch_size) + 1
display_step = 100

# Network Parameters
n_hidden_1 = 128  # 1st layer number of neurons
n_hidden_2 = 128  # 2nd layer number of neurons
num_input = data.shape[1]  
num_classes = 2  #  total classes

In [4]:
# tf Graph input
X = tf.placeholder("float", [None, num_input])
labels = tf.placeholder(name='labels', shape=None, dtype=tf.int64)

def neural_net(x):
    layer_1 = tf.layers.dense(x, n_hidden_1, activation=tf.nn.relu)
    layer_2 = tf.layers.dense(layer_1, n_hidden_2, activation=tf.nn.relu)
    out_layer = tf.layers.dense(layer_2, num_classes)
    return out_layer

# Construct model
logits = neural_net(X)
prediction = tf.nn.softmax(logits)

# Define loss and optimizer
loss_op = tf.reduce_mean(tf.nn.sparse_softmax_cross_entropy_with_logits(
    logits=logits, labels=labels))
optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate)
train_op = optimizer.minimize(loss_op)

# Evaluate model
correct_pred = tf.equal(tf.argmax(prediction, 1), labels)
accuracy = tf.reduce_mean(tf.cast(correct_pred, tf.float32))
precision = tf.metrics.precision(labels, tf.argmax(prediction, 1))

# Initialize the variables (i.e. assign their default value)
init_g = tf.global_variables_initializer()
init_l = tf.local_variables_initializer()

# Start training
with tf.Session() as sess:

    # Run the initializer
    sess.run(init_g)
    sess.run(init_l)

    batcher = gen_batches(X_train, y_train, batch_size)

    for step in range(1, num_steps+1):
        batch_x, batch_y = next(batcher)
        # Run optimization op (backprop)
        sess.run(train_op, feed_dict={X: batch_x, labels: batch_y})
        if step % display_step == 0 or step == 1:
            # Calculate batch loss and accuracy
            loss, acc, prec = sess.run([loss_op, accuracy, precision], feed_dict={X: batch_x,
                                                                 labels: batch_y})
            print("Step " + str(step) + ", Minibatch Loss= " +
                  "{:.4f}".format(loss) + ", Training Accuracy= " +
                  "{:.3f}".format(acc) + ", Train Precision= " +
                  "{:3f}".format(prec[0]))

    print("Optimization Finished!")

    # Calculate accuracy for test data
    acc,prec = sess.run([accuracy, precision], feed_dict={X: X_test, labels: y_test})
    print("Test Accuracy= " + "{:.3f}".format(acc) + 
          ", Test Precision= " + "{:3f}".format(prec[0]))
          

Step 1, Minibatch Loss= 0.6013, Training Accuracy= 0.883, Train Precision= 0.000000
Step 100, Minibatch Loss= 0.4338, Training Accuracy= 0.828, Train Precision= 0.000000
Step 200, Minibatch Loss= 0.2314, Training Accuracy= 0.875, Train Precision= 0.000000
Step 300, Minibatch Loss= 0.1586, Training Accuracy= 0.922, Train Precision= 0.000000
Step 400, Minibatch Loss= 0.0873, Training Accuracy= 0.969, Train Precision= 1.000000
Step 500, Minibatch Loss= 0.1176, Training Accuracy= 0.969, Train Precision= 0.866667
Step 600, Minibatch Loss= 0.0709, Training Accuracy= 0.969, Train Precision= 0.884615
Step 700, Minibatch Loss= 0.1197, Training Accuracy= 0.961, Train Precision= 0.911765
Step 800, Minibatch Loss= 0.0513, Training Accuracy= 0.992, Train Precision= 0.891304
Step 900, Minibatch Loss= 0.0681, Training Accuracy= 0.969, Train Precision= 0.916667
Step 1000, Minibatch Loss= 0.0965, Training Accuracy= 0.969, Train Precision= 0.888889
Step 1100, Minibatch Loss= 0.0419, Training Accuracy= 0

Step 9600, Minibatch Loss= 0.0642, Training Accuracy= 0.969, Train Precision= 0.908812
Step 9700, Minibatch Loss= 0.0491, Training Accuracy= 0.984, Train Precision= 0.906606
Step 9800, Minibatch Loss= 0.0285, Training Accuracy= 0.992, Train Precision= 0.906977
Step 9900, Minibatch Loss= 0.0199, Training Accuracy= 1.000, Train Precision= 0.907601
Step 10000, Minibatch Loss= 0.0272, Training Accuracy= 0.992, Train Precision= 0.908689
Step 10100, Minibatch Loss= 0.0143, Training Accuracy= 1.000, Train Precision= 0.909884
Step 10200, Minibatch Loss= 0.0176, Training Accuracy= 0.992, Train Precision= 0.911302
Step 10300, Minibatch Loss= 0.0588, Training Accuracy= 0.977, Train Precision= 0.911932
Step 10400, Minibatch Loss= 0.0359, Training Accuracy= 0.992, Train Precision= 0.912676
Optimization Finished!
Test Accuracy= 0.983, Test Precision= 0.912953
