In [1]:
import tensorflow as tf
import numpy as np
from sklearn.datasets import make_moons

n_sample = 5000

X, y = make_moons(n_sample, noise=0.4)

  from ._conv import register_converters as _register_converters


In [2]:
X.shape, y.shape

((5000, 2), (5000,))

In [3]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y.reshape(-1, 1))
X_train.shape, X_test.shape, y_train.shape, y_test.shape

((3750, 2), (1250, 2), (3750, 1), (1250, 1))

In [4]:
def reset_graph(seed=42):
    tf.reset_default_graph()
    tf.set_random_seed(seed)
    np.random.seed(seed)

In [8]:
from datetime import datetime

now = datetime.utcnow().strftime("%Y%m%d%H%M%S")
root_logdir = "tf_logs"
logdir = "{}/run-{}/".format(root_logdir, now)

In [9]:
def fetch_batch(epoch, batch_index, batch_size):
    np.random.seed(epoch * batch_index + batch_size)
    m = X_train.shape[0]
    indices = np.random.randint(m, size=batch_size)
    X_batch = X_train[indices]
    y_batch = y_train[indices]
    return X_batch, y_batch

def logistic_regression(epochs, batch_size, learning_rate=0.01):
    # Reset graph to prevent interference
    reset_graph()
    
    # Construction phase
    m, n = X_train.shape
    n_batches = int(np.ceil(m / batch_size))
    X = tf.placeholder(tf.float32, #data type
                       shape=(None, 2), #shape
                       name='X' #name
                      )
    y = tf.placeholder(tf.float32,
                       shape=(None, 1),
                       name='y')
    
    theta = tf.Variable(tf.random_uniform([n, 1], -1.0, 1.0),
                        dtype=tf.float32,
                        name='theta')
    
    with tf.name_scope('predictions') as pred_scope:
        logits = tf.matmul(X, theta, name='logits')
        y_proba = tf.sigmoid(logits)
    
    with tf.name_scope('loss') as loss_scope:
#         error = y_pred - y
#         mse = tf.reduce_mean(tf.square(error), name='mse')
        # Below is a better loss function for LogReg
#         epsilon = 1e-7  # to avoid an overflow when computing the log
#         loss = -tf.reduce_mean(y * tf.log(y_proba + epsilon) + (1 - y) * tf.log(1 - y_proba + epsilon))
        # Or
        loss = tf.losses.log_loss(y, y_proba)  # uses epsilon = 1e-7 by default
        
    with tf.name_scope('gradient-descent') as gd_scope:
        optimizer = tf.train.GradientDescentOptimizer(learning_rate=learning_rate)
        training_op = optimizer.minimize(loss)
    
    # Initialization phase
    init = tf.global_variables_initializer()
    saver = tf.train.Saver()
    loss_summary = tf.summary.scalar('Loss', loss)
    file_writer = tf.summary.FileWriter(logdir, tf.get_default_graph())
    
    with tf.Session() as sess:
        sess.run(init)
        for epoch in range(epochs):
            for batch_index in range(n_batches):
                X_batch, y_batch = fetch_batch(epoch, batch_index, batch_size)
                if batch_index % 10 == 0:
                    summary_str = loss_summary.eval(feed_dict={X: X_batch, y: y_batch})
                    step = epoch * n_batches + batch_index
                    file_writer.add_summary(summary_str, step)
                sess.run(training_op, feed_dict={X: X_batch, y: y_batch})
        best_theta = theta.eval()
        
    file_writer.flush()
    file_writer.close()
    print("Best theta:")
    print(best_theta)

In [10]:
epochs = 1000
batch_size = 400

logistic_regression(epochs, batch_size)

Best theta:
[[ 1.2300317]
 [-2.6846046]]
