# Logistic Regression with Tensorflow_2

# -- Further Example--

In [1]:
# Logistic Regression with Tensorflow

## 1. Hands-on method

### 1.1 Load dataset

import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt
import os

from sklearn.datasets import make_moons
m=1000
X_moons, y_moons = make_moons(m, noise=0.1, random_state=42)
X_moons_bias = np.c_[np.ones((m,1)), X_moons]

X_moons_bias.shape

y_moons.shape

##### Column vector

#Target value y has to be made as column vector. (m,1). not (m,)

y_moons_column_vector = y_moons.reshape(-1,1)

y_moons_column_vector.shape

##### Train/Test split

from sklearn.model_selection import train_test_split

X_train, X_test, Y_train, Y_test = train_test_split(X_moons_bias, y_moons_column_vector,
                                                    test_size = 0.2, random_state=42)

### 1.2 Random batch

def random_batch(X_train, Y_train, batch_size):
    rnd_indices = np.random.randint(0, len(X_train), batch_size)
    X_batch = X_train[rnd_indices]
    Y_batch = Y_train[rnd_indices]
    return X_batch, Y_batch

X_batch, Y_batch = random_batch(X_train, Y_train, 5)

X_batch

Y_batch

### 1.3 Create placeholder

n_inputs = 2

X = tf.placeholder(tf.float32, shape=(None, n_inputs + 1), name="X")
Y = tf.placeholder(tf.float32, shape=(None, 1), name="Y")

### 1.4 Parameters (Init X)

theta = tf.Variable(tf.random_uniform([n_inputs + 1, 1], -1.0, -1.0, seed=42), name="theta")

theta

### 1.5 Compute cost

logits = tf.matmul(X, theta, name="logits")
y_proba = tf.sigmoid(logits)

loss = tf.losses.log_loss(Y, y_proba)

### 1.6 Building the model

learning_rate = 0.01
optimizer = tf.train.GradientDescentOptimizer(learning_rate=learning_rate)
training_op = optimizer.minimize(loss)

init = tf.global_variables_initializer()

n_epochs = 1000
batch_size = 50
n_batches = int(np.ceil(m/batch_size))

with tf.Session() as sess:
    sess.run(init)
    
    for epoch in range(n_epochs):
        for batch_index in range(n_batches):
            X_batch, Y_batch = random_batch(X_train, Y_train, batch_size)
            sess.run(training_op, feed_dict={X: X_batch, Y: Y_batch})
        loss_val = loss.eval({X: X_test, Y: Y_test})
        if epoch % 100 == 0:
            print("Epoch:", epoch, "\tLoss:", loss_val)
            
    y_proba_val = y_proba.eval(feed_dict={X: X_test, Y: Y_test})

  from ._conv import register_converters as _register_converters


Epoch: 0 	Loss: 1.0437666
Epoch: 100 	Loss: 0.32519263
Epoch: 200 	Loss: 0.2947565
Epoch: 300 	Loss: 0.27978507
Epoch: 400 	Loss: 0.2705417
Epoch: 500 	Loss: 0.26478872
Epoch: 600 	Loss: 0.26073337
Epoch: 700 	Loss: 0.25778908
Epoch: 800 	Loss: 0.25537723
Epoch: 900 	Loss: 0.25398958


## 2.1 Adding feature (Polynomial)

In [2]:
X_train_enhanced = np.c_[X_train,
                         np.square(X_train[:, 1]),
                         np.square(X_train[:, 2]),
                         X_train[:, 1] ** 3,
                         X_train[:, 2] ** 3]
X_test_enhanced = np.c_[X_test,
                        np.square(X_test[:, 1]),
                        np.square(X_test[:, 2]),
                        X_test[:, 1] ** 3,
                        X_test[:, 2] ** 3]

In [3]:
tf.reset_default_graph() 

## 2.2 Define Graph (to make it easy to recycle)

Not include definition X and Y value to get more flexibility.

In [4]:
def logistic_regression(X, Y, initializer=None, seed=42, learning_rate=0.01):
    n_inputs_including_bias = int(X.get_shape()[1])
    with tf.name_scope("logistic_regression"):
        with tf.name_scope("model"):
            if initializer is None:
                initializer = tf.random_uniform([n_inputs_including_bias, 1], -1.0, 1.0, seed=seed)
            theta = tf.Variable(initializer, name="theta")
            logits = tf.matmul(X, theta, name="logits")
            y_proba = tf.sigmoid(logits)
        with tf.name_scope("train"):
            loss = tf.losses.log_loss(Y, y_proba, scope="loss")
            optimizer = tf.train.GradientDescentOptimizer(learning_rate=learning_rate)
            training_op = optimizer.minimize(loss)
            loss_summary = tf.summary.scalar('log_loss', loss)
        with tf.name_scope("init"):
            init = tf.global_variables_initializer()
        with tf.name_scope("save"):
            saver = tf.train.Saver()
    return y_proba, loss, training_op, loss_summary, init, saver

## 2.3 Create log directory for tensorboard

In [5]:
from datetime import datetime

def log_dir(prefix=""):
    now = datetime.utcnow().strftime("%Y%m%d%H%M%S")
    root_logdir = "tf_logs"
    if prefix:
        prefix += "-"
    name = prefix + "run-" + now
    return "{}/{}/".format(root_logdir, name)

## 2.4 Graph

In [6]:
n_inputs = 2 + 4
logdir = log_dir("logreg")

X = tf.placeholder(tf.float32, shape=(None, n_inputs + 1), name="X")
Y = tf.placeholder(tf.float32, shape=(None, 1), name="Y")

y_proba, loss, training_op, loss_summary, init, saver = logistic_regression(X, Y)

file_writer = tf.summary.FileWriter(logdir, tf.get_default_graph())

Finally, we can train model. This code check whether or not, there is checkpoint first.

## 2.5 Execute (with check point)

In [8]:
n_epochs = 4001
batch_size = 50
n_batches = int(np.ceil(m/batch_size))

checkpoint_path = "/tmp/my_logreg_model.ckpt"
checkpoint_epoch_path = checkpoint_path + ".epoch"
final_model_path = "./my_logreg_model"

with tf.Session() as sess:
    if os.path.isfile(checkpoint_epoch_path):
        with open(checkpoint_epoch_path, "rb") as f:
            start_epoch = int(f.read())
        print("Paused training. Continue epoch.", start_epoch)
        saver.restore(sess, checkpoint_path)
    else:
        start_epoch = 0
        sess.run(init)
        
    for epoch in range(start_epoch, n_epochs):
        for batch_index in range(n_batches):
            X_batch, Y_batch = random_batch(X_train_enhanced, Y_train, batch_size)
            sess.run(training_op, feed_dict={X: X_batch, Y: Y_batch})
        loss_val, summary_str = sess.run([loss, loss_summary], feed_dict={X: X_test_enhanced, Y: Y_test})
        file_writer.add_summary(summary_str, epoch)
        if epoch % 500 == 0:
            print("Epoch:", epoch, "\tLoss:", loss_val)
            saver.save(sess, checkpoint_path)
            with open(checkpoint_epoch_path, "wb") as f:
                f.write(b"%d" % (epoch + 1))
                
    saver.save(sess, final_model_path)
    y_proba_val = y_proba.eval(feed_dict={X: X_test_enhanced, Y: Y_test})
    os.remove(checkpoint_epoch_path)

Paused training. Continue epoch. 501
INFO:tensorflow:Restoring parameters from /tmp/my_logreg_model.ckpt
Epoch: 1000 	Loss: 0.11276165
Epoch: 1500 	Loss: 0.088597186
Epoch: 2000 	Loss: 0.07413462
Epoch: 2500 	Loss: 0.0644461
Epoch: 3000 	Loss: 0.057429064
Epoch: 3500 	Loss: 0.052119676
Epoch: 4000 	Loss: 0.04787636
