In [None]:
import numpy as np
import tensorflow as tf
import matplotlib.pylab as plt
%matplotlib inline
%config InlineBackend.figure_format = 'retina'

In [None]:
def plot_scatter(W_, xy, labels):
    for k, color in [(0, 'b'), (1, 'r')]:
        idx = labels.flatten() == k
        plt.scatter(xy[idx, 0], xy[idx, 1], c=color)

    if W_ is not None:
        x1 = np.linspace(-.1, 1.1)
        x2 = -W_[1] / W_[2] * x1  - W_[0] / W_[2]
        plt.plot(x1, x2, '--k')

    plt.grid()
    plt.show()

In [None]:
np.random.seed(402)
xy = np.random.rand(30,2)
labels = np.zeros((len(xy),1))
labels[-4./5. +  3./4. * xy[:,0] + 1.0 * xy[:,1] > 0, 0] = 1

In [None]:
plot_scatter(None, xy, labels)

## Sigmoid Without Bias

In [None]:
features1 = np.array([[1.0, xval, yval] for xval, yval in xy])

x = tf.placeholder(tf.float32, shape=(None, 3))
y = tf.placeholder(tf.float32, shape=(None, 1))

W = tf.Variable(tf.ones([3, 1]), dtype=tf.float32)

model = tf.nn.sigmoid(tf.matmul(x, W))

loss = tf.reduce_mean(- y * tf.log(model) - (1 - y) * tf.log(1-model))

In [None]:
lr = 0.1
MaxEpochs= 201
optimizer = tf.train.AdamOptimizer(lr)
train = optimizer.minimize(loss)

from helper import generate_batches
np.random.seed(702)
idx = np.arange(0, len(features1))
np.random.shuffle(idx)
shuffled_features = features1[idx]
shuffled_labels = labels[idx]

batch_size = 5
init = tf.global_variables_initializer()
with tf.Session() as sess:
    sess.run(init)
    out = sess.run([model, loss], feed_dict={x: shuffled_features, y: shuffled_labels})
    for epoch in range(MaxEpochs):
        if epoch % 50 == 0 :
            curr_W, curr_loss = sess.run([W, loss], feed_dict={x: shuffled_features, y: shuffled_labels})
            print(epoch, curr_W.flatten(), curr_loss)
        for x_batch, y_batch in generate_batches(batch_size, shuffled_features, shuffled_labels):
            sess.run(train, feed_dict={x: x_batch, y: y_batch})

    curr_W = sess.run(W)
W_sgd = curr_W.flatten()

In [None]:
plot_scatter(W_sgd, xy, labels)

## Sigmoid With Bias

In [None]:
features2 = np.array([[xval, yval] for xval, yval in xy])

x = tf.placeholder(tf.float32, shape=(None, 2))
y = tf.placeholder(tf.float32, shape=(None, 1))

W = tf.Variable(tf.ones([2, 1]), dtype=tf.float32)
b = tf.Variable(tf.ones([1]), dtype=tf.float32)

model = tf.nn.sigmoid(tf.matmul(x, W) + b)

loss = tf.reduce_mean(- y * tf.log(model) - (1 - y) * tf.log(1-model))

In [None]:
lr = 0.1
MaxEpochs= 201
optimizer = tf.train.AdamOptimizer(lr)
train = optimizer.minimize(loss)

from helper import generate_batches
np.random.seed(702)
idx = np.arange(0, len(features2))
np.random.shuffle(idx)
shuffled_features = features2[idx]
shuffled_labels = labels[idx]

batch_size = 5
init = tf.global_variables_initializer()
with tf.Session() as sess:
    sess.run(init)
    out = sess.run([model, loss], feed_dict={x: shuffled_features, y: shuffled_labels})
    for epoch in range(MaxEpochs):
        if epoch % 50 == 0 :
            curr_b, curr_W, curr_loss = sess.run([b, W, loss], feed_dict={x: shuffled_features, y: shuffled_labels})
            print(epoch, curr_b.flatten(), curr_W.flatten(), curr_loss)
        for x_batch, y_batch in generate_batches(batch_size, shuffled_features, shuffled_labels):
            sess.run(train, feed_dict={x: x_batch, y: y_batch})

    curr_b, curr_W = sess.run([b, W])
W_sgd = curr_W.flatten()
b_sgd = curr_b.flatten()

In [None]:
plot_scatter(np.append(b_sgd, W_sgd), xy, labels)

# Cross Entropy with Softmax

In [None]:
np.random.seed(402)
xy = np.random.rand(30,2)
labels = np.zeros((len(xy),2))
labels[-4./5. +  3./4. * xy[:,0] + 1.0 * xy[:,1] > 0, 0] = 1.
labels[-4./5. +  3./4. * xy[:,0] + 1.0 * xy[:,1] <= 0, 1] = 1.
# Notice one-hot encoding
labels[:10]

In [None]:
def plot_scatter_softmax(curr_W, curr_b, xy, labels):
    x1 = np.linspace(-0.1, 1.1)
    X, Y = np.meshgrid(x1,x1)
    print(curr_W)
    Z1 = X * curr_W[0,0] + Y * curr_W[1,0] + curr_b[0]
    Z2 = X * curr_W[0,1] + Y * curr_W[1,1] + curr_b[1]
    Z = np.exp(Z2) / (np.exp(Z1) + np.exp(Z2))

    plt.figure(figsize=(5,5))
    cs = plt.contourf(X, Y, Z, np.linspace(0, 1, 11), cmap='coolwarm')
    plt.clabel(cs, colors='k')
    for k, color in [(0, 'b'), (1, 'r')]:
        idx = labels[:, k] == 1
        plt.scatter(xy[idx, 0], xy[idx, 1], c=color, edgecolors='k')
    plt.xlim([-0.1, 1.1])
    plt.ylim([-0.1, 1.1])
    plt.show()

## Cross Entropy with Softmax without logits

In [None]:
features = np.array([[xval, yval] for xval, yval in xy])

x = tf.placeholder(tf.float32, shape=(None, 2))
y = tf.placeholder(tf.float32, shape=(None, 2))

W = tf.Variable(tf.ones([2, 2]), dtype=tf.float32)
b = tf.Variable(tf.ones([2]), dtype=tf.float32)

model = tf.nn.softmax(tf.matmul(x, W) + b)

loss = tf.reduce_mean(tf.reduce_sum(-y * tf.log(model), 1))

In [None]:
lr = 1.0
MaxEpochs= 201
optimizer = tf.train.AdamOptimizer(lr)
train = optimizer.minimize(loss)

from helper import generate_batches
np.random.seed(702)
idx = np.arange(0, len(features))
np.random.shuffle(idx)
shuffled_features = features[idx]
shuffled_labels = labels[idx]

batch_size = 5
init = tf.global_variables_initializer()
with tf.Session() as sess:
    sess.run(init)
    out = sess.run([model, loss], feed_dict={x: shuffled_features, y: shuffled_labels})
    for epoch in range(MaxEpochs):
        if epoch % 50 == 0 :
            curr_b, curr_W, curr_loss = sess.run([b, W, loss], feed_dict={x: shuffled_features, y: shuffled_labels})
            print(epoch, curr_b.flatten(), curr_W.flatten(), curr_loss)
        
        for x_batch, y_batch in generate_batches(batch_size, shuffled_features, shuffled_labels):
            sess.run(train, feed_dict={x: x_batch, y: y_batch})

    curr_b, curr_W = sess.run([b, W])
W_sgd = curr_W
b_sgd = curr_b

In [None]:
plot_scatter_softmax(W_sgd, b_sgd, xy, labels)

## Cross Entropy with Softmax with logits

In [None]:
features = np.array([[xval, yval] for xval, yval in xy])

x = tf.placeholder(tf.float32, shape=(None, 2))
y = tf.placeholder(tf.float32, shape=(None, 2))

W = tf.Variable(tf.ones([2, 2]), dtype=tf.float32)
b = tf.Variable(tf.ones([2]), dtype=tf.float32)

logits = tf.matmul(x, W) + b

loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(labels=y, logits=logits))

In [None]:
lr = 1.0
MaxEpochs= 201
optimizer = tf.train.AdamOptimizer(lr)
train = optimizer.minimize(loss)

from helper import generate_batches
np.random.seed(702)
idx = np.arange(0, len(features))
np.random.shuffle(idx)
shuffled_features = features[idx]
shuffled_labels = labels[idx]

batch_size = 5
init = tf.global_variables_initializer()
with tf.Session() as sess:
    sess.run(init)    
    for epoch in range(MaxEpochs):
        if epoch % 50 == 0 :
            curr_b, curr_W, curr_loss = sess.run([b, W, loss], feed_dict={x: shuffled_features, y: shuffled_labels})
            print(epoch, curr_b.flatten(), curr_W.flatten(), curr_loss)
        
        for x_batch, y_batch in generate_batches(batch_size, shuffled_features, shuffled_labels):
            sess.run(train, feed_dict={x: x_batch, y: y_batch})

    curr_b, curr_W = sess.run([b, W])
W_sgd = curr_W
b_sgd = curr_b

In [None]:
plot_scatter_softmax(W_sgd, b_sgd, xy, labels)