In [0]:
import numpy as np
import tensorflow as tf
import matplotlib.pyplot as plt
from pylab import rcParams
from sklearn.linear_model import LogisticRegression
from sklearn.linear_model import Perceptron
rcParams['figure.figsize'] = 16, 8

In [0]:
tf.enable_v2_behavior()

We will train a classifier to learn the function $y=1\{0<x<1\}$



In [0]:
x = np.random.uniform(low=-1, high=2, size=100)
y = np.logical_and(x >= 0.0, x <= 1.0)

In [0]:
plt.plot(x, y, 'o')

# Linear Classifier

In [0]:
clf = LogisticRegression(random_state=0, C=999, tol=0.1).fit(
    X=np.expand_dims(x, -1), 
    y=y)

In [0]:
plt.plot(x, 
         clf.intercept_ + clf.coef_[0] * x, 'o')

In [0]:
clf.predict(np.expand_dims(x, -1))

In [0]:
clf.score(np.expand_dims(x, -1), y)

# Neural network with a designed hidden layer

We are going to use the standard ingredients of a feed-forward network: linear transformations and ReLU activations.
We can design a hidden layer by observing that $1\{0<x<1\} = 1-(1\{x<0\} + 1\{x>1\})$

This gives us a hint as to what hidden units might be useful.

In [0]:
# Use two ReLU units
h = np.stack([np.maximum(-x, 0.0), np.maximum(x-1, 0.0)], axis=1)

In [0]:
plt.scatter(h[:,0], h[:,1], c=y)

In [0]:
clf2 = LogisticRegression(random_state=0, C=999, tol=0.001).fit(
    X=h, 
    y=y)

In [0]:
plt.plot(x, clf2.predict_proba(h)[:,1], 'o')

# Trained hidden layer

In [0]:
# Define the model
num_hidden = 2
np.random.seed(1)
w_hidden = tf.Variable(initial_value=np.random.rand(1, num_hidden) - 0.5, dtype=tf.float32)
b_hidden = tf.Variable(initial_value=0, dtype=tf.float32)
w_logistic = tf.Variable(initial_value=np.random.rand(num_hidden, 1) - 0.5, dtype=tf.float32)
b_logistic = tf.Variable(initial_value = -0.1)
model = [w_hidden, b_hidden, w_logistic, b_logistic]

In [0]:
x_tensor = tf.cast(np.expand_dims(x, -1), tf.float32)
y_tensor = tf.cast(y, tf.float32)

In [0]:
optimizer = tf.keras.optimizers.SGD()

In [0]:
def GetProbability(x_tensor, w_hidden, b_hidden, w_logistic, b_logistic):
  h = tf.nn.relu(tf.matmul(x_tensor, w_hidden) + b_hidden)
  a = tf.matmul(h, w_logistic) + b_logistic
  a = tf.clip_by_value(a, -10, 10)
  p = tf.squeeze(1.0 / (1.0 + tf.exp(-a)))
  return p

In [0]:
@tf.function
def UpdateParameters(model):
  with tf.GradientTape() as tape:
    p = GetProbability(x_tensor, *model)
    l = - y_tensor * tf.log(p) - (1 - y_tensor) * tf.log(1-p)
    total_loss = tf.reduce_sum(l)
  grads = tape.gradient(total_loss, model)
  optimizer.apply_gradients(zip(grads, model))

In [0]:
for i in range(5000):
  UpdateParameters(model)

In [0]:
model

In [0]:
plt.plot(x, GetProbability(x_tensor, *model).numpy(), 'o')

In [0]:
h = tf.nn.relu(tf.matmul(x_tensor, w_hidden) + b_hidden)
plt.scatter(h[:,0], h[:,1], c=y)