In [1]:
import tensorflow as tf
import numpy as np

In [2]:
from tensorflow_helpers.models.base_model import BaseModel

# Create a simple XOR dataset

Given a two dimensional vector, the model should learn the XOR operation.

<table>
  <tr>
    <th>x1</th>
    <th>x2</th>
    <th>y</th>
  </tr>
  <tr>
    <td>0</td>
    <td>0</td>
    <td>0</td>
  </tr>
  <tr>
    <td>0</td>
    <td>1</td>
    <td>1</td>
  </tr>
  <tr>
    <td>1</td>
    <td>0</td>
    <td>1</td>
  </tr>
  <tr>
    <td>1</td>
    <td>1</td>
    <td>0</td>
  </tr>
</table>

Thus, the model takes as input a matrix of size `(?, 2)` and produces an output vector of size `(?,)` where `?` indicates the batch size.

In [3]:
X = np.array([
    [0,0],
    [0,1],
    [1,0],
    [1,1],
])
Y = np.array([0,1,1,0])

In [4]:
X

array([[0, 0],
       [0, 1],
       [1, 0],
       [1, 1]])

In [5]:
Y

array([0, 1, 1, 0])

# Build the model

Create a class for our model

In [6]:
class SimpleModel(BaseModel):
    def __init__(self, **kwargs):
        super(SimpleModel, self).__init__()

        self.hidden_size = kwargs.get('hidden_size', 10)

    def build_model(self):
        # we can get the batch size 
        batch_size = self.get_batch_size()

        with tf.name_scope('input'):
            # You can access the input using the `self.input_dict` attribute
            X = self.input_dict['X']

            X_shape = X.get_shape()
            input_dim = int(X_shape[1])

            print('X shape:', X.get_shape())
            
        with tf.name_scope('hidden_layer'):
            W = tf.get_variable('W', [input_dim, self.hidden_size])
            b = tf.get_variable('b', [self.hidden_size, ])
            
            h = tf.nn.relu(tf.matmul(X, W) + b)
            
            print('hidden layer shape:', h.get_shape())
            
        with tf.name_scope('output'):
            U = tf.get_variable('U', [self.hidden_size, 1])
            v = tf.get_variable('v', [1, ])
            
            output_logits = tf.matmul(h, U) + v
            print('Output logits shape:', output_logits.get_shape())
            
            # The operation above prodcues a matrix of size (?, 1)
            # so we need to reshape it to be a single vector
            output_logits = tf.reshape(output_logits, [-1])
            print('Output logits reshaped shape:', output_logits.get_shape())
            
            # set the attribute which will be used for prediction
            output_prob = tf.nn.sigmoid(output_logits)
            print('Output shape:', output_prob.get_shape())
            self.op_predict = output_prob

        with tf.name_scope("loss"):
            Y = self.input_dict['Y']
            print('Y shape:', Y.get_shape())
            
            loss = tf.nn.sigmoid_cross_entropy_with_logits(output_logits, Y)
            print('Loss shape:', loss.get_shape())
            
            # get mean of the loss across the batch dimension
            loss_reduced = tf.reduce_mean(loss) 
            print('Loss reduces shape:', loss_reduced.get_shape())
            
            # set the attrribute which will be minimized during the training
            self.op_loss = loss_reduced

Next, create the model and add inputs

In [7]:
# not necessarily, I put it here to limit the number of threads used by TensorFlow
sess_config = tf.ConfigProto(inter_op_parallelism_threads=4, intra_op_parallelism_threads=4)
sess = tf.Session(config=sess_config)

In [8]:
model = SimpleModel(hidden_size=16)

# add_input takes the name of the input and its shape without the batch dimension
model.add_input('X', X.shape[1:])

# train_only=True indicates that this input is presented during the training only
model.add_input('Y', Y.shape[1:], dtype=tf.float32, train_only=True)

# you can skip this if you want to use the default sesssion
model.set_session(sess)

model.build_model()

X shape: (?, 2)
hidden layer shape: (?, 16)
Output logits shape: (?, 1)
Output logits reshaped shape: (?,)
Output shape: (?,)
Y shape: (?,)
Loss shape: (?,)
Loss reduces shape: ()


Finally, train the model

In [9]:
# create a dictionary of input data
data_dict_train = {
    'X': X,
    'Y': Y,
}

In [10]:
model.train_model(data_dict_train, nb_epoch=10000, batch_size=64)

Get predictions

In [11]:
# create a dictionary of test data
data_dict_test = {
    'X': X,
}

In [12]:
Y_pred = model.predict(data_dict_test, batch_size=64)

In [13]:
Y_pred

[6.84841e-05, 0.99992418, 0.99992466, 7.8695921e-05]

In [14]:
# it is a list, not a numpy array
type(Y_pred)

list

In [15]:
# convert probabilities into 1 or 0
Y_pred = np.array(Y_pred)
condition = Y_pred >= 0.5
Y_pred[condition] = 1
Y_pred[~condition] = 0

In [16]:
Y_pred

array([ 0.,  1.,  1.,  0.], dtype=float32)

In [17]:
accuracy = np.mean(Y_pred == Y)

In [18]:
accuracy

1.0