In [0]:
import numpy as np
import tensorflow as tf
import matplotlib.pyplot as plt

def init_weights(M1, M2):
    return np.random.randn(M1, M2)/np.sqrt(M1), np.zeros(M2)

class HiddenLayer(object):
    def __init__(self, M1, M2, f):
        self.M1 = M1
        self.M2 = M2
        self.f = f
        W, b = init_weights(M1, M2)
        self.W = tf.Variable(W.astype(np.float32))
        self.b = tf.Variable(b.astype(np.float32))
        self.params = [self.W, self.b]

    def forward(self, X):
        return self.f(tf.matmul(X, self.W) + self.b)

class ANN(object):
    def __init__(self, hidden_layer_sizes):
        self.hidden_layer_sizes = hidden_layer_sizes

    def fit(self, X, Y, activation=tf.nn.relu, learning_rate=1e-3, mu=0.99, reg=0, epochs=100, batch_size=None, print_period=10, show_fig=True):
        X = X.astype(np.float32)
        Y = Y.astype(np.int32)
        N, D = X.shape
        self.layers = []

        #add hidden layers
        M1 = D
        for M2 in self.hidden_layer_sizes:
            h = HiddenLayer(M1, M2, activation)
            self.layers.append(h)
            M1 = M2

        #and output layer
        K = Y.shape[1]
        h = HiddenLayer(M1, K, tf.identity)
        self.layers.append(h)

        if batch_size == None:
            batch_size = N

        self.weights = []
        for h in self.layers:
            self.weights += h.params

        #data as tf variables
        tfX = tf.placeholder(tf.float32, shape=(None, D), name='X')
        tfY = tf.placeholder(tf.float32, shape=(None, K), name='Y')
        Yish = self.forward(tfX)

        reg_pen = reg*sum([tf.nn.l2_loss(w) for w in self.weights])
        cost = tf.reduce_sum(tf.nn.softmax_cross_entropy_with_logits_v2(logits=Yish, labels=tfY)) + reg_pen
        train_op = tf.train.RMSPropOptimizer(learning_rate, decay=0.99, momentum=mu).minimize(cost)
        predict_op = tf.argmax(Yish, 1)

        #main training loop
        costs = []
        init = tf.global_variables_initializer()
        n_batches = N//batch_size
        with tf.Session() as session:
            session.run(init) 
            for i in range(epochs+1):
                for j in range(n_batches):
                    X_batch = X[j*batch_size:(j*batch_size + batch_size),]
                    Y_batch = Y[j*batch_size:(j*batch_size + batch_size),]

                    session.run(train_op, feed_dict={tfX: X_batch, tfY: Y_batch})
                c = session.run(cost, feed_dict={tfX: X_batch, tfY: Y_batch})
                costs.append(c)
                if i%print_period == 0:
                    print('Epoch ', i ,'/', epochs, ': Cost: ', c)

            if show_fig:
                plt.plot(costs)
                plt.show()

    def forward(self, X):
        p = X
        for h in self.layers:
            p = h.forward(p)

        return p

    def predict(self, X):
        return self.forward(X.astype(np.float32))