In [1]:
import numpy as np
import tensorflow as tf
import matplotlib.pyplot as plt
from fr_util import getData, getBinaryData, y2indicator, error_rate, init_weight_and_bias
from sklearn.utils import shuffle
%matplotlib inline

In [2]:
class HiddenLayer(object):
    def __init__(self, M1, M2, an_id):
        self.id = an_id
        self.M1 = M1
        self.M2 = M2
        W, b = init_weight_and_bias(M1, M2)
        self.W = tf.Variable(W.astype(np.float32))
        self.b= tf.Variable(b.astype(np.float32))
        self.params = [self.W, self.b]
        
    def forward(self, X):
        return tf.nn.relu(tf.matmul(X, self.W) + self.b)

In [3]:
X, Y = getData()

In [6]:
class ANN(object):
    def __init__(self, hidden_layer_sizes):
        self.hidden_layer_sizes = hidden_layer_sizes
    
    def fit(self, X, Y, learning_rate=10e-7, mu=0.99, decay=0.999, reg=10e-3, epochs=400, batch_sz=100, show_fig=False):
        K = len(set(Y))
        
        # make a validation set
        X, Y = shuffle(X, Y)
        X = X.astype(np.float32)
        Y = y2indicator(Y).astype(np.float32)
        Xvalid, Yvalid = X[-1000:], Y[-1000:]
        Yvalid_flat = np.argmax(Yvalid, axis=1)
        X, Y = X[:-1000], Y[:-1000]
        
        # initialize hidden layers
        N, D = X.shape
        self.hidden_layers = []
        M1 = D
        count = 0
        for M2 in self.hidden_layer_sizes:
            h = HiddenLayer(M1, M2, count)
            self.hidden_layers.append(h)
            M1 = M2
            count += 1
        W, b = init_weight_and_bias(M1, K)
        self.W = tf.Variable(W.astype(np.float32))
        self.b = tf.Variable(b.astype(np.float32))
        
        # collect params
        self.params = [self.W, self.b]
        for h in self.hidden_layers:
            self.params += h.params
            
        tfX = tf.placeholder(tf.float32, shape=(None, D), name='X')
        tfT = tf.placeholder(tf.float32, shape=(None, K), name='T')
        act = self.forward(tfX)
        
        rcost = reg*sum([tf.nn.l2_loss(p) for p in self.params])
        cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(labels=act, logits=tfT)) + rcost
        prediction = self.predict(tfX)
        train_op = tf.train.RMSPropOptimizer(learning_rate, decay=decay, momentum=mu).minimize(cost)
        
        n_batches = N // batch_sz
        costs = []
        init = tf.global_variables_initializer()
        with tf.Session() as session:
            session.run(init)
            for i in range(epochs):
                X, Y = shuffle(X, Y)
                for j in range(n_batches):
                    Xbatch = X[j*batch_sz:(j*batch_sz + batch_sz)]
                    Ybatch = Y[j*batch_sz:(j*batch_sz + batch_sz)]
                    
                    session.run(train_op, feed_dict={tfX: Xbatch, tfT: Ybatch})
                    
                    if j % 20 == 0:
                        c = session.run(cost, feed_dict={tfX: Xvalid, tfT: Yvalid})
                        costs.append(c)
                        
                        p = session.run(prediction, feed_dict={tfX: Xvalid, tfT: Yvalid})
                        e = error_rate(Yvalid_flat, p)
                        print("i:", i, "j:", j, "nb:", n_batches, "cost:", c, "error_rate:", e)
                        
        if show_fig:
            plt.plot(costs)
            
    def forward(self, X):
        Z = X
        for h in self.hidden_layers:
            Z = h.forward(Z)
        return tf.matmul(Z, self.W) + self.b
    
    def predict(self, X):
        act = self.forward(X)
        return tf.argmax(act, 1)
        
        

In [10]:
model = ANN([2000, 1000])
model.fit(X, Y, show_fig=True, epochs=4, learning_rate=10e-3)

i: 0 j: 0 nb: 392 cost: 8.63101 error_rate: 0.879
i: 0 j: 20 nb: 392 cost: 8.26519 error_rate: 0.879
i: 0 j: 40 nb: 392 cost: 7.38982 error_rate: 0.879
i: 0 j: 60 nb: 392 cost: 6.20018 error_rate: 0.879
i: 0 j: 80 nb: 392 cost: 4.88488 error_rate: 0.879
i: 0 j: 100 nb: 392 cost: 3.60155 error_rate: 0.879
i: 0 j: 120 nb: 392 cost: 2.46392 error_rate: 0.879
i: 0 j: 140 nb: 392 cost: 1.53917 error_rate: 0.879
i: 0 j: 160 nb: 392 cost: 0.851813 error_rate: 0.879
i: 0 j: 180 nb: 392 cost: 0.392557 error_rate: 0.879
i: 0 j: 200 nb: 392 cost: 0.128887 error_rate: 0.879
i: 0 j: 220 nb: 392 cost: 0.0157749 error_rate: 0.879
i: 0 j: 240 nb: 392 cost: 0.00495364 error_rate: 0.89
i: 0 j: 260 nb: 392 cost: 0.0513318 error_rate: 0.89
i: 0 j: 280 nb: 392 cost: 0.11833 error_rate: 0.89
i: 0 j: 300 nb: 392 cost: 0.180848 error_rate: 0.89
i: 0 j: 320 nb: 392 cost: 0.224462 error_rate: 0.89
i: 0 j: 340 nb: 392 cost: 0.243512 error_rate: 0.89
i: 0 j: 360 nb: 392 cost: 0.238765 error_rate: 0.89
i: 0 j: 380

KeyboardInterrupt: 