In [1]:
import matplotlib.pyplot as plt
import numpy as np
import tensorflow as tf

  from ._conv import register_converters as _register_converters


In [17]:
class NN:
    def __init__(
        self, train_X, train_Y, test_X, test_Y, 
        n_input=None, n_output=None, learning_rate=0.001, n_iterations=1000, beta=0.0001,
        n_hidden_layers=2, n_nodes_per_layer=[256, 256], batch_size=128, display_step=100,
        activation=tf.nn.leaky_relu, output_activation=tf.nn.tanh
    ):       
        self.trainX = train_X
        self.trainY = train_Y
        self.testX = test_X
        self.testY = test_Y
        
        self.n_instances = self.trainX.shape[0]
        self.n_features = self.trainX.shape[1]
        self.n_classes = self.trainY.shape[1]

        # setting initial parameters
        self.n_input = n_input if n_input is not None else self.n_features
        self.n_output = n_output if n_output is not None else self.n_classes
        self.learning_rate = learning_rate
        self.n_iterations = n_iterations
        self.n_layers = n_hidden_layers
        # self.n_nodes_per_layer = n_nodes_per_layer
        self.batch_size = batch_size
        self.display_step = display_step
        
        self.beta = beta  # parameter for L2 loss
        
        self._epochs_completed = 0
        self._index_in_epoch = 0
        
        # compute initial weights and biases for all layers
        self.wt, self.b = self._generate_initial_weights(n_nodes_per_layer)
        
        # graph placeholders
        self.X = tf.placeholder("float", [None, self.n_input])
        self.Y = tf.placeholder("float", [None, self.n_output])
        
        # perceptron and output perceptron computation
        self.perceptron = lambda h, w, b: activation(tf.add(tf.matmul(h, w), b))
        self.output_perceptron = lambda h, w, b: output_activation(tf.add(tf.matmul(h, w), b))
        
    
    def _generate_initial_weights(self, n_nodes_per_layer):
        weights = dict()
        biases = dict()
        prev_layer_n_output = self.n_input
        for i in range(self.n_layers):
            weights.update({
                'layer_{}'.format(i+1): tf.Variable(tf.random_normal([prev_layer_n_output, n_nodes_per_layer[i]]))
            })
            biases.update({
                'layer_{}'.format(i+1): tf.Variable(tf.random_normal([n_nodes_per_layer[i]]))
            })
            prev_layer_n_output = n_nodes_per_layer[i]
        # add the weight and bias for output layer
        weights.update({
            'output': tf.Variable(tf.random_normal([prev_layer_n_output, self.n_output]))
        })
        biases.update({
            'output': tf.Variable(tf.random_normal([self.n_output]))
        })
        
        return weights, biases
    
    def next_batch(self, X, Y):
        """Return the next `batch_size`. Generalizes TF's internal implementation"""
        _x = X
        _y = Y
        start = self._index_in_epoch
        self._index_in_epoch += self.batch_size
        if self._index_in_epoch > self.n_instances:
            # Finished epoch
            self._epochs_completed += 1
            # Shuffle the data
            perm = np.arange(self.n_instances)
            np.random.shuffle(perm)
            _x = _x[perm]
            _y = _y[perm]
            # Start next epoch
            start = 0
            self._index_in_epoch = self.batch_size
            assert self.batch_size <= self.n_instances
        end = self._index_in_epoch
        return _x[start:end], _y[start:end]
    
    def neural_net(self, x):
        _p = self.perceptron
        _op = self.output_perceptron
        
        layer_outputs = []
        prev_layer_output = x
        for i in range(self.n_layers):
            if len(layer_outputs) == 0:
                _input = x
            else:
                _input = layer_outputs[-1]
            layer_outputs.append(_p(_input, self.wt['layer_{}'.format(i+1)], self.b['layer_{}'.format(i+1)]))
        output = _op(layer_outputs[-1], self.wt['output'], self.b['output'])
        return output
    
    def optimize(self):
        predictions = self.neural_net(self.X)
        
        regularizer_additive_value = np.sum([self.beta*tf.nn.l2_loss(arr) for _, arr in self.wt.items()]) + np.sum([self.beta*tf.nn.l2_loss(arr) for _, arr in self.b.items()])

        # Define loss and optimizer
        loss_op = tf.reduce_mean(
            tf.nn.softmax_cross_entropy_with_logits_v2(logits=predictions, labels=self.Y)# + regularizer_additive_value
        )
        train_op = tf.train.AdamOptimizer(learning_rate=self.learning_rate).minimize(loss_op)

        # Evaluate model (with test logits, for dropout to be disabled)
        correct_pred = tf.equal(tf.argmax(predictions, 1), tf.argmax(self.Y, 1))
        accuracy = tf.reduce_mean(tf.cast(correct_pred, tf.float32))

        # Start training
        with tf.Session() as sess:

            # Initialize the variables (i.e. assign their default value) 
            # and Run the initializer
            sess.run(tf.global_variables_initializer())

            for step in range(1, self.n_iterations+1):
                batch_x, batch_y = self.next_batch(self.trainX, self.trainY)                  
                
                # Run optimization op (backprop)
                sess.run(train_op, feed_dict={self.X: batch_x, self.Y: batch_y})
                if step % self.display_step == 0 or step == 1:
                    # Calculate batch loss and accuracy
                    loss, acc = sess.run([loss_op, accuracy], feed_dict={self.X: batch_x, self.Y: batch_y})
                    print("Step " + str(step) + ", Minibatch Loss= " + \
                          "{:.4f}".format(loss) + ", Training Accuracy= " + \
                          "{:.3f}".format(acc))

            print("Optimization Finished!")

            # Calculate accuracy for MNIST test images
            print("Testing Accuracy:", \
                  sess.run(accuracy, feed_dict={self.X: self.testX, self.Y: self.testY}))


# MNIST

In [8]:
# Import MNIST data
from tensorflow.examples.tutorials.mnist import input_data as mnist_data
mnist = mnist_data.read_data_sets("/tmp/data/", one_hot=True)

Extracting /tmp/data/train-images-idx3-ubyte.gz
Extracting /tmp/data/train-labels-idx1-ubyte.gz
Extracting /tmp/data/t10k-images-idx3-ubyte.gz
Extracting /tmp/data/t10k-labels-idx1-ubyte.gz


In [9]:
# _x, _y = mnist.train.next_batch(50)
# print(_x.shape, _y.shape)
# mnist.test.images.shape
print(type(mnist.train.images), type(mnist.train.labels), type(mnist.test.images), type(mnist.test.labels))

<class 'numpy.ndarray'> <class 'numpy.ndarray'> <class 'numpy.ndarray'> <class 'numpy.ndarray'>


In [5]:
NN(mnist.train.images, mnist.train.labels, mnist.test.images, mnist.test.labels).optimize()

Step 1, Minibatch Loss= 1020.0073, Training Accuracy= 0.195
Step 100, Minibatch Loss= 18.9039, Training Accuracy= 0.805
Step 200, Minibatch Loss= 7.8815, Training Accuracy= 0.891
Step 300, Minibatch Loss= 0.8989, Training Accuracy= 0.969
Step 400, Minibatch Loss= 3.1837, Training Accuracy= 0.922
Step 500, Minibatch Loss= 8.2724, Training Accuracy= 0.891
Step 600, Minibatch Loss= 6.7082, Training Accuracy= 0.914
Step 700, Minibatch Loss= 4.5275, Training Accuracy= 0.945
Step 800, Minibatch Loss= 1.3480, Training Accuracy= 0.961
Step 900, Minibatch Loss= 2.7980, Training Accuracy= 0.938
Step 1000, Minibatch Loss= 1.0324, Training Accuracy= 0.977
Optimization Finished!
Testing Accuracy: 0.9333


# 20 News Groups

In [3]:
# Import 20NG data
from sklearn.datasets import fetch_20newsgroups
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
ngtrain = fetch_20newsgroups(subset='train', data_home='../Data')
ngtest = fetch_20newsgroups(subset='test', data_home='../Data')
ngtrain_vectorizer = TfidfVectorizer(stop_words='english', strip_accents='ascii')
_ngtrain_vectors = ngtrain_vectorizer.fit_transform(ngtrain.data)
ngtest_vectorizer = TfidfVectorizer(stop_words='english', strip_accents='ascii')
_ngtest_vectors = ngtest_vectorizer.fit_transform(ngtest.data)

In [4]:
# remove features in test that are not found in train
one_hot_encode = lambda n_classes, target: np.eye(n_classes)[target]
get_indices = lambda x, y: list(set(np.searchsorted(x, y)))

common_features = np.intersect1d(ngtrain_vectorizer.get_feature_names(), ngtest_vectorizer.get_feature_names())
ngtrain_X = _ngtrain_vectors[:, get_indices(ngtrain_vectorizer.get_feature_names(), common_features)].todense()
ngtest_X = _ngtest_vectors[:, get_indices(ngtest_vectorizer.get_feature_names(), common_features)].todense()
ngtrain_Y = one_hot_encode(20, ngtrain.target)
ngtest_Y = one_hot_encode(20, ngtest.target)

print(ngtrain_X.shape, ngtrain_Y.shape, ngtest_X.shape, ngtest_Y.shape)
print(type(ngtrain_X), type(ngtrain_Y), type(ngtest_X), type(ngtest_Y))

(11314, 49601) (11314, 20) (7532, 49601) (7532, 20)
<class 'numpy.matrixlib.defmatrix.matrix'> <class 'numpy.ndarray'> <class 'numpy.matrixlib.defmatrix.matrix'> <class 'numpy.ndarray'>


In [8]:
NN(ngtrain_X, ngtrain_Y, ngtest_X, ngtest_Y,
   #learning_rate=0.01, n_iterations=500, n_hidden_layers=2, n_nodes_per_layer=[256, 256],
   #batch_size=128, display_step=100, activation=tf.nn.leaky_relu
  ).optimize()

Step 1, Minibatch Loss= 130.7787, Training Accuracy= 0.141
Step 100, Minibatch Loss= 1.0607, Training Accuracy= 0.883
Step 200, Minibatch Loss= 0.0447, Training Accuracy= 0.984
Step 300, Minibatch Loss= 0.0977, Training Accuracy= 0.992
Step 400, Minibatch Loss= 0.0001, Training Accuracy= 1.000
Step 500, Minibatch Loss= 0.0003, Training Accuracy= 1.000
Step 600, Minibatch Loss= 0.0007, Training Accuracy= 1.000
Step 700, Minibatch Loss= 0.0005, Training Accuracy= 1.000
Step 800, Minibatch Loss= 0.0584, Training Accuracy= 0.992
Step 900, Minibatch Loss= 0.0001, Training Accuracy= 1.000
Step 1000, Minibatch Loss= 0.0001, Training Accuracy= 1.000
Optimization Finished!
Testing Accuracy: 0.58032393


In [19]:
NN(ngtrain_X, ngtrain_Y, ngtest_X, ngtest_Y,
    n_hidden_layers=2, n_nodes_per_layer=[512, 512], output_activation=tf.nn.leaky_relu
  ).optimize()

Step 1, Minibatch Loss= 557.9946, Training Accuracy= 0.039
Step 100, Minibatch Loss= 47.9567, Training Accuracy= 0.180
Step 200, Minibatch Loss= 27.1920, Training Accuracy= 0.375
Step 300, Minibatch Loss= 18.8638, Training Accuracy= 0.531
Step 400, Minibatch Loss= 11.7341, Training Accuracy= 0.641
Step 500, Minibatch Loss= 5.4132, Training Accuracy= 0.781
Step 600, Minibatch Loss= 3.9951, Training Accuracy= 0.867
Step 700, Minibatch Loss= 1.7166, Training Accuracy= 0.914
Step 800, Minibatch Loss= 1.0871, Training Accuracy= 0.969
Step 900, Minibatch Loss= 1.2516, Training Accuracy= 0.938
Step 1000, Minibatch Loss= 0.8755, Training Accuracy= 0.969
Optimization Finished!
Testing Accuracy: 0.40228358
