In [0]:
import numpy as np

In [0]:
######################
def relu(mat):
    return np.maximum(mat, 0)

def sigmoid(mat):
    sig = lambda x : 1/(1+np.exp(-x))
    return sig(mat)

def softmax(mat):
    y = np.copy(mat)
    y = np.exp(y - np.max(y))
    s = np.sum(y, axis=1)
    
    for i in range(s.shape[0]):
        y[i] = y[i]/s[i]
    return y 

def linear(mat):
    lin = lambda x : x
    return lin(mat)

########################
def relu_deri(z_):
    y = np.copy(z_)
    y[y>=0] = 1
    y[y<0] = 0
    return y
        
def sigmoid_deri(z_):
    sig_deri = lambda x : sigmoid(x)*(1-sigmoid(x)) 
    return sig_deri(z_) 

def linear_deri(z_):
    lin_deri = lambda x: 1
    return lin_deri(z_)

def softmax_deri(z_):
    soft_deri = lambda x : softmax(x)*(1 - softmax(x))
    return soft_deri(z_)

##################################

class Network:
    def __init__(self, layer_list, activation_list, x, y):
        
        self.input                 = x
        self.y                     = y
        self.output                = np.zeros(layer_list[-1])
        
        self.layers                = layer_list 
        self.activations           = activation_list             
        
        self.weights = []
        previous = self.input.shape[1]
        
        for layer in self.layers:
            current = layer
            self.weights.append(np.random.rand(previous, current))
            previous = current
        
        self.z         = [] #Keeps track of Z = WX + B
        self.a         = [] #Keeps track of a = g(Z)
    
    ##################################
    def hidden_update(self, idx, hidden):
        z_ = np.dot(hidden ,self.weights[idx])
        self.z.append(z_)
        
        if self.activations[idx] == 'relu':
            hidden_ = relu(z_)
            
        elif self.activations[idx] == 'sigmoid':
            hidden_ = sigmoid(z_)

        elif self.activations[idx] == 'linear':
            hidden_ = linear(z_)
            
        elif self.activations[idx] == 'softmax':  
            hidden_ = softmax(z_)
        
        return hidden_        
        
    def feedforward(self):        
        hidden = self.input
        self.a.append(hidden)
        
        for layer_idx, units in enumerate(self.layers):
            hidden = self.hidden_update(layer_idx, hidden)
            self.a.append(hidden)
        
        self.output = self.a[-1]
        
    ##################################    
    def activate_grad(self, idx):
        
        if self.activations[idx] == 'relu':
            grad_ = relu_deri(self.z[idx])
            
        elif self.activations[idx] == 'sigmoid':
            grad_ = sigmoid_deri(self.z[idx])

        elif self.activations[idx] == 'linear':
            grad_ = linear_deri(self.z[idx])
            
        elif self.activations[idx] == 'softmax':  
            grad_ = softmax_deri(self.z[idx])
            
        return grad_
        
    def backpropagation(self):
        grad_weights = []
        
        if self.layers[-1] == 1:
            gradients = [(self.y - self.a[-1].T).T * self.activate_grad(-1)]

        else:
            l = np.zeros(np.array(self.y).shape)
            for i in range(np.array(self.y).shape[0]):
                r = np.argmax(self.a[-1][i])
                l[i][int(r)] = 1
                
            gradients = [(self.y - l) * self.activate_grad(-1)]
        
        for i in range(len(self.layers)-1):    
            prev_grad = gradients[0].dot(self.weights[-i-1].T) * self.activate_grad(-i-2)
            gradients = [prev_grad] + gradients
            
        grad_weights = [self.a[i].T.dot(d)/self.input.shape[0] for i,d in enumerate(gradients)]
        
        return grad_weights
    
 ###################################
    def train(self, alpha, epochs):
        for e in range(epochs):
            i = 0
            
            self.feedforward()
            grad_weights = self.backpropagation()
            
            for idx in range(len(self.layers)):
                self.weights[idx] -= alpha*grad_weights[idx]
              
    
    def predict(self, x_test):
        self.input = x_test
        self.feedforward()
        return self.output

# IMDB_Reviews

In [0]:
from tensorflow import keras
imdb = keras.datasets.imdb

(train_data, train_labels), (test_data, test_labels) = imdb.load_data(num_words=10000)

# A dictionary mapping words to an integer index
word_index = imdb.get_word_index()

# The first indices are reserved
word_index = {k:(v+3) for k,v in word_index.items()}
word_index["<PAD>"] = 0
word_index["<START>"] = 1
word_index["<UNK>"] = 2  # unknown
word_index["<UNUSED>"] = 3

reverse_word_index = dict([(value, key) for (key, value) in word_index.items()])

def decode_review(text):
    return ' '.join([reverse_word_index.get(i, '?') for i in text])

In [0]:
train_text = []
for i in train_data:
  train_text.append(decode_review(i))

In [0]:
from sklearn.feature_extraction.text import CountVectorizer
vectorizer = CountVectorizer(min_df=0, lowercase=True)
vectorizer.fit(train_text)
one_hot = vectorizer.transform(train_text).toarray()

In [0]:
from sklearn.decomposition import PCA
pca = PCA(n_components=100)
principalComponents = pca.fit_transform(one_hot)

In [0]:
train_X = principalComponents[:1000]
train_Y = np.array([train_labels]).T[:1000].T

In [0]:
model = Network([100,50,1], ['linear', 'relu' ,'sigmoid'], train_X, train_Y)

In [0]:
model.train(0.001, 100)

In [0]:
model.predict(principalComponents[1000:1010]).T

array([[0.99800548, 0.5       , 1.        , 0.5       , 0.5       ,
        1.        , 0.5       , 1.        , 0.5       , 0.5       ]])

In [0]:
train_labels[1000:1010]

array([0, 1, 0, 1, 0, 0, 0, 1, 1, 1])

# Tensorflow Implementation

In [0]:
train_X = principalComponents[:20000]
train_Y = np.array([train_labels]).T[:20000]

from sklearn.preprocessing import OneHotEncoder
enc = OneHotEncoder(handle_unknown='ignore')
enc.fit(train_Y)
train_Y = enc.transform(train_Y).toarray()

In [0]:
test_X = principalComponents[20000:25000]
test_Y = enc.transform(np.array([train_labels]).T[20000:25000]).toarray()

In [0]:
import tensorflow as tf

# Parameters
learning_rate = 0.01
num_steps = 500
batch_size = 100
display_step = 100

# Network Parameters
n_hidden_1 = 50 # 1st layer number of neurons
n_hidden_2 = 25 # 2nd layer number of neurons
num_input = 100 # Input features
num_classes = 2 # Output Classes

# tf Graph input
X = tf.placeholder("float", [None, num_input])
Y = tf.placeholder("float", [None, num_classes])

# Store layers weight & bias
weights = {
    'h1': tf.Variable(tf.random_normal([num_input, n_hidden_1])),
    'h2': tf.Variable(tf.random_normal([n_hidden_1, n_hidden_2])),
    'out': tf.Variable(tf.random_normal([n_hidden_2, num_classes]))
}
biases = {
    'b1': tf.Variable(tf.random_normal([n_hidden_1])),
    'b2': tf.Variable(tf.random_normal([n_hidden_2])),
    'out': tf.Variable(tf.random_normal([num_classes]))
}


# Create model
def neural_net(x):
    # Hidden fully connected layer 
    layer_1 = tf.add(tf.matmul(x, weights['h1']), biases['b1'])
    # Hidden fully connected layer 
    layer_2 = tf.add(tf.matmul(layer_1, weights['h2']), biases['b2'])
    # Output fully connected layer with a neuron for each class
    out_layer = tf.matmul(layer_2, weights['out']) + biases['out']
    return out_layer

# Construct model
logits = neural_net(X)
prediction = tf.nn.softmax(logits)

# Define loss and optimizer
loss_op = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=logits, labels=Y))
optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate)
train_op = optimizer.minimize(loss_op)

# Evaluate model
correct_pred = tf.equal(tf.argmax(prediction, 1), tf.argmax(Y, 1))
accuracy = tf.reduce_mean(tf.cast(correct_pred, tf.float32))

# Initialize the variables (i.e. assign their default value)
init = tf.global_variables_initializer()

# Start training
with tf.Session() as sess:

    # Run the initializer
    sess.run(init)

    for step in range(1, num_steps+1):
        
        next_batch = (batch_size*step)%train_X.shape[0]    
        
        batch_x = train_X[next_batch:batch_size+next_batch]
        batch_y = train_Y[next_batch:batch_size+next_batch]
        
        # Run optimization op (backprop)
        
        sess.run(train_op, feed_dict={X: batch_x, Y: batch_y})
        if step % display_step == 0 or step == 1:
            # Calculate batch loss and accuracy
            
            loss, acc = sess.run([loss_op, accuracy], feed_dict={X: batch_x,
                                                                 Y: batch_y})
            print("Step " + str(step) + ", Minibatch Loss= " + \
                  "{:.4f}".format(loss) + ", Training Accuracy= " + \
                  "{:.3f}".format(acc))

    print("Optimization Finished!")

    print("Testing Accuracy:", sess.run(accuracy, feed_dict={X: test_X, Y: test_Y}))

Step 1, Minibatch Loss= 149.0353, Training Accuracy= 0.580
Step 100, Minibatch Loss= 8.4119, Training Accuracy= 0.820
Step 200, Minibatch Loss= 6.0187, Training Accuracy= 0.790
Step 300, Minibatch Loss= 5.3068, Training Accuracy= 0.730
Step 400, Minibatch Loss= 4.7957, Training Accuracy= 0.810
Step 500, Minibatch Loss= 3.5499, Training Accuracy= 0.810
Optimization Finished!
('Testing Accuracy:', 0.7278)


Task
- Report accuracy with layer combination of [100, 75, 75, 2], [100, 16, 16, 2] 
- Add Another Hidden Layer
- Compare loss of different optimisation functions(GD, AdaDelta, RMS Prop)
- Use F1 Score instead of accuracy
- Apply Dropout (use - https://stackoverflow.com/questions/40879504/how-to-apply-drop-out-in-tensorflow-to-improve-the-accuracy-of-neural-network)