# Proof of Concept Example

## Get the up to date data from the form 

In [1]:
import gspread
from oauth2client.service_account import ServiceAccountCredentials

class DataGetter():
    """
    An object that has cridentials to read Drive documents.
    """
    def __init__(self, credsFile='client_secret.json'):
        self.scope  = ['https://spreadsheets.google.com/feeds']
        self.creds  = ServiceAccountCredentials.from_json_keyfile_name(credsFile, self.scope)
        self.client = gspread.authorize(self.creds)

    def basicMovementData(self):
        """
        Gets the training data from the first basic movement form and processes
        it into a list of spoken commands and their corresponding labels.
        """
        rows      = self.client.open("MovementTrainingData").sheet1.get_all_records()

        responces = []
        labels    = []
        
        
        for row in rows:
            for i, response in enumerate(list(row.values())[1:]):
                labels.append(i)
                responces.append(response.lower())
                
        return (responces, labels)
    
    def makeLabelForSinario(self, number, numVarients):
        label = np.zeros(numVarients)
        label[number]  = 1.0
        return label
        

## Process the data

The libraries needed for this task are:

In [139]:
import numpy as np
from collections import Counter

In [140]:
(commands, labels) = DataGetter().basicMovementData()

I will use counters to store the count of particular words in different sinarios. 

In [141]:
# total_counts     = Counter()
# sinario_counters = [Counter() for _ in range(max(labels)+1)]

# # for sinario, responses in enumerate(data):
# #     for response in responses:
# #         for word in response.split():
# #             total_counts[word.lower()] +=1
# #             sinario_counters[sinario][word]  +=1

# for i, command in enumerate(commands):
#     for word in command.split():
#         sinario_counters[labels[i]][word] +=1
#         total_counts[word]+=1
        

The most common words in each sinario are:

In [142]:
# for i, counter in enumerate(sinario_counters):
#     print("Sinario " + str(i+1) + ": "     + str(sinario_counters[i].most_common(5)))

### The full vocabulary

As the input layer to the NN will be a tensor containinging a count of every word possible, we ned to know the full vocabulary. 

In [143]:
# vocab      = set(total_counts.keys())
# vocab_size = len(vocab)

In [144]:
# input_layer = np.zeros((1, vocab_size))
# print("The shape of the input layer tensor is: " + str(input_layer.shape))

In [145]:
# wordToIndex = {}
# for i, word in enumerate(vocab):
#     wordToIndex[word] = i

In [146]:
# def fill_input_layer(response: str):
    
#     global input_layer
#     input_layer *=0
    
#     words        = response.split()
#     word_counter = Counter()
    
#     for word in words:
#         word_counter[word] +=1

#     for word, count in word_counter.items():
#         input_layer[0][wordToIndex[word]] = count

In [147]:
# fill_input_layer(commands[0])

## First Attemept

In [162]:
import time
import sys
import numpy as np

class SentimentNetwork:
    def __init__(self, commands, labels, hidden_nodes = 10, output_nodes = 1,learning_rate = 0.1):
        """Create a SentimenNetwork with the given settings
        Args:
            commands(list) - List of commands used for training
            labels(list) - List of POSITIVE/NEGATIVE labels associated with the given reviews
            hidden_nodes(int) - Number of nodes to create in the hidden layer
            learning_rate(float) - Learning rate to use while training
        
        """

        np.random.seed(1)

        self.pre_process_data(commands, labels)
        
        self.init_network(len(self.command_vocab), hidden_nodes, output_nodes, learning_rate)

    def pre_process_data(self, commands, labels):
        
        # Pre process the reviews
        command_vocab = set()
        for command in commands:
            for word in command.split():
                command_vocab.add(word)
        self.command_vocab      = list(command_vocab)
        self.command_vocab_size = len(self.command_vocab)
        
        self.word2index = {}
        for i,word in enumerate(self.command_vocab):
            self.word2index[word] = i
            
        label_vocab = set()
        for label in labels:
            command_vocab.add(label)
            
        self.label_vocab_size = len(label_vocab)
                
        self.label_vocab      = [self.label_vec_from_number(l) for l in label_vocab]
        

    def init_network(self, input_nodes, hidden_nodes, output_nodes, learning_rate):
        # Store the number of nodes in input, hidden, and output layers.
        self.input_nodes  = input_nodes
        self.hidden_nodes = hidden_nodes
        self.output_nodes = output_nodes

        # Store the learning rate
        self.learning_rate = learning_rate

        # Initialize weights
        
        # TODO: initialize self.weights_0_1 as a matrix of zeros. These are the weights between
        #       the input layer and the hidden layer.
        
        # I think that the size of the input nodes by the number of hiden nodes in shape
        self.weights_0_1 = np.zeros((self.input_nodes, self.hidden_nodes))
        
        # TODO: initialize self.weights_1_2 as a matrix of random values. 
        #       These are the weights between the hidden layer and the output layer.
        self.weights_1_2 = self.weights_hidden_to_output = np.random.normal(0.0, self.hidden_nodes**-0.5, 
                                       (self.hidden_nodes, self.output_nodes))
        
        # TODO: Create the input layer, a two-dimensional matrix with shape 
        #       1 x input_nodes, with all values initialized to zero
        self.layer_0 = np.zeros((1,input_nodes))
    
    def label_vec_from_number(self, number ):
        label = np.zeros(19) ## TODO
        label[number] = 1
        return label
        
    
        
    def update_input_layer(self,command):
        """
        Args:
            review(string) - the string of the review
        Returns:
            None
        """
        # clear out previous state by resetting the layer to be all 0s
        self.layer_0 *= 0

        for word in command.split(' '):
            if (word in self.word2index.keys()):
                self.layer_0[0][self.word2index[word]] +=1
            
                
    def get_target_for_label(self,label):
        
        return self.label_vec_from_number(label)
    
        
    def sigmoid(self,x):
        return 1.0/(1+(np.e**(-x)))
    
    def sigmoid_output_2_derivative(self,output):
        return output * (1 - output)

    def train(self, training_commands, training_labels, epochs=5):
        assert(len(training_commands) == len(training_labels))
        
        start = time.time()
        
        
        for _ in range(epochs):
        
            correct_so_far = 0

            # loop through all the given reviews and run a forward and backward pass,
            # updating weights for every item
            for i in range(len(training_commands)):

                # TODO: Get the next review and its correct label
                command   = training_commands[i]
                target    = self.get_target_for_label(training_labels[i])

                # -- Forward Pass -- #

                self.update_input_layer(command)

                layer_1_input  = np.matmul(self.layer_0, self.weights_0_1)
                layer_1_output = layer_1_input  # Brief said to not use an activation function on this layer

                layer_2_input   = np.matmul(layer_1_output, self.weights_1_2)
                layer_2_output  = self.sigmoid(layer_2_input)

                # Output error
                layer_2_error = target - layer_2_output 
                layer_2_delta = layer_2_error * self.sigmoid_output_2_derivative(layer_2_output)


                # Backpropagated error
                layer_1_error = layer_2_delta.dot(self.weights_1_2.T) # errors propagated to the hidden layer
                layer_1_delta = layer_1_error # hidden layer gradients - no nonlinearity so it's the same as the error

                # Update the weights
                self.weights_1_2 += layer_1_output.T.dot(layer_2_delta) * self.learning_rate # update hidden-to-output weights with gradient descent step
                self.weights_0_1 += self.layer_0.T.dot(layer_1_delta) * self.learning_rate # update input-to-hidden weights with gradient descent step


                # Is the output correct?
                if (all(error < 0.5) for error in layer_2_error):
                    correct_so_far +=1




                elapsed_time = float(time.time() - start)
                commands_per_second = i / elapsed_time if elapsed_time > 0 else 0

                sys.stdout.write("\rProgress:" + str(100 * i/float(len(training_commands)))[:4] \
                                 + "% Speed(commands/sec):" + str(commands_per_second)[0:5] \
                                 + " #Correct:" + str(correct_so_far) + " #Trained:" + str(i+1) \
                                 + " Training Accuracy:" + str(correct_so_far * 100 / float(i+1))[:4] + "%")
                if(i % 2500 == 0):
                    print("")

    def test(self, testing_commands, testing_labels):
        """
        Attempts to predict the labels for the given testing_reviews,
        and uses the test_labels to calculate the accuracy of those predictions.
        """
        correct = 0
        start = time.time()

        # Loop through each of the given reviews and call run to predict
        # its label. 
        for i in range(len(testing_commands)):
            pred = self.run(testing_commands[i])
#             print(pred[0])
#             print(self.label_vec_from_number(testing_labels[i]))

            
            if(np.array_equal(pred[0], self.label_vec_from_number(testing_labels[i]))):
                correct += 1
                print(testing_labels[i])
            
            # For debug purposes, print out our prediction accuracy and speed 
            # throughout the prediction process. 

            elapsed_time = float(time.time() - start)
            commands_per_second = i / elapsed_time if elapsed_time > 0 else 0
            
            sys.stdout.write("\rProgress:" + str(100 * i/float(len(testing_commands)))[:4] \
                             + "% Speed(commands/sec):" + str(commands_per_second)[0:5] \
                             + " #Correct:" + str(correct) + " #Tested:" + str(i+1) \
                             + " Testing Accuracy:" + str(correct * 100 / float(i+1))[:4] + "%")
    
    def run(self, command):
        """
        Returns a POSITIVE or NEGATIVE prediction for the given command.
        """               
        self.update_input_layer(command.lower())
            
        hidden_input  = np.matmul(self.layer_0, self.weights_0_1)
        hidden_output = hidden_input  # Brief said to not use an activation function on this layer
           
        final_input   = np.matmul(hidden_output, self.weights_1_2)
        final_output  = self.sigmoid(final_input)
        

        
        
        # This is  stupid way of rounding
        for i, val in enumerate(final_output[0]):
            if val < 0.5:
                final_output[0][i] = 0
            else:
                final_output[0][i] = 1

#             if (val - int(val) >= 0.5):
#                 final_output[0][i] =  int (val) + 1
#             else:
#                 final_output[0][i] =  int (val)
            
        return final_output

In [175]:
mlp = SentimentNetwork(commands[:-50],labels[:-50],hidden_nodes=100,output_nodes=19, learning_rate=0.3)

In [176]:
mlp.train(commands[:-50],labels[:-50])

Progress:0.0% Speed(commands/sec):0.0 #Correct:1 #Trained:1 Training Accuracy:100.%
Progress:0.0% Speed(commands/sec):0.0 #Correct:1 #Trained:1 Training Accuracy:100.%y:100.%
Progress:31.2% Speed(commands/sec):291.7 #Correct:300 #Trained:300 Training Accuracy:100.%Progress:0.0% Speed(commands/sec):0.0 #Correct:1 #Trained:1 Training Accuracy:100.%y:100.%
Progress:0.0% Speed(commands/sec):0.0 #Correct:1 #Trained:1 Training Accuracy:100.%y:100.%
Progress:0.0% Speed(commands/sec):0.0 #Correct:1 #Trained:1 Training Accuracy:100.%y:100.%
Progress:99.8% Speed(commands/sec):317.9 #Correct:957 #Trained:957 Training Accuracy:100.%

In [177]:
mlp.test(commands[-50:],labels[-50:])

Progress:0.0% Speed(commands/sec):0.0 #Correct:0 #Tested:1 Testing Accuracy:0.0%Progress:2.0% Speed(commands/sec):124.8 #Correct:0 #Tested:2 Testing Accuracy:0.0%Progress:4.0% Speed(commands/sec):242.0 #Correct:0 #Tested:3 Testing Accuracy:0.0%Progress:6.0% Speed(commands/sec):354.8 #Correct:0 #Tested:4 Testing Accuracy:0.0%Progress:8.0% Speed(commands/sec):462.9 #Correct:0 #Tested:5 Testing Accuracy:0.0%Progress:10.0% Speed(commands/sec):564.5 #Correct:0 #Tested:6 Testing Accuracy:0.0%Progress:12.0% Speed(commands/sec):661.2 #Correct:0 #Tested:7 Testing Accuracy:0.0%Progress:14.0% Speed(commands/sec):751.1 #Correct:0 #Tested:8 Testing Accuracy:0.0%Progress:16.0% Speed(commands/sec):838.9 #Correct:0 #Tested:9 Testing Accuracy:0.0%Progress:18.0% Speed(commands/sec):916.8 #Correct:0 #Tested:10 Testing Accuracy:0.0%Progress:20.0% Speed(commands/sec):998.3 #Correct:0 #Tested:11 Testing Accuracy:0.0%Progress:22.0% Speed(commands/sec):1071. #Correct:0 #Tested:12 Testing Accuracy: