# Proof of Concept Example

## Get the up to date data from the form 

In [30]:
import gspread
from oauth2client.service_account import ServiceAccountCredentials

class DataGetter():
    """
    An object that has cridentials to read Drive documents.
    """
    def __init__(self, credsFile='client_secret.json'):
        self.scope  = ['https://spreadsheets.google.com/feeds']
        self.creds  = ServiceAccountCredentials.from_json_keyfile_name(credsFile, self.scope)
        self.client = gspread.authorize(self.creds)

    def basicMovementData(self):
        """
        Gets the training data from the first basic movement form and processes
        it into a list of spoken commands and their corresponding labels.
        """
        rows      = self.client.open("MovementTrainingData").sheet1.get_all_records()

        responces = []
        labels    = []
        
        for row in rows:
            for i, response in enumerate(list(row.values())[1:]):
                labels.append(i)
                responces.append(response.lower())
                
        return (responces, labels)

## Process the data

The libraries needed for this task are:

In [31]:
import numpy as np
from collections import Counter


In [32]:
(commands, labels) = DataGetter().basicMovementData()

I will use counters to store the count of particular words in different sinarios. 

In [33]:
total_counts     = Counter()
sinario_counters = [Counter() for _ in range(max(labels)+1)]

# for sinario, responses in enumerate(data):
#     for response in responses:
#         for word in response.split():
#             total_counts[word.lower()] +=1
#             sinario_counters[sinario][word]  +=1

for i, command in enumerate(commands):
    for word in command.split():
        sinario_counters[labels[i]][word] +=1
        total_counts[word]+=1
        

The most common words in each sinario are:

In [34]:
for i, counter in enumerate(sinario_counters):
    print("Sinario " + str(i+1) + ": "     + str(sinario_counters[i].most_common(5)))

Sinario 1: [('the', 73), ('door', 38), ('through', 35), ('of', 33), ('go', 32)]
Sinario 2: [('the', 115), ('door', 43), ('on', 43), ('your', 39), ('first', 35)]
Sinario 3: [('the', 108), ('door', 41), ('on', 40), ('go', 34), ('your', 29)]
Sinario 4: [('the', 72), ('through', 35), ('door', 32), ('your', 32), ('right', 32)]
Sinario 5: [('the', 98), ('desk', 23), ('behind', 22), ('your', 21), ('of', 21)]
Sinario 6: [('the', 76), ('door', 42), ('through', 37), ('of', 36), ('and', 35)]
Sinario 7: [('the', 81), ('and', 23), ('left', 22), ('turn', 21), ('corridor', 20)]
Sinario 8: [('the', 99), ('door', 40), ('on', 38), ('first', 33), ('your', 32)]
Sinario 9: [('the', 82), ('turn', 41), ('go', 33), ('and', 33), ('your', 31)]
Sinario 10: [('the', 75), ('your', 33), ('go', 32), ('on', 32), ('double', 30)]
Sinario 11: [('the', 129), ('on', 34), ('go', 32), ('of', 30), ('double', 30)]
Sinario 12: [('the', 58), ('up', 33), ('go', 30), ('stairs', 24), ('of', 13)]
Sinario 13: [('the', 134), ('door',

### The full vocabulary

As the input layer to the NN will be a tensor containinging a count of every word possible, we ned to know the full vocabulary. 

In [35]:
vocab      = set(total_counts.keys())
vocab_size = len(vocab)

In [36]:
input_layer = np.zeros((1, vocab_size))
print("The shape of the input layer tensor is: " + str(input_layer.shape))

The shape of the input layer tensor is: (1, 528)


In [37]:
wordToIndex = {}
for i, word in enumerate(vocab):
    wordToIndex[word] = i

In [38]:
def fill_input_layer(response: str):
    
    global input_layer
    input_layer *=0
    
    words        = response.split()
    word_counter = Counter()
    
    for word in words:
        word_counter[word] +=1

    for word, count in word_counter.items():
        input_layer[0][wordToIndex[word]] = count

In [39]:
fill_input_layer(commands[0])

## First Attemept

In [1]:
import time
import sys
import numpy as np

class SentimentNetwork:
    def __init__(self, commands, labels, hidden_nodes = 10, output_nodes = 1,learning_rate = 0.1):
        """Create a SentimenNetwork with the given settings
        Args:
            commands(list) - List of commands used for training
            labels(list) - List of POSITIVE/NEGATIVE labels associated with the given reviews
            hidden_nodes(int) - Number of nodes to create in the hidden layer
            learning_rate(float) - Learning rate to use while training
        
        """

        np.random.seed(1)

        self.pre_process_data(commands, labels)
        
        self.init_network(len(self.command_vocab), hidden_nodes, output_nodes, learning_rate)

    def pre_process_data(self, commands, labels):
        
        # Pre process the reviews
        command_vocab = set()
        for command in commands:
            for word in command.split():
                command_vocab.add(word)
        self.command_vocab      = list(command_vocab)
        self.command_vocab_size = len(self.command_vocab)
        
        self.word2index = {}
        for i,word in enumerate(self.command_vocab):
            self.word2index[word] = i
            
#         self.label2index = {}
#         for i,label in enumerate(self.label_vocab):
#             self.label2index[word] = i

        self.label_vocab      = list(range(max(labels)+1))
        self.label_vocab_size = len(self.label_vocab)

    def init_network(self, input_nodes, hidden_nodes, output_nodes, learning_rate):
        # Store the number of nodes in input, hidden, and output layers.
        self.input_nodes  = input_nodes
        self.hidden_nodes = hidden_nodes
        self.output_nodes = output_nodes

        # Store the learning rate
        self.learning_rate = learning_rate

        # Initialize weights
        
        # TODO: initialize self.weights_0_1 as a matrix of zeros. These are the weights between
        #       the input layer and the hidden layer.
        
        # I think that the size of the input nodes by the number of hiden nodes in shape
        self.weights_0_1 = np.zeros((self.input_nodes, self.hidden_nodes))
        
        # TODO: initialize self.weights_1_2 as a matrix of random values. 
        #       These are the weights between the hidden layer and the output layer.
        self.weights_1_2 = self.weights_hidden_to_output = np.random.normal(0.0, self.hidden_nodes**-0.5, 
                                       (self.hidden_nodes, self.output_nodes))
        
        # TODO: Create the input layer, a two-dimensional matrix with shape 
        #       1 x input_nodes, with all values initialized to zero
        self.layer_0 = np.zeros((1,input_nodes))
    
        
    def update_input_layer(self,command):
        """
        Args:
            review(string) - the string of the review
        Returns:
            None
        """
        # clear out previous state by resetting the layer to be all 0s
        self.layer_0 *= 0

        for word in command.split(' '):
            if (word in self.word2index.keys()):
                self.layer_0[0][self.word2index[word]] +=1
            
                
    def get_target_for_label(self,label):
        return label
        
    def sigmoid(self,x):
        return 1.0/(1+(np.e**(-x)))
    
    def sigmoid_output_2_derivative(self,output):
        return output * (1 - output)

    def train(self, training_commands, training_labels):
        assert(len(training_commands) == len(training_labels))
        
        correct_so_far = 0
        start = time.time()

        # loop through all the given reviews and run a forward and backward pass,
        # updating weights for every item
        for i in range(len(training_commands)):
            
            # TODO: Get the next review and its correct label
            command   = training_commands[i]
            target    = self.get_target_for_label(training_labels[i])
            
            # -- Forward Pass -- #
            
            self.update_input_layer(command)
            
            layer_1_input  = np.matmul(self.layer_0, self.weights_0_1)
            layer_1_output = layer_1_input  # Brief said to not use an activation function on this layer
            
            layer_2_input   = np.matmul(layer_1_output, self.weights_1_2)
            layer_2_output  = self.sigmoid(layer_2_input)
            
            # Output error
            layer_2_error = target - layer_2_output 
            layer_2_delta = layer_2_error * self.sigmoid_output_2_derivative(layer_2_output)
            

            # Backpropagated error
            layer_1_error = layer_2_delta.dot(self.weights_1_2.T) # errors propagated to the hidden layer
            layer_1_delta = layer_1_error # hidden layer gradients - no nonlinearity so it's the same as the error
            
            # Update the weights
            self.weights_1_2 += layer_1_output.T.dot(layer_2_delta) * self.learning_rate # update hidden-to-output weights with gradient descent step
            self.weights_0_1 += self.layer_0.T.dot(layer_1_delta) * self.learning_rate # update input-to-hidden weights with gradient descent step

      
            
            if (np.abs(layer_2_error) < 0.5 ):
                correct_so_far +=1
                


            elapsed_time = float(time.time() - start)
            commands_per_second = i / elapsed_time if elapsed_time > 0 else 0
            
            sys.stdout.write("\rProgress:" + str(100 * i/float(len(training_commands)))[:4] \
                             + "% Speed(commands/sec):" + str(commands_per_second)[0:5] \
                             + " #Correct:" + str(correct_so_far) + " #Trained:" + str(i+1) \
                             + " Training Accuracy:" + str(correct_so_far * 100 / float(i+1))[:4] + "%")
            if(i % 2500 == 0):
                print("")
    
    def test(self, testing_commands, testing_labels):
        """
        Attempts to predict the labels for the given testing_reviews,
        and uses the test_labels to calculate the accuracy of those predictions.
        """
        correct = 0
        start = time.time()

        # Loop through each of the given reviews and call run to predict
        # its label. 
        for i in range(len(testing_commands)):
            pred = self.run(testing_commands[i])
            if(pred == testing_labels[i]):
                correct += 1
            
            # For debug purposes, print out our prediction accuracy and speed 
            # throughout the prediction process. 

            elapsed_time = float(time.time() - start)
            commands_per_second = i / elapsed_time if elapsed_time > 0 else 0
            
            sys.stdout.write("\rProgress:" + str(100 * i/float(len(testing_commands)))[:4] \
                             + "% Speed(commands/sec):" + str(commands_per_second)[0:5] \
                             + " #Correct:" + str(correct) + " #Tested:" + str(i+1) \
                             + " Testing Accuracy:" + str(correct * 100 / float(i+1))[:4] + "%")
    
    def run(self, command):
        """
        Returns a POSITIVE or NEGATIVE prediction for the given command.
        """               
        self.update_input_layer(command.lower())
            
        hidden_input  = np.matmul(self.layer_0, self.weights_0_1)
        hidden_output = hidden_input  # Brief said to not use an activation function on this layer
           
        final_input   = np.matmul(hidden_output, self.weights_1_2)
        final_output  = self.sigmoid(final_input)
        
        
        # This is  stupid way of rounding
        if (final_output - int(final_output) >= 0.5):
            ret =  int (final_output) + 1
        else:
            ret =  int (final_output)
            
        print(ret)
        return ret

In [2]:
mlp = SentimentNetwork(commands[:-50],labels[:-50], learning_rate=0.001)

NameError: name 'commands' is not defined

In [46]:
mlp.train(commands[:-50],labels[:-50])

Progress:0.0% Speed(commands/sec):0.0 #Correct:0 #Trained:1 Training Accuracy:0.0%
Progress:0.10% Speed(commands/sec):843.0 #Correct:0 #Trained:2 Training Accuracy:0.0%Progress:0.20% Speed(commands/sec):1495. #Correct:0 #Trained:3 Training Accuracy:0.0%Progress:0.31% Speed(commands/sec):2052. #Correct:0 #Trained:4 Training Accuracy:0.0%Progress:0.41% Speed(commands/sec):2535. #Correct:0 #Trained:5 Training Accuracy:0.0%Progress:0.52% Speed(commands/sec):2933. #Correct:0 #Trained:6 Training Accuracy:0.0%Progress:0.62% Speed(commands/sec):3311. #Correct:0 #Trained:7 Training Accuracy:0.0%Progress:0.73% Speed(commands/sec):3646. #Correct:0 #Trained:8 Training Accuracy:0.0%Progress:0.83% Speed(commands/sec):3826. #Correct:0 #Trained:9 Training Accuracy:0.0%Progress:0.94% Speed(commands/sec):4059. #Correct:0 #Trained:10 Training Accuracy:0.0%Progress:1.04% Speed(commands/sec):4304. #Correct:0 #Trained:11 Training Accuracy:0.0%Progress:1.14% Speed(commands/sec):4532. #Correct:0 #

In [47]:
mlp.test(commands[-50:],labels[-50:])

Progress:0.0% Speed(commands/sec):0.0 #Correct:0 #Tested:1 Testing Accuracy:0.0%Progress:2.0% Speed(commands/sec):1065. #Correct:0 #Tested:2 Testing Accuracy:0.0%Progress:4.0% Speed(commands/sec):1739. #Correct:0 #Tested:3 Testing Accuracy:0.0%Progress:6.0% Speed(commands/sec):2270. #Correct:0 #Tested:4 Testing Accuracy:0.0%Progress:8.0% Speed(commands/sec):2727. #Correct:0 #Tested:5 Testing Accuracy:0.0%Progress:10.0% Speed(commands/sec):3124. #Correct:0 #Tested:6 Testing Accuracy:0.0%Progress:12.0% Speed(commands/sec):3426. #Correct:0 #Tested:7 Testing Accuracy:0.0%Progress:14.0% Speed(commands/sec):3682. #Correct:0 #Tested:8 Testing Accuracy:0.0%Progress:16.0% Speed(commands/sec):3907. #Correct:0 #Tested:9 Testing Accuracy:0.0%Progress:18.0% Speed(commands/sec):4134. #Correct:0 #Tested:10 Testing Accuracy:0.0%Progress:20.0% Speed(commands/sec):4278. #Correct:0 #Tested:11 Testing Accuracy:0.0%Progress:22.0% Speed(commands/sec):4365. #Correct:0 #Tested:12 Testing Accuracy: