In [32]:
import numpy as np

In [33]:
File = open('reviews.txt','r') # What we know!
reviews = list(map(lambda x:x[:-1],File.readlines()))
File.close()

File2 = open('labels.txt','r') # What we WANT to know!
labels = list(map(lambda x:x[:-1].upper(),File2.readlines()))
File2.close()

In [42]:
class SentimentNetwork:
    def __init__(self, reviews,labels,hidden_nodes=10,learning_rate = 0.1):
        
        self.pre_process_data(reviews, labels)
        
        self.init_network(len(self.word_dictionary),hidden_nodes, 1, learning_rate)
    
    def pre_process_data(self, reviews, labels):
        review_word = set()
        for review in reviews:
            for word in review.split(" "):
                review_word.add(word)
        self.review_word = list(review_word)
        
        print(review_word)

        label_word = set()
        for label in labels:
            label_word.add(label)

        self.label_word = list(label_word)


        self.word_dictionary = {}
        for i,word in enumerate(review_word):
            self.word_dictionary[word] = i

        self.label_dictionary = {}
        for i,label in enumerate(label_word):
            self.label_dictionary[word] = i

    
    def init_network(self, input_nodes, hidden_nodes, output_nodes, learning_rate):
       
        self.input_nodes = input_nodes
        self.hidden_nodes = hidden_nodes
        self.output_nodes = output_nodes
        
        self.learning_rate = learning_rate
        
        self.weights_0_1 = np.zeros((self.input_nodes,self.hidden_nodes))

        self.weights_1_2 = np.random.normal(0.0, self.output_nodes**-0.5, 
                                                (self.hidden_nodes, self.output_nodes))
        
        self.layer_1 = np.zeros((1,hidden_nodes))
   
    
    def label_to_binary(self,label):
        if(label == 'POSITIVE'):
            return 1
        else:
            return 0
 
    def sigmoid(self,x):
        return 1 / (1 + np.exp(-x))

    def sigmoid_output_derivative(self,output):
        return output * (1 - output)
    
    
    def train(self, training_reviews_all, training_labels):

        training_reviews = list()
        for review in training_reviews_all:
            indices = set()
            for word in review.split(" "):
                if(word in self.word_dictionary.keys()):
                    indices.add(self.word_dictionary[word])
            training_reviews.append(list(indices))

        for i in range(len(training_reviews)):
            
            review = training_reviews[i]
            label = training_labels[i]
        
            self.layer_1 *= 0
            for index in review:
                self.layer_1 += self.weights_0_1[index]

        
            layer_2 = self.sigmoid(self.layer_1.dot(self.weights_1_2))   
            
            layer_2_error = layer_2 - self.label_to_binary(label)
            layer_2_delta = layer_2_error * self.sigmoid_output_derivative(layer_2)
            
            
            layer_1_error = layer_2_delta.dot(self.weights_1_2.T)
            layer_1_delta = layer_1_error 

            for index in review:
                 self.weights_0_1[index] -= layer_1_delta[0] * self.learning_rate
                    
    def run(self, review):
        self.layer_1 *= 0
        unique_indices = set()
        for word in review.lower().split(" "):
            if word in self.word_dictionary.keys():
                unique_indices.add(self.word_dictionary[word])
        for index in unique_indices:
            self.layer_1 += self.weights_0_1[index]
        

        layer_2 = self.sigmoid(self.layer_1.dot(self.weights_1_2))

        if(layer_2[0] >= 0.5):
            return "POSITIVE"
        else:
            return "NEGATIVE"
    def test(self, testing_reviews, testing_labels):
            correct = 0


            for i in range(len(testing_reviews)):
                pred = self.run(testing_reviews[i])
                if(pred == testing_labels[i]):
                    correct += 1

In [43]:
mlp = SentimentNetwork(reviews[:-1000],labels[:-1000],10,learning_rate=0.01)
mlp.train(reviews[:-1000],labels[:-1000])

