In [4]:
import sys # system-specific parameters and functions
import math
import re
import random
import os
import pprint
import pandas as pd
import numpy as np
import tensorflow as tf
import matplotlib.pyplot as plt
from collections import counter

ImportError: cannot import name 'counter'

## Nico Notes

### Preprocessor 

In [3]:
class Preprocessor:
    
    def __init__(self, path):
        raw_text = read_file(path) # see helper function section below
        top_words = Counter(raw_text).most_common()
        words = [word[0] for word in top_words if word[1] >= 3]
        
        if '<unk>' in words:
            words.remove('<unk>')
        
        self.word_dict = {'<unk>':0}
        
        for i in range(1, len(words)):
            self.word_dict[words[i]] = i
        self.vocab_size = len(self.word_dict)
        self.word_dict_reverse = dict(zip(self.word_dict.values(),
                                          self.word_dict.keys()))
        
        self.text_as_index = []
        
        for word in words:
            idx = 0
            if word in self.word_dict:
                idx = self.word_dict[word]
            self.text_as_index.append(idx)
    
    def generate_data(self, path):
        words = read_file(path)
        text_as_index = []
        
        for word in words:
            idx = 0
            if word in self.word_dict:
                idx = self.word_dict[word]
            text_as_index.append(idx)
        return text_as_index            

### The Model

In [5]:
class MyModel:
    
    def __init__(self):
        self.batch_size = 256
        self.embedding_size = config['embedding_size']
        self.window_size = config['window_size']
        self.hidden_layers = config['hidden_units']
        
    def train(self, train_data, validate_data, num_epochs=6):
        if tf.test.is_gpu_available(cuda_only=False, 
                                    min_cuda_compute_capability=None):
            device = '/gpu:0'
            print('Using GPU')
        else:
            device = '/cpu:0'
            print('Using CPU')
        with tf.device(device):
            # inputs will be indexs of the n window size words b4 label
            self.x_input = tf.placeholder(tf.int64, [None, self.window_size])
            #labels will just be indexes of the next word
            self.y_true = tf.placeholder(tf.int64, [None])
            
            # embeddings is the c function
            embeddings = tf.Variable(tf.random_uniform([vocabulary_size, 
                                                        self.embedding_size],
                                                      -1.0, 1.0))
            x_flat = tf.layers.flatten(self.x_input)
            embed = tf.nn.embedding_lookup(embeddings, x_flat)
            
            x_t = tf.reshape(embed, [self.batch_size,
                                     self.window_size * self.embedding_size])
            
            w = tf.Variable(tf.truncated_normal([self.embedding_size * self.window_size,
                                                 vocabulary_size],
                                               stddev=1.0 / math.sqrt(self.embedding_size * self.window_size)))
            b = tf.Variable(tf.random_uniform([vocabulary_size]))
            d = tf.Variable(tf.random_uniform([self.hidden_layers]))
            u = tf.Variable(
                tf.truncated_normal([self.hidden_layers, vocabulary_size], stddev=1.0 / math.sqrt(vocabulary_size)))
            h = tf.Variable(tf.truncated_normal([self.embedding_size * self.window_size, self.hidden_layers],
                                               stddev=1.0 / math.sqrt(self.embedding_size * self.window_size)))
            
            # embedding is [n*b, embedding_size]
            hidden_out = tf.nn.bias_add(tf.matmul(x_t, h), d)
            tan_out = tf.nn.tanh(hidden_out)
            y_logits = tf.nn.bias_add(tf.matmul(x_t, w), b) + tf.matmul(tan_out, u)
            
            y_pred = tf.nn.softmax(y_logits)
            y_pred_cls = tf.argmax(y_pred, axis=1)
            
            y_one_hot = tf.one_hot(self.y_true, vocabulary_size)
            self.cross_entropy = tf.reduce_mean(tf.nn.softmax_cross_entropy_with-logits_v2(logits=y_logits, labels=y_one_hot))
            
            # Constructing the Stochastic Gradient Descent optimizer
            learn_rate = 0.00075
            beta1 = 0.9
            beta2 = 0.999
            optimizer = tf.train.AdamOptimizer(learn_rate, beta1, beta2).minimize(self.cross_entropy)
            correct_prediction = tf.equal(y_pred_cls, self.y_true)
            self.accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
            
            self.session = tf.Session()
            self.session.run(tf.global_variables_initializer())
            saver = tf.train.Saver()
            
            print('Training......')
            global acc_hist_train, cost_hist_train
            patience = 2
            for i in range(num_epochs):
                batches = generate_batches(train_data, self.batch_size, self.window_size)
                total_batches = len(batches)
                batch_count = 0
                last_complete = 0
                num_messages = 10 # number of printouts per epoch
                for batch in batches:
                    batch_count += 1
                    x_batch = batch[0]
                    y_true_batch = batch[1]
                    feed_dict_train = {self.x_input: x_batch,
                                      self.y_true: y_true_batch}
                    self.session.run(optimizer, feed_dict=feed_dict_train)
                    completion = 100 * batch_count / total_batches
                    if batch_count % (int(total_batches / num_messages)) == 0:
                        print('Epoch #%2d-   Batch #%5d:   %4.2f %% completed.' % (i + 1, batch_count, completion))
                        a_t, c_t = self.test(train_data)
                        a, c = self.test(validate_data)
                        acc_hist_train.append(a)
                        cost_hist_train.append(c)
                        
                        if sum(cost_hist_train[-4:]) > sum(cost_hist_train[-8:-4]):
                            patience = patience - 1
                        else:
                            patience = 2
                        
                        if patience == 0:
                            print("Early stopping triggered")
                            save_path = saver.save(self.session, "../models/" + arg_2 + '_' + arg_3 + ".ckpt")
                            print("Model saved in path: %s" % save_path)
                            return
                        
            print('Training Completed')
            save_path = saver.save(self.session, "../models/" + arg_2 + '_' + arg_3 + ".ckpt")
            print("Model saved in path: %s" % save_path)
            return
    
    def restore(self, model_path):
        if tf.test.is_gpu_available(cuda_only=False, min_cuda_compute_capability=None):
            device = '/gpu:0'
            print('Using GPU')
        else:
            device = '/cpu:0'
            print('Using CPU')
        with tf.device(device):
            # inputs will be indexes of the n (window size) words before the label
            self.x_input = tf.placeholder(tf.int64, [None, self.window_size])
            # labels will just be indexes of the next word
            self.y_true = tf.placeholder(tf.int64, [None])

            # embeddings is the c function
            embeddings = tf.Variable(tf.random_uniform([vocabulary_size, self.embedding_size], -1.0, 1.0))
            x_flat = tf.layers.flatten(self.x_input)
            embed = tf.nn.embedding_lookup(embeddings, x_flat)
            x_t = tf.reshape(embed, [self.batch_size, self.window_size * self.embedding_size])
            w = tf.Variable(tf.truncated_normal([self.embedding_size * self.window_size, vocabulary_size],
                                                stddev=1.0 / math.sqrt(self.embedding_size * self.window_size)))
            b = tf.Variable(tf.random_uniform([vocabulary_size]))
            d = tf.Variable(tf.random_uniform([self.hidden_layers]))
            u = tf.Variable(
                tf.truncated_normal([self.hidden_layers, vocabulary_size], stddev=1.0 / math.sqrt(vocabulary_size)))
            h = tf.Variable(tf.truncated_normal([self.embedding_size * self.window_size, self.hidden_layers],
                                                stddev=1.0 / math.sqrt(self.embedding_size * self.window_size)))

            # embed is [n*b, embedding_size]
            hidden_out = tf.nn.bias_add(tf.matmul(x_t, h), d)
            tan_out = tf.nn.tanh(hidden_out)
            y_logits = tf.nn.bias_add(tf.matmul(x_t, w), b) + tf.matmul(tan_out, u)

            y_pred = tf.nn.softmax(y_logits)
            y_pred_cls = tf.argmax(y_pred, axis=1)

            y_one_hot = tf.one_hot(self.y_true, vocabulary_size)
            self.cross_entropy = tf.reduce_mean(
                tf.nn.softmax_cross_entropy_with_logits_v2(logits=y_logits, labels=y_one_hot))
    
            # Construct the SGD Optimizer:
            learn_rate = 0.005
            beta1 = 0.9;
            beta2 = 0.999
            optimizer = tf.train.AdamOptimizer(learn_rate, beta1, beta2).minimize(self.cross_entropy)
            correct_prediction = tf.equal(y_pred_cls, self.y_true)
            self.accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
            
            # don't need extra commands here
            saver = tf.train.Saver()
            
            with tf.Session() as sess:
                # Restores the variables from disk.
                saver.restore(sess, model_path)
                print("Model restored.")
                test_batches = generate_batches(test_data, self.batch-size, self.window_size)
                cost, acc = [], []
                for batch in test_batches:
                    feed_dict_test = {self.x_input: batch[0],
                                      self.y_true: batch[1]}
                    acc.append(sess.run(self.accuracy, feed_dict=feed_dict_test))
                    cost.append(sess.run(self.cross_entropy, feed_dict=feed_dict_test))
                
                avg_acc = sum(acc) / float(len(acc))
                avg_cost = sum(cost) / float(len(cost))
                print("   Accuracy on test-set:   %4.2f %% \n" % (avg_acc * 100),
                      "   Cost on test-set:       %4.2f \n" % avg_cost,
                      "   Perplexity on test-set:       %4.2f \n" % np.exp(avg_cost))
    
    def test(self, test_data):
        test_batches = generate_batches(test_data, self.batch_size, self.window_size)
        cost, acc = [], []
        for batch in test_batches:
            feed_dict_train = {self.x_input: batch[0],
                               self.y_true: batch[1]}
            acc.append(self.session.run(self.accuracy, feed_dict=feed_dict_test))
            cost.append(self.session.run(self.cross_entropy, feed_dict=feed_dict_test))
        avg_acc = sum(acc) / float(len(acc))
        avg_cost = sum(cost) / float(len(cost))
        print("   Accuracy on valid-set:   %4.2f %%" % (avg_acc * 100),
              "   Cost on valid-set:       %4.2f \n" % avg_cost)
        return avg_acc, avg_cost

SyntaxError: keyword can't be an expression (<ipython-input-5-ae9886b1d79e>, line 38)

### Helper Functions