In [1]:
datafile = 'reviews_Books_5.json.gz'
import numpy as np
from nltk.tokenize import word_tokenize, sent_tokenize

In [5]:
# utility functions for getting data from datafile
import gzip
def parse(path, topN=None):
    with gzip.open(path, 'r') as g:
        lc = 0
        for l in g:
            lc += 1
            yield eval(l)
            if topN != None and lc == topN: break
                
def extractWithFeedback(data, n, minTotalFeedbacks=10):
    reviews_w_fb = []
    for i in parse(data):
        if len(i['reviewText']) < 10:
            continue
        if i['helpful'][1] > minTotalFeedbacks:
            reviews_w_fb.append(i)
        if len(reviews_w_fb) == n:
            break
    return reviews_w_fb

def getData(data, totalSample, useCache=True, minTotalFeedbacks=10, split=[0.6,0.2,0.2]):
    import pickle
    import os
    if useCache and os.path.isfile("train_data.p") and os.path.isfile("dev_data.p") and os.path.isfile("test_data.p"):
        print("using cached data")
        with open("train_data.p", "rb") as f:
            train_data = pickle.load(f)
        with open("dev_data.p", "rb") as f:
            dev_data = pickle.load(f)
        with open("test_data.p", "rb") as f:
            test_data = pickle.load(f)
        return train_data, dev_data, test_data
            
    # probably not needed, but shuffle the data just to be safe
    samples = np.random.permutation(extractWithFeedback(data, totalSample, minTotalFeedbacks))
    split_idx1 = int(split[0]*len(samples))
    split_idx2 = split_idx1+int(split[1]*len(samples))
    train_data = samples[:split_idx1]
    dev_data = samples[split_idx1:split_idx2]
    test_data = samples[split_idx2:]
    
    with open("train_data.p", "wb") as f:
        pickle.dump(train_data, f)
    with open("dev_data.p", "wb") as f:
        pickle.dump(dev_data, f)
    with open("test_data.p", "wb") as f:
        pickle.dump(test_data, f)
    
    return train_data, dev_data, test_data

In [6]:
total_sample = 10000
train_data, dev_data, test_data = getData(datafile, total_sample, useCache=True)

using cached data


In [4]:
train_data[0]

{'asin': '000649885X',
 'helpful': [6, 21],
 'overall': 1.0,
 'reviewText': "After reading The Farseer Trilogy which was very promising, I decided to take another chance with the author and try The Ship of Magic series. This first book is very silly. The concept of a liveship which speaks to it's  occupants was intriguing but it loses it's appeal as the characters were  very annoying, and the magic was very unsophisticated. I tried very hard to  find some good in this novel but it falls short of anything spectacular.  Only for the extreemely imaginative and open minded. If you have your feet  planted firmly on the ground, this fantasy novel is not for you. Goodkind  fans will find this novel a hard read. The Farseer trilogy is far better.",
 'reviewTime': '05 6, 2000',
 'reviewerID': 'A3SPHSI6Q9HO1G',
 'reviewerName': 'Amazon Customer "funnicky"',
 'summary': 'For the extremely imaginative only !',
 'unixReviewTime': 957571200}

In [5]:
# tokenize and preprocess text
import utils; reload(utils)
def preprocessAll():
    for dataset in (train_data, test_data, dev_data):
        for data in dataset:
            raw_text = data['reviewText']
            sentences = sent_tokenize(raw_text)
            final_tokens = []
            for s in sentences:
                final_tokens.append('<s>')
                for w in word_tokenize(s):
                    final_tokens.append(utils.canonicalize_word(w))
            final_tokens.append('</s>')
            data['procTokens'] = final_tokens
preprocessAll()

In [6]:
# save processed text
import pickle
with open('train_data_proc.p', 'wb') as f:
    pickle.dump(train_data, f)
with open('dev_data_proc.p', 'wb') as f:
    pickle.dump(dev_data, f)
with open('test_data_proc.p', 'wb') as f:
    pickle.dump(test_data, f)

In [1]:
# load data
import pickle
with open('train_data_proc.p', 'rb') as f:
    train_data = pickle.load(f)
with open('dev_data_proc.p', 'rb') as f:
    dev_data = pickle.load(f)
with open('test_data_proc.p', 'rb') as f:
    test_data = pickle.load(f)

In [2]:
import collections
import itertools

def flatten(list_of_lists):
    """Flatten a list-of-lists into a single list."""
    return list(itertools.chain.from_iterable(list_of_lists))

class vocabulary:
    START_TOKEN = "<s>"
    END_TOKEN = "</s>"
    UNK_TOKEN = "<unk>"
    
    def __init__(self, train_data, test_data, dev_data, size):
        self.unigram_counts = collections.Counter(flatten([t['procTokens'] for t in train_data])
                                                  +flatten([t['procTokens'] for t in test_data])
                                                  +flatten([t['procTokens'] for t in dev_data]))
        top_counts = self.unigram_counts.most_common(None if size is None else (size - 1))
        vocab = ([self.UNK_TOKEN] + [w for w,c in top_counts])
        # Assign an id to each word, by frequency
        self.id_to_word = dict(enumerate(vocab))
        self.word_to_id = {v:k for k,v in self.id_to_word.iteritems()}
        self.size = len(self.id_to_word)
        if size is not None:
            assert(self.size <= size)

        # Store special IDs
        self.START_ID = self.word_to_id[self.START_TOKEN]
        self.END_ID = self.word_to_id[self.END_TOKEN]
        self.UNK_ID = self.word_to_id[self.UNK_TOKEN]

    def words_to_ids(self, words):
        return [self.word_to_id.get(w, self.UNK_ID) for w in words]

    def ids_to_words(self, ids):
        return [self.id_to_word[i] for i in ids]

    def sentence_to_ids(self, words):
        return [self.START_ID] + self.words_to_ids(words) + [self.END_ID]

    def ordered_words(self):
        """Return a list of words, ordered by id."""
        return self.ids_to_words(range(self.size))

In [3]:
V = 10000
vocab = vocabulary(train_data, test_data, dev_data, V)

In [4]:
len(vocab.word_to_id)

10000

In [5]:
# generate batch, and pad to same length
def batchGenerator(dataset, batch_size, vocab, success_ratio=0.8, maxlength=None):
    for i in xrange(0, len(dataset), batch_size):
        batch_data = dataset[i:i+batch_size]
        if maxlength == None: 
            maxlength = max([len(d['procTokens']) for d in batch_data])
        x = []
        y = []
        raw = []
        for data in batch_data:
            tokens = data['procTokens']
            if len(tokens) > maxlength:
                tokens = tokens[:maxlength]
            elif len(tokens) < maxlength:
                tokens = tokens + ['</s>']*(maxlength-len(tokens))
            x.append(vocab.words_to_ids(tokens))
            helpful_ratio = data["helpful"][0]*1.0/data["helpful"][1]
            raw.append(data["helpful"])
            if helpful_ratio > success_ratio:
                y.append(1)
            else:
                y.append(0)
        yield (x, y, raw)
        
# run like this
#result = batchGenerator(test_data, 5, vocab, 200)

In [6]:
for r in batchGenerator(test_data[0:1], 1, vocab, 0.8, 400):
    print(vocab.ids_to_words(r[0][0]))

['<s>', '(', 'i', 'am', 'reviewing', 'the', 'DGDGDGDG', '<unk>', 'large', 'print', 'version', 'of', 'the', 'original', 'DGDGDGDG', 'book', 'by', '<unk>', '.', '<s>', 'illustrations', 'are', 'by', '<unk>', '<unk>', '.', '<s>', ')', 'this', 'fanciful', 'book', "'s", 'old-fashioned', 'style', 'and', 'content', 'almost', 'feels', 'as', 'if', 'it', 'were', 'written', 'at', 'the', 'turn', 'of', 'the', '19th', 'century', ',', 'and', 'the', 'james', "'", 'initial', 'misery', 'recalls', 'dickens', '.', '<s>', 'the', 'writing', "'s", 'rough', 'edges', 'make', 'it', 'seem', 'more', 'like', 'a', 'personal', 'story', ',', 'rather', 'than', 'the', 'product', 'of', 'some', 'anonymous', '<unk>', ',', 'the', 'beginning', 'of', 'the', 'book', '(', 'where', 'james', 'magically', 'escapes', 'from', 'his', '<unk>', ')', 'seems', 'contrived', ',', 'the', '<unk>', 'are', 'unbelievably', 'cruel', ',', 'and', 'the', 'writing', 'is', 'somehow', 'flat', '.', '<s>', 'however', ',', 'the', 'book', 'picks', 'up', '

In [6]:
import sklearn.metrics as metrics
def score_batch(pred_probs, targets):
    pred = [1 if p>0.5 else 0 for p in pred_probs]
    accuracy = metrics.accuracy_score(targets, pred)
    precision = metrics.precision_score(targets, pred)
    recall = metrics.recall_score(targets, pred)
    f1 = metrics.f1_score(targets, pred)
    return {"accuracy": accuracy, "precision": precision, "recall": recall, "f1": f1}

In [8]:
# Model Params:
trained_filename = 'tf_saved/final_project_rnn_classifier_v2'

max_length = 500
num_classes = 1
vocab_size = len(vocab.word_to_id)
embedding_size = 200
filter_sizes = [3,4,5]
num_filters = 128
learning_rate = 0.01
keep_prob = 0.8

model_params = dict(sequence_length=max_length, num_classes=num_classes, vocab_size=vocab_size,
      embedding_size=embedding_size, filter_sizes=filter_sizes, num_filters=num_filters, learning_rate=learning_rate)

In [7]:
import tensorflow as tf

In [8]:
class LanguageModel(object):

    def __init__(self, sequence_length, num_classes, vocab_size,
      embedding_size, filter_sizes, num_filters, learning_rate, l2_reg_lambda =0.0):
        
        l2_loss = tf.constant(0.0)
        
        with tf.name_scope("Training_Parameters"):
            self.learning_rate_ = tf.constant(learning_rate, name="learning_rate")
            self.dropout_keep_prob = tf.constant(1.0, name="dropout_keep_prob")

        self.input_x = tf.placeholder(tf.int32, [None, sequence_length], name="input_x")
        if num_classes == 1:
            self.input_y = tf.placeholder(tf.float32, [None], name="input_y")
        else:
            self.input_y = tf.placeholder(tf.float32, [None, num_classes], name="input_y")
        
        with tf.name_scope("embedding"):
            W = tf.Variable(
                tf.random_uniform([vocab_size, embedding_size], -1.0, 1.0),
                name="W")
            self.embedded_chars = tf.nn.embedding_lookup(W, self.input_x)
            self.embedded_chars_expanded = tf.expand_dims(self.embedded_chars, -1)
            
        pooled_outputs = []
        for i, filter_size in enumerate(filter_sizes):
            with tf.name_scope("conv-maxpool-%s" % filter_size):
                # Convolution Layer
                filter_shape = [filter_size, embedding_size, 1, num_filters]
                W = tf.Variable(tf.truncated_normal(filter_shape, stddev=0.1), name="W")
                b = tf.Variable(tf.constant(0.1, shape=[num_filters]), name="b")
                conv = tf.nn.conv2d(
                    self.embedded_chars_expanded,
                    W,
                    strides=[1, 1, 1, 1],
                    padding="VALID",
                    name="conv")
                # Apply nonlinearity
                h = tf.nn.relu(tf.nn.bias_add(conv, b), name="relu")
                # Max-pooling over the outputs
                pooled = tf.nn.max_pool(
                    h,
                    ksize=[1, sequence_length - filter_size + 1, 1, 1],
                    strides=[1, 1, 1, 1],
                    padding='VALID',
                    name="pool")
                pooled_outputs.append(pooled)

        # Combine all the pooled features
        num_filters_total = num_filters * len(filter_sizes)
        self.h_pool = tf.concat(3, pooled_outputs)
        self.h_pool_flat = tf.reshape(self.h_pool, [-1, num_filters_total])
        
        with tf.name_scope("dropout"):
            self.h_drop = tf.nn.dropout(self.h_pool_flat, self.dropout_keep_prob)
        
        with tf.name_scope("output"):
            W = tf.Variable(tf.truncated_normal([num_filters_total, num_classes], stddev=0.1), name="W")
            b = tf.Variable(tf.constant(0.1, shape=[num_classes]), name="b")
            self.logits_ = tf.nn.xw_plus_b(self.h_drop, W, b, name="logits")
            self.pred_proba_ = tf.sigmoid(self.logits_, name="pred_proba")
            
            l2_loss += tf.nn.l2_loss(W)
            l2_loss += tf.nn.l2_loss(b)
            
        with tf.name_scope("loss_function"):
            self.point_loss_ = tf.nn.sigmoid_cross_entropy_with_logits(tf.squeeze(self.logits_), self.input_y)
            self.loss_ = tf.reduce_mean(self.point_loss_) + l2_reg_lambda * l2_loss
            
        with tf.name_scope("train_ops"):
            #tvars = tf.trainable_variables()
            #grads, _ = tf.clip_by_global_norm(tf.gradients(self.loss_, tvars),self.max_grad_norm_)
            #optimizer = tf.train.GradientDescentOptimizer(self.learning_rate_)
            #self.train_step_ = optimizer.apply_gradients(zip(grads, tvars))
            #optimizer = tf.train.GradientDescentOptimizer(self.learning_rate_)
            optimizer = tf.train.AdamOptimizer(self.learning_rate_)
            self.train_step_ = optimizer.minimize(self.loss_)

In [11]:
# try to dump out some intermediate data
def dumpIntermediateOutputs(inputBatch):
    def _flatten(sentences):
        final_sent = []
        for sent in sentences:
            final_sent.append("<s>")
            for word in sent:
                final_sent.append(word)
        final_sent.append("</s>")
        return final_sent
    
    w = []
    for inputs in inputBatch:
        padded_ids = vocab.words_to_ids(inputs)
        w.append(padded_ids)
    
    with tf.Graph().as_default(), tf.Session() as session:
        with tf.variable_scope("model", reuse=None):
            lm = LanguageModel(sequence_length=len(w[0]), num_classes=num_classes, vocab_size=vocab_size,
      embedding_size=embedding_size, filter_sizes=filter_sizes, num_filters=num_filters, learning_rate=learning_rate)
        
        session.run(tf.initialize_all_variables())

        feed_dict = { lm.input_x:w,
               lm.dropout_keep_prob: 1.0,
               lm.input_y: [1]*len(w)}
                
        pred, logits_ = session.run([lm.pred_proba_, lm.logits_], feed_dict)
        return pred, logits_

In [12]:
dumpIntermediateOutputs(["this is a test text , do some thing".split(), "this is another test text , do more thing".split()])

(array([[ 0.00996649],
        [ 0.00659992]], dtype=float32), array([[-4.59851027],
        [-5.01407528]], dtype=float32))

In [12]:
# baseline score - no training
def baselineScore(dataset):
    # test 2 batches on the first 10 training set
    bi = batchGenerator(dataset, 100, vocab, 0.8, max_length)
    
    with tf.Graph().as_default(), tf.Session() as session:
        with tf.variable_scope("model", reuse=None):
            lm = LanguageModel(**model_params)
        
        session.run(tf.initialize_all_variables())
        
        pred_prob = []
        targets = []
        for i,(w,y, raw) in enumerate(bi):
            #print("batch #%s"%i)
            feed_dict = { lm.input_x:w,
               lm.dropout_keep_prob: 1.0,
               lm.input_y: [1]*len(w)}
            pred_prob.extend(session.run(lm.pred_proba_, feed_dict))
            targets.extend(y)
        print(score_batch(pred_prob, targets))
print("train set baseline")
baselineScore(train_data)
print("dev set baseline")
baselineScore(dev_data)
print("test set baseline")
baselineScore(test_data)

train set baseline
{'f1': 0.65891819400983453, 'recall': 1.0, 'precision': 0.49133333333333334, 'accuracy': 0.49133333333333334}
dev set baseline


  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)


{'f1': 0.0, 'recall': 0.0, 'precision': 0.0, 'accuracy': 0.53649999999999998}
test set baseline
{'f1': 0.0, 'recall': 0.0, 'precision': 0.0, 'accuracy': 0.53049999999999997}


In [9]:
def getScore(session, dataset, lm):
    bi = batchGenerator(dataset, 100, vocab, 0.8, max_length)
    pred_prob = []
    targets = []
    for i,(w,y, raw) in enumerate(bi):
        feed_dict = { lm.input_x:w,
           lm.dropout_keep_prob: 1.0,
           lm.input_y: [1]*len(w)}
        pred_prob.extend(session.run(lm.pred_proba_, feed_dict))
        targets.extend(y)
    print(score_batch(pred_prob, targets))

In [14]:
def run_epoch(lm, session, batch_iterator, train=False,
              verbose=False, tick_s=10, 
              keep_prob=0.8, learning_rate=0.01):
    start_time = time.time()
    tick_time = start_time
    total_cost = 0.0
    total_texts = 0
    
    if train:
        train_op = lm.train_step_
        keep_prob = keep_prob
        loss = lm.loss_
    else:
        train_op = tf.no_op()
        keep_prob = 1.0
        loss = lm.loss_
        
    for i, (w, y, _) in enumerate(batch_iterator):
        feed_dict = {
            lm.learning_rate_: learning_rate,
            lm.dropout_keep_prob: keep_prob,
            lm.input_x: w,
            lm.input_y: y
        }

        _, loss_val = session.run([train_op, loss], feed_dict)
        
        total_cost += loss_val
        total_texts += len(w)
        
        if verbose and (time.time() - tick_time >= tick_s):
            avg_cost = total_cost / total_texts
            avg_tps = total_texts / (time.time() - start_time)
            print "[batch %d]: seen %d texts at %d wps, loss = %.3f" % (i,total_texts, avg_tps, avg_cost)
            tick_time = time.time()
    #return total_cost / total_texts
    return total_cost

In [17]:
# run training

num_epochs = 10
batch_size = 50

import time
import utils; reload(utils)
def runTraining(print_interval=5):
    with tf.Graph().as_default(), tf.Session() as session:
        tf.set_random_seed(42)
        with tf.variable_scope("model", reuse=None):
            lm = LanguageModel(**model_params)
        session.run(tf.initialize_all_variables())
        saver = tf.train.Saver()
        
        for epoch in xrange(1,num_epochs+1):
            t0_epoch = time.time()
            bi = batchGenerator(train_data, batch_size, vocab, 0.8, max_length)
            print "[epoch %d] Starting epoch %d" % (epoch, epoch)
            cost = run_epoch(lm, session, bi, train=True, keep_prob=keep_prob, learning_rate=learning_rate)
            print "%s: total loss: %.03f" % ("Training", cost)
            print "[epoch %d] Completed in %s" % (epoch, utils.pretty_timedelta(since=t0_epoch))
            
            print("train score")
            getScore(session, train_data, lm)
                
            print("dev score")
            getScore(session, dev_data, lm)
        # Save final model
        saver.save(session, trained_filename)
runTraining()

[epoch 1] Starting epoch 1
Training: total loss: 249.434
[epoch 1] Completed in 0:01:48
train score
{'f1': 0.85324470876333736, 'recall': 0.82734056987788329, 'precision': 0.88082340195016251, 'accuracy': 0.86016666666666663}
dev score
{'f1': 0.64444444444444449, 'recall': 0.62567421790722766, 'precision': 0.66437571592210765, 'accuracy': 0.68000000000000005}
[epoch 2] Starting epoch 2
Training: total loss: 105.816
[epoch 2] Completed in 0:01:47
train score
{'f1': 0.92964400202463304, 'recall': 0.93453188602442339, 'precision': 0.92480698220879487, 'accuracy': 0.93049999999999999}
dev score
{'f1': 0.6933888599687662, 'recall': 0.71844660194174759, 'precision': 0.67002012072434602, 'accuracy': 0.70550000000000002}
[epoch 3] Starting epoch 3
Training: total loss: 86.443
[epoch 3] Completed in 0:01:48
train score
{'f1': 0.87171881951653574, 'recall': 0.99694708276797828, 'precision': 0.77444005270092231, 'accuracy': 0.85583333333333333}
dev score
{'f1': 0.70578778135048237, 'recall': 0.94

In [19]:
# smaller learning rate, and with L2 reg
import time
import utils; reload(utils)
num_epochs = 10
batch_size = 100
learning_rate = 0.001
max_length = 500
num_classes = 1
vocab_size = len(vocab.word_to_id)
embedding_size = 200
filter_sizes = [3,4,5]
num_filters = 128
keep_prob = 0.8
l2_reg_lambda = 0.5

trained_filename = 'tf_saved/final_project_rnn_classifier_v2_2'

model_params = dict(sequence_length=max_length, num_classes=num_classes, vocab_size=vocab_size,
      embedding_size=embedding_size, filter_sizes=filter_sizes, num_filters=num_filters, 
      learning_rate=learning_rate, l2_reg_lambda=l2_reg_lambda)

def runTraining(print_interval=5):
    with tf.Graph().as_default(), tf.Session() as session:
        tf.set_random_seed(42)
        with tf.variable_scope("model", reuse=None):
            lm = LanguageModel(**model_params)
        session.run(tf.initialize_all_variables())
        saver = tf.train.Saver()
        
        for epoch in xrange(1,num_epochs+1):
            t0_epoch = time.time()
            bi = batchGenerator(train_data, batch_size, vocab, 0.8, max_length)
            print "[epoch %d] Starting epoch %d" % (epoch, epoch)
            cost = run_epoch(lm, session, bi, train=True, keep_prob=keep_prob, learning_rate=learning_rate)
            print "%s: total loss: %.03f" % ("Training", cost)
            print "[epoch %d] Completed in %s" % (epoch, utils.pretty_timedelta(since=t0_epoch))
            
            print("train score")
            getScore(session, train_data, lm)
                
            print("dev score")
            getScore(session, dev_data, lm)
        # Save final model
        saver.save(session, trained_filename)

runTraining()

[epoch 1] Starting epoch 1
Training: total loss: 136.856
[epoch 1] Completed in 0:01:48
train score
{'f1': 0.79395277730592839, 'recall': 0.79274084124830391, 'precision': 0.79516842463422932, 'accuracy': 0.79783333333333328}
dev score
{'f1': 0.61497610196494956, 'recall': 0.62459546925566345, 'precision': 0.60564853556485354, 'accuracy': 0.63749999999999996}
[epoch 2] Starting epoch 2
Training: total loss: 97.712
[epoch 2] Completed in 0:01:49
train score
{'f1': 0.89619834710743806, 'recall': 0.91960651289009498, 'precision': 0.87395228884590592, 'accuracy': 0.89533333333333331}
dev score
{'f1': 0.65907966353290437, 'recall': 0.71844660194174759, 'precision': 0.60877513711151732, 'accuracy': 0.65549999999999997}
[epoch 3] Starting epoch 3
Training: total loss: 74.622
[epoch 3] Completed in 0:01:48
train score
{'f1': 0.93996867931094485, 'recall': 0.91621438263229305, 'precision': 0.96498749553411933, 'accuracy': 0.9425}
dev score
{'f1': 0.64810690423162576, 'recall': 0.627831715210355

In [22]:
num_epochs = 5
batch_size = 100
learning_rate = 0.005
max_length = 500
num_classes = 1
vocab_size = len(vocab.word_to_id)
embedding_size = 200
filter_sizes = [3,4,5]
num_filters = 128
keep_prob = 0.8
l2_reg_lambda = 0.5

trained_filename = 'tf_saved/final_project_rnn_classifier_v2_3'

model_params = dict(sequence_length=max_length, num_classes=num_classes, vocab_size=vocab_size,
      embedding_size=embedding_size, filter_sizes=filter_sizes, num_filters=num_filters, 
      learning_rate=learning_rate, l2_reg_lambda=l2_reg_lambda)

runTraining()

[epoch 1] Starting epoch 1
Training: total loss: 143.671
[epoch 1] Completed in 0:01:47
train score
{'f1': 0.76094552929085302, 'recall': 0.62788331071913162, 'precision': 0.96557120500782467, 'accuracy': 0.8061666666666667}
dev score
{'f1': 0.45041014168530952, 'recall': 0.32578209277238401, 'precision': 0.72946859903381644, 'accuracy': 0.63149999999999995}
[epoch 2] Starting epoch 2
Training: total loss: 78.309
[epoch 2] Completed in 0:01:46
train score
{'f1': 0.91467042429935796, 'recall': 0.99084124830393483, 'precision': 0.84937481826112238, 'accuracy': 0.90916666666666668}
dev score
{'f1': 0.70432178005990598, 'recall': 0.88781014023732474, 'precision': 0.58368794326241136, 'accuracy': 0.65449999999999997}
[epoch 3] Starting epoch 3
Training: total loss: 49.810
[epoch 3] Completed in 0:01:46
train score
{'f1': 0.97651006711409405, 'recall': 0.98710990502035278, 'precision': 0.96613545816733071, 'accuracy': 0.97666666666666668}
dev score
{'f1': 0.70315581854043396, 'recall': 0.769

In [12]:
# get score on test set
num_epochs = 10
batch_size = 100
learning_rate = 0.001
max_length = 500
num_classes = 1
vocab_size = len(vocab.word_to_id)
embedding_size = 200
filter_sizes = [3,4,5]
num_filters = 128
keep_prob = 0.8
l2_reg_lambda = 0.5

trained_filename = 'tf_saved/final_project_rnn_classifier_v2_2'

model_params = dict(sequence_length=max_length, num_classes=num_classes, vocab_size=vocab_size,
      embedding_size=embedding_size, filter_sizes=filter_sizes, num_filters=num_filters, 
      learning_rate=learning_rate, l2_reg_lambda=l2_reg_lambda)

with tf.Graph().as_default(), tf.Session() as session:
    with tf.variable_scope("model", reuse=None):
        lm = LanguageModel(**model_params)
        session.run(tf.initialize_all_variables())
        saver = tf.train.Saver()
    saver = tf.train.Saver()
    saver.restore(session, trained_filename)
    
    print("test score")
    getScore(session, test_data, lm)

test score
{'f1': 0.72125114995400186, 'recall': 0.8349307774227902, 'precision': 0.6348178137651822, 'accuracy': 0.69699999999999995}
