In [1]:
import re
import numpy as np
import pandas as pd
import collections
from sklearn import metrics
from sklearn.preprocessing import LabelEncoder
import tensorflow as tf
from sklearn.cross_validation import train_test_split
from unidecode import unidecode
from nltk.util import ngrams
from tqdm import tqdm
import time



In [2]:
permulaan = [
    'bel',
    'se',
    'ter',
    'men',
    'meng',
    'mem',
    'memper',
    'di',
    'pe',
    'me',
    'ke',
    'ber',
    'pen',
    'per',
]

hujung = ['kan', 'kah', 'lah', 'tah', 'nya', 'an', 'wan', 'wati', 'ita']

def naive_stemmer(word):
    assert isinstance(word, str), 'input must be a string'
    hujung_result = [e for e in hujung if word.endswith(e)]
    if len(hujung_result):
        hujung_result = max(hujung_result, key = len)
        if len(hujung_result):
            word = word[: -len(hujung_result)]
    permulaan_result = [e for e in permulaan if word.startswith(e)]
    if len(permulaan_result):
        permulaan_result = max(permulaan_result, key = len)
        if len(permulaan_result):
            word = word[len(permulaan_result) :]
    return word

In [4]:
def classification_textcleaning(string):
    string = re.sub(
        'http\S+|www.\S+',
        '',
        ' '.join(
            [i for i in string.split() if i.find('#') < 0 and i.find('@') < 0]
        ),
    )
    string = unidecode(string).replace('.', ' . ').replace(',', ' , ')
    string = re.sub('[^A-Za-z ]+', ' ', string)
    string = re.sub(r'[ ]+', ' ', string).strip()
    string = ' '.join(
        [i for i in re.findall('[\\w\']+|[;:\-\(\)&.,!?"]', string) if len(i)]
    )
    string = string.lower().split()
    string = [naive_stemmer(word) for word in string]
    return ' '.join([word for word in string if len(word) > 1])

def convert_sparse_matrix_to_sparse_tensor(X):
    coo = X.tocoo()
    indices = np.mat([coo.row, coo.col]).transpose()
    return tf.SparseTensorValue(indices, coo.col, coo.shape), tf.SparseTensorValue(indices, coo.data, coo.shape)

In [5]:
with open('subjectivity-negative-translated.txt','r') as fopen:
    texts = fopen.read().split('\n')
labels = [0] * len(texts)

with open('subjectivity-positive-translated.txt','r') as fopen:
    positive_texts = fopen.read().split('\n')
labels += [1] * len(positive_texts)
texts += positive_texts

assert len(labels) == len(texts)

In [6]:
x, y = [], []
for i in range(len(texts)):
    s = classification_textcleaning(texts[i])
    if len(s) > 5:
        x.append(s)
        y.append(labels[i])

In [8]:
from sklearn.feature_extraction.text import CountVectorizer
bow_chars = CountVectorizer(ngram_range=(3, 5), analyzer='char_wb', max_features=300000).fit(texts)
delattr(bow_chars, 'stop_words_')
feature_shape = bow_chars.transform(texts[:1]).shape[1]

In [11]:
class Model:
    def __init__(self, vocab_size, learning_rate):
        self.X = tf.sparse_placeholder(tf.int32)
        self.W = tf.sparse_placeholder(tf.int32)
        self.Y = tf.placeholder(tf.int32, [None])
        embeddings = tf.Variable(tf.truncated_normal([vocab_size,128]))
        embed = tf.nn.embedding_lookup_sparse(embeddings, self.X, self.W, combiner='mean')
        self.logits = tf.layers.dense(embed, 2)
        self.cost = tf.reduce_mean(tf.nn.sparse_softmax_cross_entropy_with_logits(
            logits = self.logits, labels = self.Y))
        self.optimizer = tf.train.AdamOptimizer(learning_rate = learning_rate).minimize(self.cost)
        correct_pred = tf.equal(tf.argmax(self.logits, 1,output_type=tf.int32), self.Y)
        self.accuracy = tf.reduce_mean(tf.cast(correct_pred, tf.float32))

In [12]:
sess = tf.InteractiveSession()
model = Model(feature_shape, 1e-4)
sess.run(tf.global_variables_initializer())

  "Converting sparse IndexedSlices to a dense Tensor of unknown shape. "


In [13]:
saver = tf.train.Saver(tf.trainable_variables())
saver.save(sess, 'fast-text-char/model.ckpt')

'fast-text-char/model.ckpt'

In [14]:
vectors = bow_chars.transform(x)
train_X, test_X, train_Y, test_Y = train_test_split(
    vectors, y, test_size = 0.2
)

In [15]:
from tqdm import tqdm
import time

batch_size = 32
EARLY_STOPPING, CURRENT_CHECKPOINT, CURRENT_ACC, EPOCH = 3, 0, 0, 0

while True:
    lasttime = time.time()
    if CURRENT_CHECKPOINT == EARLY_STOPPING:
        print('break epoch:%d\n' % (EPOCH))
        break

    train_acc, train_loss, test_acc, test_loss = 0, 0, 0, 0
    pbar = tqdm(
        range(0, train_X.shape[0], batch_size), desc = 'train minibatch loop'
    )
    for i in pbar:
        batch_x = convert_sparse_matrix_to_sparse_tensor(train_X[i : min(i + batch_size, train_X.shape[0])])
        batch_y = train_Y[i : min(i + batch_size, train_X.shape[0])]
        batch_x_expand = np.expand_dims(batch_x,axis = 1)
        acc, cost, _ = sess.run(
            [model.accuracy, model.cost, model.optimizer],
            feed_dict = {
                model.Y: batch_y,
                model.X: batch_x[0],
                model.W: batch_x[1],
            },
        )
        assert not np.isnan(cost)
        train_loss += cost
        train_acc += acc
        pbar.set_postfix(cost = cost, accuracy = acc)
    pbar = tqdm(range(0, test_X.shape[0], batch_size), desc = 'test minibatch loop')
    for i in pbar:
        batch_x = convert_sparse_matrix_to_sparse_tensor(test_X[i : min(i + batch_size, test_X.shape[0])])
        batch_y = test_Y[i : min(i + batch_size, test_X.shape[0])]
        batch_x_expand = np.expand_dims(batch_x,axis = 1)
        acc, cost = sess.run(
            [model.accuracy, model.cost],
            feed_dict = {
                model.Y: batch_y,
                model.X: batch_x[0],
                model.W: batch_x[1],
            },
        )
        test_loss += cost
        test_acc += acc
        pbar.set_postfix(cost = cost, accuracy = acc)

    train_loss /= train_X.shape[0] / batch_size
    train_acc /= train_X.shape[0] / batch_size
    test_loss /= test_X.shape[0] / batch_size
    test_acc /= test_X.shape[0] / batch_size

    if test_acc > CURRENT_ACC:
        print(
            'epoch: %d, pass acc: %f, current acc: %f'
            % (EPOCH, CURRENT_ACC, test_acc)
        )
        CURRENT_ACC = test_acc
        CURRENT_CHECKPOINT = 0
    else:
        CURRENT_CHECKPOINT += 1
    print('time taken:', time.time() - lasttime)
    print(
        'epoch: %d, training loss: %f, training acc: %f, valid loss: %f, valid acc: %f\n'
        % (EPOCH, train_loss, train_acc, test_loss, test_acc)
    )
    EPOCH += 1

train minibatch loop: 100%|██████████| 250/250 [00:03<00:00, 70.09it/s, accuracy=1, cost=0.677]    
test minibatch loop: 100%|██████████| 63/63 [00:00<00:00, 342.76it/s, accuracy=0.667, cost=0.643]
train minibatch loop:   3%|▎         | 8/250 [00:00<00:03, 73.91it/s, accuracy=0.594, cost=0.669]

epoch: 0, pass acc: 0.000000, current acc: 0.619836
time taken: 3.7533926963806152
epoch: 0, training loss: 0.685426, training acc: 0.569708, valid loss: 0.679796, valid acc: 0.619836



train minibatch loop: 100%|██████████| 250/250 [00:03<00:00, 72.87it/s, accuracy=1, cost=0.676]    
test minibatch loop: 100%|██████████| 63/63 [00:00<00:00, 358.85it/s, accuracy=0.778, cost=0.617]
train minibatch loop:   3%|▎         | 8/250 [00:00<00:03, 73.65it/s, accuracy=0.75, cost=0.647] 

epoch: 1, pass acc: 0.619836, current acc: 0.717957
time taken: 3.6083834171295166
epoch: 1, training loss: 0.663374, training acc: 0.668465, valid loss: 0.659503, valid acc: 0.717957



train minibatch loop: 100%|██████████| 250/250 [00:03<00:00, 72.79it/s, accuracy=1, cost=0.67]     
test minibatch loop: 100%|██████████| 63/63 [00:00<00:00, 364.50it/s, accuracy=0.889, cost=0.588]
train minibatch loop:   3%|▎         | 8/250 [00:00<00:03, 73.56it/s, accuracy=0.75, cost=0.622] 

epoch: 2, pass acc: 0.717957, current acc: 0.765401
time taken: 3.6092426776885986
epoch: 2, training loss: 0.639591, training acc: 0.741122, valid loss: 0.636817, valid acc: 0.765401



train minibatch loop: 100%|██████████| 250/250 [00:03<00:00, 72.73it/s, accuracy=1, cost=0.66]     
test minibatch loop: 100%|██████████| 63/63 [00:00<00:00, 361.53it/s, accuracy=0.889, cost=0.556]
train minibatch loop:   3%|▎         | 8/250 [00:00<00:03, 72.94it/s, accuracy=0.812, cost=0.594]

epoch: 3, pass acc: 0.765401, current acc: 0.786977
time taken: 3.6130621433258057
epoch: 3, training loss: 0.612778, training acc: 0.789309, valid loss: 0.611288, valid acc: 0.786977



train minibatch loop: 100%|██████████| 250/250 [00:03<00:00, 72.90it/s, accuracy=1, cost=0.645]    
test minibatch loop: 100%|██████████| 63/63 [00:00<00:00, 359.34it/s, accuracy=0.889, cost=0.52] 
train minibatch loop:   3%|▎         | 8/250 [00:00<00:03, 73.11it/s, accuracy=0.812, cost=0.565]

epoch: 4, pass acc: 0.786977, current acc: 0.806043
time taken: 3.613224983215332
epoch: 4, training loss: 0.582990, training acc: 0.815284, valid loss: 0.583503, valid acc: 0.806043



train minibatch loop: 100%|██████████| 250/250 [00:03<00:00, 72.82it/s, accuracy=1, cost=0.625]    
test minibatch loop: 100%|██████████| 63/63 [00:00<00:00, 361.00it/s, accuracy=0.889, cost=0.484]
train minibatch loop:   3%|▎         | 8/250 [00:00<00:03, 72.96it/s, accuracy=0.812, cost=0.534]

epoch: 5, pass acc: 0.806043, current acc: 0.814071
time taken: 3.6099295616149902
epoch: 5, training loss: 0.551289, training acc: 0.833605, valid loss: 0.554787, valid acc: 0.814071



train minibatch loop: 100%|██████████| 250/250 [00:03<00:00, 72.70it/s, accuracy=1, cost=0.598]    
test minibatch loop: 100%|██████████| 63/63 [00:00<00:00, 362.84it/s, accuracy=0.889, cost=0.449]
train minibatch loop:   3%|▎         | 8/250 [00:00<00:03, 72.90it/s, accuracy=0.812, cost=0.503]

epoch: 6, pass acc: 0.814071, current acc: 0.820594
time taken: 3.6144044399261475
epoch: 6, training loss: 0.519261, training acc: 0.843142, valid loss: 0.526669, valid acc: 0.820594



train minibatch loop: 100%|██████████| 250/250 [00:03<00:00, 72.77it/s, accuracy=1, cost=0.566]    
test minibatch loop: 100%|██████████| 63/63 [00:00<00:00, 359.75it/s, accuracy=0.889, cost=0.417]
train minibatch loop:   3%|▎         | 8/250 [00:00<00:03, 73.74it/s, accuracy=0.812, cost=0.474]

epoch: 7, pass acc: 0.820594, current acc: 0.827117
time taken: 3.6120619773864746
epoch: 7, training loss: 0.488437, training acc: 0.850295, valid loss: 0.500409, valid acc: 0.827117



train minibatch loop: 100%|██████████| 250/250 [00:03<00:00, 72.79it/s, accuracy=1, cost=0.529]    
test minibatch loop: 100%|██████████| 63/63 [00:00<00:00, 345.64it/s, accuracy=0.889, cost=0.389]
train minibatch loop:   3%|▎         | 8/250 [00:00<00:03, 72.52it/s, accuracy=0.875, cost=0.447]

epoch: 8, pass acc: 0.827117, current acc: 0.834142
time taken: 3.618224859237671
epoch: 8, training loss: 0.459905, training acc: 0.858075, valid loss: 0.476761, valid acc: 0.834142



train minibatch loop: 100%|██████████| 250/250 [00:03<00:00, 72.80it/s, accuracy=1, cost=0.49]     
test minibatch loop: 100%|██████████| 63/63 [00:00<00:00, 361.02it/s, accuracy=0.889, cost=0.364]
train minibatch loop:   3%|▎         | 8/250 [00:00<00:03, 73.07it/s, accuracy=0.906, cost=0.423]

epoch: 9, pass acc: 0.834142, current acc: 0.839159
time taken: 3.6102097034454346
epoch: 9, training loss: 0.434198, training acc: 0.864977, valid loss: 0.455991, valid acc: 0.839159



train minibatch loop: 100%|██████████| 250/250 [00:03<00:00, 72.82it/s, accuracy=1, cost=0.449]    
test minibatch loop: 100%|██████████| 63/63 [00:00<00:00, 358.81it/s, accuracy=0.889, cost=0.342]
train minibatch loop:   3%|▎         | 8/250 [00:00<00:03, 73.16it/s, accuracy=0.938, cost=0.401]

epoch: 10, pass acc: 0.839159, current acc: 0.842672
time taken: 3.6107470989227295
epoch: 10, training loss: 0.411403, training acc: 0.868867, valid loss: 0.438023, valid acc: 0.842672



train minibatch loop: 100%|██████████| 250/250 [00:03<00:00, 72.80it/s, accuracy=1, cost=0.408]    
test minibatch loop: 100%|██████████| 63/63 [00:00<00:00, 359.27it/s, accuracy=0.889, cost=0.324]
train minibatch loop:   3%|▎         | 8/250 [00:00<00:03, 72.93it/s, accuracy=0.906, cost=0.381]

epoch: 11, pass acc: 0.842672, current acc: 0.844177
time taken: 3.61116623878479
epoch: 11, training loss: 0.391329, training acc: 0.872631, valid loss: 0.422596, valid acc: 0.844177



train minibatch loop: 100%|██████████| 250/250 [00:03<00:00, 72.83it/s, accuracy=1, cost=0.369]    
test minibatch loop: 100%|██████████| 63/63 [00:00<00:00, 359.83it/s, accuracy=0.889, cost=0.309]
train minibatch loop:   3%|▎         | 8/250 [00:00<00:03, 72.94it/s, accuracy=0.906, cost=0.363]

epoch: 12, pass acc: 0.844177, current acc: 0.850198
time taken: 3.609103202819824
epoch: 12, training loss: 0.373658, training acc: 0.877525, valid loss: 0.409375, valid acc: 0.850198



train minibatch loop: 100%|██████████| 250/250 [00:03<00:00, 72.84it/s, accuracy=1, cost=0.331]    
test minibatch loop: 100%|██████████| 63/63 [00:00<00:00, 357.14it/s, accuracy=0.889, cost=0.295]
train minibatch loop:   3%|▎         | 8/250 [00:00<00:03, 73.50it/s, accuracy=0.906, cost=0.347]

epoch: 13, pass acc: 0.850198, current acc: 0.851703
time taken: 3.610186815261841
epoch: 13, training loss: 0.358040, training acc: 0.881917, valid loss: 0.398026, valid acc: 0.851703



train minibatch loop: 100%|██████████| 250/250 [00:03<00:00, 72.89it/s, accuracy=1, cost=0.296]    
test minibatch loop: 100%|██████████| 63/63 [00:00<00:00, 349.37it/s, accuracy=0.889, cost=0.283]
train minibatch loop:   3%|▎         | 8/250 [00:00<00:03, 72.48it/s, accuracy=0.906, cost=0.333]

epoch: 14, pass acc: 0.851703, current acc: 0.855717
time taken: 3.6123716831207275
epoch: 14, training loss: 0.344145, training acc: 0.886435, valid loss: 0.388243, valid acc: 0.855717



train minibatch loop: 100%|██████████| 250/250 [00:03<00:00, 72.68it/s, accuracy=1, cost=0.263]    
test minibatch loop: 100%|██████████| 63/63 [00:00<00:00, 333.86it/s, accuracy=0.889, cost=0.273]
train minibatch loop:   3%|▎         | 8/250 [00:00<00:03, 70.37it/s, accuracy=0.906, cost=0.319]

epoch: 15, pass acc: 0.855717, current acc: 0.857724
time taken: 3.630380868911743
epoch: 15, training loss: 0.331682, training acc: 0.889447, valid loss: 0.379764, valid acc: 0.857724



train minibatch loop: 100%|██████████| 250/250 [00:03<00:00, 72.44it/s, accuracy=1, cost=0.234]    
test minibatch loop: 100%|██████████| 63/63 [00:00<00:00, 363.92it/s, accuracy=0.889, cost=0.264]
train minibatch loop:   3%|▎         | 8/250 [00:00<00:03, 72.11it/s, accuracy=0.906, cost=0.307]

epoch: 16, pass acc: 0.857724, current acc: 0.860233
time taken: 3.625947952270508
epoch: 16, training loss: 0.320406, training acc: 0.893462, valid loss: 0.372368, valid acc: 0.860233



train minibatch loop: 100%|██████████| 250/250 [00:03<00:00, 72.79it/s, accuracy=1, cost=0.207]    
test minibatch loop: 100%|██████████| 63/63 [00:00<00:00, 363.38it/s, accuracy=0.778, cost=0.256]
train minibatch loop:   3%|▎         | 8/250 [00:00<00:03, 73.29it/s, accuracy=0.906, cost=0.295]

time taken: 3.6094651222229004
epoch: 17, training loss: 0.310119, training acc: 0.897227, valid loss: 0.365877, valid acc: 0.859954



train minibatch loop: 100%|██████████| 250/250 [00:03<00:00, 72.86it/s, accuracy=1, cost=0.183]    
test minibatch loop: 100%|██████████| 63/63 [00:00<00:00, 357.29it/s, accuracy=0.778, cost=0.249]
train minibatch loop:   3%|▎         | 8/250 [00:00<00:03, 72.64it/s, accuracy=0.906, cost=0.285]

epoch: 18, pass acc: 0.860233, current acc: 0.861460
time taken: 3.6095030307769775
epoch: 18, training loss: 0.300658, training acc: 0.899736, valid loss: 0.360145, valid acc: 0.861460



train minibatch loop: 100%|██████████| 250/250 [00:03<00:00, 72.88it/s, accuracy=1, cost=0.162]    
test minibatch loop: 100%|██████████| 63/63 [00:00<00:00, 359.76it/s, accuracy=0.778, cost=0.243]
train minibatch loop:   3%|▎         | 8/250 [00:00<00:03, 72.92it/s, accuracy=0.906, cost=0.275]

epoch: 19, pass acc: 0.861460, current acc: 0.862463
time taken: 3.6069769859313965
epoch: 19, training loss: 0.291894, training acc: 0.901870, valid loss: 0.355054, valid acc: 0.862463



train minibatch loop: 100%|██████████| 250/250 [00:03<00:00, 72.76it/s, accuracy=1, cost=0.144]    
test minibatch loop: 100%|██████████| 63/63 [00:00<00:00, 362.89it/s, accuracy=0.778, cost=0.237]
train minibatch loop:   3%|▎         | 8/250 [00:00<00:03, 72.37it/s, accuracy=0.906, cost=0.266]

epoch: 20, pass acc: 0.862463, current acc: 0.863968
time taken: 3.611123561859131
epoch: 20, training loss: 0.283721, training acc: 0.904756, valid loss: 0.350509, valid acc: 0.863968



train minibatch loop: 100%|██████████| 250/250 [00:03<00:00, 72.65it/s, accuracy=1, cost=0.127]    
test minibatch loop: 100%|██████████| 63/63 [00:00<00:00, 356.95it/s, accuracy=0.778, cost=0.232]
train minibatch loop:   3%|▎         | 8/250 [00:00<00:03, 72.66it/s, accuracy=0.906, cost=0.257]

epoch: 21, pass acc: 0.863968, current acc: 0.864972
time taken: 3.619170665740967
epoch: 21, training loss: 0.276056, training acc: 0.906387, valid loss: 0.346434, valid acc: 0.864972



train minibatch loop: 100%|██████████| 250/250 [00:03<00:00, 72.75it/s, accuracy=1, cost=0.113]    
test minibatch loop: 100%|██████████| 63/63 [00:00<00:00, 362.54it/s, accuracy=0.778, cost=0.227]
train minibatch loop:   3%|▎         | 8/250 [00:00<00:03, 72.87it/s, accuracy=0.906, cost=0.249]

epoch: 22, pass acc: 0.864972, current acc: 0.865975
time taken: 3.6117050647735596
epoch: 22, training loss: 0.268832, training acc: 0.910152, valid loss: 0.342765, valid acc: 0.865975



train minibatch loop: 100%|██████████| 250/250 [00:03<00:00, 72.71it/s, accuracy=1, cost=0.1]      
test minibatch loop: 100%|██████████| 63/63 [00:00<00:00, 360.50it/s, accuracy=0.778, cost=0.222]
train minibatch loop:   3%|▎         | 8/250 [00:00<00:03, 73.02it/s, accuracy=0.906, cost=0.241]

epoch: 23, pass acc: 0.865975, current acc: 0.867982
time taken: 3.6148324012756348
epoch: 23, training loss: 0.261993, training acc: 0.913164, valid loss: 0.339452, valid acc: 0.867982



train minibatch loop: 100%|██████████| 250/250 [00:03<00:00, 72.84it/s, accuracy=1, cost=0.0894]   
test minibatch loop: 100%|██████████| 63/63 [00:00<00:00, 359.41it/s, accuracy=0.889, cost=0.218]
train minibatch loop:   3%|▎         | 8/250 [00:00<00:03, 73.32it/s, accuracy=0.906, cost=0.233]

epoch: 24, pass acc: 0.867982, current acc: 0.870268
time taken: 3.6091933250427246
epoch: 24, training loss: 0.255495, training acc: 0.915046, valid loss: 0.336452, valid acc: 0.870268



train minibatch loop: 100%|██████████| 250/250 [00:03<00:00, 72.80it/s, accuracy=1, cost=0.0798]   
test minibatch loop: 100%|██████████| 63/63 [00:00<00:00, 360.80it/s, accuracy=0.889, cost=0.214]
train minibatch loop:   3%|▎         | 8/250 [00:00<00:03, 73.50it/s, accuracy=0.906, cost=0.226]

time taken: 3.610092878341675
epoch: 25, training loss: 0.249301, training acc: 0.916301, valid loss: 0.333730, valid acc: 0.869766



train minibatch loop: 100%|██████████| 250/250 [00:03<00:00, 72.78it/s, accuracy=1, cost=0.0715]   
test minibatch loop: 100%|██████████| 63/63 [00:00<00:00, 349.51it/s, accuracy=0.889, cost=0.21] 
train minibatch loop:   3%|▎         | 8/250 [00:00<00:03, 73.77it/s, accuracy=0.906, cost=0.22] 

time taken: 3.6166813373565674
epoch: 26, training loss: 0.243380, training acc: 0.919061, valid loss: 0.331257, valid acc: 0.869766



train minibatch loop: 100%|██████████| 250/250 [00:03<00:00, 72.87it/s, accuracy=1, cost=0.0642]   
test minibatch loop: 100%|██████████| 63/63 [00:00<00:00, 359.48it/s, accuracy=0.889, cost=0.207]

time taken: 3.6078076362609863
epoch: 27, training loss: 0.237706, training acc: 0.921571, valid loss: 0.329006, valid acc: 0.870268

break epoch:28






In [16]:
real_Y, predict_Y = [], []

pbar = tqdm(
    range(0, test_X.shape[0], batch_size), desc = 'validation minibatch loop'
)
for i in pbar:
    batch_x = convert_sparse_matrix_to_sparse_tensor(test_X[i : min(i + batch_size, test_X.shape[0])])
    batch_y = test_Y[i : min(i + batch_size, test_X.shape[0])]
    predict_Y += np.argmax(
        sess.run(
            model.logits, feed_dict = {model.X: batch_x[0], model.W: batch_x[1], model.Y: batch_y}
        ),
        1,
    ).tolist()
    real_Y += batch_y

validation minibatch loop: 100%|██████████| 63/63 [00:00<00:00, 586.12it/s]


In [17]:
print(
    metrics.classification_report(
        real_Y, predict_Y, target_names = ['negative', 'positive']
    )
)

             precision    recall  f1-score   support

   negative       0.85      0.87      0.86       992
   positive       0.87      0.85      0.86      1001

avg / total       0.86      0.86      0.86      1993



In [18]:
saver.save(sess, 'fast-text-char/model.ckpt')

'fast-text-char/model.ckpt'

In [19]:
import pickle
with open('vectorizer-sparse-subjectivity.pkl','wb') as fopen:
    pickle.dump(bow_chars, fopen)