In [1]:
import re
import numpy as np
import pandas as pd
import collections
from sklearn import metrics
from sklearn.preprocessing import LabelEncoder
import tensorflow as tf
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.model_selection import train_test_split
from unidecode import unidecode
from nltk.util import ngrams
from tqdm import tqdm
import time

In [2]:
permulaan = [
    'bel',
    'se',
    'ter',
    'men',
    'meng',
    'mem',
    'memper',
    'di',
    'pe',
    'me',
    'ke',
    'ber',
    'pen',
    'per',
]

hujung = ['kan', 'kah', 'lah', 'tah', 'nya', 'an', 'wan', 'wati', 'ita']

def naive_stemmer(word):
    assert isinstance(word, str), 'input must be a string'
    hujung_result = re.findall(r'^(.*?)(%s)$' % ('|'.join(hujung)), word)
    word = hujung_result[0][0] if len(hujung_result) else word
    permulaan_result = re.findall(r'^(.*?)(%s)' % ('|'.join(permulaan[::-1])), word)
    permulaan_result.extend(re.findall(r'^(.*?)(%s)' % ('|'.join(permulaan)), word))
    mula = permulaan_result if len(permulaan_result) else ''
    if len(mula):
        mula = mula[1][1] if len(mula[1][1]) > len(mula[0][1]) else mula[0][1]
    return word.replace(mula, '')

In [3]:
def classification_textcleaning(string):
    string = re.sub(
        'http\S+|www.\S+',
        '',
        ' '.join(
            [i for i in string.split() if i.find('#') < 0 and i.find('@') < 0]
        ),
    )
    string = unidecode(string).replace('.', ' . ').replace(',', ' , ')
    string = re.sub('[^A-Za-z ]+', ' ', string)
    string = re.sub(r'[ ]+', ' ', string).strip()
    string = ' '.join(
        [i for i in re.findall('[\\w\']+|[;:\-\(\)&.,!?"]', string) if len(i)]
    )
    string = string.lower().split()
    string = [(naive_stemmer(word), word) for word in string]
    return (
        ' '.join([word[0] for word in string if len(word[0]) > 1]),
        ' '.join([word[1] for word in string if len(word[0]) > 1]),
    )

def convert_sparse_matrix_to_sparse_tensor(X, limit = 5):
    coo = X.tocoo()
    indices = np.mat([coo.row, coo.col]).transpose()
    coo.data[coo.data > limit] = limit
    return tf.SparseTensorValue(indices, coo.col, coo.shape), tf.SparseTensorValue(indices, coo.data, coo.shape)

In [4]:
with open('subjectivity-negative-translated.txt','r') as fopen:
    texts = fopen.read().split('\n')
labels = [0] * len(texts)

with open('subjectivity-positive-translated.txt','r') as fopen:
    positive_texts = fopen.read().split('\n')
labels += [1] * len(positive_texts)
texts += positive_texts

assert len(labels) == len(texts)

In [5]:
for i in range(len(texts)):
    texts[i] = classification_textcleaning(texts[i])[0]

In [6]:
bow_chars = CountVectorizer(ngram_range=(3, 5), analyzer='char_wb', max_features=300000).fit(texts)
delattr(bow_chars, 'stop_words_')
feature_shape = bow_chars.transform(texts[:1]).shape[1]

In [7]:
class Model:
    def __init__(self, vocab_size, learning_rate):
        self.X = tf.sparse_placeholder(tf.int32)
        self.W = tf.sparse_placeholder(tf.int32)
        self.Y = tf.placeholder(tf.int32, [None])
        embeddings = tf.Variable(tf.truncated_normal([vocab_size,64]))
        embed = tf.nn.embedding_lookup_sparse(embeddings, self.X, self.W, combiner='mean')
        self.logits = tf.layers.dense(embed, 2)
        self.cost = tf.reduce_mean(tf.nn.sparse_softmax_cross_entropy_with_logits(
            logits = self.logits, labels = self.Y))
        self.optimizer = tf.train.AdamOptimizer(learning_rate = learning_rate).minimize(self.cost)
        correct_pred = tf.equal(tf.argmax(self.logits, 1,output_type=tf.int32), self.Y)
        self.accuracy = tf.reduce_mean(tf.cast(correct_pred, tf.float32))

In [8]:
sess = tf.InteractiveSession()
model = Model(feature_shape, 1e-4)
sess.run(tf.global_variables_initializer())

  "Converting sparse IndexedSlices to a dense Tensor of unknown shape. "


In [9]:
saver = tf.train.Saver(tf.trainable_variables())
saver.save(sess, 'fast-text-char/model.ckpt')

'fast-text-char/model.ckpt'

In [10]:
vectors = bow_chars.transform(texts)
train_X, test_X, train_Y, test_Y = train_test_split(
    vectors, labels, test_size = 0.2
)

In [11]:
from tqdm import tqdm
import time

batch_size = 32
EARLY_STOPPING, CURRENT_CHECKPOINT, CURRENT_ACC, EPOCH = 3, 0, 0, 0

while True:
    lasttime = time.time()
    if CURRENT_CHECKPOINT == EARLY_STOPPING:
        print('break epoch:%d\n' % (EPOCH))
        break

    train_acc, train_loss, test_acc, test_loss = 0, 0, 0, 0
    pbar = tqdm(
        range(0, train_X.shape[0], batch_size), desc = 'train minibatch loop'
    )
    for i in pbar:
        batch_x = convert_sparse_matrix_to_sparse_tensor(train_X[i : min(i + batch_size, train_X.shape[0])])
        batch_y = train_Y[i : min(i + batch_size, train_X.shape[0])]
        batch_x_expand = np.expand_dims(batch_x,axis = 1)
        acc, cost, _ = sess.run(
            [model.accuracy, model.cost, model.optimizer],
            feed_dict = {
                model.Y: batch_y,
                model.X: batch_x[0],
                model.W: batch_x[1],
            },
        )
        assert not np.isnan(cost)
        train_loss += cost
        train_acc += acc
        pbar.set_postfix(cost = cost, accuracy = acc)

    pbar = tqdm(range(0, test_X.shape[0], batch_size), desc = 'test minibatch loop')
    for i in pbar:
        batch_x = convert_sparse_matrix_to_sparse_tensor(test_X[i : min(i + batch_size, test_X.shape[0])])
        batch_y = test_Y[i : min(i + batch_size, test_X.shape[0])]
        batch_x_expand = np.expand_dims(batch_x,axis = 1)
        acc, cost = sess.run(
            [model.accuracy, model.cost],
            feed_dict = {
                model.Y: batch_y,
                model.X: batch_x[0],
                model.W: batch_x[1],
            },
        )
        test_loss += cost
        test_acc += acc
        pbar.set_postfix(cost = cost, accuracy = acc)

    train_loss /= train_X.shape[0] / batch_size
    train_acc /= train_X.shape[0] / batch_size
    test_loss /= test_X.shape[0] / batch_size
    test_acc /= test_X.shape[0] / batch_size

    if test_acc > CURRENT_ACC:
        print(
            'epoch: %d, pass acc: %f, current acc: %f'
            % (EPOCH, CURRENT_ACC, test_acc)
        )
        CURRENT_ACC = test_acc
        CURRENT_CHECKPOINT = 0
    else:
        CURRENT_CHECKPOINT += 1

    print('time taken:', time.time() - lasttime)
    print(
        'epoch: %d, training loss: %f, training acc: %f, valid loss: %f, valid acc: %f\n'
        % (EPOCH, train_loss, train_acc, test_loss, test_acc)
    )
    EPOCH += 1

train minibatch loop: 100%|██████████| 250/250 [00:01<00:00, 132.66it/s, accuracy=0, cost=0.723]    
test minibatch loop: 100%|██████████| 63/63 [00:00<00:00, 368.76it/s, accuracy=0.333, cost=0.715]
train minibatch loop:   6%|▌         | 15/250 [00:00<00:01, 140.11it/s, accuracy=0.562, cost=0.689]

epoch: 0, pass acc: 0.000000, current acc: 0.584379
time taken: 2.057943344116211
epoch: 0, training loss: 0.691504, training acc: 0.547371, valid loss: 0.689925, valid acc: 0.584379



train minibatch loop: 100%|██████████| 250/250 [00:01<00:00, 141.50it/s, accuracy=0, cost=0.707]    
test minibatch loop: 100%|██████████| 63/63 [00:00<00:00, 386.29it/s, accuracy=0.444, cost=0.702]
train minibatch loop:   6%|▌         | 14/250 [00:00<00:01, 139.49it/s, accuracy=0.688, cost=0.657]

epoch: 1, pass acc: 0.584379, current acc: 0.672465
time taken: 1.940366506576538
epoch: 1, training loss: 0.676703, training acc: 0.641486, valid loss: 0.674847, valid acc: 0.672465



train minibatch loop: 100%|██████████| 250/250 [00:01<00:00, 140.84it/s, accuracy=1, cost=0.691]    
test minibatch loop: 100%|██████████| 63/63 [00:00<00:00, 381.39it/s, accuracy=0.556, cost=0.688]
train minibatch loop:   6%|▌         | 14/250 [00:00<00:01, 139.52it/s, accuracy=0.656, cost=0.655]

epoch: 2, pass acc: 0.672465, current acc: 0.731951
time taken: 1.9460294246673584
epoch: 2, training loss: 0.660657, training acc: 0.716276, valid loss: 0.658011, valid acc: 0.731951



train minibatch loop: 100%|██████████| 250/250 [00:01<00:00, 141.21it/s, accuracy=1, cost=0.673]    
test minibatch loop: 100%|██████████| 63/63 [00:00<00:00, 383.47it/s, accuracy=0.667, cost=0.675]
train minibatch loop:   6%|▌         | 14/250 [00:00<00:01, 139.29it/s, accuracy=0.719, cost=0.616]

epoch: 3, pass acc: 0.731951, current acc: 0.769861
time taken: 1.9369497299194336
epoch: 3, training loss: 0.642434, training acc: 0.762329, valid loss: 0.638832, valid acc: 0.769861



train minibatch loop: 100%|██████████| 250/250 [00:01<00:00, 140.63it/s, accuracy=1, cost=0.652]    
test minibatch loop: 100%|██████████| 63/63 [00:00<00:00, 382.93it/s, accuracy=0.667, cost=0.661]
train minibatch loop:   6%|▌         | 15/250 [00:00<00:01, 140.70it/s, accuracy=0.688, cost=0.613]

epoch: 4, pass acc: 0.769861, current acc: 0.797960
time taken: 1.9444482326507568
epoch: 4, training loss: 0.621668, training acc: 0.791316, valid loss: 0.617237, valid acc: 0.797960



train minibatch loop: 100%|██████████| 250/250 [00:01<00:00, 140.84it/s, accuracy=1, cost=0.627]    
test minibatch loop: 100%|██████████| 63/63 [00:00<00:00, 387.88it/s, accuracy=0.667, cost=0.646]
train minibatch loop:   6%|▌         | 15/250 [00:00<00:01, 140.67it/s, accuracy=0.75, cost=0.588] 

epoch: 5, pass acc: 0.797960, current acc: 0.815019
time taken: 1.939239263534546
epoch: 5, training loss: 0.598515, training acc: 0.810892, valid loss: 0.593676, valid acc: 0.815019



train minibatch loop: 100%|██████████| 250/250 [00:01<00:00, 141.67it/s, accuracy=1, cost=0.599]    
test minibatch loop: 100%|██████████| 63/63 [00:00<00:00, 380.66it/s, accuracy=0.667, cost=0.63] 
train minibatch loop:   6%|▌         | 15/250 [00:00<00:01, 140.39it/s, accuracy=0.75, cost=0.562] 

epoch: 6, pass acc: 0.815019, current acc: 0.826058
time taken: 1.9318571090698242
epoch: 6, training loss: 0.573608, training acc: 0.827331, valid loss: 0.568980, valid acc: 0.826058



train minibatch loop: 100%|██████████| 250/250 [00:01<00:00, 141.23it/s, accuracy=1, cost=0.567]    
test minibatch loop: 100%|██████████| 63/63 [00:00<00:00, 320.40it/s, accuracy=0.667, cost=0.615]
train minibatch loop:   5%|▌         | 13/250 [00:00<00:01, 128.96it/s, accuracy=0.781, cost=0.559]

epoch: 7, pass acc: 0.826058, current acc: 0.838602
time taken: 1.9686055183410645
epoch: 7, training loss: 0.547853, training acc: 0.837244, valid loss: 0.544119, valid acc: 0.838602



train minibatch loop: 100%|██████████| 250/250 [00:01<00:00, 138.95it/s, accuracy=1, cost=0.532]    
test minibatch loop: 100%|██████████| 63/63 [00:00<00:00, 383.18it/s, accuracy=0.667, cost=0.599]
train minibatch loop:   6%|▌         | 14/250 [00:00<00:01, 139.49it/s, accuracy=0.75, cost=0.51]  

epoch: 8, pass acc: 0.838602, current acc: 0.844121
time taken: 1.9654672145843506
epoch: 8, training loss: 0.522203, training acc: 0.846154, valid loss: 0.519986, valid acc: 0.844121



train minibatch loop: 100%|██████████| 250/250 [00:01<00:00, 136.07it/s, accuracy=1, cost=0.495]    
test minibatch loop: 100%|██████████| 63/63 [00:00<00:00, 386.74it/s, accuracy=0.667, cost=0.585]
train minibatch loop:   6%|▌         | 15/250 [00:00<00:01, 140.88it/s, accuracy=0.75, cost=0.485] 

epoch: 9, pass acc: 0.844121, current acc: 0.848135
time taken: 2.002082109451294
epoch: 9, training loss: 0.497461, training acc: 0.852177, valid loss: 0.497258, valid acc: 0.848135



train minibatch loop: 100%|██████████| 250/250 [00:01<00:00, 140.88it/s, accuracy=1, cost=0.457]    
test minibatch loop: 100%|██████████| 63/63 [00:00<00:00, 385.45it/s, accuracy=0.667, cost=0.571]
train minibatch loop:   6%|▌         | 15/250 [00:00<00:01, 140.58it/s, accuracy=0.812, cost=0.462]

epoch: 10, pass acc: 0.848135, current acc: 0.849640
time taken: 1.9400067329406738
epoch: 10, training loss: 0.474194, training acc: 0.856695, valid loss: 0.476349, valid acc: 0.849640



train minibatch loop: 100%|██████████| 250/250 [00:01<00:00, 141.15it/s, accuracy=1, cost=0.419]    
test minibatch loop: 100%|██████████| 63/63 [00:00<00:00, 384.11it/s, accuracy=0.667, cost=0.559]
train minibatch loop:   6%|▌         | 15/250 [00:00<00:01, 141.41it/s, accuracy=0.844, cost=0.44] 

epoch: 11, pass acc: 0.849640, current acc: 0.855661
time taken: 1.9369804859161377
epoch: 11, training loss: 0.452712, training acc: 0.862718, valid loss: 0.457439, valid acc: 0.855661



train minibatch loop: 100%|██████████| 250/250 [00:01<00:00, 140.92it/s, accuracy=1, cost=0.382]    
test minibatch loop: 100%|██████████| 63/63 [00:00<00:00, 384.54it/s, accuracy=0.667, cost=0.549]
train minibatch loop:   6%|▌         | 15/250 [00:00<00:01, 140.83it/s, accuracy=0.844, cost=0.42] 

epoch: 12, pass acc: 0.855661, current acc: 0.858170
time taken: 1.9396755695343018
epoch: 12, training loss: 0.433118, training acc: 0.867863, valid loss: 0.440528, valid acc: 0.858170



train minibatch loop: 100%|██████████| 250/250 [00:01<00:00, 141.31it/s, accuracy=1, cost=0.347]    
test minibatch loop: 100%|██████████| 63/63 [00:00<00:00, 383.73it/s, accuracy=0.667, cost=0.54] 
train minibatch loop:   6%|▌         | 15/250 [00:00<00:01, 140.98it/s, accuracy=0.844, cost=0.402]

epoch: 13, pass acc: 0.858170, current acc: 0.861683
time taken: 1.9351062774658203
epoch: 13, training loss: 0.415371, training acc: 0.871000, valid loss: 0.425510, valid acc: 0.861683



train minibatch loop: 100%|██████████| 250/250 [00:01<00:00, 140.95it/s, accuracy=1, cost=0.313]    
test minibatch loop: 100%|██████████| 63/63 [00:00<00:00, 383.26it/s, accuracy=0.667, cost=0.532]
train minibatch loop:   6%|▌         | 15/250 [00:00<00:01, 142.03it/s, accuracy=0.844, cost=0.384]

epoch: 14, pass acc: 0.861683, current acc: 0.862686
time taken: 1.9404613971710205
epoch: 14, training loss: 0.399342, training acc: 0.872882, valid loss: 0.412217, valid acc: 0.862686



train minibatch loop: 100%|██████████| 250/250 [00:01<00:00, 141.19it/s, accuracy=1, cost=0.282]    
test minibatch loop: 100%|██████████| 63/63 [00:00<00:00, 382.06it/s, accuracy=0.667, cost=0.526]
train minibatch loop:   6%|▌         | 14/250 [00:00<00:01, 138.86it/s, accuracy=0.875, cost=0.393]

epoch: 15, pass acc: 0.862686, current acc: 0.863188
time taken: 1.937262773513794
epoch: 15, training loss: 0.384863, training acc: 0.876522, valid loss: 0.400461, valid acc: 0.863188



train minibatch loop: 100%|██████████| 250/250 [00:01<00:00, 139.86it/s, accuracy=1, cost=0.254]    
test minibatch loop: 100%|██████████| 63/63 [00:00<00:00, 379.77it/s, accuracy=0.667, cost=0.521]
train minibatch loop:   6%|▌         | 15/250 [00:00<00:01, 140.81it/s, accuracy=0.875, cost=0.384]

epoch: 16, pass acc: 0.863188, current acc: 0.865697
time taken: 1.9556941986083984
epoch: 16, training loss: 0.371752, training acc: 0.879659, valid loss: 0.390054, valid acc: 0.865697



train minibatch loop: 100%|██████████| 250/250 [00:01<00:00, 141.06it/s, accuracy=1, cost=0.228]    
test minibatch loop: 100%|██████████| 63/63 [00:00<00:00, 384.90it/s, accuracy=0.667, cost=0.517]
train minibatch loop:   6%|▌         | 14/250 [00:00<00:01, 139.95it/s, accuracy=0.875, cost=0.34] 

epoch: 17, pass acc: 0.865697, current acc: 0.867704
time taken: 1.9377660751342773
epoch: 17, training loss: 0.359836, training acc: 0.881917, valid loss: 0.380819, valid acc: 0.867704



train minibatch loop: 100%|██████████| 250/250 [00:01<00:00, 141.01it/s, accuracy=1, cost=0.204]    
test minibatch loop: 100%|██████████| 63/63 [00:00<00:00, 385.50it/s, accuracy=0.667, cost=0.514]
train minibatch loop:   6%|▌         | 14/250 [00:00<00:01, 138.52it/s, accuracy=0.875, cost=0.37] 

epoch: 18, pass acc: 0.867704, current acc: 0.870714
time taken: 1.9382882118225098
epoch: 18, training loss: 0.348954, training acc: 0.884804, valid loss: 0.372599, valid acc: 0.870714



train minibatch loop: 100%|██████████| 250/250 [00:01<00:00, 141.17it/s, accuracy=1, cost=0.183]    
test minibatch loop: 100%|██████████| 63/63 [00:00<00:00, 383.95it/s, accuracy=0.667, cost=0.512]
train minibatch loop:   6%|▌         | 15/250 [00:00<00:01, 140.10it/s, accuracy=0.875, cost=0.315]

epoch: 19, pass acc: 0.870714, current acc: 0.872219
time taken: 1.9366741180419922
epoch: 19, training loss: 0.338965, training acc: 0.886435, valid loss: 0.365256, valid acc: 0.872219



train minibatch loop: 100%|██████████| 250/250 [00:01<00:00, 141.42it/s, accuracy=1, cost=0.164]    
test minibatch loop: 100%|██████████| 63/63 [00:00<00:00, 383.66it/s, accuracy=0.667, cost=0.51] 
train minibatch loop:   6%|▌         | 15/250 [00:00<00:01, 140.55it/s, accuracy=0.906, cost=0.359]

epoch: 20, pass acc: 0.872219, current acc: 0.873223
time taken: 1.9462225437164307
epoch: 20, training loss: 0.329747, training acc: 0.889070, valid loss: 0.358671, valid acc: 0.873223



train minibatch loop: 100%|██████████| 250/250 [00:01<00:00, 140.52it/s, accuracy=1, cost=0.147]    
test minibatch loop: 100%|██████████| 63/63 [00:00<00:00, 383.69it/s, accuracy=0.667, cost=0.509]
train minibatch loop:   6%|▌         | 15/250 [00:00<00:01, 140.95it/s, accuracy=0.875, cost=0.293]

time taken: 1.9455444812774658
epoch: 21, training loss: 0.321195, training acc: 0.890952, valid loss: 0.352742, valid acc: 0.873223



train minibatch loop: 100%|██████████| 250/250 [00:01<00:00, 141.19it/s, accuracy=1, cost=0.131]    
test minibatch loop: 100%|██████████| 63/63 [00:00<00:00, 280.02it/s, accuracy=0.667, cost=0.509]
train minibatch loop:   6%|▌         | 15/250 [00:00<00:01, 141.66it/s, accuracy=0.875, cost=0.282]

epoch: 22, pass acc: 0.873223, current acc: 0.874226
time taken: 1.9974935054779053
epoch: 22, training loss: 0.313220, training acc: 0.893964, valid loss: 0.347384, valid acc: 0.874226



train minibatch loop: 100%|██████████| 250/250 [00:01<00:00, 140.80it/s, accuracy=1, cost=0.118]    
test minibatch loop: 100%|██████████| 63/63 [00:00<00:00, 382.06it/s, accuracy=0.667, cost=0.509]
train minibatch loop:   6%|▌         | 15/250 [00:00<00:01, 141.55it/s, accuracy=0.875, cost=0.272]

epoch: 23, pass acc: 0.874226, current acc: 0.875230
time taken: 1.943094253540039
epoch: 23, training loss: 0.305747, training acc: 0.896725, valid loss: 0.342522, valid acc: 0.875230



train minibatch loop: 100%|██████████| 250/250 [00:01<00:00, 141.11it/s, accuracy=1, cost=0.106]    
test minibatch loop: 100%|██████████| 63/63 [00:00<00:00, 381.89it/s, accuracy=0.667, cost=0.51] 
train minibatch loop:   6%|▌         | 15/250 [00:00<00:01, 140.93it/s, accuracy=0.875, cost=0.263]

time taken: 1.9383635520935059
epoch: 24, training loss: 0.298715, training acc: 0.898356, valid loss: 0.338095, valid acc: 0.874728



train minibatch loop: 100%|██████████| 250/250 [00:01<00:00, 141.28it/s, accuracy=1, cost=0.0949]   
test minibatch loop: 100%|██████████| 63/63 [00:00<00:00, 383.35it/s, accuracy=0.667, cost=0.511]
train minibatch loop:   6%|▌         | 14/250 [00:00<00:01, 139.64it/s, accuracy=0.875, cost=0.254]

time taken: 1.935624599456787
epoch: 25, training loss: 0.292069, training acc: 0.900238, valid loss: 0.334051, valid acc: 0.874728



train minibatch loop: 100%|██████████| 250/250 [00:01<00:00, 140.88it/s, accuracy=1, cost=0.0854]   
test minibatch loop: 100%|██████████| 63/63 [00:00<00:00, 382.41it/s, accuracy=0.667, cost=0.512]
train minibatch loop:   6%|▌         | 14/250 [00:00<00:01, 137.77it/s, accuracy=0.906, cost=0.333]

epoch: 26, pass acc: 0.875230, current acc: 0.876233
time taken: 1.9413058757781982
epoch: 26, training loss: 0.285767, training acc: 0.901493, valid loss: 0.330346, valid acc: 0.876233



train minibatch loop: 100%|██████████| 250/250 [00:01<00:00, 139.93it/s, accuracy=1, cost=0.0769]   
test minibatch loop: 100%|██████████| 63/63 [00:00<00:00, 382.77it/s, accuracy=0.667, cost=0.514]
train minibatch loop:   6%|▌         | 15/250 [00:00<00:01, 140.52it/s, accuracy=0.906, cost=0.238]

epoch: 27, pass acc: 0.876233, current acc: 0.879244
time taken: 1.9531888961791992
epoch: 27, training loss: 0.279769, training acc: 0.903376, valid loss: 0.326941, valid acc: 0.879244



train minibatch loop: 100%|██████████| 250/250 [00:01<00:00, 140.69it/s, accuracy=1, cost=0.0694]   
test minibatch loop: 100%|██████████| 63/63 [00:00<00:00, 386.76it/s, accuracy=0.667, cost=0.516]
train minibatch loop:   6%|▌         | 15/250 [00:00<00:01, 141.85it/s, accuracy=0.906, cost=0.23] 

epoch: 28, pass acc: 0.879244, current acc: 0.880749
time taken: 1.9434189796447754
epoch: 28, training loss: 0.274044, training acc: 0.905634, valid loss: 0.323804, valid acc: 0.880749



train minibatch loop: 100%|██████████| 250/250 [00:01<00:00, 140.90it/s, accuracy=1, cost=0.0627]   
test minibatch loop: 100%|██████████| 63/63 [00:00<00:00, 381.16it/s, accuracy=0.667, cost=0.518]
train minibatch loop:   6%|▌         | 15/250 [00:00<00:01, 140.93it/s, accuracy=0.938, cost=0.222]

epoch: 29, pass acc: 0.880749, current acc: 0.881753
time taken: 1.9414536952972412
epoch: 29, training loss: 0.268564, training acc: 0.907768, valid loss: 0.320907, valid acc: 0.881753



train minibatch loop: 100%|██████████| 250/250 [00:01<00:00, 140.54it/s, accuracy=1, cost=0.0568]   
test minibatch loop: 100%|██████████| 63/63 [00:00<00:00, 382.64it/s, accuracy=0.667, cost=0.52] 
train minibatch loop:   6%|▌         | 15/250 [00:00<00:01, 141.17it/s, accuracy=0.906, cost=0.319]

time taken: 1.9452533721923828
epoch: 30, training loss: 0.263307, training acc: 0.910152, valid loss: 0.318228, valid acc: 0.881753



train minibatch loop: 100%|██████████| 250/250 [00:01<00:00, 141.52it/s, accuracy=1, cost=0.0515]   
test minibatch loop: 100%|██████████| 63/63 [00:00<00:00, 382.33it/s, accuracy=0.667, cost=0.523]
train minibatch loop:   6%|▌         | 14/250 [00:00<00:01, 139.61it/s, accuracy=0.906, cost=0.316]

time taken: 1.9482879638671875
epoch: 31, training loss: 0.258252, training acc: 0.911783, valid loss: 0.315743, valid acc: 0.880749



train minibatch loop: 100%|██████████| 250/250 [00:01<00:00, 141.46it/s, accuracy=1, cost=0.0468]   
test minibatch loop: 100%|██████████| 63/63 [00:00<00:00, 383.58it/s, accuracy=0.667, cost=0.525]
train minibatch loop:   6%|▌         | 15/250 [00:00<00:01, 140.72it/s, accuracy=0.938, cost=0.202]

epoch: 32, pass acc: 0.881753, current acc: 0.882255
time taken: 1.9427552223205566
epoch: 32, training loss: 0.253383, training acc: 0.913791, valid loss: 0.313438, valid acc: 0.882255



train minibatch loop: 100%|██████████| 250/250 [00:01<00:00, 141.49it/s, accuracy=1, cost=0.0425]   
test minibatch loop: 100%|██████████| 63/63 [00:00<00:00, 383.16it/s, accuracy=0.667, cost=0.528]
train minibatch loop:   6%|▌         | 14/250 [00:00<00:01, 139.33it/s, accuracy=0.906, cost=0.309]

epoch: 33, pass acc: 0.882255, current acc: 0.883258
time taken: 1.9330670833587646
epoch: 33, training loss: 0.248683, training acc: 0.915799, valid loss: 0.311294, valid acc: 0.883258



train minibatch loop: 100%|██████████| 250/250 [00:01<00:00, 140.84it/s, accuracy=1, cost=0.0388]   
test minibatch loop: 100%|██████████| 63/63 [00:00<00:00, 381.17it/s, accuracy=0.667, cost=0.531]
train minibatch loop:   6%|▌         | 15/250 [00:00<00:01, 140.65it/s, accuracy=0.938, cost=0.19] 

time taken: 1.942573070526123
epoch: 34, training loss: 0.244140, training acc: 0.917054, valid loss: 0.309300, valid acc: 0.882756



train minibatch loop: 100%|██████████| 250/250 [00:01<00:00, 140.65it/s, accuracy=1, cost=0.0354]   
test minibatch loop: 100%|██████████| 63/63 [00:00<00:00, 387.62it/s, accuracy=0.667, cost=0.535]
train minibatch loop:   6%|▌         | 15/250 [00:00<00:01, 141.22it/s, accuracy=0.938, cost=0.185]

time taken: 1.9421100616455078
epoch: 35, training loss: 0.239742, training acc: 0.919061, valid loss: 0.307442, valid acc: 0.883258



train minibatch loop: 100%|██████████| 250/250 [00:01<00:00, 140.86it/s, accuracy=1, cost=0.0323]   
test minibatch loop: 100%|██████████| 63/63 [00:00<00:00, 384.79it/s, accuracy=0.667, cost=0.538]
train minibatch loop:   6%|▌         | 15/250 [00:00<00:01, 140.76it/s, accuracy=0.938, cost=0.18] 

epoch: 36, pass acc: 0.883258, current acc: 0.883760
time taken: 1.9409313201904297
epoch: 36, training loss: 0.235480, training acc: 0.919940, valid loss: 0.305711, valid acc: 0.883760



train minibatch loop: 100%|██████████| 250/250 [00:01<00:00, 140.81it/s, accuracy=1, cost=0.0296]   
test minibatch loop: 100%|██████████| 63/63 [00:00<00:00, 384.25it/s, accuracy=0.667, cost=0.542]
train minibatch loop:   6%|▌         | 15/250 [00:00<00:01, 140.39it/s, accuracy=0.938, cost=0.174]

epoch: 37, pass acc: 0.883760, current acc: 0.884763
time taken: 1.9411437511444092
epoch: 37, training loss: 0.231344, training acc: 0.922449, valid loss: 0.304098, valid acc: 0.884763



train minibatch loop: 100%|██████████| 250/250 [00:01<00:00, 140.70it/s, accuracy=1, cost=0.0271]   
test minibatch loop: 100%|██████████| 63/63 [00:00<00:00, 381.36it/s, accuracy=0.667, cost=0.545]
train minibatch loop:   6%|▌         | 14/250 [00:00<00:01, 139.41it/s, accuracy=0.906, cost=0.294]

time taken: 1.9439127445220947
epoch: 38, training loss: 0.227327, training acc: 0.924206, valid loss: 0.302595, valid acc: 0.884262



train minibatch loop: 100%|██████████| 250/250 [00:01<00:00, 138.76it/s, accuracy=1, cost=0.0249]   
test minibatch loop: 100%|██████████| 63/63 [00:00<00:00, 382.89it/s, accuracy=0.667, cost=0.549]
train minibatch loop:   6%|▌         | 14/250 [00:00<00:01, 138.93it/s, accuracy=0.906, cost=0.291]

time taken: 1.968212366104126
epoch: 39, training loss: 0.223421, training acc: 0.925461, valid loss: 0.301193, valid acc: 0.884262



train minibatch loop: 100%|██████████| 250/250 [00:01<00:00, 139.79it/s, accuracy=1, cost=0.0229]   
test minibatch loop: 100%|██████████| 63/63 [00:00<00:00, 382.79it/s, accuracy=0.667, cost=0.553]

time taken: 1.9546802043914795
epoch: 40, training loss: 0.219620, training acc: 0.926591, valid loss: 0.299888, valid acc: 0.884262

break epoch:41






In [12]:
real_Y, predict_Y = [], []

pbar = tqdm(
    range(0, test_X.shape[0], batch_size), desc = 'validation minibatch loop'
)
for i in pbar:
    batch_x = convert_sparse_matrix_to_sparse_tensor(test_X[i : min(i + batch_size, test_X.shape[0])])
    batch_y = test_Y[i : min(i + batch_size, test_X.shape[0])]
    predict_Y += np.argmax(
        sess.run(
            model.logits, feed_dict = {model.X: batch_x[0], model.W: batch_x[1], model.Y: batch_y}
        ),
        1,
    ).tolist()
    real_Y += batch_y

validation minibatch loop: 100%|██████████| 63/63 [00:00<00:00, 623.93it/s]


In [13]:
print(
    metrics.classification_report(
        real_Y, predict_Y, target_names = ['negative', 'positive']
    )
)

             precision    recall  f1-score   support

   negative       0.88      0.88      0.88      1002
   positive       0.88      0.87      0.88       991

avg / total       0.88      0.88      0.88      1993



In [14]:
saver.save(sess, 'fast-text-char/model.ckpt')

'fast-text-char/model.ckpt'

In [15]:
import pickle
with open('vectorizer-sparse-subjectivity.pkl','wb') as fopen:
    pickle.dump(bow_chars, fopen)