In [1]:
import re
import numpy as np
import pandas as pd
import collections
from sklearn import metrics
from sklearn.preprocessing import LabelEncoder
import tensorflow as tf
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.cross_validation import train_test_split
from unidecode import unidecode
from tqdm import tqdm
import time



In [2]:
permulaan = [
    'bel',
    'se',
    'ter',
    'men',
    'meng',
    'mem',
    'memper',
    'di',
    'pe',
    'me',
    'ke',
    'ber',
    'pen',
    'per',
]

hujung = ['kan', 'kah', 'lah', 'tah', 'nya', 'an', 'wan', 'wati', 'ita']

def naive_stemmer(word):
    assert isinstance(word, str), 'input must be a string'
    hujung_result = re.findall(r'^(.*?)(%s)$' % ('|'.join(hujung)), word)
    word = hujung_result[0][0] if len(hujung_result) else word
    permulaan_result = re.findall(r'^(.*?)(%s)' % ('|'.join(permulaan[::-1])), word)
    permulaan_result.extend(re.findall(r'^(.*?)(%s)' % ('|'.join(permulaan)), word))
    mula = permulaan_result if len(permulaan_result) else ''
    if len(mula):
        mula = mula[1][1] if len(mula[1][1]) > len(mula[0][1]) else mula[0][1]
    return word.replace(mula, '')

In [3]:
def classification_textcleaning(string):
    string = re.sub(
        'http\S+|www.\S+',
        '',
        ' '.join(
            [i for i in string.split() if i.find('#') < 0 and i.find('@') < 0]
        ),
    )
    string = unidecode(string).replace('.', ' . ').replace(',', ' , ')
    string = re.sub('[^A-Za-z ]+', ' ', string)
    string = re.sub(r'[ ]+', ' ', string).strip()
    string = ' '.join(
        [i for i in re.findall('[\\w\']+|[;:\-\(\)&.,!?"]', string) if len(i)]
    )
    string = string.lower().split()
    string = [(naive_stemmer(word), word) for word in string]
    return (
        ' '.join([word[0] for word in string if len(word[0]) > 1]),
        ' '.join([word[1] for word in string if len(word[0]) > 1]),
    )

def convert_sparse_matrix_to_sparse_tensor(X, limit = 5):
    coo = X.tocoo()
    indices = np.mat([coo.row, coo.col]).transpose()
    coo.data[coo.data > limit] = limit
    return tf.SparseTensorValue(indices, coo.col, coo.shape), tf.SparseTensorValue(indices, coo.data, coo.shape)

In [4]:
df = pd.read_csv('dataset/sentiment-data-v2.csv')
Y = LabelEncoder().fit_transform(df.label)
df.head()

Unnamed: 0,label,text
0,Negative,Lebih-lebih lagi dengan kemudahan internet da...
1,Positive,boleh memberi teguran kepada parti tetapi perl...
2,Negative,Adalah membingungkan mengapa masyarakat Cina b...
3,Positive,Kami menurunkan defisit daripada 6.7 peratus p...
4,Negative,"Ini masalahnya. Bukan rakyat, tetapi sistem"


In [5]:
with open('dataset/polarity-negative-translated.txt','r') as fopen:
    texts = fopen.read().split('\n')
labels = [0] * len(texts)

with open('dataset/polarity-positive-translated.txt','r') as fopen:
    positive_texts = fopen.read().split('\n')
labels += [1] * len(positive_texts)
texts += positive_texts
texts += df.iloc[:,1].tolist()
labels += Y.tolist()

assert len(labels) == len(texts)

In [6]:
for i in range(len(texts)):
    texts[i] = classification_textcleaning(texts[i])[0]

In [7]:
bow_chars = CountVectorizer(ngram_range=(3, 5), analyzer='char_wb', max_features=300000).fit(texts)
delattr(bow_chars, 'stop_words_')

In [8]:
feature_shape = bow_chars.transform(texts[:1]).shape[1]

In [9]:
class Model:
    def __init__(self, vocab_size, learning_rate):
        self.X = tf.sparse_placeholder(tf.int32)
        self.W = tf.sparse_placeholder(tf.int32)
        self.Y = tf.placeholder(tf.int32, [None])
        embeddings = tf.Variable(tf.truncated_normal([vocab_size,64]))
        embed = tf.nn.embedding_lookup_sparse(embeddings, self.X, self.W, combiner='mean')
        self.logits = tf.layers.dense(embed, 2)
        self.cost = tf.reduce_mean(tf.nn.sparse_softmax_cross_entropy_with_logits(
            logits = self.logits, labels = self.Y))
        self.optimizer = tf.train.AdamOptimizer(learning_rate = learning_rate).minimize(self.cost)
        correct_pred = tf.equal(tf.argmax(self.logits, 1,output_type=tf.int32), self.Y)
        self.accuracy = tf.reduce_mean(tf.cast(correct_pred, tf.float32))

In [10]:
sess = tf.InteractiveSession()
model = Model(feature_shape, 1e-4)
sess.run(tf.global_variables_initializer())

  "Converting sparse IndexedSlices to a dense Tensor of unknown shape. "


In [11]:
saver = tf.train.Saver(tf.trainable_variables())
saver.save(sess, 'fast-text-char/model.ckpt')

'fast-text-char/model.ckpt'

In [12]:
vectors = bow_chars.transform(texts)
train_X, test_X, train_Y, test_Y = train_test_split(
    vectors, labels, test_size = 0.2
)

In [13]:
from tqdm import tqdm
import time

batch_size = 32
EARLY_STOPPING, CURRENT_CHECKPOINT, CURRENT_ACC, EPOCH = 3, 0, 0, 0

while True:
    lasttime = time.time()
    if CURRENT_CHECKPOINT == EARLY_STOPPING:
        print('break epoch:%d\n' % (EPOCH))
        break

    train_acc, train_loss, test_acc, test_loss = 0, 0, 0, 0
    pbar = tqdm(
        range(0, train_X.shape[0], batch_size), desc = 'train minibatch loop'
    )
    for i in pbar:
        batch_x = convert_sparse_matrix_to_sparse_tensor(train_X[i : min(i + batch_size, train_X.shape[0])])
        batch_y = train_Y[i : min(i + batch_size, train_X.shape[0])]
        batch_x_expand = np.expand_dims(batch_x,axis = 1)
        acc, cost, _ = sess.run(
            [model.accuracy, model.cost, model.optimizer],
            feed_dict = {
                model.Y: batch_y,
                model.X: batch_x[0],
                model.W: batch_x[1],
            },
        )
        assert not np.isnan(cost)
        train_loss += cost
        train_acc += acc
        pbar.set_postfix(cost = cost, accuracy = acc)

    pbar = tqdm(range(0, test_X.shape[0], batch_size), desc = 'test minibatch loop')
    for i in pbar:
        batch_x = convert_sparse_matrix_to_sparse_tensor(test_X[i : min(i + batch_size, test_X.shape[0])])
        batch_y = test_Y[i : min(i + batch_size, test_X.shape[0])]
        batch_x_expand = np.expand_dims(batch_x,axis = 1)
        acc, cost = sess.run(
            [model.accuracy, model.cost],
            feed_dict = {
                model.Y: batch_y,
                model.X: batch_x[0],
                model.W: batch_x[1],
            },
        )
        test_loss += cost
        test_acc += acc
        pbar.set_postfix(cost = cost, accuracy = acc)

    train_loss /= train_X.shape[0] / batch_size
    train_acc /= train_X.shape[0] / batch_size
    test_loss /= test_X.shape[0] / batch_size
    test_acc /= test_X.shape[0] / batch_size

    if test_acc > CURRENT_ACC:
        print(
            'epoch: %d, pass acc: %f, current acc: %f'
            % (EPOCH, CURRENT_ACC, test_acc)
        )
        CURRENT_ACC = test_acc
        CURRENT_CHECKPOINT = 0
    else:
        CURRENT_CHECKPOINT += 1

    print('time taken:', time.time() - lasttime)
    print(
        'epoch: %d, training loss: %f, training acc: %f, valid loss: %f, valid acc: %f\n'
        % (EPOCH, train_loss, train_acc, test_loss, test_acc)
    )
    EPOCH += 1

train minibatch loop: 100%|██████████| 357/357 [00:02<00:00, 135.13it/s, accuracy=0.548, cost=0.694]
test minibatch loop: 100%|██████████| 90/90 [00:00<00:00, 377.78it/s, accuracy=0.75, cost=0.675] 
train minibatch loop:   4%|▍         | 14/357 [00:00<00:02, 139.94it/s, accuracy=0.344, cost=0.73] 

epoch: 0, pass acc: 0.000000, current acc: 0.557073
time taken: 2.8827085494995117
epoch: 0, training loss: 0.692972, training acc: 0.524341, valid loss: 0.691601, valid acc: 0.557073



train minibatch loop: 100%|██████████| 357/357 [00:02<00:00, 138.14it/s, accuracy=0.516, cost=0.691]
test minibatch loop: 100%|██████████| 90/90 [00:00<00:00, 378.54it/s, accuracy=0.75, cost=0.669] 
train minibatch loop:   4%|▍         | 15/357 [00:00<00:02, 142.93it/s, accuracy=0.406, cost=0.725]

epoch: 1, pass acc: 0.557073, current acc: 0.582283
time taken: 2.82466197013855
epoch: 1, training loss: 0.683992, training acc: 0.560493, valid loss: 0.684942, valid acc: 0.582283



train minibatch loop: 100%|██████████| 357/357 [00:02<00:00, 138.60it/s, accuracy=0.516, cost=0.688]
test minibatch loop: 100%|██████████| 90/90 [00:00<00:00, 390.31it/s, accuracy=0.75, cost=0.665] 
train minibatch loop:   4%|▍         | 15/357 [00:00<00:02, 141.31it/s, accuracy=0.406, cost=0.717]

epoch: 2, pass acc: 0.582283, current acc: 0.601891
time taken: 2.80815052986145
epoch: 2, training loss: 0.676320, training acc: 0.583955, valid loss: 0.679109, valid acc: 0.601891



train minibatch loop: 100%|██████████| 357/357 [00:02<00:00, 141.37it/s, accuracy=0.548, cost=0.683]
test minibatch loop: 100%|██████████| 90/90 [00:00<00:00, 387.60it/s, accuracy=0.75, cost=0.663] 
train minibatch loop:   4%|▍         | 15/357 [00:00<00:02, 140.39it/s, accuracy=0.438, cost=0.709]

epoch: 3, pass acc: 0.601891, current acc: 0.611695
time taken: 2.759413242340088
epoch: 3, training loss: 0.668874, training acc: 0.599715, valid loss: 0.673454, valid acc: 0.611695



train minibatch loop: 100%|██████████| 357/357 [00:02<00:00, 141.49it/s, accuracy=0.548, cost=0.677]
test minibatch loop: 100%|██████████| 90/90 [00:00<00:00, 385.68it/s, accuracy=0.625, cost=0.66] 
train minibatch loop:   4%|▍         | 15/357 [00:00<00:02, 141.40it/s, accuracy=0.438, cost=0.7]  

epoch: 4, pass acc: 0.611695, current acc: 0.619748
time taken: 2.7584547996520996
epoch: 4, training loss: 0.661259, training acc: 0.616086, valid loss: 0.667748, valid acc: 0.619748



train minibatch loop: 100%|██████████| 357/357 [00:02<00:00, 141.79it/s, accuracy=0.548, cost=0.67] 
test minibatch loop: 100%|██████████| 90/90 [00:00<00:00, 389.77it/s, accuracy=0.625, cost=0.658]
train minibatch loop:   4%|▍         | 15/357 [00:00<00:02, 142.68it/s, accuracy=0.5, cost=0.69]   

epoch: 5, pass acc: 0.619748, current acc: 0.628501
time taken: 2.750880002975464
epoch: 5, training loss: 0.653339, training acc: 0.629655, valid loss: 0.661933, valid acc: 0.628501



train minibatch loop: 100%|██████████| 357/357 [00:02<00:00, 141.63it/s, accuracy=0.548, cost=0.662]
test minibatch loop: 100%|██████████| 90/90 [00:00<00:00, 392.35it/s, accuracy=0.625, cost=0.656]
train minibatch loop:   4%|▍         | 15/357 [00:00<00:02, 142.49it/s, accuracy=0.5, cost=0.68]   

epoch: 6, pass acc: 0.628501, current acc: 0.632703
time taken: 2.7522051334381104
epoch: 6, training loss: 0.645093, training acc: 0.645850, valid loss: 0.656033, valid acc: 0.632703



train minibatch loop: 100%|██████████| 357/357 [00:02<00:00, 141.72it/s, accuracy=0.548, cost=0.654]
test minibatch loop: 100%|██████████| 90/90 [00:00<00:00, 389.13it/s, accuracy=0.625, cost=0.653]
train minibatch loop:   4%|▍         | 15/357 [00:00<00:02, 140.33it/s, accuracy=0.531, cost=0.67] 

epoch: 7, pass acc: 0.632703, current acc: 0.642507
time taken: 2.7521347999572754
epoch: 7, training loss: 0.636568, training acc: 0.657231, valid loss: 0.650113, valid acc: 0.642507



train minibatch loop: 100%|██████████| 357/357 [00:02<00:00, 141.12it/s, accuracy=0.613, cost=0.645]
test minibatch loop: 100%|██████████| 90/90 [00:00<00:00, 387.28it/s, accuracy=0.75, cost=0.65]  
train minibatch loop:   4%|▍         | 15/357 [00:00<00:02, 142.51it/s, accuracy=0.562, cost=0.659]

epoch: 8, pass acc: 0.642507, current acc: 0.653011
time taken: 2.7645280361175537
epoch: 8, training loss: 0.627843, training acc: 0.670543, valid loss: 0.644252, valid acc: 0.653011



train minibatch loop: 100%|██████████| 357/357 [00:02<00:00, 141.51it/s, accuracy=0.613, cost=0.636]
test minibatch loop: 100%|██████████| 90/90 [00:00<00:00, 390.22it/s, accuracy=0.75, cost=0.647] 
train minibatch loop:   4%|▍         | 15/357 [00:00<00:02, 139.74it/s, accuracy=0.562, cost=0.649]

epoch: 9, pass acc: 0.653011, current acc: 0.660014
time taken: 2.7556698322296143
epoch: 9, training loss: 0.619011, training acc: 0.680173, valid loss: 0.638526, valid acc: 0.660014



train minibatch loop: 100%|██████████| 357/357 [00:02<00:00, 141.83it/s, accuracy=0.645, cost=0.627]
test minibatch loop: 100%|██████████| 90/90 [00:00<00:00, 388.00it/s, accuracy=0.75, cost=0.644] 
train minibatch loop:   4%|▍         | 15/357 [00:00<00:02, 142.79it/s, accuracy=0.594, cost=0.639]

epoch: 10, pass acc: 0.660014, current acc: 0.662115
time taken: 2.7511253356933594
epoch: 10, training loss: 0.610162, training acc: 0.691556, valid loss: 0.633000, valid acc: 0.662115



train minibatch loop: 100%|██████████| 357/357 [00:02<00:00, 141.08it/s, accuracy=0.677, cost=0.617]
test minibatch loop: 100%|██████████| 90/90 [00:00<00:00, 390.23it/s, accuracy=0.75, cost=0.641] 
train minibatch loop:   4%|▍         | 15/357 [00:00<00:02, 140.96it/s, accuracy=0.594, cost=0.629]

epoch: 11, pass acc: 0.662115, current acc: 0.669118
time taken: 2.762988328933716
epoch: 11, training loss: 0.601373, training acc: 0.701014, valid loss: 0.627722, valid acc: 0.669118



train minibatch loop: 100%|██████████| 357/357 [00:02<00:00, 141.05it/s, accuracy=0.645, cost=0.608]
test minibatch loop: 100%|██████████| 90/90 [00:00<00:00, 388.39it/s, accuracy=0.75, cost=0.638] 
train minibatch loop:   4%|▍         | 15/357 [00:00<00:02, 141.13it/s, accuracy=0.625, cost=0.619]

epoch: 12, pass acc: 0.669118, current acc: 0.673669
time taken: 2.7647182941436768
epoch: 12, training loss: 0.592705, training acc: 0.708890, valid loss: 0.622722, valid acc: 0.673669



train minibatch loop: 100%|██████████| 357/357 [00:02<00:00, 141.68it/s, accuracy=0.71, cost=0.598] 
test minibatch loop: 100%|██████████| 90/90 [00:00<00:00, 389.31it/s, accuracy=0.75, cost=0.634] 
train minibatch loop:   4%|▍         | 15/357 [00:00<00:02, 142.10it/s, accuracy=0.625, cost=0.61] 

epoch: 13, pass acc: 0.673669, current acc: 0.677521
time taken: 2.752776622772217
epoch: 13, training loss: 0.584202, training acc: 0.717912, valid loss: 0.618016, valid acc: 0.677521



train minibatch loop: 100%|██████████| 357/357 [00:02<00:00, 141.40it/s, accuracy=0.71, cost=0.589] 
test minibatch loop: 100%|██████████| 90/90 [00:00<00:00, 388.58it/s, accuracy=0.75, cost=0.631] 
train minibatch loop:   4%|▍         | 15/357 [00:00<00:02, 142.73it/s, accuracy=0.625, cost=0.601]

epoch: 14, pass acc: 0.677521, current acc: 0.683824
time taken: 2.758405923843384
epoch: 14, training loss: 0.575892, training acc: 0.726229, valid loss: 0.613612, valid acc: 0.683824



train minibatch loop: 100%|██████████| 357/357 [00:02<00:00, 141.67it/s, accuracy=0.71, cost=0.579] 
test minibatch loop: 100%|██████████| 90/90 [00:00<00:00, 390.11it/s, accuracy=0.75, cost=0.628] 
train minibatch loop:   4%|▍         | 15/357 [00:00<00:02, 141.13it/s, accuracy=0.625, cost=0.592]

epoch: 15, pass acc: 0.683824, current acc: 0.686625
time taken: 2.7528414726257324
epoch: 15, training loss: 0.567794, training acc: 0.729730, valid loss: 0.609508, valid acc: 0.686625



train minibatch loop: 100%|██████████| 357/357 [00:02<00:00, 141.28it/s, accuracy=0.71, cost=0.57]  
test minibatch loop: 100%|██████████| 90/90 [00:00<00:00, 392.20it/s, accuracy=0.75, cost=0.624] 
train minibatch loop:   4%|▍         | 15/357 [00:00<00:02, 140.84it/s, accuracy=0.625, cost=0.584]

epoch: 16, pass acc: 0.686625, current acc: 0.688025
time taken: 2.7586071491241455
epoch: 16, training loss: 0.559920, training acc: 0.736033, valid loss: 0.605698, valid acc: 0.688025



train minibatch loop: 100%|██████████| 357/357 [00:02<00:00, 141.84it/s, accuracy=0.71, cost=0.561] 
test minibatch loop: 100%|██████████| 90/90 [00:00<00:00, 387.14it/s, accuracy=0.75, cost=0.621] 
train minibatch loop:   4%|▍         | 15/357 [00:00<00:02, 141.56it/s, accuracy=0.625, cost=0.575]

epoch: 17, pass acc: 0.688025, current acc: 0.690476
time taken: 2.7624003887176514
epoch: 17, training loss: 0.552275, training acc: 0.740936, valid loss: 0.602174, valid acc: 0.690476



train minibatch loop: 100%|██████████| 357/357 [00:02<00:00, 141.35it/s, accuracy=0.71, cost=0.552] 
test minibatch loop: 100%|██████████| 90/90 [00:00<00:00, 388.68it/s, accuracy=0.75, cost=0.618] 
train minibatch loop:   4%|▍         | 14/357 [00:00<00:02, 139.90it/s, accuracy=0.719, cost=0.567]

epoch: 18, pass acc: 0.690476, current acc: 0.694328
time taken: 2.759275197982788
epoch: 18, training loss: 0.544860, training acc: 0.745838, valid loss: 0.598926, valid acc: 0.694328



train minibatch loop: 100%|██████████| 357/357 [00:02<00:00, 141.15it/s, accuracy=0.71, cost=0.543] 
test minibatch loop: 100%|██████████| 90/90 [00:00<00:00, 390.34it/s, accuracy=0.75, cost=0.615] 
train minibatch loop:   4%|▍         | 15/357 [00:00<00:02, 142.45it/s, accuracy=0.688, cost=0.56] 

epoch: 19, pass acc: 0.694328, current acc: 0.696078
time taken: 2.7616238594055176
epoch: 19, training loss: 0.537674, training acc: 0.751091, valid loss: 0.595943, valid acc: 0.696078



train minibatch loop: 100%|██████████| 357/357 [00:02<00:00, 141.05it/s, accuracy=0.71, cost=0.535] 
test minibatch loop: 100%|██████████| 90/90 [00:00<00:00, 389.80it/s, accuracy=0.875, cost=0.612]
train minibatch loop:   4%|▍         | 15/357 [00:00<00:02, 141.62it/s, accuracy=0.688, cost=0.552]

epoch: 20, pass acc: 0.696078, current acc: 0.699230
time taken: 2.7638111114501953
epoch: 20, training loss: 0.530713, training acc: 0.756168, valid loss: 0.593213, valid acc: 0.699230



train minibatch loop: 100%|██████████| 357/357 [00:02<00:00, 141.55it/s, accuracy=0.71, cost=0.527] 
test minibatch loop: 100%|██████████| 90/90 [00:00<00:00, 387.96it/s, accuracy=0.875, cost=0.609]
train minibatch loop:   4%|▍         | 15/357 [00:00<00:02, 141.29it/s, accuracy=0.688, cost=0.545]

epoch: 21, pass acc: 0.699230, current acc: 0.705882
time taken: 2.7564079761505127
epoch: 21, training loss: 0.523973, training acc: 0.760283, valid loss: 0.590724, valid acc: 0.705882



train minibatch loop: 100%|██████████| 357/357 [00:02<00:00, 141.72it/s, accuracy=0.71, cost=0.519] 
test minibatch loop: 100%|██████████| 90/90 [00:00<00:00, 390.19it/s, accuracy=0.75, cost=0.607] 
train minibatch loop:   4%|▍         | 15/357 [00:00<00:02, 140.85it/s, accuracy=0.688, cost=0.539]

epoch: 22, pass acc: 0.705882, current acc: 0.709034
time taken: 2.7519538402557373
epoch: 22, training loss: 0.517449, training acc: 0.764135, valid loss: 0.588464, valid acc: 0.709034



train minibatch loop: 100%|██████████| 357/357 [00:02<00:00, 139.98it/s, accuracy=0.742, cost=0.511]
test minibatch loop: 100%|██████████| 90/90 [00:00<00:00, 386.37it/s, accuracy=0.75, cost=0.604] 
train minibatch loop:   4%|▍         | 14/357 [00:00<00:02, 139.76it/s, accuracy=0.75, cost=0.546] 

epoch: 23, pass acc: 0.709034, current acc: 0.711134
time taken: 2.7852275371551514
epoch: 23, training loss: 0.511133, training acc: 0.766939, valid loss: 0.586422, valid acc: 0.711134



train minibatch loop: 100%|██████████| 357/357 [00:02<00:00, 140.79it/s, accuracy=0.742, cost=0.503]
test minibatch loop: 100%|██████████| 90/90 [00:00<00:00, 388.31it/s, accuracy=0.75, cost=0.602] 
train minibatch loop:   4%|▍         | 15/357 [00:00<00:02, 141.68it/s, accuracy=0.688, cost=0.526]

epoch: 24, pass acc: 0.711134, current acc: 0.714286
time taken: 2.769306182861328
epoch: 24, training loss: 0.505019, training acc: 0.771053, valid loss: 0.584587, valid acc: 0.714286



train minibatch loop: 100%|██████████| 357/357 [00:02<00:00, 141.56it/s, accuracy=0.774, cost=0.496]
test minibatch loop: 100%|██████████| 90/90 [00:00<00:00, 383.06it/s, accuracy=0.75, cost=0.6]   
train minibatch loop:   4%|▍         | 15/357 [00:00<00:02, 141.37it/s, accuracy=0.688, cost=0.52] 

time taken: 2.759004592895508
epoch: 25, training loss: 0.499100, training acc: 0.775433, valid loss: 0.582946, valid acc: 0.712885



train minibatch loop: 100%|██████████| 357/357 [00:02<00:00, 141.49it/s, accuracy=0.774, cost=0.489]
test minibatch loop: 100%|██████████| 90/90 [00:00<00:00, 391.37it/s, accuracy=0.75, cost=0.598] 
train minibatch loop:   4%|▍         | 14/357 [00:00<00:02, 139.37it/s, accuracy=0.688, cost=0.514]

time taken: 2.7552366256713867
epoch: 26, training loss: 0.493368, training acc: 0.779635, valid loss: 0.581490, valid acc: 0.713936



train minibatch loop: 100%|██████████| 357/357 [00:02<00:00, 141.22it/s, accuracy=0.774, cost=0.482]
test minibatch loop: 100%|██████████| 90/90 [00:00<00:00, 389.51it/s, accuracy=0.75, cost=0.596] 
train minibatch loop:   4%|▍         | 15/357 [00:00<00:02, 140.76it/s, accuracy=0.688, cost=0.509]

epoch: 27, pass acc: 0.714286, current acc: 0.715686
time taken: 2.7610113620758057
epoch: 27, training loss: 0.487816, training acc: 0.782962, valid loss: 0.580207, valid acc: 0.715686



train minibatch loop: 100%|██████████| 357/357 [00:02<00:00, 141.44it/s, accuracy=0.806, cost=0.475]
test minibatch loop: 100%|██████████| 90/90 [00:00<00:00, 387.98it/s, accuracy=0.75, cost=0.595] 
train minibatch loop:   4%|▍         | 15/357 [00:00<00:02, 142.14it/s, accuracy=0.688, cost=0.503]

time taken: 2.7584028244018555
epoch: 28, training loss: 0.482437, training acc: 0.785591, valid loss: 0.579088, valid acc: 0.715686



train minibatch loop: 100%|██████████| 357/357 [00:02<00:00, 141.29it/s, accuracy=0.806, cost=0.469]
test minibatch loop: 100%|██████████| 90/90 [00:00<00:00, 389.15it/s, accuracy=0.75, cost=0.594] 
train minibatch loop:   4%|▍         | 15/357 [00:00<00:02, 142.98it/s, accuracy=0.688, cost=0.498]

epoch: 29, pass acc: 0.715686, current acc: 0.717787
time taken: 2.75982666015625
epoch: 29, training loss: 0.477222, training acc: 0.789618, valid loss: 0.578122, valid acc: 0.717787



train minibatch loop: 100%|██████████| 357/357 [00:02<00:00, 141.47it/s, accuracy=0.806, cost=0.463]
test minibatch loop: 100%|██████████| 90/90 [00:00<00:00, 389.13it/s, accuracy=0.75, cost=0.593] 
train minibatch loop:   4%|▍         | 15/357 [00:00<00:02, 140.98it/s, accuracy=0.688, cost=0.493]

epoch: 30, pass acc: 0.717787, current acc: 0.719188
time taken: 2.756946563720703
epoch: 30, training loss: 0.472165, training acc: 0.793207, valid loss: 0.577300, valid acc: 0.719188



train minibatch loop: 100%|██████████| 357/357 [00:02<00:00, 141.24it/s, accuracy=0.806, cost=0.457]
test minibatch loop: 100%|██████████| 90/90 [00:00<00:00, 387.14it/s, accuracy=0.75, cost=0.592] 
train minibatch loop:   4%|▍         | 15/357 [00:00<00:02, 141.82it/s, accuracy=0.688, cost=0.489]

epoch: 31, pass acc: 0.719188, current acc: 0.720588
time taken: 2.7619266510009766
epoch: 31, training loss: 0.467257, training acc: 0.795221, valid loss: 0.576612, valid acc: 0.720588



train minibatch loop: 100%|██████████| 357/357 [00:02<00:00, 141.26it/s, accuracy=0.806, cost=0.451]
test minibatch loop: 100%|██████████| 90/90 [00:00<00:00, 389.60it/s, accuracy=0.75, cost=0.591] 
train minibatch loop:   4%|▍         | 15/357 [00:00<00:02, 142.78it/s, accuracy=0.688, cost=0.484]

epoch: 32, pass acc: 0.720588, current acc: 0.721289
time taken: 2.765531301498413
epoch: 32, training loss: 0.462491, training acc: 0.798110, valid loss: 0.576051, valid acc: 0.721289



train minibatch loop: 100%|██████████| 357/357 [00:02<00:00, 141.88it/s, accuracy=0.806, cost=0.445]
test minibatch loop: 100%|██████████| 90/90 [00:00<00:00, 387.49it/s, accuracy=0.75, cost=0.59]  
train minibatch loop:   4%|▍         | 15/357 [00:00<00:02, 141.26it/s, accuracy=0.719, cost=0.48] 

epoch: 33, pass acc: 0.721289, current acc: 0.722339
time taken: 2.7505366802215576
epoch: 33, training loss: 0.457860, training acc: 0.800386, valid loss: 0.575607, valid acc: 0.722339



train minibatch loop: 100%|██████████| 357/357 [00:02<00:00, 141.02it/s, accuracy=0.806, cost=0.44] 
test minibatch loop: 100%|██████████| 90/90 [00:00<00:00, 385.89it/s, accuracy=0.75, cost=0.59]  
train minibatch loop:   4%|▍         | 15/357 [00:00<00:02, 141.45it/s, accuracy=0.719, cost=0.476]

epoch: 34, pass acc: 0.722339, current acc: 0.722689
time taken: 2.766610622406006
epoch: 34, training loss: 0.453357, training acc: 0.801962, valid loss: 0.575274, valid acc: 0.722689



train minibatch loop: 100%|██████████| 357/357 [00:02<00:00, 141.22it/s, accuracy=0.806, cost=0.434]
test minibatch loop: 100%|██████████| 90/90 [00:00<00:00, 386.44it/s, accuracy=0.75, cost=0.59]  
train minibatch loop:   4%|▍         | 14/357 [00:00<00:02, 139.49it/s, accuracy=0.75, cost=0.472] 

time taken: 2.762693166732788
epoch: 35, training loss: 0.448976, training acc: 0.804675, valid loss: 0.575043, valid acc: 0.722689



train minibatch loop: 100%|██████████| 357/357 [00:02<00:00, 141.57it/s, accuracy=0.806, cost=0.429]
test minibatch loop: 100%|██████████| 90/90 [00:00<00:00, 389.69it/s, accuracy=0.75, cost=0.589] 
train minibatch loop:   4%|▍         | 15/357 [00:00<00:02, 142.45it/s, accuracy=0.781, cost=0.468]

time taken: 2.7545599937438965
epoch: 36, training loss: 0.444709, training acc: 0.807127, valid loss: 0.574909, valid acc: 0.721639



train minibatch loop: 100%|██████████| 357/357 [00:02<00:00, 141.58it/s, accuracy=0.806, cost=0.424]
test minibatch loop: 100%|██████████| 90/90 [00:00<00:00, 386.84it/s, accuracy=0.75, cost=0.589] 

time taken: 2.7560172080993652
epoch: 37, training loss: 0.440552, training acc: 0.809665, valid loss: 0.574864, valid acc: 0.720238

break epoch:38






In [14]:
real_Y, predict_Y = [], []

pbar = tqdm(
    range(0, test_X.shape[0], batch_size), desc = 'validation minibatch loop'
)
for i in pbar:
    batch_x = convert_sparse_matrix_to_sparse_tensor(test_X[i : min(i + batch_size, test_X.shape[0])])
    batch_y = test_Y[i : min(i + batch_size, test_X.shape[0])]
    predict_Y += np.argmax(
        sess.run(
            model.logits, feed_dict = {model.X: batch_x[0], model.W: batch_x[1], model.Y: batch_y}
        ),
        1,
    ).tolist()
    real_Y += batch_y

validation minibatch loop: 100%|██████████| 90/90 [00:00<00:00, 655.27it/s]


In [15]:
print(
    metrics.classification_report(
        real_Y, predict_Y, target_names = ['negative','positive']
    )
)

             precision    recall  f1-score   support

   negative       0.71      0.64      0.67      1303
   positive       0.72      0.78      0.75      1553

avg / total       0.71      0.71      0.71      2856



In [16]:
text = classification_textcleaning('kerajaan sebenarnya sangat sayangkan rakyatnya')
transformed = bow_chars.transform([text[0]])
batch_x = convert_sparse_matrix_to_sparse_tensor(transformed)
sess.run(model.logits, feed_dict = {model.X: batch_x[0], model.W: batch_x[1]})

array([[-0.8593565,  0.9225268]], dtype=float32)

In [17]:
saver.save(sess, 'fast-text-char/model.ckpt')

'fast-text-char/model.ckpt'

In [18]:
import pickle
with open('vectorizer-sparse-sentiment.pkl','wb') as fopen:
    pickle.dump(bow_chars, fopen)