In [1]:
import re
import numpy as np
import pandas as pd
import collections
from sklearn import metrics
from sklearn.cross_validation import train_test_split
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.preprocessing import LabelEncoder
from unidecode import unidecode
import tensorflow as tf
import pandas as pd
from tqdm import tqdm
import time



In [2]:
permulaan = [
    'bel',
    'se',
    'ter',
    'men',
    'meng',
    'mem',
    'memper',
    'di',
    'pe',
    'me',
    'ke',
    'ber',
    'pen',
    'per',
]

hujung = ['kan', 'kah', 'lah', 'tah', 'nya', 'an', 'wan', 'wati', 'ita']

def naive_stemmer(word):
    assert isinstance(word, str), 'input must be a string'
    hujung_result = re.findall(r'^(.*?)(%s)$' % ('|'.join(hujung)), word)
    word = hujung_result[0][0] if len(hujung_result) else word
    permulaan_result = re.findall(r'^(.*?)(%s)' % ('|'.join(permulaan[::-1])), word)
    permulaan_result.extend(re.findall(r'^(.*?)(%s)' % ('|'.join(permulaan)), word))
    mula = permulaan_result if len(permulaan_result) else ''
    if len(mula):
        mula = mula[1][1] if len(mula[1][1]) > len(mula[0][1]) else mula[0][1]
    return word.replace(mula, '')

In [3]:
def classification_textcleaning(string):
    string = re.sub(
        'http\S+|www.\S+',
        '',
        ' '.join(
            [i for i in string.split() if i.find('#') < 0 and i.find('@') < 0]
        ),
    )
    string = unidecode(string).replace('.', ' . ').replace(',', ' , ')
    string = re.sub('[^A-Za-z ]+', ' ', string)
    string = re.sub(r'[ ]+', ' ', string).strip()
    string = ' '.join(
        [i for i in re.findall('[\\w\']+|[;:\-\(\)&.,!?"]', string) if len(i)]
    )
    string = string.lower().split()
    string = [(naive_stemmer(word), word) for word in string]
    return (
        ' '.join([word[0] for word in string if len(word[0]) > 1]),
        ' '.join([word[1] for word in string if len(word[0]) > 1]),
    )

def convert_sparse_matrix_to_sparse_tensor(X, limit = 5):
    coo = X.tocoo()
    indices = np.mat([coo.row, coo.col]).transpose()
    coo.data[coo.data > limit] = limit
    return tf.SparseTensorValue(indices, coo.col, coo.shape), tf.SparseTensorValue(indices, coo.data, coo.shape)

In [4]:
import os
emotion_files = [f for f in os.listdir(os.getcwd()) if 'translated-' in f]
emotion_files

['translated-joy',
 'translated-love',
 'translated-fear',
 'translated-sadness',
 'translated-surprise',
 'translated-anger']

In [5]:
texts, labels = [], []
for f in emotion_files:
    with open(f) as fopen:
        dataset = list(filter(None, fopen.read().split('\n')))
        labels.extend([f.split('-')[1]] * len(dataset))
        texts.extend(dataset)

In [6]:
for i in range(len(texts)):
    texts[i] = classification_textcleaning(texts[i])[0]

In [7]:
unique_labels = np.unique(labels).tolist()
labels = LabelEncoder().fit_transform(labels)
unique_labels

['anger', 'fear', 'joy', 'love', 'sadness', 'surprise']

In [8]:
for i in range(len(texts)):
    texts[i] = classification_textcleaning(texts[i])[0]

In [9]:
bow_chars = CountVectorizer(ngram_range=(3, 5), analyzer='char_wb', max_features=300000).fit(texts)
delattr(bow_chars, 'stop_words_')

In [10]:
feature_shape = bow_chars.transform(texts[:1]).shape[1]
feature_shape

67557

In [11]:
class Model:
    def __init__(self, output_size, vocab_size, learning_rate):
        self.X = tf.sparse_placeholder(tf.int32)
        self.W = tf.sparse_placeholder(tf.int32)
        self.Y = tf.placeholder(tf.int32, [None])
        embeddings = tf.Variable(tf.truncated_normal([vocab_size,64]))
        embed = tf.nn.embedding_lookup_sparse(embeddings, self.X, self.W, combiner='mean')
        self.logits = tf.layers.dense(embed, output_size)
        self.cost = tf.reduce_mean(tf.nn.sparse_softmax_cross_entropy_with_logits(
            logits = self.logits, labels = self.Y))
        self.optimizer = tf.train.AdamOptimizer(learning_rate = learning_rate).minimize(self.cost)
        correct_pred = tf.equal(tf.argmax(self.logits, 1,output_type=tf.int32), self.Y)
        self.accuracy = tf.reduce_mean(tf.cast(correct_pred, tf.float32))

In [12]:
sess = tf.InteractiveSession()
model = Model(len(unique_labels), feature_shape, 1e-4)
sess.run(tf.global_variables_initializer())

  "Converting sparse IndexedSlices to a dense Tensor of unknown shape. "


In [13]:
saver = tf.train.Saver(tf.trainable_variables())
saver.save(sess, 'fast-text-char/model.ckpt')

'fast-text-char/model.ckpt'

In [14]:
vectors = bow_chars.transform(texts)
train_X, test_X, train_Y, test_Y = train_test_split(
    vectors, labels, test_size = 0.2
)

In [15]:
from tqdm import tqdm
import time

batch_size = 32
EARLY_STOPPING, CURRENT_CHECKPOINT, CURRENT_ACC, EPOCH = 3, 0, 0, 0

while True:
    lasttime = time.time()
    if CURRENT_CHECKPOINT == EARLY_STOPPING:
        print('break epoch:%d\n' % (EPOCH))
        break

    train_acc, train_loss, test_acc, test_loss = 0, 0, 0, 0
    pbar = tqdm(
        range(0, train_X.shape[0], batch_size), desc = 'train minibatch loop'
    )
    for i in pbar:
        batch_x = convert_sparse_matrix_to_sparse_tensor(train_X[i : min(i + batch_size, train_X.shape[0])])
        batch_y = train_Y[i : min(i + batch_size, train_X.shape[0])]
        batch_x_expand = np.expand_dims(batch_x,axis = 1)
        acc, cost, _ = sess.run(
            [model.accuracy, model.cost, model.optimizer],
            feed_dict = {
                model.Y: batch_y,
                model.X: batch_x[0],
                model.W: batch_x[1],
            },
        )
        assert not np.isnan(cost)
        train_loss += cost
        train_acc += acc
        pbar.set_postfix(cost = cost, accuracy = acc)

    pbar = tqdm(range(0, test_X.shape[0], batch_size), desc = 'test minibatch loop')
    for i in pbar:
        batch_x = convert_sparse_matrix_to_sparse_tensor(test_X[i : min(i + batch_size, test_X.shape[0])])
        batch_y = test_Y[i : min(i + batch_size, test_X.shape[0])]
        batch_x_expand = np.expand_dims(batch_x,axis = 1)
        acc, cost = sess.run(
            [model.accuracy, model.cost],
            feed_dict = {
                model.Y: batch_y,
                model.X: batch_x[0],
                model.W: batch_x[1],
            },
        )
        test_loss += cost
        test_acc += acc
        pbar.set_postfix(cost = cost, accuracy = acc)

    train_loss /= train_X.shape[0] / batch_size
    train_acc /= train_X.shape[0] / batch_size
    test_loss /= test_X.shape[0] / batch_size
    test_acc /= test_X.shape[0] / batch_size

    if test_acc > CURRENT_ACC:
        print(
            'epoch: %d, pass acc: %f, current acc: %f'
            % (EPOCH, CURRENT_ACC, test_acc)
        )
        CURRENT_ACC = test_acc
        CURRENT_CHECKPOINT = 0
    else:
        CURRENT_CHECKPOINT += 1

    print('time taken:', time.time() - lasttime)
    print(
        'epoch: %d, training loss: %f, training acc: %f, valid loss: %f, valid acc: %f\n'
        % (EPOCH, train_loss, train_acc, test_loss, test_acc)
    )
    EPOCH += 1

train minibatch loop: 100%|██████████| 2463/2463 [00:25<00:00, 94.81it/s, accuracy=0.5, cost=1.64]   
test minibatch loop: 100%|██████████| 616/616 [00:02<00:00, 219.05it/s, accuracy=0.261, cost=1.71]
train minibatch loop:   0%|          | 10/2463 [00:00<00:26, 93.47it/s, accuracy=0.281, cost=1.71]

epoch: 0, pass acc: 0.000000, current acc: 0.367424
time taken: 28.793192148208618
epoch: 0, training loss: 1.737624, training acc: 0.286365, valid loss: 1.683998, valid acc: 0.367424



train minibatch loop: 100%|██████████| 2463/2463 [00:25<00:00, 95.42it/s, accuracy=0.5, cost=1.47]  
test minibatch loop: 100%|██████████| 616/616 [00:02<00:00, 217.81it/s, accuracy=0.522, cost=1.58]
train minibatch loop:   0%|          | 10/2463 [00:00<00:27, 90.54it/s, accuracy=0.344, cost=1.59]

epoch: 1, pass acc: 0.367424, current acc: 0.477628
time taken: 28.64283514022827
epoch: 1, training loss: 1.616371, training acc: 0.432574, valid loss: 1.552411, valid acc: 0.477628



train minibatch loop: 100%|██████████| 2463/2463 [00:25<00:00, 94.91it/s, accuracy=0.643, cost=1.27] 
test minibatch loop: 100%|██████████| 616/616 [00:02<00:00, 218.93it/s, accuracy=0.652, cost=1.42]
train minibatch loop:   0%|          | 10/2463 [00:00<00:26, 93.13it/s, accuracy=0.531, cost=1.41]

epoch: 2, pass acc: 0.477628, current acc: 0.551381
time taken: 28.76784896850586
epoch: 2, training loss: 1.464915, training acc: 0.528620, valid loss: 1.396159, valid acc: 0.551381



train minibatch loop: 100%|██████████| 2463/2463 [00:25<00:00, 95.33it/s, accuracy=0.714, cost=1.1]   
test minibatch loop: 100%|██████████| 616/616 [00:02<00:00, 214.44it/s, accuracy=0.609, cost=1.27] 
train minibatch loop:   0%|          | 10/2463 [00:00<00:26, 91.84it/s, accuracy=0.625, cost=1.25]

epoch: 3, pass acc: 0.551381, current acc: 0.598512
time taken: 28.711954355239868
epoch: 3, training loss: 1.307954, training acc: 0.589959, valid loss: 1.253290, valid acc: 0.598512



train minibatch loop: 100%|██████████| 2463/2463 [00:25<00:00, 95.20it/s, accuracy=0.786, cost=0.973]
test minibatch loop: 100%|██████████| 616/616 [00:02<00:00, 213.55it/s, accuracy=0.609, cost=1.16] 
train minibatch loop:   0%|          | 10/2463 [00:00<00:26, 93.12it/s, accuracy=0.562, cost=1.22]

epoch: 4, pass acc: 0.598512, current acc: 0.637288
time taken: 28.757939338684082
epoch: 4, training loss: 1.174731, training acc: 0.633383, valid loss: 1.140401, valid acc: 0.637288



train minibatch loop: 100%|██████████| 2463/2463 [00:25<00:00, 95.30it/s, accuracy=0.75, cost=0.877] 
test minibatch loop: 100%|██████████| 616/616 [00:02<00:00, 218.63it/s, accuracy=0.565, cost=1.07] 
train minibatch loop:   0%|          | 10/2463 [00:00<00:25, 95.99it/s, accuracy=0.594, cost=1.15] 

epoch: 5, pass acc: 0.637288, current acc: 0.661630
time taken: 28.666513681411743
epoch: 5, training loss: 1.072006, training acc: 0.662767, valid loss: 1.056140, valid acc: 0.661630



train minibatch loop: 100%|██████████| 2463/2463 [00:25<00:00, 95.55it/s, accuracy=0.786, cost=0.805]
test minibatch loop: 100%|██████████| 616/616 [00:02<00:00, 217.30it/s, accuracy=0.609, cost=1]    
train minibatch loop:   0%|          | 10/2463 [00:00<00:27, 90.84it/s, accuracy=0.594, cost=1.09] 

epoch: 6, pass acc: 0.661630, current acc: 0.680987
time taken: 28.615931510925293
epoch: 6, training loss: 0.995213, training acc: 0.683210, valid loss: 0.994008, valid acc: 0.680987



train minibatch loop: 100%|██████████| 2463/2463 [00:26<00:00, 95.48it/s, accuracy=0.786, cost=0.75] 
test minibatch loop: 100%|██████████| 616/616 [00:02<00:00, 216.61it/s, accuracy=0.652, cost=0.945]
train minibatch loop:   0%|          | 9/2463 [00:00<00:27, 89.52it/s, accuracy=0.594, cost=1.05] 

epoch: 7, pass acc: 0.680987, current acc: 0.693441
time taken: 28.90414547920227
epoch: 7, training loss: 0.937690, training acc: 0.698893, valid loss: 0.947770, valid acc: 0.693441



train minibatch loop: 100%|██████████| 2463/2463 [00:25<00:00, 95.71it/s, accuracy=0.786, cost=0.707] 
test minibatch loop: 100%|██████████| 616/616 [00:02<00:00, 211.98it/s, accuracy=0.652, cost=0.901]
train minibatch loop:   0%|          | 10/2463 [00:00<00:26, 93.23it/s, accuracy=0.719, cost=0.828]

epoch: 8, pass acc: 0.693441, current acc: 0.702374
time taken: 28.642375230789185
epoch: 8, training loss: 0.893843, training acc: 0.710655, valid loss: 0.912714, valid acc: 0.702374



train minibatch loop: 100%|██████████| 2463/2463 [00:26<00:00, 94.62it/s, accuracy=0.786, cost=0.673]
test minibatch loop: 100%|██████████| 616/616 [00:02<00:00, 216.21it/s, accuracy=0.696, cost=0.866]
train minibatch loop:   0%|          | 9/2463 [00:00<00:27, 88.15it/s, accuracy=0.656, cost=0.979]

epoch: 9, pass acc: 0.702374, current acc: 0.710565
time taken: 28.882978916168213
epoch: 9, training loss: 0.859628, training acc: 0.720387, valid loss: 0.885560, valid acc: 0.710565



train minibatch loop: 100%|██████████| 2463/2463 [00:25<00:00, 95.47it/s, accuracy=0.786, cost=0.646]
test minibatch loop: 100%|██████████| 616/616 [00:02<00:00, 217.88it/s, accuracy=0.739, cost=0.836]
train minibatch loop:   0%|          | 11/2463 [00:00<00:24, 100.15it/s, accuracy=0.656, cost=0.954]

epoch: 10, pass acc: 0.710565, current acc: 0.715406
time taken: 28.62755060195923
epoch: 10, training loss: 0.832274, training acc: 0.727937, valid loss: 0.864087, valid acc: 0.715406



train minibatch loop: 100%|██████████| 2463/2463 [00:25<00:00, 94.80it/s, accuracy=0.786, cost=0.624]
test minibatch loop: 100%|██████████| 616/616 [00:02<00:00, 218.43it/s, accuracy=0.739, cost=0.81] 
train minibatch loop:   0%|          | 10/2463 [00:00<00:26, 92.49it/s, accuracy=0.656, cost=0.932]

epoch: 11, pass acc: 0.715406, current acc: 0.719619
time taken: 28.80274724960327
epoch: 11, training loss: 0.809904, training acc: 0.734103, valid loss: 0.846787, valid acc: 0.719619



train minibatch loop: 100%|██████████| 2463/2463 [00:24<00:00, 101.79it/s, accuracy=0.786, cost=0.606]
test minibatch loop: 100%|██████████| 616/616 [00:02<00:00, 234.45it/s, accuracy=0.739, cost=0.788]
train minibatch loop:   0%|          | 10/2463 [00:00<00:24, 98.90it/s, accuracy=0.719, cost=0.778]

epoch: 12, pass acc: 0.719619, current acc: 0.724288
time taken: 26.893165588378906
epoch: 12, training loss: 0.791237, training acc: 0.739750, valid loss: 0.832623, valid acc: 0.724288



train minibatch loop: 100%|██████████| 2463/2463 [00:23<00:00, 105.10it/s, accuracy=0.821, cost=0.591]
test minibatch loop: 100%|██████████| 616/616 [00:02<00:00, 232.67it/s, accuracy=0.739, cost=0.769]
train minibatch loop:   0%|          | 11/2463 [00:00<00:23, 105.56it/s, accuracy=0.719, cost=0.768]

epoch: 13, pass acc: 0.724288, current acc: 0.726826
time taken: 26.086301565170288
epoch: 13, training loss: 0.775384, training acc: 0.744370, valid loss: 0.820864, valid acc: 0.726826



train minibatch loop: 100%|██████████| 2463/2463 [00:23<00:00, 105.78it/s, accuracy=0.821, cost=0.578]
test minibatch loop: 100%|██████████| 616/616 [00:02<00:00, 229.20it/s, accuracy=0.696, cost=0.752]
train minibatch loop:   0%|          | 10/2463 [00:00<00:24, 98.49it/s, accuracy=0.781, cost=0.687]

epoch: 14, pass acc: 0.726826, current acc: 0.728532
time taken: 25.974788427352905
epoch: 14, training loss: 0.761716, training acc: 0.748430, valid loss: 0.810987, valid acc: 0.728532



train minibatch loop: 100%|██████████| 2463/2463 [00:23<00:00, 104.50it/s, accuracy=0.821, cost=0.567]
test minibatch loop: 100%|██████████| 616/616 [00:02<00:00, 217.30it/s, accuracy=0.696, cost=0.736]
train minibatch loop:   0%|          | 10/2463 [00:00<00:27, 90.61it/s, accuracy=0.719, cost=0.868]

epoch: 15, pass acc: 0.728532, current acc: 0.731678
time taken: 26.40718150138855
epoch: 15, training loss: 0.749777, training acc: 0.752008, valid loss: 0.802608, valid acc: 0.731678



train minibatch loop: 100%|██████████| 2463/2463 [00:25<00:00, 94.84it/s, accuracy=0.821, cost=0.558]
test minibatch loop: 100%|██████████| 616/616 [00:02<00:00, 218.78it/s, accuracy=0.696, cost=0.722]
train minibatch loop:   0%|          | 11/2463 [00:00<00:24, 100.87it/s, accuracy=0.719, cost=0.856]

epoch: 16, pass acc: 0.731678, current acc: 0.734521
time taken: 28.788124084472656
epoch: 16, training loss: 0.739230, training acc: 0.754571, valid loss: 0.795440, valid acc: 0.734521



train minibatch loop: 100%|██████████| 2463/2463 [00:25<00:00, 95.32it/s, accuracy=0.821, cost=0.55] 
test minibatch loop: 100%|██████████| 616/616 [00:02<00:00, 212.67it/s, accuracy=0.696, cost=0.709]
train minibatch loop:   0%|          | 9/2463 [00:00<00:28, 86.72it/s, accuracy=0.781, cost=0.786]

epoch: 17, pass acc: 0.734521, current acc: 0.735688
time taken: 28.74174976348877
epoch: 17, training loss: 0.729820, training acc: 0.757147, valid loss: 0.789265, valid acc: 0.735688



train minibatch loop: 100%|██████████| 2463/2463 [00:25<00:00, 95.41it/s, accuracy=0.821, cost=0.543]
test minibatch loop: 100%|██████████| 616/616 [00:02<00:00, 217.65it/s, accuracy=0.696, cost=0.697]
train minibatch loop:   0%|          | 10/2463 [00:00<00:26, 94.22it/s, accuracy=0.781, cost=0.644]

epoch: 18, pass acc: 0.735688, current acc: 0.736906
time taken: 28.64875364303589
epoch: 18, training loss: 0.721354, training acc: 0.759698, valid loss: 0.783914, valid acc: 0.736906



train minibatch loop: 100%|██████████| 2463/2463 [00:25<00:00, 94.92it/s, accuracy=0.857, cost=0.536]
test minibatch loop: 100%|██████████| 616/616 [00:02<00:00, 218.38it/s, accuracy=0.696, cost=0.687]
train minibatch loop:   0%|          | 10/2463 [00:00<00:25, 96.02it/s, accuracy=0.719, cost=0.827]

epoch: 19, pass acc: 0.736906, current acc: 0.737718
time taken: 28.772202253341675
epoch: 19, training loss: 0.713681, training acc: 0.762326, valid loss: 0.779254, valid acc: 0.737718



train minibatch loop: 100%|██████████| 2463/2463 [00:25<00:00, 95.52it/s, accuracy=0.857, cost=0.531] 
test minibatch loop: 100%|██████████| 616/616 [00:02<00:00, 216.14it/s, accuracy=0.696, cost=0.676]
train minibatch loop:   0%|          | 10/2463 [00:00<00:26, 91.18it/s, accuracy=0.719, cost=0.819]

epoch: 20, pass acc: 0.737718, current acc: 0.739342
time taken: 28.638182640075684
epoch: 20, training loss: 0.706681, training acc: 0.763887, valid loss: 0.775181, valid acc: 0.739342



train minibatch loop: 100%|██████████| 2463/2463 [00:25<00:00, 95.13it/s, accuracy=0.857, cost=0.526]
test minibatch loop: 100%|██████████| 616/616 [00:02<00:00, 217.17it/s, accuracy=0.739, cost=0.667]
train minibatch loop:   0%|          | 9/2463 [00:00<00:27, 88.63it/s, accuracy=0.719, cost=0.811]

epoch: 21, pass acc: 0.739342, current acc: 0.741189
time taken: 28.732747316360474
epoch: 21, training loss: 0.700259, training acc: 0.765790, valid loss: 0.771611, valid acc: 0.741189



train minibatch loop: 100%|██████████| 2463/2463 [00:25<00:00, 96.55it/s, accuracy=0.857, cost=0.522] 
test minibatch loop: 100%|██████████| 616/616 [00:02<00:00, 214.11it/s, accuracy=0.783, cost=0.658]
train minibatch loop:   0%|          | 10/2463 [00:00<00:26, 93.60it/s, accuracy=0.812, cost=0.617]

epoch: 22, pass acc: 0.741189, current acc: 0.742021
time taken: 28.73519778251648
epoch: 22, training loss: 0.694336, training acc: 0.767490, valid loss: 0.768473, valid acc: 0.742021



train minibatch loop: 100%|██████████| 2463/2463 [00:25<00:00, 95.43it/s, accuracy=0.857, cost=0.518]
test minibatch loop: 100%|██████████| 616/616 [00:02<00:00, 216.38it/s, accuracy=0.783, cost=0.65] 
train minibatch loop:   0%|          | 9/2463 [00:00<00:27, 89.91it/s, accuracy=0.719, cost=0.798]

time taken: 28.658127546310425
epoch: 23, training loss: 0.688850, training acc: 0.769089, valid loss: 0.765713, valid acc: 0.741970



train minibatch loop: 100%|██████████| 2463/2463 [00:25<00:00, 95.30it/s, accuracy=0.857, cost=0.515]
test minibatch loop: 100%|██████████| 616/616 [00:02<00:00, 219.95it/s, accuracy=0.783, cost=0.643]
train minibatch loop:   0%|          | 10/2463 [00:00<00:26, 92.47it/s, accuracy=0.719, cost=0.792]

epoch: 24, pass acc: 0.742021, current acc: 0.742681
time taken: 28.648456811904907
epoch: 24, training loss: 0.683748, training acc: 0.770751, valid loss: 0.763283, valid acc: 0.742681



train minibatch loop: 100%|██████████| 2463/2463 [00:25<00:00, 95.32it/s, accuracy=0.857, cost=0.512] 
test minibatch loop: 100%|██████████| 616/616 [00:02<00:00, 217.69it/s, accuracy=0.783, cost=0.635]
train minibatch loop:   0%|          | 11/2463 [00:00<00:24, 100.85it/s, accuracy=0.75, cost=0.716] 

time taken: 28.670026540756226
epoch: 25, training loss: 0.678984, training acc: 0.772210, valid loss: 0.761142, valid acc: 0.742630



train minibatch loop: 100%|██████████| 2463/2463 [00:25<00:00, 95.12it/s, accuracy=0.857, cost=0.509]
test minibatch loop: 100%|██████████| 616/616 [00:02<00:00, 217.15it/s, accuracy=0.783, cost=0.629]
train minibatch loop:   0%|          | 10/2463 [00:00<00:25, 95.63it/s, accuracy=0.75, cost=0.714] 

epoch: 26, pass acc: 0.742681, current acc: 0.743544
time taken: 28.733546257019043
epoch: 26, training loss: 0.674522, training acc: 0.773581, valid loss: 0.759257, valid acc: 0.743544



train minibatch loop: 100%|██████████| 2463/2463 [00:25<00:00, 95.22it/s, accuracy=0.857, cost=0.507]
test minibatch loop: 100%|██████████| 616/616 [00:02<00:00, 217.35it/s, accuracy=0.783, cost=0.622]
train minibatch loop:   0%|          | 10/2463 [00:00<00:24, 98.61it/s, accuracy=0.812, cost=0.595]

epoch: 27, pass acc: 0.743544, current acc: 0.743950
time taken: 28.70233988761902
epoch: 27, training loss: 0.670330, training acc: 0.774989, valid loss: 0.757598, valid acc: 0.743950



train minibatch loop: 100%|██████████| 2463/2463 [00:25<00:00, 95.47it/s, accuracy=0.857, cost=0.504]
test minibatch loop: 100%|██████████| 616/616 [00:02<00:00, 214.83it/s, accuracy=0.783, cost=0.617]
train minibatch loop:   0%|          | 9/2463 [00:00<00:28, 87.45it/s, accuracy=0.719, cost=0.773]

time taken: 28.67311692237854
epoch: 28, training loss: 0.666380, training acc: 0.776372, valid loss: 0.756140, valid acc: 0.743798



train minibatch loop: 100%|██████████| 2463/2463 [00:25<00:00, 95.22it/s, accuracy=0.857, cost=0.502]
test minibatch loop: 100%|██████████| 616/616 [00:02<00:00, 215.74it/s, accuracy=0.783, cost=0.611]
train minibatch loop:   0%|          | 9/2463 [00:00<00:27, 88.99it/s, accuracy=0.719, cost=0.768]

epoch: 29, pass acc: 0.743950, current acc: 0.744153
time taken: 28.724653244018555
epoch: 29, training loss: 0.662648, training acc: 0.777514, valid loss: 0.754863, valid acc: 0.744153



train minibatch loop: 100%|██████████| 2463/2463 [00:25<00:00, 95.14it/s, accuracy=0.857, cost=0.5]   
test minibatch loop: 100%|██████████| 616/616 [00:02<00:00, 213.82it/s, accuracy=0.783, cost=0.606]
train minibatch loop:   0%|          | 10/2463 [00:00<00:26, 94.11it/s, accuracy=0.812, cost=0.585]

epoch: 30, pass acc: 0.744153, current acc: 0.744457
time taken: 28.77310824394226
epoch: 30, training loss: 0.659113, training acc: 0.778504, valid loss: 0.753746, valid acc: 0.744457



train minibatch loop: 100%|██████████| 2463/2463 [00:25<00:00, 95.35it/s, accuracy=0.857, cost=0.498]
test minibatch loop: 100%|██████████| 616/616 [00:02<00:00, 215.26it/s, accuracy=0.783, cost=0.601]
train minibatch loop:   0%|          | 10/2463 [00:00<00:25, 96.22it/s, accuracy=0.719, cost=0.761]

epoch: 31, pass acc: 0.744457, current acc: 0.744762
time taken: 28.694873571395874
epoch: 31, training loss: 0.655758, training acc: 0.779557, valid loss: 0.752773, valid acc: 0.744762



train minibatch loop: 100%|██████████| 2463/2463 [00:25<00:00, 95.08it/s, accuracy=0.857, cost=0.497]
test minibatch loop: 100%|██████████| 616/616 [00:02<00:00, 215.59it/s, accuracy=0.783, cost=0.596]
train minibatch loop:   0%|          | 9/2463 [00:00<00:28, 86.68it/s, accuracy=0.812, cost=0.661]

epoch: 32, pass acc: 0.744762, current acc: 0.745016
time taken: 28.76305365562439
epoch: 32, training loss: 0.652567, training acc: 0.780508, valid loss: 0.751931, valid acc: 0.745016



train minibatch loop: 100%|██████████| 2463/2463 [00:25<00:00, 95.54it/s, accuracy=0.857, cost=0.495]
test minibatch loop: 100%|██████████| 616/616 [00:02<00:00, 213.01it/s, accuracy=0.783, cost=0.592]
train minibatch loop:   0%|          | 10/2463 [00:00<00:24, 98.70it/s, accuracy=0.812, cost=0.577]

epoch: 33, pass acc: 0.745016, current acc: 0.745219
time taken: 28.673179626464844
epoch: 33, training loss: 0.649525, training acc: 0.781460, valid loss: 0.751206, valid acc: 0.745219



train minibatch loop: 100%|██████████| 2463/2463 [00:25<00:00, 95.28it/s, accuracy=0.857, cost=0.494]
test minibatch loop: 100%|██████████| 616/616 [00:02<00:00, 217.01it/s, accuracy=0.783, cost=0.588]
train minibatch loop:   0%|          | 10/2463 [00:00<00:26, 92.33it/s, accuracy=0.719, cost=0.751]

epoch: 34, pass acc: 0.745219, current acc: 0.745472
time taken: 28.691425800323486
epoch: 34, training loss: 0.646621, training acc: 0.782729, valid loss: 0.750589, valid acc: 0.745472



train minibatch loop: 100%|██████████| 2463/2463 [00:25<00:00, 95.21it/s, accuracy=0.857, cost=0.492]
test minibatch loop: 100%|██████████| 616/616 [00:02<00:00, 219.42it/s, accuracy=0.783, cost=0.584]
train minibatch loop:   0%|          | 9/2463 [00:00<00:28, 86.17it/s, accuracy=0.812, cost=0.652]

epoch: 35, pass acc: 0.745472, current acc: 0.745777
time taken: 28.678489685058594
epoch: 35, training loss: 0.643844, training acc: 0.783516, valid loss: 0.750068, valid acc: 0.745777



train minibatch loop: 100%|██████████| 2463/2463 [00:25<00:00, 95.54it/s, accuracy=0.857, cost=0.491]
test minibatch loop: 100%|██████████| 616/616 [00:02<00:00, 214.17it/s, accuracy=0.783, cost=0.581]
train minibatch loop:   0%|          | 10/2463 [00:00<00:25, 94.90it/s, accuracy=0.719, cost=0.746]

time taken: 28.657948970794678
epoch: 36, training loss: 0.641184, training acc: 0.784607, valid loss: 0.749637, valid acc: 0.745574



train minibatch loop: 100%|██████████| 2463/2463 [00:25<00:00, 95.24it/s, accuracy=0.857, cost=0.489] 
test minibatch loop: 100%|██████████| 616/616 [00:02<00:00, 215.19it/s, accuracy=0.783, cost=0.578]
train minibatch loop:   0%|          | 10/2463 [00:00<00:24, 98.59it/s, accuracy=0.719, cost=0.743]

time taken: 28.724912405014038
epoch: 37, training loss: 0.638633, training acc: 0.785609, valid loss: 0.749287, valid acc: 0.745574



train minibatch loop: 100%|██████████| 2463/2463 [00:25<00:00, 94.77it/s, accuracy=0.857, cost=0.488]
test minibatch loop: 100%|██████████| 616/616 [00:02<00:00, 217.93it/s, accuracy=0.783, cost=0.575]

time taken: 28.819119691848755
epoch: 38, training loss: 0.636182, training acc: 0.786624, valid loss: 0.749011, valid acc: 0.745269

break epoch:39






In [16]:
real_Y, predict_Y = [], []

pbar = tqdm(
    range(0, test_X.shape[0], batch_size), desc = 'validation minibatch loop'
)
for i in pbar:
    batch_x = convert_sparse_matrix_to_sparse_tensor(test_X[i : min(i + batch_size, test_X.shape[0])])
    batch_y = test_Y[i : min(i + batch_size, test_X.shape[0])]
    predict_Y += np.argmax(
        sess.run(
            model.logits, feed_dict = {model.X: batch_x[0], model.W: batch_x[1], model.Y: batch_y}
        ),
        1,
    ).tolist()
    real_Y += batch_y.tolist()

validation minibatch loop: 100%|██████████| 616/616 [00:01<00:00, 420.11it/s]


In [17]:
print(
    metrics.classification_report(
        real_Y, predict_Y, target_names = unique_labels
    )
)

             precision    recall  f1-score   support

      anger       0.79      0.75      0.77      3803
       fear       0.73      0.73      0.73      3784
        joy       0.71      0.77      0.74      3872
       love       0.81      0.80      0.80      3052
    sadness       0.72      0.70      0.71      3205
   surprise       0.73      0.70      0.72      1987

avg / total       0.75      0.74      0.75     19703



In [18]:
text = classification_textcleaning('kerajaan sebenarnya sangat sayangkan rakyatnya')
transformed = bow_chars.transform([text[0]])
batch_x = convert_sparse_matrix_to_sparse_tensor(transformed)
sess.run(tf.nn.softmax(model.logits), feed_dict = {model.X: batch_x[0], model.W: batch_x[1]})

array([[4.2693354e-03, 3.0179410e-03, 5.0122179e-03, 9.8567480e-01,
        1.7875731e-03, 2.3805207e-04]], dtype=float32)

In [19]:
saver.save(sess, 'fast-text-char/model.ckpt')

'fast-text-char/model.ckpt'

In [20]:
import pickle
with open('vectorizer-sparse-emotion.pkl','wb') as fopen:
    pickle.dump(bow_chars, fopen)