In [1]:
import re
import numpy as np
import pandas as pd
import collections
from sklearn import metrics
from sklearn.cross_validation import train_test_split
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.preprocessing import LabelEncoder
from unidecode import unidecode
import tensorflow as tf
import pandas as pd
from tqdm import tqdm
import time



In [2]:
permulaan = [
    'bel',
    'se',
    'ter',
    'men',
    'meng',
    'mem',
    'memper',
    'di',
    'pe',
    'me',
    'ke',
    'ber',
    'pen',
    'per',
]

hujung = ['kan', 'kah', 'lah', 'tah', 'nya', 'an', 'wan', 'wati', 'ita']

def naive_stemmer(word):
    assert isinstance(word, str), 'input must be a string'
    hujung_result = [e for e in hujung if word.endswith(e)]
    if len(hujung_result):
        hujung_result = max(hujung_result, key = len)
        if len(hujung_result):
            word = word[: -len(hujung_result)]
    permulaan_result = [e for e in permulaan if word.startswith(e)]
    if len(permulaan_result):
        permulaan_result = max(permulaan_result, key = len)
        if len(permulaan_result):
            word = word[len(permulaan_result) :]
    return word

In [3]:
def classification_textcleaning(string):
    string = re.sub(
        'http\S+|www.\S+',
        '',
        ' '.join(
            [i for i in string.split() if i.find('#') < 0 and i.find('@') < 0]
        ),
    )
    string = unidecode(string).replace('.', ' . ').replace(',', ' , ')
    string = re.sub('[^A-Za-z ]+', ' ', string)
    string = re.sub(r'[ ]+', ' ', string).strip()
    string = ' '.join(
        [i for i in re.findall('[\\w\']+|[;:\-\(\)&.,!?"]', string) if len(i)]
    )
    string = string.lower().split()
    string = [naive_stemmer(word) for word in string]
    return ' '.join([word for word in string if len(word) > 1])

def convert_sparse_matrix_to_sparse_tensor(X):
    coo = X.tocoo()
    indices = np.mat([coo.row, coo.col]).transpose()
    return tf.SparseTensorValue(indices, coo.col, coo.shape), tf.SparseTensorValue(indices, coo.data, coo.shape)

In [4]:
import os
emotion_files = [f for f in os.listdir(os.getcwd()) if 'translated-' in f]
emotion_files

['translated-joy',
 'translated-love',
 'translated-fear',
 'translated-sadness',
 'translated-surprise',
 'translated-anger']

In [5]:
texts, labels = [], []
for f in emotion_files:
    with open(f) as fopen:
        dataset = list(filter(None, fopen.read().split('\n')))
        labels.extend([f.split('-')[1]] * len(dataset))
        texts.extend(dataset)

In [6]:
unique_labels = np.unique(labels).tolist()
labels = LabelEncoder().fit_transform(labels)
unique_labels

['anger', 'fear', 'joy', 'love', 'sadness', 'surprise']

In [7]:
x, y = [], []
for i in range(len(texts)):
    s = classification_textcleaning(texts[i])
    if len(s) > 5:
        x.append(s)
        y.append(labels[i])

In [8]:
bow_chars = CountVectorizer(ngram_range=(3, 5), analyzer='char_wb', max_features=300000).fit(texts)
delattr(bow_chars, 'stop_words_')

In [9]:
feature_shape = bow_chars.transform(texts[:1]).shape[1]
feature_shape

86078

In [10]:
class Model:
    def __init__(self, output_size, vocab_size, learning_rate):
        self.X = tf.sparse_placeholder(tf.int32)
        self.W = tf.sparse_placeholder(tf.int32)
        self.Y = tf.placeholder(tf.int32, [None])
        embeddings = tf.Variable(tf.truncated_normal([vocab_size,128]))
        embed = tf.nn.embedding_lookup_sparse(embeddings, self.X, self.W, combiner='mean')
        self.logits = tf.layers.dense(embed, output_size)
        self.cost = tf.reduce_mean(tf.nn.sparse_softmax_cross_entropy_with_logits(
            logits = self.logits, labels = self.Y))
        self.optimizer = tf.train.AdamOptimizer(learning_rate = learning_rate).minimize(self.cost)
        correct_pred = tf.equal(tf.argmax(self.logits, 1,output_type=tf.int32), self.Y)
        self.accuracy = tf.reduce_mean(tf.cast(correct_pred, tf.float32))

In [11]:
sess = tf.InteractiveSession()
model = Model(len(unique_labels), feature_shape, 1e-4)
sess.run(tf.global_variables_initializer())

  "Converting sparse IndexedSlices to a dense Tensor of unknown shape. "


In [12]:
saver = tf.train.Saver(tf.trainable_variables())
saver.save(sess, 'fast-text-char/model.ckpt')

'fast-text-char/model.ckpt'

In [13]:
vectors = bow_chars.transform(texts)
train_X, test_X, train_Y, test_Y = train_test_split(
    vectors, labels, test_size = 0.2
)

In [14]:
from tqdm import tqdm
import time

batch_size = 32
EARLY_STOPPING, CURRENT_CHECKPOINT, CURRENT_ACC, EPOCH = 3, 0, 0, 0

while True:
    lasttime = time.time()
    if CURRENT_CHECKPOINT == EARLY_STOPPING:
        print('break epoch:%d\n' % (EPOCH))
        break

    train_acc, train_loss, test_acc, test_loss = 0, 0, 0, 0
    pbar = tqdm(
        range(0, train_X.shape[0], batch_size), desc = 'train minibatch loop'
    )
    for i in pbar:
        batch_x = convert_sparse_matrix_to_sparse_tensor(train_X[i : min(i + batch_size, train_X.shape[0])])
        batch_y = train_Y[i : min(i + batch_size, train_X.shape[0])]
        batch_x_expand = np.expand_dims(batch_x,axis = 1)
        acc, cost, _ = sess.run(
            [model.accuracy, model.cost, model.optimizer],
            feed_dict = {
                model.Y: batch_y,
                model.X: batch_x[0],
                model.W: batch_x[1],
            },
        )
        assert not np.isnan(cost)
        train_loss += cost
        train_acc += acc
        pbar.set_postfix(cost = cost, accuracy = acc)

    pbar = tqdm(range(0, test_X.shape[0], batch_size), desc = 'test minibatch loop')
    for i in pbar:
        batch_x = convert_sparse_matrix_to_sparse_tensor(test_X[i : min(i + batch_size, test_X.shape[0])])
        batch_y = test_Y[i : min(i + batch_size, test_X.shape[0])]
        batch_x_expand = np.expand_dims(batch_x,axis = 1)
        acc, cost = sess.run(
            [model.accuracy, model.cost],
            feed_dict = {
                model.Y: batch_y,
                model.X: batch_x[0],
                model.W: batch_x[1],
            },
        )
        test_loss += cost
        test_acc += acc
        pbar.set_postfix(cost = cost, accuracy = acc)

    train_loss /= train_X.shape[0] / batch_size
    train_acc /= train_X.shape[0] / batch_size
    test_loss /= test_X.shape[0] / batch_size
    test_acc /= test_X.shape[0] / batch_size

    if test_acc > CURRENT_ACC:
        print(
            'epoch: %d, pass acc: %f, current acc: %f'
            % (EPOCH, CURRENT_ACC, test_acc)
        )
        CURRENT_ACC = test_acc
        CURRENT_CHECKPOINT = 0
    else:
        CURRENT_CHECKPOINT += 1

    print('time taken:', time.time() - lasttime)
    print(
        'epoch: %d, training loss: %f, training acc: %f, valid loss: %f, valid acc: %f\n'
        % (EPOCH, train_loss, train_acc, test_loss, test_acc)
    )
    EPOCH += 1

train minibatch loop: 100%|██████████| 2463/2463 [00:34<00:00, 70.78it/s, accuracy=0.357, cost=1.7]  
test minibatch loop: 100%|██████████| 616/616 [00:01<00:00, 380.79it/s, accuracy=0.435, cost=1.58]
train minibatch loop:   0%|          | 8/2463 [00:00<00:34, 70.91it/s, accuracy=0.406, cost=1.63]

epoch: 0, pass acc: 0.000000, current acc: 0.435818
time taken: 36.526482820510864
epoch: 0, training loss: 1.719506, training acc: 0.311557, valid loss: 1.642083, valid acc: 0.435818



train minibatch loop: 100%|██████████| 2463/2463 [00:34<00:00, 70.89it/s, accuracy=0.429, cost=1.55]
test minibatch loop: 100%|██████████| 616/616 [00:01<00:00, 387.49it/s, accuracy=0.696, cost=1.39]
train minibatch loop:   0%|          | 8/2463 [00:00<00:34, 71.40it/s, accuracy=0.406, cost=1.46]

epoch: 1, pass acc: 0.435818, current acc: 0.559877
time taken: 36.337278842926025
epoch: 1, training loss: 1.541226, training acc: 0.506874, valid loss: 1.439651, valid acc: 0.559877



train minibatch loop: 100%|██████████| 2463/2463 [00:34<00:00, 70.93it/s, accuracy=0.536, cost=1.39]
test minibatch loop: 100%|██████████| 616/616 [00:01<00:00, 381.69it/s, accuracy=0.652, cost=1.2] 
train minibatch loop:   0%|          | 8/2463 [00:00<00:34, 71.39it/s, accuracy=0.438, cost=1.3] 

epoch: 2, pass acc: 0.559877, current acc: 0.626852
time taken: 36.342402935028076
epoch: 2, training loss: 1.323602, training acc: 0.601750, valid loss: 1.231924, valid acc: 0.626852



train minibatch loop: 100%|██████████| 2463/2463 [00:34<00:00, 70.88it/s, accuracy=0.571, cost=1.27] 
test minibatch loop: 100%|██████████| 616/616 [00:01<00:00, 387.22it/s, accuracy=0.696, cost=1.07] 
train minibatch loop:   0%|          | 8/2463 [00:00<00:34, 71.34it/s, accuracy=0.469, cost=1.17] 

epoch: 3, pass acc: 0.626852, current acc: 0.669962
time taken: 36.342238664627075
epoch: 3, training loss: 1.134289, training acc: 0.655741, valid loss: 1.075691, valid acc: 0.669962



train minibatch loop: 100%|██████████| 2463/2463 [00:34<00:00, 70.57it/s, accuracy=0.643, cost=1.19] 
test minibatch loop: 100%|██████████| 616/616 [00:01<00:00, 388.18it/s, accuracy=0.652, cost=0.989]
train minibatch loop:   0%|          | 8/2463 [00:00<00:34, 71.03it/s, accuracy=0.531, cost=1.06] 

epoch: 4, pass acc: 0.669962, current acc: 0.698719
time taken: 36.333343744277954
epoch: 4, training loss: 0.998491, training acc: 0.694736, valid loss: 0.969251, valid acc: 0.698719



train minibatch loop: 100%|██████████| 2463/2463 [00:34<00:00, 70.90it/s, accuracy=0.679, cost=1.15] 
test minibatch loop: 100%|██████████| 616/616 [00:01<00:00, 386.50it/s, accuracy=0.609, cost=0.931]
train minibatch loop:   0%|          | 8/2463 [00:00<00:34, 71.71it/s, accuracy=0.531, cost=0.984]

epoch: 5, pass acc: 0.698719, current acc: 0.717174
time taken: 36.339839696884155
epoch: 5, training loss: 0.905001, training acc: 0.720179, valid loss: 0.897002, valid acc: 0.717174



train minibatch loop: 100%|██████████| 2463/2463 [00:34<00:00, 71.06it/s, accuracy=0.679, cost=1.12] 
test minibatch loop: 100%|██████████| 616/616 [00:01<00:00, 386.56it/s, accuracy=0.609, cost=0.889]
train minibatch loop:   0%|          | 8/2463 [00:00<00:34, 70.95it/s, accuracy=0.531, cost=0.924]

epoch: 6, pass acc: 0.717174, current acc: 0.729609
time taken: 36.34914755821228
epoch: 6, training loss: 0.839532, training acc: 0.738272, valid loss: 0.846647, valid acc: 0.729609



train minibatch loop: 100%|██████████| 2463/2463 [00:34<00:00, 70.81it/s, accuracy=0.714, cost=1.09] 
test minibatch loop: 100%|██████████| 616/616 [00:01<00:00, 387.07it/s, accuracy=0.609, cost=0.857]
train minibatch loop:   0%|          | 8/2463 [00:00<00:34, 70.70it/s, accuracy=0.625, cost=0.878]

epoch: 7, pass acc: 0.729609, current acc: 0.737577
time taken: 36.355093002319336
epoch: 7, training loss: 0.792104, training acc: 0.749782, valid loss: 0.810350, valid acc: 0.737577



train minibatch loop: 100%|██████████| 2463/2463 [00:34<00:00, 70.80it/s, accuracy=0.714, cost=1.07] 
test minibatch loop: 100%|██████████| 616/616 [00:01<00:00, 387.46it/s, accuracy=0.652, cost=0.831]
train minibatch loop:   0%|          | 8/2463 [00:00<00:34, 71.31it/s, accuracy=0.656, cost=0.843]

epoch: 8, pass acc: 0.737577, current acc: 0.743738
time taken: 36.382598638534546
epoch: 8, training loss: 0.756453, training acc: 0.758982, valid loss: 0.783319, valid acc: 0.743738



train minibatch loop: 100%|██████████| 2463/2463 [00:34<00:00, 70.96it/s, accuracy=0.714, cost=1.05] 
test minibatch loop: 100%|██████████| 616/616 [00:01<00:00, 388.64it/s, accuracy=0.652, cost=0.81] 
train minibatch loop:   0%|          | 8/2463 [00:00<00:34, 70.95it/s, accuracy=0.688, cost=0.814]

epoch: 9, pass acc: 0.743738, current acc: 0.747748
time taken: 36.29483675956726
epoch: 9, training loss: 0.728712, training acc: 0.765998, valid loss: 0.762610, valid acc: 0.747748



train minibatch loop: 100%|██████████| 2463/2463 [00:34<00:00, 70.91it/s, accuracy=0.714, cost=1.03] 
test minibatch loop: 100%|██████████| 616/616 [00:01<00:00, 389.73it/s, accuracy=0.652, cost=0.793]
train minibatch loop:   0%|          | 8/2463 [00:00<00:34, 70.73it/s, accuracy=0.688, cost=0.792]

epoch: 10, pass acc: 0.747748, current acc: 0.752315
time taken: 36.31716275215149
epoch: 10, training loss: 0.706465, training acc: 0.771442, valid loss: 0.746365, valid acc: 0.752315



train minibatch loop: 100%|██████████| 2463/2463 [00:34<00:00, 70.87it/s, accuracy=0.714, cost=1.02] 
test minibatch loop: 100%|██████████| 616/616 [00:01<00:00, 374.06it/s, accuracy=0.696, cost=0.778]
train minibatch loop:   0%|          | 8/2463 [00:00<00:35, 70.09it/s, accuracy=0.688, cost=0.773]

epoch: 11, pass acc: 0.752315, current acc: 0.756446
time taken: 36.397101402282715
epoch: 11, training loss: 0.688158, training acc: 0.776771, valid loss: 0.733372, valid acc: 0.756446



train minibatch loop: 100%|██████████| 2463/2463 [00:34<00:00, 70.60it/s, accuracy=0.714, cost=1]    
test minibatch loop: 100%|██████████| 616/616 [00:01<00:00, 383.09it/s, accuracy=0.696, cost=0.765]
train minibatch loop:   0%|          | 8/2463 [00:00<00:34, 70.79it/s, accuracy=0.688, cost=0.757]

epoch: 12, pass acc: 0.756446, current acc: 0.759898
time taken: 36.49841284751892
epoch: 12, training loss: 0.672764, training acc: 0.780780, valid loss: 0.722814, valid acc: 0.759898



train minibatch loop: 100%|██████████| 2463/2463 [00:34<00:00, 70.98it/s, accuracy=0.75, cost=0.99]  
test minibatch loop: 100%|██████████| 616/616 [00:01<00:00, 382.64it/s, accuracy=0.696, cost=0.753]
train minibatch loop:   0%|          | 8/2463 [00:00<00:34, 71.26it/s, accuracy=0.688, cost=0.743]

epoch: 13, pass acc: 0.759898, current acc: 0.761623
time taken: 36.41802668571472
epoch: 13, training loss: 0.659580, training acc: 0.784284, valid loss: 0.714125, valid acc: 0.761623



train minibatch loop: 100%|██████████| 2463/2463 [00:34<00:00, 70.92it/s, accuracy=0.75, cost=0.977] 
test minibatch loop: 100%|██████████| 616/616 [00:01<00:00, 387.85it/s, accuracy=0.696, cost=0.743]
train minibatch loop:   0%|          | 8/2463 [00:00<00:34, 71.36it/s, accuracy=0.719, cost=0.732]

epoch: 14, pass acc: 0.761623, current acc: 0.762689
time taken: 36.31924271583557
epoch: 14, training loss: 0.648110, training acc: 0.787101, valid loss: 0.706897, valid acc: 0.762689



train minibatch loop: 100%|██████████| 2463/2463 [00:34<00:00, 70.86it/s, accuracy=0.75, cost=0.964] 
test minibatch loop: 100%|██████████| 616/616 [00:01<00:00, 390.25it/s, accuracy=0.696, cost=0.734]
train minibatch loop:   0%|          | 8/2463 [00:00<00:34, 70.38it/s, accuracy=0.719, cost=0.721]

epoch: 15, pass acc: 0.762689, current acc: 0.764973
time taken: 36.338618755340576
epoch: 15, training loss: 0.638001, training acc: 0.790336, valid loss: 0.700835, valid acc: 0.764973



train minibatch loop: 100%|██████████| 2463/2463 [00:34<00:00, 70.95it/s, accuracy=0.75, cost=0.952] 
test minibatch loop: 100%|██████████| 616/616 [00:01<00:00, 388.74it/s, accuracy=0.696, cost=0.725]
train minibatch loop:   0%|          | 8/2463 [00:00<00:34, 71.17it/s, accuracy=0.719, cost=0.712]

epoch: 16, pass acc: 0.764973, current acc: 0.765125
time taken: 36.30199193954468
epoch: 16, training loss: 0.628989, training acc: 0.792976, valid loss: 0.695717, valid acc: 0.765125



train minibatch loop: 100%|██████████| 2463/2463 [00:34<00:00, 71.06it/s, accuracy=0.75, cost=0.941] 
test minibatch loop: 100%|██████████| 616/616 [00:01<00:00, 387.32it/s, accuracy=0.739, cost=0.718]
train minibatch loop:   0%|          | 8/2463 [00:00<00:34, 71.01it/s, accuracy=0.75, cost=0.704] 

epoch: 17, pass acc: 0.765125, current acc: 0.766718
time taken: 36.254037380218506
epoch: 17, training loss: 0.620877, training acc: 0.795336, valid loss: 0.691374, valid acc: 0.766718



train minibatch loop: 100%|██████████| 2463/2463 [00:34<00:00, 70.98it/s, accuracy=0.75, cost=0.93]  
test minibatch loop: 100%|██████████| 616/616 [00:01<00:00, 389.20it/s, accuracy=0.739, cost=0.711]
train minibatch loop:   0%|          | 8/2463 [00:00<00:34, 70.85it/s, accuracy=0.75, cost=0.697] 

epoch: 18, pass acc: 0.766718, current acc: 0.768038
time taken: 36.285399198532104
epoch: 18, training loss: 0.613515, training acc: 0.797455, valid loss: 0.687674, valid acc: 0.768038



train minibatch loop: 100%|██████████| 2463/2463 [00:34<00:00, 70.93it/s, accuracy=0.75, cost=0.92]  
test minibatch loop: 100%|██████████| 616/616 [00:01<00:00, 389.41it/s, accuracy=0.739, cost=0.704]
train minibatch loop:   0%|          | 8/2463 [00:00<00:34, 70.80it/s, accuracy=0.75, cost=0.69]  

epoch: 19, pass acc: 0.768038, current acc: 0.768749
time taken: 36.307868242263794
epoch: 19, training loss: 0.606783, training acc: 0.799028, valid loss: 0.684515, valid acc: 0.768749



train minibatch loop: 100%|██████████| 2463/2463 [00:34<00:00, 70.91it/s, accuracy=0.75, cost=0.91]  
test minibatch loop: 100%|██████████| 616/616 [00:01<00:00, 390.15it/s, accuracy=0.739, cost=0.698]
train minibatch loop:   0%|          | 8/2463 [00:00<00:34, 71.83it/s, accuracy=0.75, cost=0.684] 

epoch: 20, pass acc: 0.768749, current acc: 0.769307
time taken: 36.31364560127258
epoch: 20, training loss: 0.600590, training acc: 0.800957, valid loss: 0.681815, valid acc: 0.769307



train minibatch loop: 100%|██████████| 2463/2463 [00:34<00:00, 71.23it/s, accuracy=0.75, cost=0.901] 
test minibatch loop: 100%|██████████| 616/616 [00:01<00:00, 391.62it/s, accuracy=0.739, cost=0.693]
train minibatch loop:   0%|          | 8/2463 [00:00<00:34, 71.49it/s, accuracy=0.75, cost=0.678] 

epoch: 21, pass acc: 0.769307, current acc: 0.769611
time taken: 36.282639265060425
epoch: 21, training loss: 0.594859, training acc: 0.802632, valid loss: 0.679508, valid acc: 0.769611



train minibatch loop: 100%|██████████| 2463/2463 [00:34<00:00, 71.02it/s, accuracy=0.75, cost=0.892] 
test minibatch loop: 100%|██████████| 616/616 [00:01<00:00, 385.48it/s, accuracy=0.696, cost=0.688]
train minibatch loop:   0%|          | 8/2463 [00:00<00:34, 70.45it/s, accuracy=0.75, cost=0.673] 

epoch: 22, pass acc: 0.769611, current acc: 0.769744
time taken: 36.27955889701843
epoch: 22, training loss: 0.589530, training acc: 0.804091, valid loss: 0.677539, valid acc: 0.769744



train minibatch loop: 100%|██████████| 2463/2463 [00:34<00:00, 70.99it/s, accuracy=0.75, cost=0.883] 
test minibatch loop: 100%|██████████| 616/616 [00:01<00:00, 389.81it/s, accuracy=0.696, cost=0.684]
train minibatch loop:   0%|          | 8/2463 [00:00<00:34, 71.44it/s, accuracy=0.75, cost=0.669] 

epoch: 23, pass acc: 0.769744, current acc: 0.770759
time taken: 36.27450656890869
epoch: 23, training loss: 0.584552, training acc: 0.805537, valid loss: 0.675864, valid acc: 0.770759



train minibatch loop: 100%|██████████| 2463/2463 [00:34<00:00, 71.02it/s, accuracy=0.75, cost=0.875] 
test minibatch loop: 100%|██████████| 616/616 [00:01<00:00, 384.88it/s, accuracy=0.739, cost=0.68] 
train minibatch loop:   0%|          | 8/2463 [00:00<00:34, 71.30it/s, accuracy=0.75, cost=0.664] 

epoch: 24, pass acc: 0.770759, current acc: 0.771286
time taken: 36.28367853164673
epoch: 24, training loss: 0.579884, training acc: 0.807111, valid loss: 0.674445, valid acc: 0.771286



train minibatch loop: 100%|██████████| 2463/2463 [00:34<00:00, 71.01it/s, accuracy=0.75, cost=0.868] 
test minibatch loop: 100%|██████████| 616/616 [00:01<00:00, 391.23it/s, accuracy=0.739, cost=0.676]
train minibatch loop:   0%|          | 8/2463 [00:00<00:34, 71.26it/s, accuracy=0.75, cost=0.66]  

epoch: 25, pass acc: 0.771286, current acc: 0.771540
time taken: 36.26240372657776
epoch: 25, training loss: 0.575489, training acc: 0.808709, valid loss: 0.673251, valid acc: 0.771540



train minibatch loop: 100%|██████████| 2463/2463 [00:34<00:00, 71.02it/s, accuracy=0.75, cost=0.861] 
test minibatch loop: 100%|██████████| 616/616 [00:01<00:00, 388.60it/s, accuracy=0.739, cost=0.673]
train minibatch loop:   0%|          | 8/2463 [00:00<00:34, 70.21it/s, accuracy=0.75, cost=0.657] 

epoch: 26, pass acc: 0.771540, current acc: 0.771844
time taken: 36.26729607582092
epoch: 26, training loss: 0.571340, training acc: 0.810156, valid loss: 0.672255, valid acc: 0.771844



train minibatch loop: 100%|██████████| 2463/2463 [00:34<00:00, 71.04it/s, accuracy=0.75, cost=0.854] 
test minibatch loop: 100%|██████████| 616/616 [00:01<00:00, 388.18it/s, accuracy=0.739, cost=0.67] 
train minibatch loop:   0%|          | 8/2463 [00:00<00:34, 70.72it/s, accuracy=0.75, cost=0.653] 

time taken: 36.260812520980835
epoch: 27, training loss: 0.567410, training acc: 0.811412, valid loss: 0.671436, valid acc: 0.771540



train minibatch loop: 100%|██████████| 2463/2463 [00:34<00:00, 71.20it/s, accuracy=0.75, cost=0.847] 
test minibatch loop: 100%|██████████| 616/616 [00:01<00:00, 389.44it/s, accuracy=0.739, cost=0.668]
train minibatch loop:   0%|          | 8/2463 [00:00<00:34, 70.53it/s, accuracy=0.75, cost=0.65]  

time taken: 36.282947301864624
epoch: 28, training loss: 0.563677, training acc: 0.812757, valid loss: 0.670772, valid acc: 0.771844



train minibatch loop: 100%|██████████| 2463/2463 [00:34<00:00, 71.00it/s, accuracy=0.75, cost=0.841] 
test minibatch loop: 100%|██████████| 616/616 [00:01<00:00, 382.91it/s, accuracy=0.739, cost=0.665]
train minibatch loop:   0%|          | 8/2463 [00:00<00:34, 71.19it/s, accuracy=0.75, cost=0.647] 

epoch: 29, pass acc: 0.771844, current acc: 0.772149
time taken: 36.29914617538452
epoch: 29, training loss: 0.560123, training acc: 0.813747, valid loss: 0.670249, valid acc: 0.772149



train minibatch loop: 100%|██████████| 2463/2463 [00:34<00:00, 71.03it/s, accuracy=0.786, cost=0.835]
test minibatch loop: 100%|██████████| 616/616 [00:01<00:00, 389.13it/s, accuracy=0.739, cost=0.663]
train minibatch loop:   0%|          | 8/2463 [00:00<00:34, 70.54it/s, accuracy=0.75, cost=0.644] 

time taken: 36.25847387313843
epoch: 30, training loss: 0.556732, training acc: 0.814865, valid loss: 0.669850, valid acc: 0.771997



train minibatch loop: 100%|██████████| 2463/2463 [00:34<00:00, 70.98it/s, accuracy=0.786, cost=0.83] 
test minibatch loop: 100%|██████████| 616/616 [00:01<00:00, 388.67it/s, accuracy=0.739, cost=0.661]
train minibatch loop:   0%|          | 8/2463 [00:00<00:34, 71.39it/s, accuracy=0.75, cost=0.641] 

epoch: 31, pass acc: 0.772149, current acc: 0.772504
time taken: 36.28441309928894
epoch: 31, training loss: 0.553490, training acc: 0.815652, valid loss: 0.669565, valid acc: 0.772504



train minibatch loop: 100%|██████████| 2463/2463 [00:34<00:00, 70.85it/s, accuracy=0.786, cost=0.825]
test minibatch loop: 100%|██████████| 616/616 [00:01<00:00, 388.94it/s, accuracy=0.739, cost=0.659]
train minibatch loop:   0%|          | 8/2463 [00:00<00:34, 71.56it/s, accuracy=0.75, cost=0.638] 

epoch: 32, pass acc: 0.772504, current acc: 0.772860
time taken: 36.26200866699219
epoch: 32, training loss: 0.550383, training acc: 0.816794, valid loss: 0.669381, valid acc: 0.772860



train minibatch loop: 100%|██████████| 2463/2463 [00:34<00:00, 70.99it/s, accuracy=0.786, cost=0.82] 
test minibatch loop: 100%|██████████| 616/616 [00:01<00:00, 389.30it/s, accuracy=0.739, cost=0.658]
train minibatch loop:   0%|          | 8/2463 [00:00<00:34, 70.23it/s, accuracy=0.75, cost=0.636] 

epoch: 33, pass acc: 0.772860, current acc: 0.772910
time taken: 36.27719235420227
epoch: 33, training loss: 0.547402, training acc: 0.817809, valid loss: 0.669289, valid acc: 0.772910



train minibatch loop: 100%|██████████| 2463/2463 [00:34<00:00, 70.96it/s, accuracy=0.786, cost=0.815]
test minibatch loop: 100%|██████████| 616/616 [00:01<00:00, 384.71it/s, accuracy=0.739, cost=0.657]
train minibatch loop:   0%|          | 8/2463 [00:00<00:34, 71.33it/s, accuracy=0.75, cost=0.633] 

time taken: 36.311283111572266
epoch: 34, training loss: 0.544536, training acc: 0.818837, valid loss: 0.669282, valid acc: 0.772301



train minibatch loop: 100%|██████████| 2463/2463 [00:34<00:00, 71.12it/s, accuracy=0.786, cost=0.811]
test minibatch loop: 100%|██████████| 616/616 [00:01<00:00, 385.35it/s, accuracy=0.739, cost=0.655]
train minibatch loop:   0%|          | 8/2463 [00:00<00:34, 70.68it/s, accuracy=0.75, cost=0.631] 

time taken: 36.267308950424194
epoch: 35, training loss: 0.541777, training acc: 0.819725, valid loss: 0.669350, valid acc: 0.772301



train minibatch loop: 100%|██████████| 2463/2463 [00:34<00:00, 70.97it/s, accuracy=0.786, cost=0.806]
test minibatch loop: 100%|██████████| 616/616 [00:01<00:00, 389.88it/s, accuracy=0.739, cost=0.654]

time taken: 36.284964084625244
epoch: 36, training loss: 0.539117, training acc: 0.820575, valid loss: 0.669489, valid acc: 0.772251

break epoch:37






In [15]:
real_Y, predict_Y = [], []

pbar = tqdm(
    range(0, test_X.shape[0], batch_size), desc = 'validation minibatch loop'
)
for i in pbar:
    batch_x = convert_sparse_matrix_to_sparse_tensor(test_X[i : min(i + batch_size, test_X.shape[0])])
    batch_y = test_Y[i : min(i + batch_size, test_X.shape[0])]
    predict_Y += np.argmax(
        sess.run(
            model.logits, feed_dict = {model.X: batch_x[0], model.W: batch_x[1], model.Y: batch_y}
        ),
        1,
    ).tolist()
    real_Y += batch_y.tolist()

validation minibatch loop: 100%|██████████| 616/616 [00:00<00:00, 721.06it/s]


In [16]:
print(
    metrics.classification_report(
        real_Y, predict_Y, target_names = unique_labels
    )
)

             precision    recall  f1-score   support

      anger       0.79      0.79      0.79      3698
       fear       0.75      0.75      0.75      3808
        joy       0.76      0.77      0.77      3945
       love       0.84      0.83      0.84      3049
    sadness       0.75      0.72      0.74      3251
   surprise       0.74      0.76      0.75      1952

avg / total       0.77      0.77      0.77     19703



In [17]:
text = classification_textcleaning('kerajaan sebenarnya sangat sayangkan rakyatnya')
transformed = bow_chars.transform([text[0]])
batch_x = convert_sparse_matrix_to_sparse_tensor(transformed)
sess.run(tf.nn.softmax(model.logits), feed_dict = {model.X: batch_x[0], model.W: batch_x[1]})

array([[1.1725525e-07, 2.0598983e-07, 2.3001657e-04, 9.8360693e-01,
        3.8486030e-14, 1.6162727e-02]], dtype=float32)

In [18]:
saver.save(sess, 'fast-text-char/model.ckpt')

'fast-text-char/model.ckpt'

In [19]:
import pickle
with open('vectorizer-sparse-emotion.pkl','wb') as fopen:
    pickle.dump(bow_chars, fopen)