In [1]:
import re
import numpy as np
import collections
from sklearn import metrics
from sklearn.cross_validation import train_test_split
import tensorflow as tf
import pandas as pd
from unidecode import unidecode
from sklearn.preprocessing import LabelEncoder
from tqdm import tqdm
import time



In [5]:
import json

with open('tokenized.json') as fopen:
    dataset = json.load(fopen)
texts = dataset['x']
labels = dataset['y']
del dataset

In [6]:
x, y = [], []
for i in tqdm(range(len(texts))):
    s = ' '.join(texts[i])
    if len(s) > 5:
        x.append(s)
        y.append(labels[i])

100%|██████████| 192029/192029 [00:00<00:00, 582639.90it/s]


In [7]:
def convert_sparse_matrix_to_sparse_tensor(X):
    coo = X.tocoo()
    indices = np.mat([coo.row, coo.col]).transpose()
    return tf.SparseTensorValue(indices, coo.col, coo.shape), tf.SparseTensorValue(indices, coo.data, coo.shape)

In [8]:
from sklearn.feature_extraction.text import CountVectorizer

In [10]:
bow_chars = CountVectorizer(ngram_range=(3, 5), analyzer='char_wb', max_features=300000).fit(x)
delattr(bow_chars, 'stop_words_')

In [11]:
vectors = bow_chars.transform(x)
feature_shape = vectors.shape[1]
feature_shape

300000

In [12]:
class Model:
    def __init__(self, output_size, vocab_size, learning_rate):
        self.X = tf.sparse_placeholder(tf.int32)
        self.W = tf.sparse_placeholder(tf.int32)
        self.Y = tf.placeholder(tf.float32, [None, output_size])
        embeddings = tf.Variable(tf.truncated_normal([vocab_size,128]))
        embed = tf.nn.embedding_lookup_sparse(embeddings, self.X, self.W, combiner='mean')
        self.logits = tf.layers.dense(embed, output_size)
        self.cost = tf.reduce_mean(
            tf.nn.sigmoid_cross_entropy_with_logits(
                logits = self.logits, labels = self.Y
            )
        )
        self.optimizer = tf.train.AdamOptimizer(learning_rate = learning_rate).minimize(self.cost)
        correct_prediction = tf.equal(tf.round(tf.nn.sigmoid(self.logits)), tf.round(self.Y))
        all_labels_true = tf.reduce_min(tf.cast(correct_prediction, tf.float32), 1)
        self.accuracy = tf.reduce_mean(all_labels_true)

In [13]:
sess = tf.InteractiveSession()
model = Model(6, feature_shape, 1e-4)
sess.run(tf.global_variables_initializer())

  "Converting sparse IndexedSlices to a dense Tensor of unknown shape. "


In [14]:
saver = tf.train.Saver(tf.trainable_variables())
saver.save(sess, 'fast-text-char/model.ckpt')

'fast-text-char/model.ckpt'

In [15]:
train_X, test_X, train_Y, test_Y = train_test_split(
    vectors, y, test_size = 0.2
)

In [16]:
from tqdm import tqdm
import time

batch_size = 60
EARLY_STOPPING, CURRENT_CHECKPOINT, CURRENT_ACC, EPOCH = 3, 0, 0, 0

while True:
    lasttime = time.time()
    if CURRENT_CHECKPOINT == EARLY_STOPPING:
        print('break epoch:%d\n' % (EPOCH))
        break

    train_acc, train_loss, test_acc, test_loss = 0, 0, 0, 0
    pbar = tqdm(
        range(0, train_X.shape[0], batch_size), desc = 'train minibatch loop'
    )
    for i in pbar:
        batch_x = convert_sparse_matrix_to_sparse_tensor(train_X[i : min(i + batch_size, train_X.shape[0])])
        batch_y = train_Y[i : min(i + batch_size, train_X.shape[0])]
        acc, cost, _ = sess.run(
            [model.accuracy, model.cost, model.optimizer],
            feed_dict = {
                model.Y: batch_y,
                model.X: batch_x[0],
                model.W: batch_x[1],
            },
        )
        assert not np.isnan(cost)
        train_loss += cost
        train_acc += acc
        pbar.set_postfix(cost = cost, accuracy = acc)

    pbar = tqdm(range(0, test_X.shape[0], batch_size), desc = 'test minibatch loop')
    for i in pbar:
        batch_x = convert_sparse_matrix_to_sparse_tensor(test_X[i : min(i + batch_size, test_X.shape[0])])
        batch_y = test_Y[i : min(i + batch_size, test_X.shape[0])]
        acc, cost = sess.run(
            [model.accuracy, model.cost],
            feed_dict = {
                model.Y: batch_y,
                model.X: batch_x[0],
                model.W: batch_x[1],
            },
        )
        test_loss += cost
        test_acc += acc
        pbar.set_postfix(cost = cost, accuracy = acc)

    train_loss /= train_X.shape[0] / batch_size
    train_acc /= train_X.shape[0] / batch_size
    test_loss /= test_X.shape[0] / batch_size
    test_acc /= test_X.shape[0] / batch_size

    if test_acc > CURRENT_ACC:
        print(
            'epoch: %d, pass acc: %f, current acc: %f'
            % (EPOCH, CURRENT_ACC, test_acc)
        )
        CURRENT_ACC = test_acc
        CURRENT_CHECKPOINT = 0
    else:
        CURRENT_CHECKPOINT += 1

    print('time taken:', time.time() - lasttime)
    print(
        'epoch: %d, training loss: %f, training acc: %f, valid loss: %f, valid acc: %f\n'
        % (EPOCH, train_loss, train_acc, test_loss, test_acc)
    )
    EPOCH += 1

train minibatch loop: 100%|██████████| 2556/2556 [01:52<00:00, 22.79it/s, accuracy=0.857, cost=0.172] 
test minibatch loop: 100%|██████████| 639/639 [00:03<00:00, 210.43it/s, accuracy=0.816, cost=0.244] 
train minibatch loop:   0%|          | 3/2556 [00:00<01:52, 22.65it/s, accuracy=0.817, cost=0.204]

epoch: 0, pass acc: 0.000000, current acc: 0.899579
time taken: 115.20369625091553
epoch: 0, training loss: 0.286168, training acc: 0.839196, valid loss: 0.127799, valid acc: 0.899579



train minibatch loop: 100%|██████████| 2556/2556 [01:52<00:00, 22.89it/s, accuracy=0.857, cost=0.144] 
test minibatch loop: 100%|██████████| 639/639 [00:03<00:00, 209.97it/s, accuracy=0.816, cost=0.214] 
train minibatch loop:   0%|          | 3/2556 [00:00<01:52, 22.65it/s, accuracy=0.833, cost=0.165]

epoch: 1, pass acc: 0.899579, current acc: 0.900780
time taken: 115.12048959732056
epoch: 1, training loss: 0.116865, training acc: 0.897905, valid loss: 0.105463, valid acc: 0.900780



train minibatch loop: 100%|██████████| 2556/2556 [01:52<00:00, 22.85it/s, accuracy=0.857, cost=0.126] 
test minibatch loop: 100%|██████████| 639/639 [00:03<00:00, 209.74it/s, accuracy=0.837, cost=0.194] 
train minibatch loop:   0%|          | 3/2556 [00:00<01:52, 22.73it/s, accuracy=0.85, cost=0.137] 

epoch: 2, pass acc: 0.900780, current acc: 0.902038
time taken: 115.09034776687622
epoch: 2, training loss: 0.100738, training acc: 0.898857, valid loss: 0.094101, valid acc: 0.902038



train minibatch loop: 100%|██████████| 2556/2556 [01:52<00:00, 22.91it/s, accuracy=0.857, cost=0.114] 
test minibatch loop: 100%|██████████| 639/639 [00:03<00:00, 211.45it/s, accuracy=0.816, cost=0.186] 
train minibatch loop:   0%|          | 3/2556 [00:00<01:53, 22.55it/s, accuracy=0.833, cost=0.124]

epoch: 3, pass acc: 0.902038, current acc: 0.902188
time taken: 115.06237173080444
epoch: 3, training loss: 0.092209, training acc: 0.899340, valid loss: 0.088464, valid acc: 0.902188



train minibatch loop: 100%|██████████| 2556/2556 [01:52<00:00, 22.91it/s, accuracy=0.857, cost=0.107] 
test minibatch loop: 100%|██████████| 639/639 [00:03<00:00, 211.99it/s, accuracy=0.816, cost=0.18]  
train minibatch loop:   0%|          | 3/2556 [00:00<01:53, 22.54it/s, accuracy=0.833, cost=0.116]

epoch: 4, pass acc: 0.902188, current acc: 0.902762
time taken: 115.03546023368835
epoch: 4, training loss: 0.087553, training acc: 0.900325, valid loss: 0.085175, valid acc: 0.902762



train minibatch loop: 100%|██████████| 2556/2556 [01:52<00:00, 22.88it/s, accuracy=0.857, cost=0.102] 
test minibatch loop: 100%|██████████| 639/639 [00:03<00:00, 211.46it/s, accuracy=0.816, cost=0.174] 
train minibatch loop:   0%|          | 3/2556 [00:00<01:52, 22.66it/s, accuracy=0.833, cost=0.112]

epoch: 5, pass acc: 0.902762, current acc: 0.903519
time taken: 115.05824279785156
epoch: 5, training loss: 0.084437, training acc: 0.901349, valid loss: 0.082866, valid acc: 0.903519



train minibatch loop: 100%|██████████| 2556/2556 [01:51<00:00, 22.93it/s, accuracy=0.857, cost=0.0971]
test minibatch loop: 100%|██████████| 639/639 [00:03<00:00, 208.86it/s, accuracy=0.816, cost=0.169] 
train minibatch loop:   0%|          | 3/2556 [00:00<01:53, 22.42it/s, accuracy=0.833, cost=0.109]

epoch: 6, pass acc: 0.903519, current acc: 0.903780
time taken: 115.00309157371521
epoch: 6, training loss: 0.082053, training acc: 0.902412, valid loss: 0.081093, valid acc: 0.903780



train minibatch loop: 100%|██████████| 2556/2556 [01:51<00:00, 22.92it/s, accuracy=0.857, cost=0.0929]
test minibatch loop: 100%|██████████| 639/639 [00:03<00:00, 211.56it/s, accuracy=0.816, cost=0.165] 
train minibatch loop:   0%|          | 3/2556 [00:00<01:53, 22.49it/s, accuracy=0.85, cost=0.107] 

epoch: 7, pass acc: 0.903780, current acc: 0.904354
time taken: 115.0012514591217
epoch: 7, training loss: 0.080113, training acc: 0.903397, valid loss: 0.079674, valid acc: 0.904354



train minibatch loop: 100%|██████████| 2556/2556 [01:52<00:00, 22.90it/s, accuracy=0.857, cost=0.0892]
test minibatch loop: 100%|██████████| 639/639 [00:03<00:00, 210.63it/s, accuracy=0.816, cost=0.162] 
train minibatch loop:   0%|          | 3/2556 [00:00<01:52, 22.60it/s, accuracy=0.85, cost=0.106] 

epoch: 8, pass acc: 0.904354, current acc: 0.905215
time taken: 115.04232907295227
epoch: 8, training loss: 0.078480, training acc: 0.904584, valid loss: 0.078509, valid acc: 0.905215



train minibatch loop: 100%|██████████| 2556/2556 [01:52<00:00, 22.89it/s, accuracy=0.857, cost=0.0858]
test minibatch loop: 100%|██████████| 639/639 [00:03<00:00, 211.56it/s, accuracy=0.816, cost=0.16]  
train minibatch loop:   0%|          | 3/2556 [00:00<01:53, 22.55it/s, accuracy=0.85, cost=0.106] 

epoch: 9, pass acc: 0.905215, current acc: 0.905789
time taken: 115.02357625961304
epoch: 9, training loss: 0.077073, training acc: 0.905230, valid loss: 0.077537, valid acc: 0.905789



train minibatch loop: 100%|██████████| 2556/2556 [01:52<00:00, 22.89it/s, accuracy=0.857, cost=0.0828]
test minibatch loop: 100%|██████████| 639/639 [00:03<00:00, 211.09it/s, accuracy=0.816, cost=0.158] 
train minibatch loop:   0%|          | 3/2556 [00:00<01:53, 22.58it/s, accuracy=0.867, cost=0.106]

epoch: 10, pass acc: 0.905789, current acc: 0.906232
time taken: 115.06719374656677
epoch: 10, training loss: 0.075837, training acc: 0.905771, valid loss: 0.076715, valid acc: 0.906232



train minibatch loop: 100%|██████████| 2556/2556 [01:52<00:00, 22.92it/s, accuracy=0.857, cost=0.0799]
test minibatch loop: 100%|██████████| 639/639 [00:03<00:00, 211.18it/s, accuracy=0.816, cost=0.156] 
train minibatch loop:   0%|          | 3/2556 [00:00<01:53, 22.55it/s, accuracy=0.883, cost=0.105]

epoch: 11, pass acc: 0.906232, current acc: 0.906598
time taken: 115.03527069091797
epoch: 11, training loss: 0.074735, training acc: 0.906443, valid loss: 0.076012, valid acc: 0.906598



train minibatch loop: 100%|██████████| 2556/2556 [01:52<00:00, 22.86it/s, accuracy=0.857, cost=0.0774]
test minibatch loop: 100%|██████████| 639/639 [00:03<00:00, 211.52it/s, accuracy=0.816, cost=0.156] 
train minibatch loop:   0%|          | 3/2556 [00:00<01:53, 22.51it/s, accuracy=0.883, cost=0.105]

epoch: 12, pass acc: 0.906598, current acc: 0.907119
time taken: 115.02859568595886
epoch: 12, training loss: 0.073739, training acc: 0.907304, valid loss: 0.075406, valid acc: 0.907119



train minibatch loop: 100%|██████████| 2556/2556 [01:51<00:00, 22.88it/s, accuracy=0.857, cost=0.075] 
test minibatch loop: 100%|██████████| 639/639 [00:03<00:00, 211.98it/s, accuracy=0.816, cost=0.155] 
train minibatch loop:   0%|          | 3/2556 [00:00<01:53, 22.58it/s, accuracy=0.883, cost=0.105]

epoch: 13, pass acc: 0.907119, current acc: 0.907772
time taken: 115.01431465148926
epoch: 13, training loss: 0.072828, training acc: 0.907904, valid loss: 0.074880, valid acc: 0.907772



train minibatch loop: 100%|██████████| 2556/2556 [01:51<00:00, 22.84it/s, accuracy=0.857, cost=0.0728]
test minibatch loop: 100%|██████████| 639/639 [00:03<00:00, 211.65it/s, accuracy=0.816, cost=0.154] 
train minibatch loop:   0%|          | 3/2556 [00:00<01:53, 22.59it/s, accuracy=0.883, cost=0.105]

epoch: 14, pass acc: 0.907772, current acc: 0.907850
time taken: 115.0006947517395
epoch: 14, training loss: 0.071987, training acc: 0.908276, valid loss: 0.074420, valid acc: 0.907850



train minibatch loop: 100%|██████████| 2556/2556 [01:51<00:00, 22.90it/s, accuracy=0.857, cost=0.0708]
test minibatch loop: 100%|██████████| 639/639 [00:03<00:00, 211.34it/s, accuracy=0.816, cost=0.154] 
train minibatch loop:   0%|          | 3/2556 [00:00<01:53, 22.55it/s, accuracy=0.883, cost=0.105]

epoch: 15, pass acc: 0.907850, current acc: 0.908293
time taken: 114.9916934967041
epoch: 15, training loss: 0.071204, training acc: 0.908733, valid loss: 0.074015, valid acc: 0.908293



train minibatch loop: 100%|██████████| 2556/2556 [01:51<00:00, 22.89it/s, accuracy=0.857, cost=0.0688]
test minibatch loop: 100%|██████████| 639/639 [00:03<00:00, 211.81it/s, accuracy=0.816, cost=0.154] 
train minibatch loop:   0%|          | 3/2556 [00:00<01:52, 22.61it/s, accuracy=0.883, cost=0.105]

time taken: 115.00036025047302
epoch: 16, training loss: 0.070469, training acc: 0.909294, valid loss: 0.073658, valid acc: 0.908111



train minibatch loop: 100%|██████████| 2556/2556 [01:51<00:00, 22.91it/s, accuracy=0.857, cost=0.067] 
test minibatch loop: 100%|██████████| 639/639 [00:03<00:00, 211.62it/s, accuracy=0.816, cost=0.154] 
train minibatch loop:   0%|          | 3/2556 [00:00<01:53, 22.46it/s, accuracy=0.883, cost=0.105]

epoch: 17, pass acc: 0.908293, current acc: 0.908424
time taken: 114.99447965621948
epoch: 17, training loss: 0.069776, training acc: 0.909724, valid loss: 0.073342, valid acc: 0.908424



train minibatch loop: 100%|██████████| 2556/2556 [01:51<00:00, 22.93it/s, accuracy=0.857, cost=0.0653]
test minibatch loop: 100%|██████████| 639/639 [00:03<00:00, 212.75it/s, accuracy=0.816, cost=0.154] 
train minibatch loop:   0%|          | 3/2556 [00:00<01:53, 22.51it/s, accuracy=0.883, cost=0.105]

epoch: 18, pass acc: 0.908424, current acc: 0.908815
time taken: 114.97312664985657
epoch: 18, training loss: 0.069118, training acc: 0.910272, valid loss: 0.073061, valid acc: 0.908815



train minibatch loop: 100%|██████████| 2556/2556 [01:51<00:00, 22.88it/s, accuracy=0.857, cost=0.0637]
test minibatch loop: 100%|██████████| 639/639 [00:03<00:00, 211.68it/s, accuracy=0.816, cost=0.154] 
train minibatch loop:   0%|          | 3/2556 [00:00<01:53, 22.48it/s, accuracy=0.867, cost=0.105]

epoch: 19, pass acc: 0.908815, current acc: 0.909128
time taken: 114.9917516708374
epoch: 19, training loss: 0.068492, training acc: 0.910709, valid loss: 0.072811, valid acc: 0.909128



train minibatch loop: 100%|██████████| 2556/2556 [01:51<00:00, 22.91it/s, accuracy=0.857, cost=0.0621]
test minibatch loop: 100%|██████████| 639/639 [00:03<00:00, 212.24it/s, accuracy=0.816, cost=0.154] 
train minibatch loop:   0%|          | 3/2556 [00:00<01:52, 22.68it/s, accuracy=0.867, cost=0.105]

time taken: 114.97773814201355
epoch: 20, training loss: 0.067893, training acc: 0.911081, valid loss: 0.072588, valid acc: 0.909076



train minibatch loop: 100%|██████████| 2556/2556 [01:51<00:00, 22.92it/s, accuracy=0.857, cost=0.0606]
test minibatch loop: 100%|██████████| 639/639 [00:03<00:00, 212.98it/s, accuracy=0.816, cost=0.154] 
train minibatch loop:   0%|          | 3/2556 [00:00<01:53, 22.57it/s, accuracy=0.867, cost=0.105]

epoch: 21, pass acc: 0.909128, current acc: 0.909337
time taken: 114.9418580532074
epoch: 21, training loss: 0.067318, training acc: 0.911531, valid loss: 0.072389, valid acc: 0.909337



train minibatch loop: 100%|██████████| 2556/2556 [01:51<00:00, 22.90it/s, accuracy=0.857, cost=0.0592]
test minibatch loop: 100%|██████████| 639/639 [00:02<00:00, 213.09it/s, accuracy=0.816, cost=0.154] 
train minibatch loop:   0%|          | 3/2556 [00:00<01:53, 22.59it/s, accuracy=0.867, cost=0.105]

epoch: 22, pass acc: 0.909337, current acc: 0.909363
time taken: 114.90017485618591
epoch: 22, training loss: 0.066765, training acc: 0.912007, valid loss: 0.072212, valid acc: 0.909363



train minibatch loop: 100%|██████████| 2556/2556 [01:51<00:00, 22.88it/s, accuracy=0.857, cost=0.0579]
test minibatch loop: 100%|██████████| 639/639 [00:02<00:00, 213.97it/s, accuracy=0.837, cost=0.154] 
train minibatch loop:   0%|          | 3/2556 [00:00<01:53, 22.53it/s, accuracy=0.867, cost=0.105]

epoch: 23, pass acc: 0.909363, current acc: 0.909526
time taken: 114.87630128860474
epoch: 23, training loss: 0.066233, training acc: 0.912561, valid loss: 0.072055, valid acc: 0.909526



train minibatch loop: 100%|██████████| 2556/2556 [01:51<00:00, 22.91it/s, accuracy=0.857, cost=0.0566]
test minibatch loop: 100%|██████████| 639/639 [00:03<00:00, 210.42it/s, accuracy=0.837, cost=0.154] 
train minibatch loop:   0%|          | 3/2556 [00:00<01:52, 22.69it/s, accuracy=0.867, cost=0.105]

epoch: 24, pass acc: 0.909526, current acc: 0.909578
time taken: 114.91472482681274
epoch: 24, training loss: 0.065718, training acc: 0.913005, valid loss: 0.071916, valid acc: 0.909578



train minibatch loop: 100%|██████████| 2556/2556 [01:51<00:00, 22.92it/s, accuracy=0.857, cost=0.0554]
test minibatch loop: 100%|██████████| 639/639 [00:02<00:00, 214.08it/s, accuracy=0.837, cost=0.154] 
train minibatch loop:   0%|          | 3/2556 [00:00<01:52, 22.60it/s, accuracy=0.867, cost=0.106]

time taken: 114.86278223991394
epoch: 25, training loss: 0.065221, training acc: 0.913501, valid loss: 0.071793, valid acc: 0.909552



train minibatch loop: 100%|██████████| 2556/2556 [01:51<00:00, 22.93it/s, accuracy=0.857, cost=0.0542]
test minibatch loop: 100%|██████████| 639/639 [00:02<00:00, 213.23it/s, accuracy=0.837, cost=0.153] 
train minibatch loop:   0%|          | 3/2556 [00:00<01:53, 22.52it/s, accuracy=0.867, cost=0.106]

epoch: 26, pass acc: 0.909578, current acc: 0.909604
time taken: 114.87152290344238
epoch: 26, training loss: 0.064740, training acc: 0.913964, valid loss: 0.071685, valid acc: 0.909604



train minibatch loop: 100%|██████████| 2556/2556 [01:51<00:00, 22.92it/s, accuracy=0.857, cost=0.0531]
test minibatch loop: 100%|██████████| 639/639 [00:02<00:00, 213.16it/s, accuracy=0.837, cost=0.153] 
train minibatch loop:   0%|          | 3/2556 [00:00<01:53, 22.59it/s, accuracy=0.867, cost=0.106]

epoch: 27, pass acc: 0.909604, current acc: 0.909682
time taken: 114.88721418380737
epoch: 27, training loss: 0.064273, training acc: 0.914394, valid loss: 0.071592, valid acc: 0.909682



train minibatch loop: 100%|██████████| 2556/2556 [01:51<00:00, 22.90it/s, accuracy=0.857, cost=0.0521]
test minibatch loop: 100%|██████████| 639/639 [00:02<00:00, 213.40it/s, accuracy=0.837, cost=0.153] 
train minibatch loop:   0%|          | 3/2556 [00:00<01:52, 22.65it/s, accuracy=0.867, cost=0.106]

epoch: 28, pass acc: 0.909682, current acc: 0.909682
time taken: 114.87823390960693
epoch: 28, training loss: 0.063820, training acc: 0.914831, valid loss: 0.071512, valid acc: 0.909682



train minibatch loop: 100%|██████████| 2556/2556 [01:51<00:00, 22.77it/s, accuracy=0.857, cost=0.0511]
test minibatch loop: 100%|██████████| 639/639 [00:03<00:00, 212.63it/s, accuracy=0.837, cost=0.153] 
train minibatch loop:   0%|          | 3/2556 [00:00<01:52, 22.62it/s, accuracy=0.867, cost=0.107]

epoch: 29, pass acc: 0.909682, current acc: 0.909865
time taken: 114.88942337036133
epoch: 29, training loss: 0.063380, training acc: 0.915157, valid loss: 0.071444, valid acc: 0.909865



train minibatch loop: 100%|██████████| 2556/2556 [01:51<00:00, 22.92it/s, accuracy=0.857, cost=0.0501]
test minibatch loop: 100%|██████████| 639/639 [00:02<00:00, 213.10it/s, accuracy=0.837, cost=0.153] 
train minibatch loop:   0%|          | 3/2556 [00:00<01:53, 22.58it/s, accuracy=0.867, cost=0.107]

epoch: 30, pass acc: 0.909865, current acc: 0.910021
time taken: 114.85861921310425
epoch: 30, training loss: 0.062953, training acc: 0.915412, valid loss: 0.071387, valid acc: 0.910021



train minibatch loop: 100%|██████████| 2556/2556 [01:51<00:00, 22.90it/s, accuracy=0.857, cost=0.0492] 
test minibatch loop: 100%|██████████| 639/639 [00:02<00:00, 213.16it/s, accuracy=0.837, cost=0.153] 
train minibatch loop:   0%|          | 3/2556 [00:00<01:52, 22.66it/s, accuracy=0.867, cost=0.107]

epoch: 31, pass acc: 0.910021, current acc: 0.910073
time taken: 114.88888716697693
epoch: 31, training loss: 0.062537, training acc: 0.915757, valid loss: 0.071342, valid acc: 0.910073



train minibatch loop: 100%|██████████| 2556/2556 [01:51<00:00, 22.90it/s, accuracy=0.857, cost=0.0483] 
test minibatch loop: 100%|██████████| 639/639 [00:02<00:00, 213.59it/s, accuracy=0.837, cost=0.153] 
train minibatch loop:   0%|          | 3/2556 [00:00<01:53, 22.55it/s, accuracy=0.867, cost=0.108]

epoch: 32, pass acc: 0.910073, current acc: 0.910152
time taken: 114.89193224906921
epoch: 32, training loss: 0.062132, training acc: 0.916103, valid loss: 0.071306, valid acc: 0.910152



train minibatch loop: 100%|██████████| 2556/2556 [01:51<00:00, 22.91it/s, accuracy=0.857, cost=0.0475] 
test minibatch loop: 100%|██████████| 639/639 [00:02<00:00, 214.16it/s, accuracy=0.837, cost=0.153] 
train minibatch loop:   0%|          | 3/2556 [00:00<01:53, 22.59it/s, accuracy=0.867, cost=0.108]

epoch: 33, pass acc: 0.910152, current acc: 0.910178
time taken: 114.85901284217834
epoch: 33, training loss: 0.061737, training acc: 0.916521, valid loss: 0.071281, valid acc: 0.910178



train minibatch loop: 100%|██████████| 2556/2556 [01:51<00:00, 22.89it/s, accuracy=0.857, cost=0.0467] 
test minibatch loop: 100%|██████████| 639/639 [00:02<00:00, 213.23it/s, accuracy=0.837, cost=0.153] 
train minibatch loop:   0%|          | 3/2556 [00:00<01:52, 22.62it/s, accuracy=0.867, cost=0.108]

epoch: 34, pass acc: 0.910178, current acc: 0.910308
time taken: 114.90892553329468
epoch: 34, training loss: 0.061352, training acc: 0.916899, valid loss: 0.071264, valid acc: 0.910308



train minibatch loop: 100%|██████████| 2556/2556 [01:51<00:00, 22.93it/s, accuracy=0.857, cost=0.0459]
test minibatch loop: 100%|██████████| 639/639 [00:03<00:00, 212.81it/s, accuracy=0.837, cost=0.153] 
train minibatch loop:   0%|          | 3/2556 [00:00<01:52, 22.74it/s, accuracy=0.867, cost=0.109]

time taken: 114.88584637641907
epoch: 35, training loss: 0.060976, training acc: 0.917310, valid loss: 0.071256, valid acc: 0.910256



train minibatch loop: 100%|██████████| 2556/2556 [01:51<00:00, 22.92it/s, accuracy=0.857, cost=0.0452]
test minibatch loop: 100%|██████████| 639/639 [00:03<00:00, 212.99it/s, accuracy=0.837, cost=0.153] 
train minibatch loop:   0%|          | 3/2556 [00:00<01:52, 22.60it/s, accuracy=0.867, cost=0.109]

epoch: 36, pass acc: 0.910308, current acc: 0.910595
time taken: 114.8835813999176
epoch: 36, training loss: 0.060609, training acc: 0.917766, valid loss: 0.071256, valid acc: 0.910595



train minibatch loop: 100%|██████████| 2556/2556 [01:51<00:00, 22.95it/s, accuracy=0.857, cost=0.0444]
test minibatch loop: 100%|██████████| 639/639 [00:02<00:00, 214.09it/s, accuracy=0.837, cost=0.153] 
train minibatch loop:   0%|          | 3/2556 [00:00<01:52, 22.62it/s, accuracy=0.867, cost=0.109]

epoch: 37, pass acc: 0.910595, current acc: 0.910595
time taken: 114.82986950874329
epoch: 37, training loss: 0.060251, training acc: 0.918301, valid loss: 0.071264, valid acc: 0.910595



train minibatch loop: 100%|██████████| 2556/2556 [01:51<00:00, 22.95it/s, accuracy=0.857, cost=0.0437]
test minibatch loop: 100%|██████████| 639/639 [00:03<00:00, 211.85it/s, accuracy=0.837, cost=0.153] 
train minibatch loop:   0%|          | 3/2556 [00:00<01:52, 22.69it/s, accuracy=0.867, cost=0.109]

time taken: 114.87326407432556
epoch: 38, training loss: 0.059900, training acc: 0.918582, valid loss: 0.071280, valid acc: 0.910386



train minibatch loop: 100%|██████████| 2556/2556 [01:51<00:00, 22.91it/s, accuracy=0.857, cost=0.0431]
test minibatch loop: 100%|██████████| 639/639 [00:03<00:00, 212.95it/s, accuracy=0.837, cost=0.153] 
train minibatch loop:   0%|          | 3/2556 [00:00<01:52, 22.63it/s, accuracy=0.867, cost=0.11]

time taken: 114.86839842796326
epoch: 39, training loss: 0.059557, training acc: 0.919038, valid loss: 0.071303, valid acc: 0.910491



train minibatch loop: 100%|██████████| 2556/2556 [01:51<00:00, 22.91it/s, accuracy=0.857, cost=0.0424]
test minibatch loop: 100%|██████████| 639/639 [00:03<00:00, 212.99it/s, accuracy=0.837, cost=0.153] 

time taken: 114.87353873252869
epoch: 40, training loss: 0.059221, training acc: 0.919567, valid loss: 0.071332, valid acc: 0.910386

break epoch:41






In [17]:
stack = []

pbar = tqdm(
    range(0, test_X.shape[0], batch_size), desc = 'validation minibatch loop'
)
for i in pbar:
    batch_x = convert_sparse_matrix_to_sparse_tensor(test_X[i : min(i + batch_size, test_X.shape[0])])
    batch_y = test_Y[i : min(i + batch_size, test_X.shape[0])]
    stack.append(sess.run(
        tf.nn.sigmoid(model.logits), 
        feed_dict = {model.X: batch_x[0], model.W: batch_x[1], model.Y: batch_y}
    ))

validation minibatch loop: 100%|██████████| 639/639 [00:07<00:00, 84.25it/s]


In [18]:
print(metrics.classification_report(np.array(test_Y),np.around(np.concatenate(stack,axis=0)),
                                    target_names=["toxic", "severe_toxic", "obscene", 
                                            "threat", "insult", "identity_hate"]))

               precision    recall  f1-score   support

        toxic       0.83      0.56      0.67      3654
 severe_toxic       0.47      0.22      0.30       387
      obscene       0.84      0.54      0.65      1985
       threat       0.48      0.17      0.25       120
       insult       0.73      0.44      0.55      1862
identity_hate       0.44      0.18      0.26       326

  avg / total       0.77      0.49      0.60      8334



In [19]:
saver.save(sess, 'fast-text-char/model.ckpt')

'fast-text-char/model.ckpt'

In [20]:
import pickle
with open('vectorizer-sparse-toxicity.pkl','wb') as fopen:
    pickle.dump(bow_chars, fopen)