In [1]:
import re
import numpy as np
import collections
from sklearn import metrics
from sklearn.cross_validation import train_test_split
import tensorflow as tf
import pandas as pd
from unidecode import unidecode
from sklearn.preprocessing import LabelEncoder
from tqdm import tqdm
import time



In [2]:
import json

with open('tokenization.json') as fopen:
    dataset = json.load(fopen)
texts = dataset['texts']
labels = dataset['labels']
del dataset

In [3]:
x, y = [], []
for i in tqdm(range(len(texts))):
    s = ' '.join(texts[i])
    if len(s) > 5:
        x.append(s)
        y.append(labels[i])

100%|██████████| 9962/9962 [00:00<00:00, 310529.86it/s]


In [4]:
def convert_sparse_matrix_to_sparse_tensor(X):
    coo = X.tocoo()
    indices = np.mat([coo.row, coo.col]).transpose()
    return tf.SparseTensorValue(indices, coo.col, coo.shape), tf.SparseTensorValue(indices, coo.data, coo.shape)

In [5]:
from sklearn.feature_extraction.text import CountVectorizer

In [6]:
bow_chars = CountVectorizer(ngram_range=(3, 5), analyzer='char_wb', max_features=300000).fit(x)
delattr(bow_chars, 'stop_words_')

In [7]:
vectors = bow_chars.transform(x)
feature_shape = vectors.shape[1]
feature_shape

65672

In [8]:
class Model:
    def __init__(self, output_size, vocab_size, learning_rate):
        self.X = tf.sparse_placeholder(tf.int32)
        self.W = tf.sparse_placeholder(tf.int32)
        self.Y = tf.placeholder(tf.int32, [None])
        embeddings = tf.Variable(tf.truncated_normal([vocab_size,128]))
        embed = tf.nn.embedding_lookup_sparse(embeddings, self.X, self.W, combiner='mean')
        self.logits = tf.layers.dense(embed, output_size)
        self.cost = tf.reduce_mean(tf.nn.sparse_softmax_cross_entropy_with_logits(
            logits = self.logits, labels = self.Y))
        self.optimizer = tf.train.AdamOptimizer(learning_rate = learning_rate).minimize(self.cost)
        correct_pred = tf.equal(tf.argmax(self.logits, 1,output_type=tf.int32), self.Y)
        self.accuracy = tf.reduce_mean(tf.cast(correct_pred, tf.float32))

In [9]:
sess = tf.InteractiveSession()
model = Model(2, feature_shape, 1e-4)
sess.run(tf.global_variables_initializer())

  "Converting sparse IndexedSlices to a dense Tensor of unknown shape. "


In [10]:
saver = tf.train.Saver(tf.trainable_variables())
saver.save(sess, 'fast-text-char/model.ckpt')

'fast-text-char/model.ckpt'

In [11]:
train_X, test_X, train_Y, test_Y = train_test_split(
    vectors, y, test_size = 0.2
)

In [12]:
from tqdm import tqdm
import time

batch_size = 60
EARLY_STOPPING, CURRENT_CHECKPOINT, CURRENT_ACC, EPOCH = 3, 0, 0, 0

while True:
    lasttime = time.time()
    if CURRENT_CHECKPOINT == EARLY_STOPPING:
        print('break epoch:%d\n' % (EPOCH))
        break

    train_acc, train_loss, test_acc, test_loss = 0, 0, 0, 0
    pbar = tqdm(
        range(0, train_X.shape[0], batch_size), desc = 'train minibatch loop'
    )
    for i in pbar:
        batch_x = convert_sparse_matrix_to_sparse_tensor(train_X[i : min(i + batch_size, train_X.shape[0])])
        batch_y = train_Y[i : min(i + batch_size, train_X.shape[0])]
        acc, cost, _ = sess.run(
            [model.accuracy, model.cost, model.optimizer],
            feed_dict = {
                model.Y: batch_y,
                model.X: batch_x[0],
                model.W: batch_x[1],
            },
        )
        assert not np.isnan(cost)
        train_loss += cost
        train_acc += acc
        pbar.set_postfix(cost = cost, accuracy = acc)

    pbar = tqdm(range(0, test_X.shape[0], batch_size), desc = 'test minibatch loop')
    for i in pbar:
        batch_x = convert_sparse_matrix_to_sparse_tensor(test_X[i : min(i + batch_size, test_X.shape[0])])
        batch_y = test_Y[i : min(i + batch_size, test_X.shape[0])]
        acc, cost = sess.run(
            [model.accuracy, model.cost],
            feed_dict = {
                model.Y: batch_y,
                model.X: batch_x[0],
                model.W: batch_x[1],
            },
        )
        test_loss += cost
        test_acc += acc
        pbar.set_postfix(cost = cost, accuracy = acc)

    train_loss /= train_X.shape[0] / batch_size
    train_acc /= train_X.shape[0] / batch_size
    test_loss /= test_X.shape[0] / batch_size
    test_acc /= test_X.shape[0] / batch_size

    if test_acc > CURRENT_ACC:
        print(
            'epoch: %d, pass acc: %f, current acc: %f'
            % (EPOCH, CURRENT_ACC, test_acc)
        )
        CURRENT_ACC = test_acc
        CURRENT_CHECKPOINT = 0
    else:
        CURRENT_CHECKPOINT += 1

    print('time taken:', time.time() - lasttime)
    print(
        'epoch: %d, training loss: %f, training acc: %f, valid loss: %f, valid acc: %f\n'
        % (EPOCH, train_loss, train_acc, test_loss, test_acc)
    )
    EPOCH += 1

train minibatch loop: 100%|██████████| 133/133 [00:01<00:00, 70.98it/s, accuracy=0.592, cost=0.683]
test minibatch loop: 100%|██████████| 34/34 [00:00<00:00, 277.33it/s, accuracy=0.462, cost=0.681]
train minibatch loop:   6%|▌         | 8/133 [00:00<00:01, 77.28it/s, accuracy=0.5, cost=0.681]  

epoch: 0, pass acc: 0.000000, current acc: 0.568335
time taken: 2.00079607963562
epoch: 0, training loss: 0.693529, training acc: 0.525099, valid loss: 0.701461, valid acc: 0.568335



train minibatch loop: 100%|██████████| 133/133 [00:01<00:00, 77.73it/s, accuracy=0.673, cost=0.671]
test minibatch loop: 100%|██████████| 34/34 [00:00<00:00, 288.63it/s, accuracy=0.462, cost=0.674]
train minibatch loop:   6%|▌         | 8/133 [00:00<00:01, 77.47it/s, accuracy=0.717, cost=0.663]

epoch: 1, pass acc: 0.568335, current acc: 0.665174
time taken: 1.8305857181549072
epoch: 1, training loss: 0.676979, training acc: 0.610793, valid loss: 0.686072, valid acc: 0.665174



train minibatch loop: 100%|██████████| 133/133 [00:01<00:00, 77.83it/s, accuracy=0.714, cost=0.658]
test minibatch loop: 100%|██████████| 34/34 [00:00<00:00, 290.84it/s, accuracy=0.615, cost=0.667]
train minibatch loop:   6%|▌         | 8/133 [00:00<00:01, 78.02it/s, accuracy=0.817, cost=0.645]

epoch: 2, pass acc: 0.665174, current acc: 0.740553
time taken: 1.8272716999053955
epoch: 2, training loss: 0.660672, training acc: 0.704587, valid loss: 0.670402, valid acc: 0.740553



train minibatch loop: 100%|██████████| 133/133 [00:01<00:00, 77.93it/s, accuracy=0.755, cost=0.646]
test minibatch loop: 100%|██████████| 34/34 [00:00<00:00, 288.84it/s, accuracy=0.538, cost=0.659]
train minibatch loop:   6%|▌         | 8/133 [00:00<00:01, 77.57it/s, accuracy=0.8, cost=0.626]  

epoch: 3, pass acc: 0.740553, current acc: 0.791926
time taken: 1.826045274734497
epoch: 3, training loss: 0.643512, training acc: 0.767387, valid loss: 0.653684, valid acc: 0.791926



train minibatch loop: 100%|██████████| 133/133 [00:01<00:00, 77.90it/s, accuracy=0.755, cost=0.632]
test minibatch loop: 100%|██████████| 34/34 [00:00<00:00, 292.15it/s, accuracy=0.615, cost=0.65] 
train minibatch loop:   6%|▌         | 8/133 [00:00<00:01, 78.10it/s, accuracy=0.85, cost=0.605] 

epoch: 4, pass acc: 0.791926, current acc: 0.821336
time taken: 1.8259706497192383
epoch: 4, training loss: 0.624956, training acc: 0.796625, valid loss: 0.635584, valid acc: 0.821336



train minibatch loop: 100%|██████████| 133/133 [00:01<00:00, 77.87it/s, accuracy=0.755, cost=0.617]
test minibatch loop: 100%|██████████| 34/34 [00:00<00:00, 289.86it/s, accuracy=0.692, cost=0.64] 
train minibatch loop:   6%|▌         | 8/133 [00:00<00:01, 77.77it/s, accuracy=0.867, cost=0.583]

epoch: 5, pass acc: 0.821336, current acc: 0.840210
time taken: 1.8267972469329834
epoch: 5, training loss: 0.604842, training acc: 0.815574, valid loss: 0.616117, valid acc: 0.840210



train minibatch loop: 100%|██████████| 133/133 [00:01<00:00, 77.41it/s, accuracy=0.776, cost=0.602]
test minibatch loop: 100%|██████████| 34/34 [00:00<00:00, 261.05it/s, accuracy=0.692, cost=0.629]
train minibatch loop:   6%|▌         | 8/133 [00:00<00:01, 77.35it/s, accuracy=0.883, cost=0.56] 

epoch: 6, pass acc: 0.840210, current acc: 0.848238
time taken: 1.8498857021331787
epoch: 6, training loss: 0.583328, training acc: 0.832542, valid loss: 0.595559, valid acc: 0.848238



train minibatch loop: 100%|██████████| 133/133 [00:01<00:00, 77.78it/s, accuracy=0.796, cost=0.585]
test minibatch loop: 100%|██████████| 34/34 [00:00<00:00, 289.52it/s, accuracy=0.692, cost=0.618]
train minibatch loop:   6%|▌         | 8/133 [00:00<00:01, 78.24it/s, accuracy=0.883, cost=0.535]

epoch: 7, pass acc: 0.848238, current acc: 0.855764
time taken: 1.8299586772918701
epoch: 7, training loss: 0.560791, training acc: 0.842609, valid loss: 0.574360, valid acc: 0.855764



train minibatch loop: 100%|██████████| 133/133 [00:01<00:00, 77.96it/s, accuracy=0.796, cost=0.569]
test minibatch loop: 100%|██████████| 34/34 [00:00<00:00, 286.56it/s, accuracy=0.692, cost=0.605]
train minibatch loop:   6%|▌         | 8/133 [00:00<00:01, 77.43it/s, accuracy=0.883, cost=0.51] 

epoch: 8, pass acc: 0.855764, current acc: 0.861284
time taken: 1.8261587619781494
epoch: 8, training loss: 0.537749, training acc: 0.853527, valid loss: 0.553053, valid acc: 0.861284



train minibatch loop: 100%|██████████| 133/133 [00:01<00:00, 77.57it/s, accuracy=0.796, cost=0.552]
test minibatch loop: 100%|██████████| 34/34 [00:00<00:00, 288.88it/s, accuracy=0.692, cost=0.593]
train minibatch loop:   6%|▌         | 8/133 [00:00<00:01, 77.90it/s, accuracy=0.883, cost=0.485]

epoch: 9, pass acc: 0.861284, current acc: 0.864294
time taken: 1.8338537216186523
epoch: 9, training loss: 0.514761, training acc: 0.858295, valid loss: 0.532173, valid acc: 0.864294



train minibatch loop: 100%|██████████| 133/133 [00:01<00:00, 77.71it/s, accuracy=0.816, cost=0.535]
test minibatch loop: 100%|██████████| 34/34 [00:00<00:00, 292.23it/s, accuracy=0.692, cost=0.581]
train minibatch loop:   6%|▌         | 8/133 [00:00<00:01, 77.13it/s, accuracy=0.9, cost=0.46]   

epoch: 10, pass acc: 0.864294, current acc: 0.867807
time taken: 1.829629898071289
epoch: 10, training loss: 0.492348, training acc: 0.862088, valid loss: 0.512176, valid acc: 0.867807



train minibatch loop: 100%|██████████| 133/133 [00:01<00:00, 77.70it/s, accuracy=0.837, cost=0.519]
test minibatch loop: 100%|██████████| 34/34 [00:00<00:00, 288.81it/s, accuracy=0.692, cost=0.569]
train minibatch loop:   6%|▌         | 8/133 [00:00<00:01, 77.38it/s, accuracy=0.917, cost=0.437]

epoch: 11, pass acc: 0.867807, current acc: 0.868810
time taken: 1.8309235572814941
epoch: 11, training loss: 0.470930, training acc: 0.866006, valid loss: 0.493399, valid acc: 0.868810



train minibatch loop: 100%|██████████| 133/133 [00:01<00:00, 77.85it/s, accuracy=0.857, cost=0.504]
test minibatch loop: 100%|██████████| 34/34 [00:00<00:00, 288.94it/s, accuracy=0.692, cost=0.558]
train minibatch loop:   6%|▌         | 8/133 [00:00<00:01, 77.30it/s, accuracy=0.917, cost=0.414]

epoch: 12, pass acc: 0.868810, current acc: 0.870315
time taken: 1.827784538269043
epoch: 12, training loss: 0.450793, training acc: 0.870301, valid loss: 0.476047, valid acc: 0.870315



train minibatch loop: 100%|██████████| 133/133 [00:01<00:00, 77.71it/s, accuracy=0.857, cost=0.489]
test minibatch loop: 100%|██████████| 34/34 [00:00<00:00, 289.06it/s, accuracy=0.692, cost=0.547]
train minibatch loop:   6%|▌         | 8/133 [00:00<00:01, 77.66it/s, accuracy=0.917, cost=0.393]

epoch: 13, pass acc: 0.870315, current acc: 0.873326
time taken: 1.8306820392608643
epoch: 13, training loss: 0.432091, training acc: 0.873062, valid loss: 0.460203, valid acc: 0.873326



train minibatch loop: 100%|██████████| 133/133 [00:01<00:00, 77.88it/s, accuracy=0.878, cost=0.475]
test minibatch loop: 100%|██████████| 34/34 [00:00<00:00, 287.43it/s, accuracy=0.692, cost=0.538]
train minibatch loop:   6%|▌         | 8/133 [00:00<00:01, 77.76it/s, accuracy=0.917, cost=0.374]

epoch: 14, pass acc: 0.873326, current acc: 0.875835
time taken: 1.828165054321289
epoch: 14, training loss: 0.414872, training acc: 0.877105, valid loss: 0.445862, valid acc: 0.875835



train minibatch loop: 100%|██████████| 133/133 [00:01<00:00, 77.71it/s, accuracy=0.878, cost=0.462]
test minibatch loop: 100%|██████████| 34/34 [00:00<00:00, 287.89it/s, accuracy=0.692, cost=0.529]
train minibatch loop:   6%|▌         | 8/133 [00:00<00:01, 77.92it/s, accuracy=0.917, cost=0.356]

epoch: 15, pass acc: 0.875835, current acc: 0.876336
time taken: 1.8311221599578857
epoch: 15, training loss: 0.399102, training acc: 0.878737, valid loss: 0.432956, valid acc: 0.876336



train minibatch loop: 100%|██████████| 133/133 [00:01<00:00, 77.83it/s, accuracy=0.878, cost=0.449]
test minibatch loop: 100%|██████████| 34/34 [00:00<00:00, 288.81it/s, accuracy=0.692, cost=0.521]
train minibatch loop:   6%|▌         | 8/133 [00:00<00:01, 77.29it/s, accuracy=0.917, cost=0.339]

epoch: 16, pass acc: 0.876336, current acc: 0.876838
time taken: 1.82830810546875
epoch: 16, training loss: 0.384696, training acc: 0.881874, valid loss: 0.421380, valid acc: 0.876838



train minibatch loop: 100%|██████████| 133/133 [00:01<00:00, 77.57it/s, accuracy=0.878, cost=0.438]
test minibatch loop: 100%|██████████| 34/34 [00:00<00:00, 288.39it/s, accuracy=0.692, cost=0.514]
train minibatch loop:   6%|▌         | 8/133 [00:00<00:01, 77.61it/s, accuracy=0.9, cost=0.324]  

epoch: 17, pass acc: 0.876838, current acc: 0.877340
time taken: 1.8340368270874023
epoch: 17, training loss: 0.371541, training acc: 0.885137, valid loss: 0.411012, valid acc: 0.877340



train minibatch loop: 100%|██████████| 133/133 [00:01<00:00, 77.98it/s, accuracy=0.878, cost=0.427]
test minibatch loop: 100%|██████████| 34/34 [00:00<00:00, 288.45it/s, accuracy=0.692, cost=0.508]
train minibatch loop:   6%|▌         | 8/133 [00:00<00:01, 77.36it/s, accuracy=0.9, cost=0.31]   

epoch: 18, pass acc: 0.877340, current acc: 0.877842
time taken: 1.8251845836639404
epoch: 18, training loss: 0.359514, training acc: 0.887270, valid loss: 0.401725, valid acc: 0.877842



train minibatch loop: 100%|██████████| 133/133 [00:01<00:00, 77.91it/s, accuracy=0.878, cost=0.416]
test minibatch loop: 100%|██████████| 34/34 [00:00<00:00, 289.14it/s, accuracy=0.692, cost=0.502]
train minibatch loop:   6%|▌         | 8/133 [00:00<00:01, 77.39it/s, accuracy=0.9, cost=0.298]  

epoch: 19, pass acc: 0.877842, current acc: 0.881856
time taken: 1.8262813091278076
epoch: 19, training loss: 0.348492, training acc: 0.889027, valid loss: 0.393399, valid acc: 0.881856



train minibatch loop: 100%|██████████| 133/133 [00:01<00:00, 77.97it/s, accuracy=0.878, cost=0.406]
test minibatch loop: 100%|██████████| 34/34 [00:00<00:00, 287.91it/s, accuracy=0.692, cost=0.496]
train minibatch loop:   6%|▌         | 8/133 [00:00<00:01, 77.27it/s, accuracy=0.9, cost=0.286]  

epoch: 20, pass acc: 0.881856, current acc: 0.882859
time taken: 1.8260180950164795
epoch: 20, training loss: 0.338358, training acc: 0.891536, valid loss: 0.385921, valid acc: 0.882859



train minibatch loop: 100%|██████████| 133/133 [00:01<00:00, 77.67it/s, accuracy=0.898, cost=0.396]
test minibatch loop: 100%|██████████| 34/34 [00:00<00:00, 289.60it/s, accuracy=0.692, cost=0.492]
train minibatch loop:   6%|▌         | 8/133 [00:00<00:01, 77.21it/s, accuracy=0.9, cost=0.275]  

epoch: 21, pass acc: 0.882859, current acc: 0.884365
time taken: 1.831261157989502
epoch: 21, training loss: 0.329004, training acc: 0.894702, valid loss: 0.379188, valid acc: 0.884365



train minibatch loop: 100%|██████████| 133/133 [00:01<00:00, 77.68it/s, accuracy=0.898, cost=0.387]
test minibatch loop: 100%|██████████| 34/34 [00:00<00:00, 261.10it/s, accuracy=0.692, cost=0.487]
train minibatch loop:   6%|▌         | 8/133 [00:00<00:01, 77.79it/s, accuracy=0.9, cost=0.265]  

epoch: 22, pass acc: 0.884365, current acc: 0.884866
time taken: 1.8438198566436768
epoch: 22, training loss: 0.320334, training acc: 0.896207, valid loss: 0.373110, valid acc: 0.884866



train minibatch loop: 100%|██████████| 133/133 [00:01<00:00, 78.05it/s, accuracy=0.898, cost=0.378]
test minibatch loop: 100%|██████████| 34/34 [00:00<00:00, 289.57it/s, accuracy=0.692, cost=0.483]
train minibatch loop:   6%|▌         | 8/133 [00:00<00:01, 77.45it/s, accuracy=0.9, cost=0.256]  

epoch: 23, pass acc: 0.884866, current acc: 0.885368
time taken: 1.823049545288086
epoch: 23, training loss: 0.312266, training acc: 0.897964, valid loss: 0.367608, valid acc: 0.885368



train minibatch loop: 100%|██████████| 133/133 [00:01<00:00, 77.74it/s, accuracy=0.898, cost=0.37] 
test minibatch loop: 100%|██████████| 34/34 [00:00<00:00, 290.19it/s, accuracy=0.692, cost=0.48] 
train minibatch loop:   6%|▌         | 8/133 [00:00<00:01, 77.61it/s, accuracy=0.9, cost=0.247]  

epoch: 24, pass acc: 0.885368, current acc: 0.886873
time taken: 1.8294336795806885
epoch: 24, training loss: 0.304724, training acc: 0.899470, valid loss: 0.362613, valid acc: 0.886873



train minibatch loop: 100%|██████████| 133/133 [00:01<00:00, 77.66it/s, accuracy=0.898, cost=0.361]
test minibatch loop: 100%|██████████| 34/34 [00:00<00:00, 290.41it/s, accuracy=0.692, cost=0.476]
train minibatch loop:   6%|▌         | 8/133 [00:00<00:01, 78.22it/s, accuracy=0.9, cost=0.239]  

epoch: 25, pass acc: 0.886873, current acc: 0.889382
time taken: 1.831205129623413
epoch: 25, training loss: 0.297645, training acc: 0.901729, valid loss: 0.358065, valid acc: 0.889382



train minibatch loop: 100%|██████████| 133/133 [00:01<00:00, 77.94it/s, accuracy=0.898, cost=0.353]
test minibatch loop: 100%|██████████| 34/34 [00:00<00:00, 289.24it/s, accuracy=0.692, cost=0.473]
train minibatch loop:   6%|▌         | 8/133 [00:00<00:01, 76.86it/s, accuracy=0.917, cost=0.232]

epoch: 26, pass acc: 0.889382, current acc: 0.890887
time taken: 1.8260059356689453
epoch: 26, training loss: 0.290973, training acc: 0.902858, valid loss: 0.353911, valid acc: 0.890887



train minibatch loop: 100%|██████████| 133/133 [00:01<00:00, 77.87it/s, accuracy=0.898, cost=0.346]
test minibatch loop: 100%|██████████| 34/34 [00:00<00:00, 287.59it/s, accuracy=0.692, cost=0.47] 
train minibatch loop:   6%|▌         | 8/133 [00:00<00:01, 77.57it/s, accuracy=0.917, cost=0.225]

epoch: 27, pass acc: 0.890887, current acc: 0.891389
time taken: 1.8283772468566895
epoch: 27, training loss: 0.284663, training acc: 0.904364, valid loss: 0.350109, valid acc: 0.891389



train minibatch loop: 100%|██████████| 133/133 [00:01<00:00, 77.68it/s, accuracy=0.898, cost=0.338]
test minibatch loop: 100%|██████████| 34/34 [00:00<00:00, 285.83it/s, accuracy=0.692, cost=0.468]
train minibatch loop:   6%|▌         | 8/133 [00:00<00:01, 78.54it/s, accuracy=0.917, cost=0.218]

epoch: 28, pass acc: 0.891389, current acc: 0.892393
time taken: 1.8324975967407227
epoch: 28, training loss: 0.278672, training acc: 0.906246, valid loss: 0.346618, valid acc: 0.892393



train minibatch loop: 100%|██████████| 133/133 [00:01<00:00, 77.85it/s, accuracy=0.898, cost=0.331]
test minibatch loop: 100%|██████████| 34/34 [00:00<00:00, 286.97it/s, accuracy=0.692, cost=0.465]
train minibatch loop:   6%|▌         | 8/133 [00:00<00:01, 77.09it/s, accuracy=0.917, cost=0.212]

epoch: 29, pass acc: 0.892393, current acc: 0.893396
time taken: 1.8290534019470215
epoch: 29, training loss: 0.272967, training acc: 0.907878, valid loss: 0.343407, valid acc: 0.893396



train minibatch loop: 100%|██████████| 133/133 [00:01<00:00, 77.81it/s, accuracy=0.898, cost=0.324]
test minibatch loop: 100%|██████████| 34/34 [00:00<00:00, 286.78it/s, accuracy=0.769, cost=0.463]
train minibatch loop:   6%|▌         | 8/133 [00:00<00:01, 77.16it/s, accuracy=0.917, cost=0.206]

epoch: 30, pass acc: 0.893396, current acc: 0.896715
time taken: 1.829481840133667
epoch: 30, training loss: 0.267517, training acc: 0.909133, valid loss: 0.340446, valid acc: 0.896715



train minibatch loop: 100%|██████████| 133/133 [00:01<00:00, 77.74it/s, accuracy=0.898, cost=0.317]
test minibatch loop: 100%|██████████| 34/34 [00:00<00:00, 289.63it/s, accuracy=0.769, cost=0.46] 
train minibatch loop:   6%|▌         | 8/133 [00:00<00:01, 77.88it/s, accuracy=0.917, cost=0.201]

epoch: 31, pass acc: 0.896715, current acc: 0.898221
time taken: 1.8297929763793945
epoch: 31, training loss: 0.262297, training acc: 0.910764, valid loss: 0.337711, valid acc: 0.898221



train minibatch loop: 100%|██████████| 133/133 [00:01<00:00, 77.72it/s, accuracy=0.898, cost=0.31] 
test minibatch loop: 100%|██████████| 34/34 [00:00<00:00, 289.40it/s, accuracy=0.769, cost=0.458]
train minibatch loop:   6%|▌         | 8/133 [00:00<00:01, 77.83it/s, accuracy=0.917, cost=0.196]

epoch: 32, pass acc: 0.898221, current acc: 0.899224
time taken: 1.8309762477874756
epoch: 32, training loss: 0.257285, training acc: 0.912144, valid loss: 0.335181, valid acc: 0.899224



train minibatch loop: 100%|██████████| 133/133 [00:01<00:00, 77.87it/s, accuracy=0.898, cost=0.304]
test minibatch loop: 100%|██████████| 34/34 [00:00<00:00, 289.64it/s, accuracy=0.769, cost=0.456]
train minibatch loop:   6%|▌         | 8/133 [00:00<00:01, 77.99it/s, accuracy=0.917, cost=0.191]

epoch: 33, pass acc: 0.899224, current acc: 0.900228
time taken: 1.8269217014312744
epoch: 33, training loss: 0.252461, training acc: 0.913023, valid loss: 0.332838, valid acc: 0.900228



train minibatch loop: 100%|██████████| 133/133 [00:01<00:00, 77.77it/s, accuracy=0.918, cost=0.298]
test minibatch loop: 100%|██████████| 34/34 [00:00<00:00, 290.09it/s, accuracy=0.769, cost=0.455]
train minibatch loop:   6%|▌         | 8/133 [00:00<00:01, 77.72it/s, accuracy=0.917, cost=0.186]

time taken: 1.8288168907165527
epoch: 34, training loss: 0.247810, training acc: 0.914808, valid loss: 0.330664, valid acc: 0.900228



train minibatch loop: 100%|██████████| 133/133 [00:01<00:00, 77.80it/s, accuracy=0.918, cost=0.291]
test minibatch loop: 100%|██████████| 34/34 [00:00<00:00, 290.40it/s, accuracy=0.769, cost=0.453]
train minibatch loop:   6%|▌         | 8/133 [00:00<00:01, 77.79it/s, accuracy=0.917, cost=0.182]

epoch: 35, pass acc: 0.900228, current acc: 0.900228
time taken: 1.8282558917999268
epoch: 35, training loss: 0.243318, training acc: 0.916690, valid loss: 0.328646, valid acc: 0.900228



train minibatch loop: 100%|██████████| 133/133 [00:01<00:00, 77.84it/s, accuracy=0.918, cost=0.286]
test minibatch loop: 100%|██████████| 34/34 [00:00<00:00, 286.88it/s, accuracy=0.769, cost=0.451]
train minibatch loop:   6%|▌         | 8/133 [00:00<00:01, 77.47it/s, accuracy=0.917, cost=0.177]

epoch: 36, pass acc: 0.900228, current acc: 0.901231
time taken: 1.8287444114685059
epoch: 36, training loss: 0.238971, training acc: 0.918196, valid loss: 0.326773, valid acc: 0.901231



train minibatch loop: 100%|██████████| 133/133 [00:01<00:00, 77.69it/s, accuracy=0.918, cost=0.28] 
test minibatch loop: 100%|██████████| 34/34 [00:00<00:00, 284.26it/s, accuracy=0.769, cost=0.449]
train minibatch loop:   6%|▌         | 8/133 [00:00<00:01, 78.00it/s, accuracy=0.917, cost=0.173]

time taken: 1.83321213722229
epoch: 37, training loss: 0.234759, training acc: 0.919576, valid loss: 0.325032, valid acc: 0.901231



train minibatch loop: 100%|██████████| 133/133 [00:01<00:00, 77.85it/s, accuracy=0.918, cost=0.274]
test minibatch loop: 100%|██████████| 34/34 [00:00<00:00, 288.58it/s, accuracy=0.769, cost=0.448]
train minibatch loop:   6%|▌         | 8/133 [00:00<00:01, 76.75it/s, accuracy=0.917, cost=0.169]

time taken: 1.827665090560913
epoch: 38, training loss: 0.230674, training acc: 0.920705, valid loss: 0.323414, valid acc: 0.901231



train minibatch loop: 100%|██████████| 133/133 [00:01<00:00, 77.22it/s, accuracy=0.918, cost=0.269]
test minibatch loop: 100%|██████████| 34/34 [00:00<00:00, 289.38it/s, accuracy=0.769, cost=0.446]
train minibatch loop:   6%|▌         | 8/133 [00:00<00:01, 77.75it/s, accuracy=0.917, cost=0.166]

epoch: 39, pass acc: 0.901231, current acc: 0.901733
time taken: 1.841341257095337
epoch: 39, training loss: 0.226706, training acc: 0.922337, valid loss: 0.321912, valid acc: 0.901733



train minibatch loop: 100%|██████████| 133/133 [00:01<00:00, 77.64it/s, accuracy=0.918, cost=0.263]
test minibatch loop: 100%|██████████| 34/34 [00:00<00:00, 287.91it/s, accuracy=0.769, cost=0.445]
train minibatch loop:   6%|▌         | 8/133 [00:00<00:01, 77.31it/s, accuracy=0.917, cost=0.162]

epoch: 40, pass acc: 0.901733, current acc: 0.902235
time taken: 1.8328146934509277
epoch: 40, training loss: 0.222848, training acc: 0.924596, valid loss: 0.320517, valid acc: 0.902235



train minibatch loop: 100%|██████████| 133/133 [00:01<00:00, 77.73it/s, accuracy=0.918, cost=0.258]
test minibatch loop: 100%|██████████| 34/34 [00:00<00:00, 287.24it/s, accuracy=0.769, cost=0.443]
train minibatch loop:   6%|▌         | 8/133 [00:00<00:01, 77.57it/s, accuracy=0.917, cost=0.158]

time taken: 1.830817461013794
epoch: 41, training loss: 0.219093, training acc: 0.925976, valid loss: 0.319223, valid acc: 0.902235



train minibatch loop: 100%|██████████| 133/133 [00:01<00:00, 77.76it/s, accuracy=0.918, cost=0.253]
test minibatch loop: 100%|██████████| 34/34 [00:00<00:00, 289.74it/s, accuracy=0.769, cost=0.442]
train minibatch loop:   6%|▌         | 8/133 [00:00<00:01, 77.20it/s, accuracy=0.917, cost=0.155]

time taken: 1.8297176361083984
epoch: 42, training loss: 0.215437, training acc: 0.926854, valid loss: 0.318023, valid acc: 0.902235



train minibatch loop: 100%|██████████| 133/133 [00:01<00:00, 77.77it/s, accuracy=0.918, cost=0.248]
test minibatch loop: 100%|██████████| 34/34 [00:00<00:00, 284.63it/s, accuracy=0.769, cost=0.44] 
train minibatch loop:   6%|▌         | 8/133 [00:00<00:01, 77.36it/s, accuracy=0.933, cost=0.152]

epoch: 43, pass acc: 0.902235, current acc: 0.902736
time taken: 1.8312606811523438
epoch: 43, training loss: 0.211873, training acc: 0.927984, valid loss: 0.316912, valid acc: 0.902736



train minibatch loop: 100%|██████████| 133/133 [00:01<00:00, 77.95it/s, accuracy=0.939, cost=0.244]
test minibatch loop: 100%|██████████| 34/34 [00:00<00:00, 287.25it/s, accuracy=0.769, cost=0.439]
train minibatch loop:   6%|▌         | 8/133 [00:00<00:01, 77.67it/s, accuracy=0.933, cost=0.149]

epoch: 44, pass acc: 0.902736, current acc: 0.903238
time taken: 1.8262388706207275
epoch: 44, training loss: 0.208397, training acc: 0.929267, valid loss: 0.315885, valid acc: 0.903238



train minibatch loop: 100%|██████████| 133/133 [00:01<00:00, 77.71it/s, accuracy=0.939, cost=0.239]
test minibatch loop: 100%|██████████| 34/34 [00:00<00:00, 287.73it/s, accuracy=0.769, cost=0.437]
train minibatch loop:   6%|▌         | 8/133 [00:00<00:01, 77.28it/s, accuracy=0.933, cost=0.146]

epoch: 45, pass acc: 0.903238, current acc: 0.903740
time taken: 1.8311009407043457
epoch: 45, training loss: 0.205005, training acc: 0.930898, valid loss: 0.314937, valid acc: 0.903740



train minibatch loop: 100%|██████████| 133/133 [00:01<00:00, 77.85it/s, accuracy=0.939, cost=0.235]
test minibatch loop: 100%|██████████| 34/34 [00:00<00:00, 290.25it/s, accuracy=0.769, cost=0.436]
train minibatch loop:   6%|▌         | 8/133 [00:00<00:01, 78.07it/s, accuracy=0.933, cost=0.143]

time taken: 1.827251672744751
epoch: 46, training loss: 0.201693, training acc: 0.931525, valid loss: 0.314065, valid acc: 0.903740



train minibatch loop: 100%|██████████| 133/133 [00:01<00:00, 77.81it/s, accuracy=0.939, cost=0.23] 
test minibatch loop: 100%|██████████| 34/34 [00:00<00:00, 289.69it/s, accuracy=0.769, cost=0.435]
train minibatch loop:   6%|▌         | 8/133 [00:00<00:01, 77.65it/s, accuracy=0.933, cost=0.14] 

time taken: 1.828348159790039
epoch: 47, training loss: 0.198457, training acc: 0.932404, valid loss: 0.313265, valid acc: 0.903238



train minibatch loop: 100%|██████████| 133/133 [00:01<00:00, 77.69it/s, accuracy=0.939, cost=0.226]
test minibatch loop: 100%|██████████| 34/34 [00:00<00:00, 287.38it/s, accuracy=0.769, cost=0.433]

time taken: 1.8323161602020264
epoch: 48, training loss: 0.195295, training acc: 0.933157, valid loss: 0.312532, valid acc: 0.903740

break epoch:49






In [13]:
real_Y, predict_Y = [], []

pbar = tqdm(
    range(0, test_X.shape[0], batch_size), desc = 'validation minibatch loop'
)
for i in pbar:
    batch_x = convert_sparse_matrix_to_sparse_tensor(test_X[i : min(i + batch_size, test_X.shape[0])])
    batch_y = test_Y[i : min(i + batch_size, test_X.shape[0])]
    predict_Y += np.argmax(
        sess.run(
            model.logits, feed_dict = {model.X: batch_x[0], model.W: batch_x[1], model.Y: batch_y}
        ),
        1,
    ).tolist()
    real_Y += batch_y

validation minibatch loop: 100%|██████████| 34/34 [00:00<00:00, 265.50it/s]


In [14]:
print(
    metrics.classification_report(
        real_Y, predict_Y, target_names = ['negative','positive']
    )
)

             precision    recall  f1-score   support

   negative       0.89      0.88      0.88       992
   positive       0.88      0.90      0.89      1001

avg / total       0.89      0.89      0.89      1993



In [15]:
saver.save(sess, 'fast-text-char/model.ckpt')

'fast-text-char/model.ckpt'

In [16]:
import pickle
with open('vectorizer-sparse-subjectivity.pkl','wb') as fopen:
    pickle.dump(bow_chars, fopen)