# Final Experiments

In [1]:
# 0. Some initial set-up.
from collections import Counter
import numpy as np
import os
import pandas as pd
import random
from tf_rnn_classifier import TfRNNClassifier
from collections import defaultdict, Counter
from sklearn.tree import DecisionTreeClassifier
from sklearn.multiclass import OneVsRestClassifier
from sklearn.feature_extraction import DictVectorizer
from sklearn.feature_extraction.text import CountVectorizer, TfidfVectorizer
from sklearn.model_selection import cross_val_score
from scipy.sparse import hstack
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import precision_recall_fscore_support, classification_report, roc_auc_score
import tensorflow as tf
import sst
from utils import evaluate, build_rnn_dataset
import utils

  from ._conv import register_converters as _register_converters


In [2]:
vsmdata_home = 'vsmdata'

glove_home = os.path.join(vsmdata_home, 'glove.6B')

data_dir = "./data/"

In [3]:
train = pd.read_csv(data_dir + "train.csv").fillna(' ')
test = pd.read_csv(data_dir + "test.csv").fillna(' ')
test_labels = pd.read_csv(data_dir + "test_labels.csv")

## Dataset Set-up

In [4]:
X_rnn, Y_rnn = build_rnn_dataset(train, 0.9)

## Baseline Features

In [5]:
label_cols = ['toxic', 'severe_toxic', 'obscene', 'threat', 'insult', 'identity_hate']
COMMENT = 'comment_text'
train_examples = train[COMMENT]
train_labels = train[label_cols]

In [31]:
print(np.sum(np.array(Y_rnn['train']), axis=0))

[13662  1437  7565   421  7066  1265]


In [16]:
train_text = [" ".join(ex) for ex in X_rnn['train']]
dev_text = [" ".join(ex) for ex in X_rnn['dev']]
all_text = train_text + dev_text
print(len(train_text), len(dev_text), len(all_text))

143613 15958 159571


In [17]:
all_text = [" ".join(ex) for ex in X_rnn]

In [18]:
# may take several minutes...
word_vectorizer = TfidfVectorizer(
    sublinear_tf=True,
    strip_accents='unicode',
    analyzer='word',
    token_pattern=r'\w{1,}',
    stop_words='english',
    ngram_range=(1, 1),
    max_features=10000)
word_vectorizer.fit(all_text)
train_word_features = word_vectorizer.transform(train_text)
dev_word_features = word_vectorizer.transform(dev_text)

In [19]:
train_features = hstack([train_word_features])
dev_features = hstack([dev_word_features])

## Glove

In [6]:
glove_lookup = utils.glove2dict(
    os.path.join(vsmdata_home, 'glove.6B.100d.txt'))

In [7]:
full_train_vocab = sst.get_vocab(X_rnn['train'])

In [8]:
glove_vocab = sorted(set(glove_lookup) & set(full_train_vocab))
print("Embedding matrix contains %d words." % len(glove_vocab))

Embedding matrix contains 55422 words.


In [9]:
glove_embedding = np.array([glove_lookup[w] for w in glove_vocab])

In [10]:
glove_vocab.append("$UNK")
glove_embedding = np.vstack(
    (glove_embedding, utils.randvec(glove_embedding.shape[1])))

In [11]:
full_glove_vocab = sorted(set(sst.get_vocab(X_rnn['train'])))
print("Embedding matrix contains %d words." % len(full_glove_vocab))

Embedding matrix contains 494751 words.


In [12]:
full_glove_embedding = np.array([
    glove_lookup[w] 
    if w in glove_lookup else utils.randvec(len(glove_lookup["hello"])) 
    for w in full_glove_vocab
])

In [13]:
full_glove_vocab.append("$UNK")
full_glove_embedding = np.vstack(
    (full_glove_embedding, utils.randvec(full_glove_embedding.shape[1])))

## 1. Baseline

In [64]:

classifier = LogisticRegression(C=0.1, solver='sag')
classifier.fit(train_word_features, Y_rnn_binary)

LogisticRegression(C=0.1, class_weight=None, dual=False, fit_intercept=True,
          intercept_scaling=1, max_iter=100, multi_class='ovr', n_jobs=1,
          penalty='l2', random_state=None, solver='sag', tol=0.0001,
          verbose=0, warm_start=False)

In [65]:
preds = classifier.predict_proba(train_features)[:, 1]
preds = np.expand_dims(preds, 1)

In [66]:
preds

array([[0.1061184 ],
       [0.1061184 ],
       [0.09453445],
       ...,
       [0.1061184 ],
       [0.1061184 ],
       [0.11739597]])

In [67]:
evaluate(Y_rnn_binary, preds)

p, r, f1: 0.0000, 0.0000, 0.0000

average F1 score: 0.000000


In [25]:
scores = []
submission = pd.DataFrame.from_dict({'id': test['id']})
for i,class_name in enumerate(label_cols):
    train_target = train[class_name]
    test_target = test_labels[class_name]
    classifier = LogisticRegression(C=0.1, solver='sag')

#     cv_score = np.mean(cross_val_score(classifier, train_features, train_target, cv=3, scoring='roc_auc'))
#     scores.append(cv_score)
#     print('CV score for class {} is {}'.format(class_name, cv_score))

    classifier.fit(train_features, np.array(Y_rnn['train'])[:,i])
    preds = classifier.predict_proba(dev_features)[:, 1]
    preds = np.expand_dims(preds, 1)
    evaluate(np.expand_dims(np.array(Y_rnn['dev'])[:,i],1), preds)
    

print('Total CV score is {}'.format(np.mean(scores)))

p, r, f1: 0.0000, 0.0000, 0.0000

average F1 score: 0.000000
p, r, f1: 0.0000, 0.0000, 0.0000

average F1 score: 0.000000
p, r, f1: 0.0000, 0.0000, 0.0000

average F1 score: 0.000000
p, r, f1: 0.0000, 0.0000, 0.0000

average F1 score: 0.000000
p, r, f1: 0.0000, 0.0000, 0.0000

average F1 score: 0.000000
p, r, f1: 0.0000, 0.0000, 0.0000

average F1 score: 0.000000
Total CV score is nan


  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)


## 2. Deep

In [18]:
deep1 = TfRNNClassifier(
    full_glove_vocab,
    embedding=full_glove_embedding,
    embed_dim=100,
    hidden_dim=50,
    max_length=100,
    hidden_activation=tf.nn.tanh,
    cell_class=tf.nn.rnn_cell.LSTMCell, # LSTM
    train_embedding=True,
    max_iter=3,
    word_length=12,
    bidir_rnn=True, # Bidirectional RNN!
    dropout=0.2,
    eta=0.01,
    eval_every=1,
    experiment_name="MULTI_deep")

In [21]:
deep1.fit(X_rnn['train'][:], 
          Y_rnn['train'], 
          X_dev=X_rnn['dev'],
          y_dev=Y_rnn['dev'])

./logs/MULTI_deep
epoch 1, iter 1, loss 0.69646, batch_time 0.756
epoch 1, iter 2, loss 0.43689, batch_time 0.684
epoch 1, iter 3, loss 0.23577, batch_time 0.681
epoch 1, iter 4, loss 0.16694, batch_time 0.583
epoch 1, iter 5, loss 0.15759, batch_time 0.627
epoch 1, iter 6, loss 0.14166, batch_time 0.576
epoch 1, iter 7, loss 0.16641, batch_time 0.575
epoch 1, iter 8, loss 0.13442, batch_time 0.562
epoch 1, iter 9, loss 0.15260, batch_time 0.637
epoch 1, iter 10, loss 0.14046, batch_time 0.653
epoch 1, iter 11, loss 0.12294, batch_time 0.645
epoch 1, iter 12, loss 0.14425, batch_time 0.650
epoch 1, iter 13, loss 0.15000, batch_time 0.651
epoch 1, iter 14, loss 0.13755, batch_time 0.576
epoch 1, iter 15, loss 0.16407, batch_time 0.568
epoch 1, iter 16, loss 0.13681, batch_time 0.566
epoch 1, iter 17, loss 0.14641, batch_time 0.622
epoch 1, iter 18, loss 0.15108, batch_time 0.636
epoch 1, iter 19, loss 0.14275, batch_time 0.631
epoch 1, iter 20, loss 0.13697, batch_time 0.578
epoch 1, it

epoch 2, iter 166, loss 0.06244, batch_time 0.635
epoch 2, iter 167, loss 0.06390, batch_time 0.650
epoch 2, iter 168, loss 0.06049, batch_time 0.643
epoch 2, iter 169, loss 0.05783, batch_time 0.642
epoch 2, iter 170, loss 0.06624, batch_time 0.664
epoch 2, iter 171, loss 0.05505, batch_time 0.635
epoch 2, iter 172, loss 0.06634, batch_time 0.601
epoch 2, iter 173, loss 0.05507, batch_time 0.637
epoch 2, iter 174, loss 0.05356, batch_time 0.581
epoch 2, iter 175, loss 0.05772, batch_time 0.580
epoch 2, iter 176, loss 0.06841, batch_time 0.576
epoch 2, iter 177, loss 0.05270, batch_time 0.573
epoch 2, iter 178, loss 0.05547, batch_time 0.640
epoch 2, iter 179, loss 0.06229, batch_time 0.572
epoch 2, iter 180, loss 0.05469, batch_time 0.567
epoch 2, iter 181, loss 0.05285, batch_time 0.587
epoch 2, iter 182, loss 0.06065, batch_time 0.579
epoch 2, iter 183, loss 0.05205, batch_time 0.588
epoch 2, iter 184, loss 0.05824, batch_time 0.641
epoch 2, iter 185, loss 0.06225, batch_time 0.641


epoch 3, iter 329, loss 0.04510, batch_time 0.632
epoch 3, iter 330, loss 0.04821, batch_time 0.642
epoch 3, iter 331, loss 0.05041, batch_time 0.628
epoch 3, iter 332, loss 0.03961, batch_time 0.591
epoch 3, iter 333, loss 0.04669, batch_time 0.585
epoch 3, iter 334, loss 0.04791, batch_time 0.578
epoch 3, iter 335, loss 0.03797, batch_time 0.588
epoch 3, iter 336, loss 0.04885, batch_time 0.579
epoch 3, iter 337, loss 0.03703, batch_time 0.628
epoch 3, iter 338, loss 0.05454, batch_time 0.665
epoch 3, iter 339, loss 0.05722, batch_time 0.598
epoch 3, iter 340, loss 0.04354, batch_time 0.585
epoch 3, iter 341, loss 0.04932, batch_time 0.576
epoch 3, iter 342, loss 0.03871, batch_time 0.615
epoch 3, iter 343, loss 0.04255, batch_time 0.631
epoch 3, iter 344, loss 0.05357, batch_time 0.642
epoch 3, iter 345, loss 0.04153, batch_time 0.581
epoch 3, iter 346, loss 0.04520, batch_time 0.584
epoch 3, iter 347, loss 0.05445, batch_time 0.573
epoch 3, iter 348, loss 0.04043, batch_time 0.586


<tf_rnn_classifier.TfRNNClassifier at 0x7f17b6a15518>

In [24]:
preds = deep1.predict(X_rnn['train'])

In [25]:
evaluate(Y_rnn['train'], preds)

CLASS: toxic
p, r, f1: 0.9046, 0.8510, 0.8770

CLASS: severe_toxic
p, r, f1: 0.8889, 0.0056, 0.0111

CLASS: obscene
p, r, f1: 0.8433, 0.8008, 0.8215

CLASS: threat
p, r, f1: 0.0000, 0.0000, 0.0000

CLASS: insult
p, r, f1: 0.7771, 0.7070, 0.7404

CLASS: identity_hate
p, r, f1: 1.0000, 0.0008, 0.0016

average F1 score: 0.408593
weighted avg. F1 scored: 0.746307
macro-averaged ROC-AUC score: 0.984355


In [22]:
preds = deep1.predict(X_rnn['dev'][:])

In [23]:
evaluate(Y_rnn['dev'], preds)

CLASS: toxic
p, r, f1: 0.7976, 0.7004, 0.7458

CLASS: severe_toxic
p, r, f1: 0.5000, 0.0253, 0.0482

CLASS: obscene
p, r, f1: 0.8376, 0.6708, 0.7450

CLASS: threat
p, r, f1: 0.0000, 0.0000, 0.0000

CLASS: insult
p, r, f1: 0.7532, 0.5795, 0.6551

CLASS: identity_hate
p, r, f1: 0.0000, 0.0000, 0.0000

average F1 score: 0.365677
weighted avg. F1 scored: 0.655793
macro-averaged ROC-AUC score: 0.961122


## 3. Deep + CE

In [18]:
deep_ce1 = TfRNNClassifier(
    full_glove_vocab,
    embedding=full_glove_embedding,
    embed_dim=100,
    hidden_dim=50,
    max_length=100,
    hidden_activation=tf.nn.tanh,
    cell_class=tf.nn.rnn_cell.LSTMCell, # LSTM
    train_embedding=True,
    max_iter=3,
    word_length=12,
    bidir_rnn=True, # Bidirectional RNN!
    char_embed=True, # Character Embeddings!
    dropout=0.2,
    eta=0.01,
    eval_every=1,
    experiment_name="MULTI_deep_ce")

In [19]:
deep_ce1.fit(X_rnn['train'][:], 
             Y_rnn['train'], 
             X_dev=X_rnn['dev'],
             y_dev=Y_rnn['dev'])

./logs/MULTI_deep_ce
epoch 1, iter 1, loss 0.70904, batch_time 2.132
epoch 1, iter 2, loss 0.23591, batch_time 1.135
epoch 1, iter 3, loss 0.14409, batch_time 1.170
epoch 1, iter 4, loss 0.15776, batch_time 1.126
epoch 1, iter 5, loss 0.13168, batch_time 1.137
epoch 1, iter 6, loss 0.15272, batch_time 1.174
epoch 1, iter 7, loss 0.12483, batch_time 1.155
epoch 1, iter 8, loss 0.15855, batch_time 1.214
epoch 1, iter 9, loss 0.14092, batch_time 1.154
epoch 1, iter 10, loss 0.13549, batch_time 1.149
epoch 1, iter 11, loss 0.11809, batch_time 1.134
epoch 1, iter 12, loss 0.13125, batch_time 1.149
epoch 1, iter 13, loss 0.14065, batch_time 1.200
epoch 1, iter 14, loss 0.15252, batch_time 1.141
epoch 1, iter 15, loss 0.14418, batch_time 1.134
epoch 1, iter 16, loss 0.14706, batch_time 1.135
epoch 1, iter 17, loss 0.16138, batch_time 1.151
epoch 1, iter 18, loss 0.13948, batch_time 1.250
epoch 1, iter 19, loss 0.15966, batch_time 1.145
epoch 1, iter 20, loss 0.14846, batch_time 1.219
epoch 1,

epoch 2, iter 166, loss 0.08197, batch_time 1.221
epoch 2, iter 167, loss 0.06622, batch_time 1.195
epoch 2, iter 168, loss 0.07686, batch_time 1.206
epoch 2, iter 169, loss 0.06777, batch_time 1.194
epoch 2, iter 170, loss 0.07034, batch_time 1.213
epoch 2, iter 171, loss 0.05663, batch_time 1.151
epoch 2, iter 172, loss 0.06941, batch_time 1.215
epoch 2, iter 173, loss 0.06714, batch_time 1.207
epoch 2, iter 174, loss 0.07808, batch_time 1.174
epoch 2, iter 175, loss 0.06683, batch_time 1.151
epoch 2, iter 176, loss 0.06979, batch_time 1.173
epoch 2, iter 177, loss 0.06832, batch_time 1.159
epoch 2, iter 178, loss 0.06266, batch_time 1.167
epoch 2, iter 179, loss 0.06760, batch_time 1.154
epoch 2, iter 180, loss 0.06129, batch_time 1.193
epoch 2, iter 181, loss 0.06585, batch_time 1.140
epoch 2, iter 182, loss 0.05124, batch_time 1.142
epoch 2, iter 183, loss 0.05934, batch_time 1.189
epoch 2, iter 184, loss 0.07571, batch_time 1.145
epoch 2, iter 185, loss 0.06484, batch_time 1.148


epoch 3, iter 329, loss 0.04641, batch_time 1.226
epoch 3, iter 330, loss 0.04617, batch_time 1.209
epoch 3, iter 331, loss 0.04284, batch_time 1.157
epoch 3, iter 332, loss 0.04733, batch_time 1.129
epoch 3, iter 333, loss 0.05200, batch_time 1.133
epoch 3, iter 334, loss 0.04373, batch_time 1.171
epoch 3, iter 335, loss 0.03862, batch_time 1.193
epoch 3, iter 336, loss 0.04658, batch_time 1.204
epoch 3, iter 337, loss 0.05447, batch_time 1.279
epoch 3, iter 338, loss 0.04527, batch_time 1.252
epoch 3, iter 339, loss 0.04301, batch_time 1.178
epoch 3, iter 340, loss 0.05371, batch_time 1.236
epoch 3, iter 341, loss 0.04798, batch_time 1.170
epoch 3, iter 342, loss 0.04885, batch_time 1.162
epoch 3, iter 343, loss 0.04302, batch_time 1.232
epoch 3, iter 344, loss 0.04845, batch_time 1.189
epoch 3, iter 345, loss 0.05485, batch_time 1.147
epoch 3, iter 346, loss 0.05461, batch_time 1.212
epoch 3, iter 347, loss 0.04020, batch_time 1.263
epoch 3, iter 348, loss 0.05148, batch_time 1.213


<tf_rnn_classifier.TfRNNClassifier at 0x7f7d06a51518>

In [20]:
preds = deep_ce1.predict(X_rnn['train'])

In [21]:
evaluate(Y_rnn['train'], preds)

CLASS: toxic
p, r, f1: 0.8918, 0.8521, 0.8715

CLASS: severe_toxic
p, r, f1: 0.6448, 0.2122, 0.3194

CLASS: obscene
p, r, f1: 0.7876, 0.8387, 0.8124

CLASS: threat
p, r, f1: 0.0000, 0.0000, 0.0000

CLASS: insult
p, r, f1: 0.7684, 0.7174, 0.7420

CLASS: identity_hate
p, r, f1: 0.0000, 0.0000, 0.0000

average F1 score: 0.457541
weighted avg. F1 scored: 0.756112
macro-averaged ROC-AUC score: 0.982130


In [22]:
preds = deep_ce1.predict(X_rnn['dev'])

In [23]:
evaluate(Y_rnn['dev'], preds)

CLASS: toxic
p, r, f1: 0.7598, 0.7096, 0.7338

CLASS: severe_toxic
p, r, f1: 0.6038, 0.2025, 0.3033

CLASS: obscene
p, r, f1: 0.7708, 0.7534, 0.7620

CLASS: threat
p, r, f1: 0.0000, 0.0000, 0.0000

CLASS: insult
p, r, f1: 0.7139, 0.6091, 0.6574

CLASS: identity_hate
p, r, f1: 0.0000, 0.0000, 0.0000

average F1 score: 0.409421
weighted avg. F1 scored: 0.666020
macro-averaged ROC-AUC score: 0.961995


## 4. Deep + Self

In [24]:
deep_self1 = TfRNNClassifier(
    full_glove_vocab,
    embedding=full_glove_embedding,
    embed_dim=100,
    hidden_dim=50,
    max_length=100,
    hidden_activation=tf.nn.tanh,
    cell_class=tf.nn.rnn_cell.LSTMCell, # LSTM
    train_embedding=True,
    max_iter=3,
    word_length=12,
    bidir_rnn=True, # Bidirectional RNN!
    self_attend=True, # Self-Attention!
    dropout=0.2,
    eta=0.01,
    eval_every=1,
    experiment_name="MULTI_deep_self")

In [25]:
deep_self1.fit(X_rnn['train'][:], 
               Y_rnn['train'], 
               X_dev=X_rnn['dev'],
               y_dev=Y_rnn['dev'])  

Tensor("concat_1:0", shape=(?, 200, 50), dtype=float32)
./logs/MULTI_deep_self
epoch 1, iter 1, loss 0.68526, batch_time 0.706
epoch 1, iter 2, loss 0.43877, batch_time 0.616
epoch 1, iter 3, loss 0.25484, batch_time 0.591
epoch 1, iter 4, loss 0.19027, batch_time 0.597
epoch 1, iter 5, loss 0.15480, batch_time 0.667
epoch 1, iter 6, loss 0.14428, batch_time 0.576
epoch 1, iter 7, loss 0.11870, batch_time 0.593
epoch 1, iter 8, loss 0.16814, batch_time 0.587
epoch 1, iter 9, loss 0.14066, batch_time 0.578
epoch 1, iter 10, loss 0.13662, batch_time 0.593
epoch 1, iter 11, loss 0.12841, batch_time 0.732
epoch 1, iter 12, loss 0.14061, batch_time 0.606
epoch 1, iter 13, loss 0.11936, batch_time 0.596
epoch 1, iter 14, loss 0.14160, batch_time 0.643
epoch 1, iter 15, loss 0.12360, batch_time 0.663
epoch 1, iter 16, loss 0.15761, batch_time 0.583
epoch 1, iter 17, loss 0.16609, batch_time 0.589
epoch 1, iter 18, loss 0.15035, batch_time 0.599
epoch 1, iter 19, loss 0.13179, batch_time 0.595

epoch 2, iter 165, loss 0.05993, batch_time 0.652
epoch 2, iter 166, loss 0.05310, batch_time 0.658
epoch 2, iter 167, loss 0.06729, batch_time 0.664
epoch 2, iter 168, loss 0.05068, batch_time 0.667
epoch 2, iter 169, loss 0.05370, batch_time 0.611
epoch 2, iter 170, loss 0.04687, batch_time 0.624
epoch 2, iter 171, loss 0.04969, batch_time 0.666
epoch 2, iter 172, loss 0.05903, batch_time 0.656
epoch 2, iter 173, loss 0.06482, batch_time 0.610
epoch 2, iter 174, loss 0.07087, batch_time 0.617
epoch 2, iter 175, loss 0.04653, batch_time 0.610
epoch 2, iter 176, loss 0.07639, batch_time 0.604
epoch 2, iter 177, loss 0.05631, batch_time 0.696
epoch 2, iter 178, loss 0.06119, batch_time 0.601
epoch 2, iter 179, loss 0.05509, batch_time 0.631
epoch 2, iter 180, loss 0.06627, batch_time 0.663
epoch 2, iter 181, loss 0.04604, batch_time 0.665
epoch 2, iter 182, loss 0.04286, batch_time 0.656
epoch 2, iter 183, loss 0.05181, batch_time 0.598
epoch 2, iter 184, loss 0.06720, batch_time 0.643


epoch 3, iter 328, loss 0.03911, batch_time 0.671
epoch 3, iter 329, loss 0.05100, batch_time 0.664
epoch 3, iter 330, loss 0.04375, batch_time 0.675
epoch 3, iter 331, loss 0.05486, batch_time 0.676
epoch 3, iter 332, loss 0.05016, batch_time 0.609
epoch 3, iter 333, loss 0.05409, batch_time 0.658
epoch 3, iter 334, loss 0.04487, batch_time 0.653
epoch 3, iter 335, loss 0.04778, batch_time 0.686
epoch 3, iter 336, loss 0.04997, batch_time 0.665
epoch 3, iter 337, loss 0.04094, batch_time 0.599
epoch 3, iter 338, loss 0.04659, batch_time 0.615
epoch 3, iter 339, loss 0.03874, batch_time 0.662
epoch 3, iter 340, loss 0.05241, batch_time 0.666
epoch 3, iter 341, loss 0.04303, batch_time 0.663
epoch 3, iter 342, loss 0.04387, batch_time 0.614
epoch 3, iter 343, loss 0.04687, batch_time 0.623
epoch 3, iter 344, loss 0.03755, batch_time 0.613
epoch 3, iter 345, loss 0.04204, batch_time 0.658
epoch 3, iter 346, loss 0.04489, batch_time 0.634
epoch 3, iter 347, loss 0.04526, batch_time 0.672


<tf_rnn_classifier.TfRNNClassifier at 0x7f7c6b659048>

In [26]:
preds = deep_self1.predict(X_rnn['train'])

In [27]:
evaluate(Y_rnn['train'], preds)

CLASS: toxic
p, r, f1: 0.9156, 0.8303, 0.8708

CLASS: severe_toxic
p, r, f1: 0.6869, 0.2046, 0.3153

CLASS: obscene
p, r, f1: 0.8638, 0.7803, 0.8199

CLASS: threat
p, r, f1: 0.0000, 0.0000, 0.0000

CLASS: insult
p, r, f1: 0.7716, 0.7383, 0.7546

CLASS: identity_hate
p, r, f1: 0.0000, 0.0000, 0.0000

average F1 score: 0.460107
weighted avg. F1 scored: 0.760285
macro-averaged ROC-AUC score: 0.984704


In [28]:
preds = deep_self1.predict(X_rnn['dev'][:])

In [29]:
evaluate(Y_rnn['dev'], preds)

CLASS: toxic
p, r, f1: 0.8328, 0.6624, 0.7379

CLASS: severe_toxic
p, r, f1: 0.5526, 0.1329, 0.2143

CLASS: obscene
p, r, f1: 0.8499, 0.6663, 0.7470

CLASS: threat
p, r, f1: 0.0000, 0.0000, 0.0000

CLASS: insult
p, r, f1: 0.7301, 0.6005, 0.6590

CLASS: identity_hate
p, r, f1: 0.0000, 0.0000, 0.0000

average F1 score: 0.393026
weighted avg. F1 scored: 0.660747
macro-averaged ROC-AUC score: 0.962749


## 5. Deep + CE + Attention

In [50]:
deep_ce_self1 = TfRNNClassifier(
    full_glove_vocab,
    embedding=full_glove_embedding,
    embed_dim=100,
    hidden_dim=50,
    max_length=100,
    hidden_activation=tf.nn.tanh,
    cell_class=tf.nn.rnn_cell.LSTMCell, # LSTM
    train_embedding=True,
    max_iter=5,
    word_length=12,
    bidir_rnn=True, # Bidirectional RNN!
    char_embed=True, # Character Embeddings!
    self_attend=True, # Self-Attention
    dropout=0.2,
    eta=0.01,
    eval_every=1,
    experiment_name="MULTI_deep_ce_self")

In [51]:
deep_ce_self1.fit(X_rnn['train'][:], 
                      Y_rnn['train'], 
                      X_dev=X_rnn['dev'],
                      y_dev=Y_rnn['dev'])

Tensor("concat_2:0", shape=(?, 200, 50), dtype=float32)
./logs/MULTI_deep_ce_self
epoch 1, iter 1, loss 0.74207, batch_time 1.285
epoch 1, iter 2, loss 0.28805, batch_time 1.191
epoch 1, iter 3, loss 0.17152, batch_time 1.221
epoch 1, iter 4, loss 0.14659, batch_time 1.207
epoch 1, iter 5, loss 0.14584, batch_time 1.197
epoch 1, iter 6, loss 0.13039, batch_time 1.194
epoch 1, iter 7, loss 0.16606, batch_time 1.200
epoch 1, iter 8, loss 0.14210, batch_time 1.152
epoch 1, iter 9, loss 0.14152, batch_time 1.142
epoch 1, iter 10, loss 0.13235, batch_time 1.159
epoch 1, iter 11, loss 0.13778, batch_time 1.159
epoch 1, iter 12, loss 0.14550, batch_time 1.176
epoch 1, iter 13, loss 0.13562, batch_time 1.160
epoch 1, iter 14, loss 0.15890, batch_time 1.195
epoch 1, iter 15, loss 0.16046, batch_time 1.159
epoch 1, iter 16, loss 0.17019, batch_time 1.188
epoch 1, iter 17, loss 0.15985, batch_time 1.209
epoch 1, iter 18, loss 0.13755, batch_time 1.203
epoch 1, iter 19, loss 0.13340, batch_time 1.

epoch 2, iter 165, loss 0.06468, batch_time 1.148
epoch 2, iter 166, loss 0.06609, batch_time 1.142
epoch 2, iter 167, loss 0.06570, batch_time 1.143
epoch 2, iter 168, loss 0.06906, batch_time 1.162
epoch 2, iter 169, loss 0.07266, batch_time 1.149
epoch 2, iter 170, loss 0.06448, batch_time 1.273
epoch 2, iter 171, loss 0.06197, batch_time 1.249
epoch 2, iter 172, loss 0.06788, batch_time 1.148
epoch 2, iter 173, loss 0.06947, batch_time 1.136
epoch 2, iter 174, loss 0.06021, batch_time 1.137
epoch 2, iter 175, loss 0.06128, batch_time 1.150
epoch 2, iter 176, loss 0.06866, batch_time 1.156
epoch 2, iter 177, loss 0.07685, batch_time 1.155
epoch 2, iter 178, loss 0.06738, batch_time 1.236
epoch 2, iter 179, loss 0.06488, batch_time 1.171
epoch 2, iter 180, loss 0.07789, batch_time 1.178
epoch 2, iter 181, loss 0.05990, batch_time 1.156
epoch 2, iter 182, loss 0.06410, batch_time 1.150
epoch 2, iter 183, loss 0.05459, batch_time 1.154
epoch 2, iter 184, loss 0.07044, batch_time 1.121


epoch 3, iter 328, loss 0.04870, batch_time 1.231
epoch 3, iter 329, loss 0.04524, batch_time 1.153
epoch 3, iter 330, loss 0.05541, batch_time 1.145
epoch 3, iter 331, loss 0.04031, batch_time 1.177
epoch 3, iter 332, loss 0.03979, batch_time 1.161
epoch 3, iter 333, loss 0.04072, batch_time 1.162
epoch 3, iter 334, loss 0.04038, batch_time 1.160
epoch 3, iter 335, loss 0.03771, batch_time 1.181
epoch 3, iter 336, loss 0.04747, batch_time 1.140
epoch 3, iter 337, loss 0.04769, batch_time 1.153
epoch 3, iter 338, loss 0.04115, batch_time 1.149
epoch 3, iter 339, loss 0.05419, batch_time 1.175
epoch 3, iter 340, loss 0.04241, batch_time 1.149
epoch 3, iter 341, loss 0.04738, batch_time 1.142
epoch 3, iter 342, loss 0.04710, batch_time 1.227
epoch 3, iter 343, loss 0.04104, batch_time 1.172
epoch 3, iter 344, loss 0.04722, batch_time 1.167
epoch 3, iter 345, loss 0.05496, batch_time 1.225
epoch 3, iter 346, loss 0.05489, batch_time 1.222
epoch 3, iter 347, loss 0.04103, batch_time 1.219


epoch 4, iter 491, loss 0.03183, batch_time 1.232
epoch 4, iter 492, loss 0.03886, batch_time 1.147
epoch 4, iter 493, loss 0.03929, batch_time 1.210
epoch 4, iter 494, loss 0.03608, batch_time 1.213
epoch 4, iter 495, loss 0.03925, batch_time 1.181
epoch 4, iter 496, loss 0.04539, batch_time 1.232
epoch 4, iter 497, loss 0.04054, batch_time 1.227
epoch 4, iter 498, loss 0.04259, batch_time 1.194
epoch 4, iter 499, loss 0.03628, batch_time 1.209
epoch 4, iter 500, loss 0.04412, batch_time 1.209
epoch 4, iter 501, loss 0.03889, batch_time 1.224
epoch 4, iter 502, loss 0.04532, batch_time 1.216
epoch 4, iter 503, loss 0.04158, batch_time 1.177
epoch 4, iter 504, loss 0.04093, batch_time 1.242
epoch 4, iter 505, loss 0.04109, batch_time 1.209
epoch 4, iter 506, loss 0.03656, batch_time 1.222
epoch 4, iter 507, loss 0.03623, batch_time 1.204
epoch 4, iter 508, loss 0.03583, batch_time 1.161
epoch 4, iter 509, loss 0.03790, batch_time 1.277
epoch 4, iter 510, loss 0.03138, batch_time 1.242


epoch 5, iter 654, loss 0.03934, batch_time 1.149
epoch 5, iter 655, loss 0.03536, batch_time 1.139
epoch 5, iter 656, loss 0.03179, batch_time 1.213
epoch 5, iter 657, loss 0.03323, batch_time 1.163
epoch 5, iter 658, loss 0.02782, batch_time 1.142
epoch 5, iter 659, loss 0.03463, batch_time 1.250
epoch 5, iter 660, loss 0.03198, batch_time 1.258
epoch 5, iter 661, loss 0.03477, batch_time 1.239
epoch 5, iter 662, loss 0.02954, batch_time 1.239
epoch 5, iter 663, loss 0.03280, batch_time 1.238
epoch 5, iter 664, loss 0.03055, batch_time 1.242
epoch 5, iter 665, loss 0.03022, batch_time 1.232
epoch 5, iter 666, loss 0.02995, batch_time 1.167
epoch 5, iter 667, loss 0.04037, batch_time 1.225
epoch 5, iter 668, loss 0.03503, batch_time 1.216
epoch 5, iter 669, loss 0.03292, batch_time 1.246
epoch 5, iter 670, loss 0.03567, batch_time 1.225
epoch 5, iter 671, loss 0.03128, batch_time 1.204
epoch 5, iter 672, loss 0.03388, batch_time 1.215
epoch 5, iter 673, loss 0.03676, batch_time 1.155


<tf_rnn_classifier.TfRNNClassifier at 0x7f7ba90e7860>

In [52]:
preds = deep_ce_self1.predict(X_rnn['train'])

In [53]:
evaluate(Y_rnn['train'], preds)

CLASS: toxic
p, r, f1: 0.9203, 0.9094, 0.9148

CLASS: severe_toxic
p, r, f1: 0.6409, 0.4161, 0.5046

CLASS: obscene
p, r, f1: 0.8631, 0.8558, 0.8594

CLASS: threat
p, r, f1: 0.0000, 0.0000, 0.0000

CLASS: insult
p, r, f1: 0.7922, 0.7955, 0.7939

CLASS: identity_hate
p, r, f1: 0.9167, 0.0087, 0.0172

average F1 score: 0.514994
weighted avg. F1 scored: 0.807106
macro-averaged ROC-AUC score: 0.988520


In [54]:
preds = deep_ce_self1.predict(X_rnn['dev'])

In [55]:
evaluate(Y_rnn['dev'], preds)

CLASS: toxic
p, r, f1: 0.7346, 0.7292, 0.7319

CLASS: severe_toxic
p, r, f1: 0.5116, 0.2785, 0.3607

CLASS: obscene
p, r, f1: 0.8290, 0.7240, 0.7729

CLASS: threat
p, r, f1: 0.0000, 0.0000, 0.0000

CLASS: insult
p, r, f1: 0.7260, 0.6338, 0.6768

CLASS: identity_hate
p, r, f1: 0.0000, 0.0000, 0.0000

average F1 score: 0.423703
weighted avg. F1 scored: 0.674501
macro-averaged ROC-AUC score: 0.961947


In [64]:
print(np.sum(train['obscene']))

8449


In [None]:
print(train.loc[train['threat'] == 1])

In [None]:
## Special test
print(Y_rnn['train'].shape)

In [14]:
for i,label in enumerate(label_cols):
    print("-------")
    print("Training classifier for class %s..." % label)
    Y_binary = np.expand_dims(np.array(Y_rnn['train'])[:, i], 1)
    Y_binary_dev = np.expand_dims(np.array(Y_rnn['dev'])[:, 1], 1)
    print(Y_binary.shape, Y_binary_dev.shape)
    
    classifier = TfRNNClassifier(
        full_glove_vocab,
        embedding=full_glove_embedding,
        embed_dim=100,
        hidden_dim=50,
        max_length=100,
        hidden_activation=tf.nn.tanh,
        cell_class=tf.nn.rnn_cell.LSTMCell, # LSTM
        train_embedding=True,
        max_iter=1,
        word_length=12,
        bidir_rnn=True, # Bidirectional RNN!
        char_embed=True, # Character Embeddings!
        self_attend=True, # Self-Attention
        dropout=0.2,
        eta=0.01,
        eval_every=1,
        experiment_name="deep_ce_self_"+label)
    
    classifier.fit(X_rnn['train'][:], 
                   Y_binary, 
                   X_dev=X_rnn['dev'],
                   y_dev=Y_binary_dev)
    
    preds = classifier.predict(X_rnn['train'])
    evaluate(Y_binary, preds)
    
    preds = classifier.predict(X_rnn['dev'])
    evaluate(Y_binary_dev, preds)
    
    print("-------")
    

-------
Training classifier for class toxic...
(143613, 1) (15958, 1)
Tensor("concat_2:0", shape=(?, 200, 50), dtype=float32)
./logs/deep_ce_self_toxic
epoch 1, iter 1, loss 0.72481, batch_time 2.183
epoch 1, iter 2, loss 0.42541, batch_time 1.170
epoch 1, iter 3, loss 0.36020, batch_time 1.144
epoch 1, iter 4, loss 0.32809, batch_time 1.226
epoch 1, iter 5, loss 0.32928, batch_time 1.208
epoch 1, iter 6, loss 0.30154, batch_time 1.207
epoch 1, iter 7, loss 0.27918, batch_time 1.196
epoch 1, iter 8, loss 0.28452, batch_time 1.184
epoch 1, iter 9, loss 0.33001, batch_time 1.210
epoch 1, iter 10, loss 0.35148, batch_time 1.193
epoch 1, iter 11, loss 0.30278, batch_time 1.208
epoch 1, iter 12, loss 0.35312, batch_time 1.194
epoch 1, iter 13, loss 0.29213, batch_time 1.201
epoch 1, iter 14, loss 0.32602, batch_time 1.192
epoch 1, iter 15, loss 0.32027, batch_time 1.162
epoch 1, iter 16, loss 0.31253, batch_time 1.237
epoch 1, iter 17, loss 0.27858, batch_time 1.212
epoch 1, iter 18, loss 0

epoch 1, iter 18, loss 0.06867, batch_time 1.146
epoch 1, iter 19, loss 0.04866, batch_time 1.120
epoch 1, iter 20, loss 0.05549, batch_time 1.121
epoch 1, iter 21, loss 0.04353, batch_time 1.132
epoch 1, iter 22, loss 0.04805, batch_time 1.116
epoch 1, iter 23, loss 0.05883, batch_time 1.117
epoch 1, iter 24, loss 0.06946, batch_time 1.209
epoch 1, iter 25, loss 0.05533, batch_time 1.125
epoch 1, iter 26, loss 0.03143, batch_time 1.148
epoch 1, iter 27, loss 0.05030, batch_time 1.153
epoch 1, iter 28, loss 0.06135, batch_time 1.168
epoch 1, iter 29, loss 0.04820, batch_time 1.144
epoch 1, iter 30, loss 0.03524, batch_time 1.218
epoch 1, iter 31, loss 0.08036, batch_time 1.232
epoch 1, iter 32, loss 0.02260, batch_time 1.140
epoch 1, iter 33, loss 0.05997, batch_time 1.141
epoch 1, iter 34, loss 0.05989, batch_time 1.131
epoch 1, iter 35, loss 0.05774, batch_time 1.120
epoch 1, iter 36, loss 0.05504, batch_time 1.137
epoch 1, iter 37, loss 0.04609, batch_time 1.131
epoch 1, iter 38, lo

epoch 1, iter 38, loss 0.17880, batch_time 1.241
epoch 1, iter 39, loss 0.20240, batch_time 1.236
epoch 1, iter 40, loss 0.16227, batch_time 1.210
epoch 1, iter 41, loss 0.20552, batch_time 1.189
epoch 1, iter 42, loss 0.16695, batch_time 1.218
epoch 1, iter 43, loss 0.20012, batch_time 1.213
epoch 1, iter 44, loss 0.18914, batch_time 1.205
epoch 1, iter 45, loss 0.16676, batch_time 1.215
epoch 1, iter 46, loss 0.16612, batch_time 1.215
epoch 1, iter 47, loss 0.16082, batch_time 1.157
epoch 1, iter 48, loss 0.11552, batch_time 1.164
epoch 1, iter 49, loss 0.19528, batch_time 1.138
epoch 1, iter 50, loss 0.19322, batch_time 1.149
epoch 1, iter 51, loss 0.16747, batch_time 1.235
epoch 1, iter 52, loss 0.16052, batch_time 1.216
epoch 1, iter 53, loss 0.18411, batch_time 1.218
epoch 1, iter 54, loss 0.17006, batch_time 1.205
epoch 1, iter 55, loss 0.17214, batch_time 1.198
epoch 1, iter 56, loss 0.17489, batch_time 1.137
epoch 1, iter 57, loss 0.15486, batch_time 1.205
epoch 1, iter 58, lo

epoch 1, iter 58, loss 0.01955, batch_time 1.138
epoch 1, iter 59, loss 0.02613, batch_time 1.135
epoch 1, iter 60, loss 0.01397, batch_time 1.134
epoch 1, iter 61, loss 0.01447, batch_time 1.150
epoch 1, iter 62, loss 0.00789, batch_time 1.135
epoch 1, iter 63, loss 0.03243, batch_time 1.122
epoch 1, iter 64, loss 0.02545, batch_time 1.125
epoch 1, iter 65, loss 0.01976, batch_time 1.182
epoch 1, iter 66, loss 0.03204, batch_time 1.132
epoch 1, iter 67, loss 0.03781, batch_time 1.154
epoch 1, iter 68, loss 0.02644, batch_time 1.144
epoch 1, iter 69, loss 0.01958, batch_time 1.140
epoch 1, iter 70, loss 0.02079, batch_time 1.128
epoch 1, iter 71, loss 0.02006, batch_time 1.146
epoch 1, iter 72, loss 0.01425, batch_time 1.137
epoch 1, iter 73, loss 0.01463, batch_time 1.132
epoch 1, iter 74, loss 0.02436, batch_time 1.150
epoch 1, iter 75, loss 0.00347, batch_time 1.130
epoch 1, iter 76, loss 0.01416, batch_time 1.156
epoch 1, iter 77, loss 0.01406, batch_time 1.236
epoch 1, iter 78, lo

epoch 1, iter 78, loss 0.11099, batch_time 1.143
epoch 1, iter 79, loss 0.11476, batch_time 1.157
epoch 1, iter 80, loss 0.11142, batch_time 1.233
epoch 1, iter 81, loss 0.10356, batch_time 1.206
epoch 1, iter 82, loss 0.10867, batch_time 1.227
epoch 1, iter 83, loss 0.11063, batch_time 1.225
epoch 1, iter 84, loss 0.10562, batch_time 1.214
epoch 1, iter 85, loss 0.13210, batch_time 1.230
epoch 1, iter 86, loss 0.14038, batch_time 1.316
epoch 1, iter 87, loss 0.11739, batch_time 1.159
epoch 1, iter 88, loss 0.13086, batch_time 1.183
epoch 1, iter 89, loss 0.12767, batch_time 1.147
epoch 1, iter 90, loss 0.12793, batch_time 1.147
epoch 1, iter 91, loss 0.12252, batch_time 1.247
epoch 1, iter 92, loss 0.12951, batch_time 1.234
epoch 1, iter 93, loss 0.08866, batch_time 1.236
epoch 1, iter 94, loss 0.12784, batch_time 1.249
epoch 1, iter 95, loss 0.12291, batch_time 1.224
epoch 1, iter 96, loss 0.10713, batch_time 1.241
epoch 1, iter 97, loss 0.10966, batch_time 1.212
epoch 1, iter 98, lo

epoch 1, iter 98, loss 0.03790, batch_time 1.200
epoch 1, iter 99, loss 0.05363, batch_time 1.225
epoch 1, iter 100, loss 0.05078, batch_time 1.141
epoch 1, iter 101, loss 0.07488, batch_time 1.221
epoch 1, iter 102, loss 0.02319, batch_time 1.168
epoch 1, iter 103, loss 0.04932, batch_time 1.204
epoch 1, iter 104, loss 0.04860, batch_time 1.214
epoch 1, iter 105, loss 0.05537, batch_time 1.214
epoch 1, iter 106, loss 0.05864, batch_time 1.146
epoch 1, iter 107, loss 0.04923, batch_time 1.161
epoch 1, iter 108, loss 0.04314, batch_time 1.179
epoch 1, iter 109, loss 0.05196, batch_time 1.223
epoch 1, iter 110, loss 0.04880, batch_time 1.191
epoch 1, iter 111, loss 0.06252, batch_time 1.162
epoch 1, iter 112, loss 0.07077, batch_time 1.153
epoch 1, iter 113, loss 0.04318, batch_time 1.212
epoch 1, iter 114, loss 0.03869, batch_time 1.225
epoch 1, iter 115, loss 0.05601, batch_time 1.158
epoch 1, iter 116, loss 0.04797, batch_time 1.232
epoch 1, iter 117, loss 0.04396, batch_time 1.211
ep

In [None]:
# toxic 
# p, r, f1: 0.7841, 0.5894, 0.6730
# average F1 score: 0.672990
# p, r, f1: 0.1268, 0.9304, 0.2232
# average F1 score: 0.223235

# severe toxic
# p, r, f1: 0.6084, 0.1113, 0.1882
# average F1 score: 0.188235
# p, r, f1: 0.6071, 0.1076, 0.1828
# average F1 score: 0.182796

# obscene
# p, r, f1: 0.8277, 0.6130, 0.7043
# average F1 score: 0.704337
# p, r, f1: 0.2124, 0.8481, 0.3397
# average F1 score: 0.339670

# threat 
# all 0s

# insult
# p, r, f1: 0.8037, 0.4890, 0.6080
# average F1 score: 0.608007
# p, r, f1: 0.2595, 0.7785, 0.3892
# average F1 score: 0.389241

# identity_hate
# all 0s
