# Final Experiments

In [2]:
# 0. Some initial set-up.
from collections import Counter
import numpy as np
import os
import pandas as pd
import random
from tf_rnn_classifier import TfRNNClassifier
from collections import defaultdict, Counter
from sklearn.tree import DecisionTreeClassifier
from sklearn.multiclass import OneVsRestClassifier
from sklearn.feature_extraction import DictVectorizer
from sklearn.feature_extraction.text import CountVectorizer, TfidfVectorizer
from sklearn.model_selection import cross_val_score
from scipy.sparse import hstack
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import precision_recall_fscore_support, classification_report, roc_auc_score
import tensorflow as tf
import sst
from utils import evaluate, build_rnn_dataset
import utils

  from ._conv import register_converters as _register_converters


In [3]:
vsmdata_home = 'vsmdata'

glove_home = os.path.join(vsmdata_home, 'glove.6B')

data_dir = "./data/"

In [4]:
train = pd.read_csv(data_dir + "train.csv").fillna(' ')
test = pd.read_csv(data_dir + "test.csv").fillna(' ')
test_labels = pd.read_csv(data_dir + "test_labels.csv")

## Dataset Set-up

In [4]:
X_rnn, Y_rnn = build_rnn_dataset(train, 0.9)

In [1]:
X_test, Y_test = build_rnn_dataset(test, 1)
X_test, Y_test = X_test['train'], Y_test['train']

NameError: name 'build_rnn_dataset' is not defined

## Baseline Features

In [5]:
label_cols = ['toxic', 'severe_toxic', 'obscene', 'threat', 'insult', 'identity_hate']
COMMENT = 'comment_text'
train_examples = train[COMMENT]
train_labels = train[label_cols]

In [6]:
train_text = [" ".join(ex) for ex in X_rnn['train']]
dev_text = [" ".join(ex) for ex in X_rnn['dev']]
all_text = train_text + dev_text
print(len(train_text), len(dev_text), len(all_text))

143613 15958 159571


In [7]:
all_text = [" ".join(ex) for ex in X_rnn]

In [8]:
# may take several minutes...
word_vectorizer = TfidfVectorizer(
    sublinear_tf=True,
    strip_accents='unicode',
    analyzer='word',
    token_pattern=r'\w{1,}',
    stop_words='english',
    ngram_range=(1, 1),
    max_features=10000)
word_vectorizer.fit(all_text)
train_word_features = word_vectorizer.transform(train_text)
dev_word_features = word_vectorizer.transform(dev_text)

In [9]:
train_features = hstack([train_word_features])
dev_features = hstack([dev_word_features])

## Glove

In [10]:
glove_lookup = utils.glove2dict(
    os.path.join(vsmdata_home, 'glove.6B.100d.txt'))

In [11]:
full_train_vocab = sst.get_vocab(X_rnn['train'])

In [12]:
glove_vocab = sorted(set(glove_lookup) & set(full_train_vocab))
print("Embedding matrix contains %d words." % len(glove_vocab))

Embedding matrix contains 55422 words.


In [13]:
glove_embedding = np.array([glove_lookup[w] for w in glove_vocab])

In [14]:
glove_vocab.append("$UNK")
glove_embedding = np.vstack(
    (glove_embedding, utils.randvec(glove_embedding.shape[1])))

In [15]:
full_glove_vocab = sorted(set(sst.get_vocab(X_rnn['train'])))
print("Embedding matrix contains %d words." % len(full_glove_vocab))

Embedding matrix contains 494751 words.


In [16]:
full_glove_embedding = np.array([
    glove_lookup[w] 
    if w in glove_lookup else utils.randvec(len(glove_lookup["hello"])) 
    for w in full_glove_vocab
])

In [17]:
full_glove_vocab.append("$UNK")
full_glove_embedding = np.vstack(
    (full_glove_embedding, utils.randvec(full_glove_embedding.shape[1])))

# Binary ALL

In [18]:
Y_rnn_binary = np.max(Y_rnn['train'], axis=1).reshape((-1, 1))
Y_rnn_binary_dev = np.max(Y_rnn['dev'], axis=1).reshape((-1, 1))
print(Y_rnn_binary.shape, Y_rnn_binary_dev.shape)

(143613, 1) (15958, 1)


## 1. Baseline

In [40]:
classifier = LogisticRegression(C=0.1, solver='sag')
classifier.fit(train_word_features, Y_rnn_binary)

LogisticRegression(C=0.1, class_weight=None, dual=False, fit_intercept=True,
          intercept_scaling=1, max_iter=100, multi_class='ovr', n_jobs=1,
          penalty='l2', random_state=None, solver='sag', tol=0.0001,
          verbose=0, warm_start=False)

In [41]:
preds = classifier.predict_proba(train_features)[:, 1]
preds = np.expand_dims(preds, 1)

In [42]:
preds

array([[0.10612625],
       [0.10612625],
       [0.0945392 ],
       ...,
       [0.10612625],
       [0.10612625],
       [0.11740118]])

In [43]:
evaluate(Y_rnn_binary, preds)

p, r, f1: 0.0000, 0.0000, 0.0000

average F1 score: 0.000000


## 2. Deep

In [73]:
deep1 = TfRNNClassifier(
    full_glove_vocab,
    embedding=full_glove_embedding,
    embed_dim=100,
    hidden_dim=50,
    max_length=100,
    hidden_activation=tf.nn.tanh,
    cell_class=tf.nn.rnn_cell.LSTMCell, # LSTM
    train_embedding=True,
    max_iter=3,
    word_length=12,
    bidir_rnn=True, # Bidirectional RNN!
    dropout=0.2,
    eta=0.01,
    eval_every=1,
    experiment_name="BINARY_ALL_deep")

In [74]:
deep1.fit(X_rnn['train'][:], 
          Y_rnn_binary[:], 
          X_dev=X_rnn['dev'],
          y_dev=Y_rnn_binary_dev)

./logs/BINARY_ALL_deep
epoch 1, iter 1, loss 0.70816, batch_time 0.632
epoch 1, iter 2, loss 0.34461, batch_time 0.586
epoch 1, iter 3, loss 0.33011, batch_time 0.625
epoch 1, iter 4, loss 0.30604, batch_time 0.617
epoch 1, iter 5, loss 0.34285, batch_time 0.624
epoch 1, iter 6, loss 0.31578, batch_time 0.577
epoch 1, iter 7, loss 0.31118, batch_time 0.599
epoch 1, iter 8, loss 0.31245, batch_time 0.672
epoch 1, iter 9, loss 0.28501, batch_time 0.598
epoch 1, iter 10, loss 0.31354, batch_time 0.661
epoch 1, iter 11, loss 0.28506, batch_time 0.574
epoch 1, iter 12, loss 0.26809, batch_time 0.583
epoch 1, iter 13, loss 0.27459, batch_time 0.560
epoch 1, iter 14, loss 0.27408, batch_time 0.562
epoch 1, iter 15, loss 0.31646, batch_time 0.567
epoch 1, iter 16, loss 0.27334, batch_time 0.583
epoch 1, iter 17, loss 0.27119, batch_time 0.587
epoch 1, iter 18, loss 0.25123, batch_time 0.590
epoch 1, iter 19, loss 0.28185, batch_time 0.578
epoch 1, iter 20, loss 0.25312, batch_time 0.562
epoch 

epoch 2, iter 166, loss 0.11190, batch_time 0.662
epoch 2, iter 167, loss 0.10880, batch_time 0.633
epoch 2, iter 168, loss 0.14018, batch_time 0.638
epoch 2, iter 169, loss 0.10264, batch_time 0.580
epoch 2, iter 170, loss 0.11059, batch_time 0.582
epoch 2, iter 171, loss 0.12230, batch_time 0.607
epoch 2, iter 172, loss 0.12169, batch_time 0.653
epoch 2, iter 173, loss 0.11703, batch_time 0.647
epoch 2, iter 174, loss 0.09927, batch_time 0.590
epoch 2, iter 175, loss 0.11001, batch_time 0.662
epoch 2, iter 176, loss 0.10315, batch_time 0.590
epoch 2, iter 177, loss 0.13853, batch_time 0.597
epoch 2, iter 178, loss 0.10734, batch_time 0.585
epoch 2, iter 179, loss 0.13717, batch_time 0.605
epoch 2, iter 180, loss 0.10436, batch_time 0.599
epoch 2, iter 181, loss 0.10636, batch_time 0.581
epoch 2, iter 182, loss 0.11975, batch_time 0.587
epoch 2, iter 183, loss 0.11283, batch_time 0.668
epoch 2, iter 184, loss 0.10715, batch_time 0.584
epoch 2, iter 185, loss 0.11744, batch_time 0.585


epoch 3, iter 329, loss 0.05403, batch_time 0.650
epoch 3, iter 330, loss 0.05441, batch_time 0.645
epoch 3, iter 331, loss 0.04931, batch_time 0.634
epoch 3, iter 332, loss 0.07060, batch_time 0.615
epoch 3, iter 333, loss 0.07602, batch_time 0.637
epoch 3, iter 334, loss 0.07135, batch_time 0.595
epoch 3, iter 335, loss 0.07262, batch_time 0.605
epoch 3, iter 336, loss 0.05329, batch_time 0.643
epoch 3, iter 337, loss 0.07015, batch_time 0.653
epoch 3, iter 338, loss 0.05336, batch_time 0.658
epoch 3, iter 339, loss 0.05645, batch_time 0.589
epoch 3, iter 340, loss 0.07509, batch_time 0.666
epoch 3, iter 341, loss 0.04719, batch_time 0.658
epoch 3, iter 342, loss 0.07220, batch_time 0.674
epoch 3, iter 343, loss 0.05246, batch_time 0.606
epoch 3, iter 344, loss 0.06793, batch_time 0.663
epoch 3, iter 345, loss 0.06383, batch_time 0.603
epoch 3, iter 346, loss 0.05827, batch_time 0.619
epoch 3, iter 347, loss 0.06992, batch_time 0.609
epoch 3, iter 348, loss 0.06331, batch_time 0.600


<tf_rnn_classifier.TfRNNClassifier at 0x7f6a4ca15e80>

In [77]:
preds = deep1.predict(X_rnn['train'][:])

In [78]:
evaluate(Y_rnn_binary, preds)

p, r, f1: 0.9454, 0.9209, 0.9330

average F1 score: 0.932970


In [75]:
preds = deep1.predict(X_rnn['dev'][:])

In [76]:
evaluate(Y_rnn_binary_dev, preds)

p, r, f1: 0.8148, 0.6890, 0.7466

average F1 score: 0.746633


## 3. Deep + CE

In [81]:
deep_ce1 = TfRNNClassifier(
    full_glove_vocab,
    embedding=full_glove_embedding,
    embed_dim=100,
    hidden_dim=50,
    max_length=100,
    hidden_activation=tf.nn.tanh,
    cell_class=tf.nn.rnn_cell.LSTMCell, # LSTM
    train_embedding=True,
    max_iter=2,
    word_length=12,
    bidir_rnn=True, # Bidirectional RNN!
    char_embed=True, # Character Embeddings!
    dropout=0.2,
    eta=0.01,
    eval_every=1,
    experiment_name="BINARY_ALL_deep_ce")

In [82]:
deep_ce1.fit(X_rnn['train'][:], 
             Y_rnn_binary[:], 
             X_dev=X_rnn['dev'],
             y_dev=Y_rnn_binary_dev)

./logs/BINARY_ALL_deep_ce
epoch 1, iter 1, loss 0.79662, batch_time 1.320
epoch 1, iter 2, loss 0.31445, batch_time 1.151
epoch 1, iter 3, loss 0.34568, batch_time 1.222
epoch 1, iter 4, loss 0.32165, batch_time 1.207
epoch 1, iter 5, loss 0.31944, batch_time 1.237
epoch 1, iter 6, loss 0.36194, batch_time 1.221
epoch 1, iter 7, loss 0.30870, batch_time 1.247
epoch 1, iter 8, loss 0.31016, batch_time 1.200
epoch 1, iter 9, loss 0.33310, batch_time 1.203
epoch 1, iter 10, loss 0.32175, batch_time 1.208
epoch 1, iter 11, loss 0.32617, batch_time 1.146
epoch 1, iter 12, loss 0.33288, batch_time 1.146
epoch 1, iter 13, loss 0.28945, batch_time 1.148
epoch 1, iter 14, loss 0.28806, batch_time 1.157
epoch 1, iter 15, loss 0.32562, batch_time 1.133
epoch 1, iter 16, loss 0.29511, batch_time 1.156
epoch 1, iter 17, loss 0.28357, batch_time 1.181
epoch 1, iter 18, loss 0.35015, batch_time 1.219
epoch 1, iter 19, loss 0.29404, batch_time 1.246
epoch 1, iter 20, loss 0.32139, batch_time 1.235
epo

epoch 2, iter 166, loss 0.10369, batch_time 1.206
epoch 2, iter 167, loss 0.13131, batch_time 1.143
epoch 2, iter 168, loss 0.13076, batch_time 1.144
epoch 2, iter 169, loss 0.14296, batch_time 1.228
epoch 2, iter 170, loss 0.09770, batch_time 1.254
epoch 2, iter 171, loss 0.11405, batch_time 1.136
epoch 2, iter 172, loss 0.14052, batch_time 1.157
epoch 2, iter 173, loss 0.12201, batch_time 1.215
epoch 2, iter 174, loss 0.10230, batch_time 1.255
epoch 2, iter 175, loss 0.12177, batch_time 1.199
epoch 2, iter 176, loss 0.13643, batch_time 1.147
epoch 2, iter 177, loss 0.11264, batch_time 1.149
epoch 2, iter 178, loss 0.11135, batch_time 1.135
epoch 2, iter 179, loss 0.10971, batch_time 1.170
epoch 2, iter 180, loss 0.14657, batch_time 1.232
epoch 2, iter 181, loss 0.11642, batch_time 1.142
epoch 2, iter 182, loss 0.09775, batch_time 1.141
epoch 2, iter 183, loss 0.12032, batch_time 1.161
epoch 2, iter 184, loss 0.12640, batch_time 1.146
epoch 2, iter 185, loss 0.10305, batch_time 1.172


<tf_rnn_classifier.TfRNNClassifier at 0x7f6a1d2aaf98>

In [83]:
preds = deep_ce1.predict(X_rnn['train'][:])

In [84]:
evaluate(Y_rnn_binary, preds)

p, r, f1: 0.9254, 0.8070, 0.8622

average F1 score: 0.862176


In [85]:
preds = deep_ce1.predict(X_rnn['dev'][:])

In [86]:
evaluate(Y_rnn_binary_dev, preds)

p, r, f1: 0.8723, 0.6595, 0.7512

average F1 score: 0.751152


## 4. Deep + Self

In [109]:
deep_self1 = TfRNNClassifier(
    full_glove_vocab,
    embedding=full_glove_embedding,
    embed_dim=100,
    hidden_dim=50,
    max_length=100,
    hidden_activation=tf.nn.tanh,
    cell_class=tf.nn.rnn_cell.LSTMCell, # LSTM
    train_embedding=True,
    max_iter=2,
    word_length=12,
    bidir_rnn=True, # Bidirectional RNN!
    self_attend=True, # Self-Attention!
    dropout=0.2,
    eta=0.01,
    eval_every=1,
    experiment_name="BINARY_ALL_deep_self")

In [110]:
deep_self1.fit(X_rnn['train'][:], 
               Y_rnn_binary[:], 
               X_dev=X_rnn['dev'],
               y_dev=Y_rnn_binary_dev)  

Tensor("concat_1:0", shape=(?, 200, 50), dtype=float32)
./logs/BINARY_ALL_deep_self
epoch 1, iter 1, loss 0.70161, batch_time 0.655
epoch 1, iter 2, loss 0.37226, batch_time 0.616
epoch 1, iter 3, loss 0.33144, batch_time 0.607
epoch 1, iter 4, loss 0.28658, batch_time 0.599
epoch 1, iter 5, loss 0.37834, batch_time 0.592
epoch 1, iter 6, loss 0.30250, batch_time 0.587
epoch 1, iter 7, loss 0.34485, batch_time 0.611
epoch 1, iter 8, loss 0.29862, batch_time 0.600
epoch 1, iter 9, loss 0.31589, batch_time 0.662
epoch 1, iter 10, loss 0.29424, batch_time 0.670
epoch 1, iter 11, loss 0.30892, batch_time 0.657
epoch 1, iter 12, loss 0.28761, batch_time 0.667
epoch 1, iter 13, loss 0.31388, batch_time 0.647
epoch 1, iter 14, loss 0.26492, batch_time 0.592
epoch 1, iter 15, loss 0.25105, batch_time 0.659
epoch 1, iter 16, loss 0.26089, batch_time 0.672
epoch 1, iter 17, loss 0.27550, batch_time 0.606
epoch 1, iter 18, loss 0.23450, batch_time 0.607
epoch 1, iter 19, loss 0.29858, batch_time 

epoch 2, iter 165, loss 0.12036, batch_time 0.595
epoch 2, iter 166, loss 0.10712, batch_time 0.605
epoch 2, iter 167, loss 0.09346, batch_time 0.609
epoch 2, iter 168, loss 0.10070, batch_time 0.610
epoch 2, iter 169, loss 0.09405, batch_time 0.612
epoch 2, iter 170, loss 0.10252, batch_time 0.633
epoch 2, iter 171, loss 0.09604, batch_time 0.665
epoch 2, iter 172, loss 0.09954, batch_time 0.675
epoch 2, iter 173, loss 0.10889, batch_time 0.663
epoch 2, iter 174, loss 0.09229, batch_time 0.678
epoch 2, iter 175, loss 0.10316, batch_time 0.627
epoch 2, iter 176, loss 0.10481, batch_time 0.617
epoch 2, iter 177, loss 0.08980, batch_time 0.616
epoch 2, iter 178, loss 0.10258, batch_time 0.673
epoch 2, iter 179, loss 0.09741, batch_time 0.664
epoch 2, iter 180, loss 0.09519, batch_time 0.615
epoch 2, iter 181, loss 0.11558, batch_time 0.667
epoch 2, iter 182, loss 0.12764, batch_time 0.597
epoch 2, iter 183, loss 0.11279, batch_time 0.667
epoch 2, iter 184, loss 0.14068, batch_time 0.597


<tf_rnn_classifier.TfRNNClassifier at 0x7f69ae869860>

In [111]:
preds = deep_self1.predict(X_rnn['train'][:])

In [112]:
evaluate(Y_rnn_binary, preds)

p, r, f1: 0.9175, 0.8366, 0.8752

average F1 score: 0.875185


In [113]:
preds = deep_self1.predict(X_rnn['dev'][:])

In [114]:
evaluate(Y_rnn_binary_dev, preds)

p, r, f1: 0.8463, 0.6746, 0.7507

average F1 score: 0.750724


## 5. Deep + CE + Attention

In [36]:
deep_ce_self1 = TfRNNClassifier(
    full_glove_vocab,
    embedding=full_glove_embedding,
    embed_dim=100,
    hidden_dim=50,
    max_length=100,
    hidden_activation=tf.nn.tanh,
    cell_class=tf.nn.rnn_cell.LSTMCell, # LSTM
    train_embedding=True,
    max_iter=2,
    word_length=12,
    bidir_rnn=True, # Bidirectional RNN
    char_embed=True, # Character Embeddings
    self_attend=True, # Self-Attention
    dropout=0.2,
    eta=0.01,
    eval_every=1,
    experiment_name="BINARY_ALL_deep_ce_self")

In [37]:
deep_ce_self1.fit(X_rnn['train'][:], 
                      Y_rnn_binary[:], 
                      X_dev=X_rnn['dev'],
                      y_dev=Y_rnn_binary_dev)

Tensor("concat_2:0", shape=(?, 200, 50), dtype=float32)
./logs/BINARY_ALL_deep_ce_self
epoch 1, iter 1, loss 0.86763, batch_time 1.142
epoch 1, iter 2, loss 0.37050, batch_time 1.135
epoch 1, iter 3, loss 0.34039, batch_time 1.155
epoch 1, iter 4, loss 0.34038, batch_time 1.178
epoch 1, iter 5, loss 0.35133, batch_time 1.157
epoch 1, iter 6, loss 0.34788, batch_time 1.170
epoch 1, iter 7, loss 0.33005, batch_time 1.176
epoch 1, iter 8, loss 0.34250, batch_time 1.244
epoch 1, iter 9, loss 0.30581, batch_time 1.228
epoch 1, iter 10, loss 0.36925, batch_time 1.238
epoch 1, iter 11, loss 0.34285, batch_time 1.179
epoch 1, iter 12, loss 0.35470, batch_time 1.170
epoch 1, iter 13, loss 0.34715, batch_time 1.174
epoch 1, iter 14, loss 0.34091, batch_time 1.221
epoch 1, iter 15, loss 0.34537, batch_time 1.237
epoch 1, iter 16, loss 0.36096, batch_time 1.155
epoch 1, iter 17, loss 0.33024, batch_time 1.163
epoch 1, iter 18, loss 0.30658, batch_time 1.162
epoch 1, iter 19, loss 0.34225, batch_ti

epoch 2, iter 165, loss 0.13064, batch_time 1.159
epoch 2, iter 166, loss 0.15081, batch_time 1.155
epoch 2, iter 167, loss 0.12281, batch_time 1.241
epoch 2, iter 168, loss 0.13374, batch_time 1.156
epoch 2, iter 169, loss 0.16226, batch_time 1.147
epoch 2, iter 170, loss 0.14264, batch_time 1.145
epoch 2, iter 171, loss 0.12870, batch_time 1.166
epoch 2, iter 172, loss 0.12534, batch_time 1.181
epoch 2, iter 173, loss 0.16791, batch_time 1.143
epoch 2, iter 174, loss 0.12241, batch_time 1.146
epoch 2, iter 175, loss 0.13729, batch_time 1.158
epoch 2, iter 176, loss 0.12849, batch_time 1.148
epoch 2, iter 177, loss 0.11850, batch_time 1.133
epoch 2, iter 178, loss 0.13081, batch_time 1.149
epoch 2, iter 179, loss 0.12812, batch_time 1.136
epoch 2, iter 180, loss 0.12427, batch_time 1.142
epoch 2, iter 181, loss 0.13106, batch_time 1.144
epoch 2, iter 182, loss 0.11053, batch_time 1.217
epoch 2, iter 183, loss 0.13336, batch_time 1.245
epoch 2, iter 184, loss 0.13398, batch_time 1.192


epoch 3, iter 328, loss 0.05966, batch_time 1.152
epoch 3, iter 329, loss 0.06050, batch_time 1.163
epoch 3, iter 330, loss 0.08882, batch_time 1.183
epoch 3, iter 331, loss 0.07094, batch_time 1.279
epoch 3, iter 332, loss 0.07992, batch_time 1.278
epoch 3, iter 333, loss 0.06362, batch_time 1.178
epoch 3, iter 334, loss 0.09334, batch_time 1.200
epoch 3, iter 335, loss 0.10283, batch_time 1.254
epoch 3, iter 336, loss 0.08398, batch_time 1.163
epoch 3, iter 337, loss 0.07533, batch_time 1.159
epoch 3, iter 338, loss 0.07066, batch_time 1.152
epoch 3, iter 339, loss 0.07658, batch_time 1.153
epoch 3, iter 340, loss 0.07173, batch_time 1.203
epoch 3, iter 341, loss 0.06944, batch_time 1.144
epoch 3, iter 342, loss 0.08691, batch_time 1.176
epoch 3, iter 343, loss 0.06734, batch_time 1.219
epoch 3, iter 344, loss 0.08833, batch_time 1.253
epoch 3, iter 345, loss 0.07606, batch_time 1.159
epoch 3, iter 346, loss 0.06661, batch_time 1.167
epoch 3, iter 347, loss 0.08259, batch_time 1.156


<tf_rnn_classifier.TfRNNClassifier at 0x7f4b3dc3dba8>

In [38]:
preds = deep_ce_self1.predict(X_rnn['dev'])

In [39]:
evaluate(Y_rnn_binary_dev, preds)

p, r, f1: 0.6331, 0.7711, 0.6953

average F1 score: 0.695335
