In [1]:
import pandas as pd
import numpy as np
import tensorflow as tf
import utils
import time

from keras.models import Model, Input
from keras.layers import Dense, Conv1D, BatchNormalization, GlobalMaxPooling1D, Dropout, \
    Embedding, Concatenate, SpatialDropout1D, MaxPooling1D

from utils.preprocessing_utils import tokenize_sentences, convert_tokens_to_padded_sequence
from utils.dataset_utils import load_data_from_csv
from utils.embedding_utils import load_word2vec_embeddings, create_initial_embedding_matrix
from utils.training_utils import train_model, train_and_evaluate_model

  from ._conv import register_converters as _register_converters
Using TensorFlow backend.


In [20]:
import importlib
import utils
importlib.reload(utils.embedding_utils)
importlib.reload(utils.dataset_utils)
importlib.reload(utils.preprocessing_utils)
importlib.reload(utils.training_utils)
importlib.reload(utils.keras_utils)

<module 'utils.keras_utils' from '/home/philipp/work/gitprojects/toxic-comment-experiments/utils/keras_utils.py'>

Global parameters which hold for all models

In [2]:
random_seed = 2018
classes = ['toxic','severe_toxic','obscene','threat','insult','identity_hate']
features = 'comment_text'
np.random.seed(random_seed)
path_train_data = 'data/kaggle/train.csv'
path_test_data = 'data/kaggle/test_complete.csv'
path_tokenizer = 'data/models/word_tokenizer.pickle'

embedding_length = 300
path_embeddings = 'data/embeddings/GoogleNews-vectors-negative300.bin.gz'

Load train and test data and pretrained word2vec embeddings

In [3]:
X_train, Y_train = load_data_from_csv(path_train_data, features, classes)
X_test, Y_test = load_data_from_csv(path_test_data, features, classes)

emb_idx, emb_mean, emb_std = load_word2vec_embeddings(path_embeddings)

Preprocessing and tokenizatin of train and test data

In [4]:
X_train_tok = tokenize_sentences(X_train)
del X_train
X_test_tok = tokenize_sentences(X_test)
del X_test

Create initial embedding matrix for neural network and word -> idx mapping

In [5]:
embedding_matrix, word_embedding_mapping = create_initial_embedding_matrix(X_train_tok, X_test_tok, emb_idx, emb_mean, emb_std, embedding_length, debug=True)
del emb_idx

Number of unique tokens: 326175
Number of tokens found in pretrained embeddings: 74211


Transform comments in train and test data to padded matrices

In [6]:
max_len_train = X_train_tok.apply(lambda x: len(x)).max()
max_len_test = X_test_tok.apply(lambda x: len(x)).max()
# limit length to 2000, otherwise we get a MemoryError
max_comment_length = 2000
X_train_input = convert_tokens_to_padded_sequence(X_train_tok, word_embedding_mapping, max_comment_length)
del X_train_tok
X_test_input = convert_tokens_to_padded_sequence(X_test_tok, word_embedding_mapping, max_comment_length)
del X_test_tok

### Singlelayer CNN with a single window size

This simple CNN consists of an embedding layer, a single convolution layer with a fixed window size and a fully connected hidden layer.

In [7]:
m1_kernel_size = 3
m1_hidden_dim = 100
m1_num_filters = 150
m1_dropout = 0.4
m1_spatial_dropout = 0.2
m1_batch_size = 64
m1_epochs = 5

m1_weights_path = 'data/models/cnn_simple/model{}.hdf5'
m1_scores_path = 'data/scores/cnn_simple/scores_{}'

The network architecture

In [8]:
m1_input = Input((max_comment_length,))
m1_word_emb = Embedding(input_dim=len(embedding_matrix), output_dim=embedding_length, input_length=max_comment_length, weights=[embedding_matrix])(m1_input)
m1_word_emb = SpatialDropout1D(m1_spatial_dropout)(m1_word_emb)

m1_conv1 = Conv1D(kernel_size=m1_kernel_size, filters=m1_num_filters, padding='same')(m1_word_emb)
m1_conv1 = GlobalMaxPooling1D()(m1_conv1)

m1_fc2 = Dense(m1_hidden_dim, activation='relu')(m1_conv1)
m1_dropout2 = Dropout(m1_dropout)(m1_fc2)
m1_output = Dense(len(classes), activation='sigmoid')(m1_dropout2)

m1_model = Model(inputs=[m1_input], outputs=[m1_output])
m1_model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_1 (InputLayer)         (None, 2000)              0         
_________________________________________________________________
embedding_1 (Embedding)      (None, 2000, 300)         97852800  
_________________________________________________________________
spatial_dropout1d_1 (Spatial (None, 2000, 300)         0         
_________________________________________________________________
conv1d_1 (Conv1D)            (None, 2000, 150)         135150    
_________________________________________________________________
global_max_pooling1d_1 (Glob (None, 150)               0         
_________________________________________________________________
dense_1 (Dense)              (None, 100)               15100     
_________________________________________________________________
dropout_1 (Dropout)          (None, 100)               0         
__________

Train model

In [9]:
m1_scores = train_and_evaluate_model(m1_model, X_train_input, Y_train, (X_test_input, Y_test), \
                                    m1_epochs, m1_batch_size, 'adam', 'binary_crossentropy', ['accuracy'], \
                                    random_seed, runs=5)
np.save(m1_scores_path.format(time.time()), m1_scores)

RUN 1/5
Train on 159571 samples, validate on 63978 samples
Epoch 1/5

  'precision', 'predicted', average, warn_for)



 train: ROC-AUC - epoch: 1 - score: 0.99328
 Tox: 0.99078 - STox: 0.99115 - Obs: 0.99486 - Thr: 0.98640 - Ins: 0.99088 - IdH: 0.98296
 train: F1 Score - epoch: 1 - score: 0.78430
 Tox: 0.84888 - STox: 0.22837 - Obs: 0.84299 - Thr: 0.00000 - Ins: 0.75970 - IdH: 0.08625

 val: ROC-AUC - epoch: 1 - score: 0.97821
 Tox: 0.96405 - STox: 0.98478 - Obs: 0.97667 - Thr: 0.97369 - Ins: 0.97125 - IdH: 0.96928
 val: F1 Score - epoch: 1 - score: 0.64034
 Tox: 0.66049 - STox: 0.26038 - Obs: 0.68823 - Thr: 0.00000 - Ins: 0.64561 - IdH: 0.11068
Epoch 2/5
 train: ROC-AUC - epoch: 2 - score: 0.99671
 Tox: 0.99578 - STox: 0.99362 - Obs: 0.99700 - Thr: 0.99628 - Ins: 0.99451 - IdH: 0.99456
 train: F1 Score - epoch: 2 - score: 0.80602
 Tox: 0.87107 - STox: 0.03075 - Obs: 0.85524 - Thr: 0.09109 - Ins: 0.78690 - IdH: 0.40217

 val: ROC-AUC - epoch: 2 - score: 0.97932
 Tox: 0.96315 - STox: 0.98674 - Obs: 0.97454 - Thr: 0.98653 - Ins: 0.97109 - IdH: 0.97744
 val: F1 Score - epoch: 2 - score: 0.64637
 Tox: 0.6

Epoch 2/5
 train: ROC-AUC - epoch: 2 - score: 0.99651
 Tox: 0.99586 - STox: 0.99360 - Obs: 0.99710 - Thr: 0.99400 - Ins: 0.99402 - IdH: 0.99304
 train: F1 Score - epoch: 2 - score: 0.82295
 Tox: 0.87953 - STox: 0.26795 - Obs: 0.88185 - Thr: 0.00000 - Ins: 0.80129 - IdH: 0.41236

 val: ROC-AUC - epoch: 2 - score: 0.97878
 Tox: 0.96097 - STox: 0.98564 - Obs: 0.97598 - Thr: 0.98296 - Ins: 0.96953 - IdH: 0.97527
 val: F1 Score - epoch: 2 - score: 0.63269
 Tox: 0.64135 - STox: 0.15656 - Obs: 0.68198 - Thr: 0.00000 - Ins: 0.64233 - IdH: 0.41020
Epoch 3/5
 train: ROC-AUC - epoch: 3 - score: 0.99812
 Tox: 0.99801 - STox: 0.99593 - Obs: 0.99840 - Thr: 0.99717 - Ins: 0.99664 - IdH: 0.99741
 train: F1 Score - epoch: 3 - score: 0.88145
 Tox: 0.93316 - STox: 0.60984 - Obs: 0.90794 - Thr: 0.42643 - Ins: 0.86412 - IdH: 0.54081

 val: ROC-AUC - epoch: 3 - score: 0.97815
 Tox: 0.95803 - STox: 0.98562 - Obs: 0.97481 - Thr: 0.98490 - Ins: 0.97088 - IdH: 0.97388
 val: F1 Score - epoch: 3 - score: 0.61095


Epoch 3/5
 train: ROC-AUC - epoch: 3 - score: 0.99817
 Tox: 0.99813 - STox: 0.99575 - Obs: 0.99825 - Thr: 0.99831 - Ins: 0.99644 - IdH: 0.99763
 train: F1 Score - epoch: 3 - score: 0.87156
 Tox: 0.93459 - STox: 0.42344 - Obs: 0.90537 - Thr: 0.52705 - Ins: 0.84549 - IdH: 0.39220

 val: ROC-AUC - epoch: 3 - score: 0.97795
 Tox: 0.96117 - STox: 0.98534 - Obs: 0.97475 - Thr: 0.98850 - Ins: 0.97026 - IdH: 0.97484
 val: F1 Score - epoch: 3 - score: 0.61327
 Tox: 0.60004 - STox: 0.26230 - Obs: 0.67140 - Thr: 0.44444 - Ins: 0.65475 - IdH: 0.36522
Epoch 4/5
 train: ROC-AUC - epoch: 4 - score: 0.99884
 Tox: 0.99918 - STox: 0.99617 - Obs: 0.99905 - Thr: 0.99872 - Ins: 0.99791 - IdH: 0.99824
 train: F1 Score - epoch: 4 - score: 0.90354
 Tox: 0.95317 - STox: 0.02225 - Obs: 0.93319 - Thr: 0.68293 - Ins: 0.89171 - IdH: 0.79609

 val: ROC-AUC - epoch: 4 - score: 0.97594
 Tox: 0.95678 - STox: 0.98007 - Obs: 0.97278 - Thr: 0.98190 - Ins: 0.96794 - IdH: 0.97398
 val: F1 Score - epoch: 4 - score: 0.59196


### Singlelayer CNN with multiple window sizes
This CNN consists of an embedding layer, a convolution layer with multiple window sizes which get concatenated afterwards. On top of that there is a fully connected hidden layer.

In [7]:
m2_kernel_sizes = [3, 4, 5]
m2_hidden_dim = 100
m2_num_filters = [100, 100, 100]
m2_dropout = 0.4
m2_spatial_dropout = 0.2
m2_batch_size = 64
m2_epochs = 5

m2_weights_path = 'data/models/cnn_multiwindowsizes/model{}.hdf5'
m2_scores_path = 'data/scores/cnn_multiwindowsizes/scores_{}'

The network architecture

In [8]:
m2_input = Input((max_comment_length,))
m2_word_emb = Embedding(input_dim=len(embedding_matrix), output_dim=embedding_length, input_length=max_comment_length, weights=[embedding_matrix])(m2_input)
m2_word_emb = SpatialDropout1D(m2_spatial_dropout)(m2_word_emb)

m2_conv1 = Conv1D(kernel_size=m2_kernel_sizes[0], filters=m2_num_filters[0], padding='same')(m2_word_emb)
m2_conv1 = GlobalMaxPooling1D()(m2_conv1)

m2_conv2 = Conv1D(kernel_size=m2_kernel_sizes[1], filters=m2_num_filters[1], padding='same')(m2_word_emb)
m2_conv2 = GlobalMaxPooling1D()(m2_conv2)

m2_conv3 = Conv1D(kernel_size=m2_kernel_sizes[2], filters=m2_num_filters[2], padding='same')(m2_word_emb)
m2_conv3 = GlobalMaxPooling1D()(m2_conv3)

m2_concat4 = Concatenate()([m2_conv1, m2_conv2, m2_conv3])

m2_fc5 = Dense(m2_hidden_dim, activation='relu')(m2_concat4)
m2_fc5 = Dropout(m2_dropout)(m2_fc5)
m2_output = Dense(len(classes), activation='sigmoid')(m2_fc5)

m2_model = Model(inputs=[m2_input], outputs=[m2_output])
m2_model.summary()

__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_1 (InputLayer)            (None, 2000)         0                                            
__________________________________________________________________________________________________
embedding_1 (Embedding)         (None, 2000, 300)    97852800    input_1[0][0]                    
__________________________________________________________________________________________________
spatial_dropout1d_1 (SpatialDro (None, 2000, 300)    0           embedding_1[0][0]                
__________________________________________________________________________________________________
conv1d_1 (Conv1D)               (None, 2000, 100)    90100       spatial_dropout1d_1[0][0]        
__________________________________________________________________________________________________
conv1d_2 (

Train model

In [9]:
m2_scores = train_and_evaluate_model(m2_model, X_train_input, Y_train, (X_test_input, Y_test), \
                                    m2_epochs, m2_batch_size, 'adam', 'binary_crossentropy', ['accuracy'], \
                                    random_seed, runs=5)
np.save(m2_scores_path.format(time.time()), m2_scores)

RUN 1/5
Train on 159571 samples, validate on 63978 samples
Epoch 1/5

  'precision', 'predicted', average, warn_for)



 train: ROC-AUC - epoch: 1 - score: 0.99353
 Tox: 0.99127 - STox: 0.99205 - Obs: 0.99533 - Thr: 0.98707 - Ins: 0.99064 - IdH: 0.98487
 train: F1 Score - epoch: 1 - score: 0.79021
 Tox: 0.85094 - STox: 0.21281 - Obs: 0.85314 - Thr: 0.00000 - Ins: 0.76805 - IdH: 0.04167

 val: ROC-AUC - epoch: 1 - score: 0.97798
 Tox: 0.96347 - STox: 0.98536 - Obs: 0.97531 - Thr: 0.97731 - Ins: 0.96978 - IdH: 0.96900
 val: F1 Score - epoch: 1 - score: 0.62728
 Tox: 0.65258 - STox: 0.19394 - Obs: 0.66379 - Thr: 0.00000 - Ins: 0.63306 - IdH: 0.05398
Epoch 2/5
 train: ROC-AUC - epoch: 2 - score: 0.99698
 Tox: 0.99667 - STox: 0.99428 - Obs: 0.99772 - Thr: 0.99640 - Ins: 0.99480 - IdH: 0.99572
 train: F1 Score - epoch: 2 - score: 0.83364
 Tox: 0.91403 - STox: 0.15359 - Obs: 0.87631 - Thr: 0.18622 - Ins: 0.77807 - IdH: 0.34357

 val: ROC-AUC - epoch: 2 - score: 0.97662
 Tox: 0.95969 - STox: 0.98590 - Obs: 0.97388 - Thr: 0.98460 - Ins: 0.96895 - IdH: 0.97439
 val: F1 Score - epoch: 2 - score: 0.61320
 Tox: 0.6

Epoch 2/5
 train: ROC-AUC - epoch: 2 - score: 0.99720
 Tox: 0.99682 - STox: 0.99401 - Obs: 0.99773 - Thr: 0.99653 - Ins: 0.99496 - IdH: 0.99521
 train: F1 Score - epoch: 2 - score: 0.84621
 Tox: 0.90084 - STox: 0.34190 - Obs: 0.89389 - Thr: 0.24014 - Ins: 0.83407 - IdH: 0.38871

 val: ROC-AUC - epoch: 2 - score: 0.97951
 Tox: 0.96289 - STox: 0.98643 - Obs: 0.97525 - Thr: 0.98598 - Ins: 0.97111 - IdH: 0.97724
 val: F1 Score - epoch: 2 - score: 0.63889
 Tox: 0.64838 - STox: 0.29927 - Obs: 0.67501 - Thr: 0.23938 - Ins: 0.64918 - IdH: 0.37225
Epoch 3/5
 train: ROC-AUC - epoch: 3 - score: 0.99843
 Tox: 0.99832 - STox: 0.99602 - Obs: 0.99883 - Thr: 0.99890 - Ins: 0.99699 - IdH: 0.99796
 train: F1 Score - epoch: 3 - score: 0.89397
 Tox: 0.93507 - STox: 0.65296 - Obs: 0.92579 - Thr: 0.59438 - Ins: 0.87503 - IdH: 0.64611

 val: ROC-AUC - epoch: 3 - score: 0.97817
 Tox: 0.96190 - STox: 0.98677 - Obs: 0.97474 - Thr: 0.98437 - Ins: 0.97096 - IdH: 0.97404
 val: F1 Score - epoch: 3 - score: 0.61089


Epoch 3/5
 train: ROC-AUC - epoch: 3 - score: 0.99847
 Tox: 0.99835 - STox: 0.99577 - Obs: 0.99865 - Thr: 0.99833 - Ins: 0.99722 - IdH: 0.99804
 train: F1 Score - epoch: 3 - score: 0.89103
 Tox: 0.94264 - STox: 0.34416 - Obs: 0.91221 - Thr: 0.50930 - Ins: 0.88330 - IdH: 0.72706

 val: ROC-AUC - epoch: 3 - score: 0.97814
 Tox: 0.95932 - STox: 0.98336 - Obs: 0.97311 - Thr: 0.98568 - Ins: 0.97067 - IdH: 0.97628
 val: F1 Score - epoch: 3 - score: 0.61915
 Tox: 0.60135 - STox: 0.15058 - Obs: 0.68044 - Thr: 0.35294 - Ins: 0.65628 - IdH: 0.54122
Epoch 4/5
 train: ROC-AUC - epoch: 4 - score: 0.99917
 Tox: 0.99932 - STox: 0.99722 - Obs: 0.99934 - Thr: 0.99921 - Ins: 0.99852 - IdH: 0.99879
 train: F1 Score - epoch: 4 - score: 0.91971
 Tox: 0.95970 - STox: 0.57323 - Obs: 0.94083 - Thr: 0.59697 - Ins: 0.91100 - IdH: 0.78070

 val: ROC-AUC - epoch: 4 - score: 0.97594
 Tox: 0.95497 - STox: 0.98534 - Obs: 0.97315 - Thr: 0.98131 - Ins: 0.96697 - IdH: 0.97139
 val: F1 Score - epoch: 4 - score: 0.60462


### Multilayer CNN
This architecture consists of multiple convolutional layers with a fully connected hidden layer on top of it.

In [7]:
m3_kernel_sizes = [3, 5]
m3_hidden_dim = 100
m3_num_filters = [150, 150]
m3_dropout = 0.4
m3_spatial_dropout = 0.2
m3_batch_size = 64
m3_epochs = 5

m3_weights_path = 'data/models/cnn_multilayer/model{}.hdf5'
m3_scores_path = 'data/scores/cnn_multilayer/scores_{}'

The model architecture

In [8]:
m3_input = Input((max_comment_length,))
m3_word_emb = Embedding(input_dim=len(embedding_matrix), output_dim=embedding_length, input_length=max_comment_length, weights=[embedding_matrix])(m3_input)
m3_word_emb = SpatialDropout1D(m3_spatial_dropout)(m3_word_emb)

m3_conv1 = Conv1D(kernel_size=m3_kernel_sizes[0], filters=m3_num_filters[0], padding='same')(m3_word_emb)
#m3_conv1 = MaxPooling1D(2, strides=2)(m3_conv1)

m3_conv2 = Conv1D(kernel_size=m3_kernel_sizes[1], filters=m3_num_filters[1], padding='same')(m3_conv1)
m3_conv2 = GlobalMaxPooling1D()(m3_conv2)

m3_fc3 = Dense(m3_hidden_dim, activation='relu')(m3_conv2)
m3_fc3 = Dropout(m3_dropout)(m3_fc3)
m3_output = Dense(len(classes), activation='sigmoid')(m3_fc3)

m3_model = Model(inputs=[m3_input], outputs=[m3_output])
m3_model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_1 (InputLayer)         (None, 2000)              0         
_________________________________________________________________
embedding_1 (Embedding)      (None, 2000, 300)         97852800  
_________________________________________________________________
spatial_dropout1d_1 (Spatial (None, 2000, 300)         0         
_________________________________________________________________
conv1d_1 (Conv1D)            (None, 2000, 150)         135150    
_________________________________________________________________
conv1d_2 (Conv1D)            (None, 2000, 150)         112650    
_________________________________________________________________
global_max_pooling1d_1 (Glob (None, 150)               0         
_________________________________________________________________
dense_1 (Dense)              (None, 100)               15100     
__________

Train model

In [9]:
m3_scores = train_and_evaluate_model(m3_model, X_train_input, Y_train, (X_test_input, Y_test), \
                                    m3_epochs, m3_batch_size, 'adam', 'binary_crossentropy', ['accuracy'], \
                                    random_seed, runs=5)
np.save(m3_scores_path.format(time.time()), m3_scores)

RUN 1/5
Train on 159571 samples, validate on 63978 samples
Epoch 1/5

  'precision', 'predicted', average, warn_for)



 train: ROC-AUC - epoch: 1 - score: 0.99240
 Tox: 0.98926 - STox: 0.99139 - Obs: 0.99420 - Thr: 0.97973 - Ins: 0.98992 - IdH: 0.98433
 train: F1 Score - epoch: 1 - score: 0.78217
 Tox: 0.84286 - STox: 0.35544 - Obs: 0.83047 - Thr: 0.00000 - Ins: 0.76162 - IdH: 0.20185

 val: ROC-AUC - epoch: 1 - score: 0.97636
 Tox: 0.96117 - STox: 0.98580 - Obs: 0.97423 - Thr: 0.96775 - Ins: 0.96697 - IdH: 0.96714
 val: F1 Score - epoch: 1 - score: 0.62857
 Tox: 0.64768 - STox: 0.30376 - Obs: 0.67920 - Thr: 0.00000 - Ins: 0.62210 - IdH: 0.25606
Epoch 2/5
 train: ROC-AUC - epoch: 2 - score: 0.99598
 Tox: 0.99486 - STox: 0.99250 - Obs: 0.99685 - Thr: 0.99540 - Ins: 0.99330 - IdH: 0.99182
 train: F1 Score - epoch: 2 - score: 0.82128
 Tox: 0.88285 - STox: 0.19094 - Obs: 0.86961 - Thr: 0.14829 - Ins: 0.80673 - IdH: 0.31561

 val: ROC-AUC - epoch: 2 - score: 0.97699
 Tox: 0.95829 - STox: 0.98639 - Obs: 0.97377 - Thr: 0.98439 - Ins: 0.96858 - IdH: 0.96712
 val: F1 Score - epoch: 2 - score: 0.62067
 Tox: 0.6

Epoch 2/5
 train: ROC-AUC - epoch: 2 - score: 0.99571
 Tox: 0.99492 - STox: 0.99268 - Obs: 0.99706 - Thr: 0.99267 - Ins: 0.99265 - IdH: 0.99086
 train: F1 Score - epoch: 2 - score: 0.79764
 Tox: 0.83975 - STox: 0.28093 - Obs: 0.87763 - Thr: 0.00000 - Ins: 0.78681 - IdH: 0.23472

 val: ROC-AUC - epoch: 2 - score: 0.97834
 Tox: 0.96097 - STox: 0.98801 - Obs: 0.97551 - Thr: 0.97564 - Ins: 0.96727 - IdH: 0.97163
 val: F1 Score - epoch: 2 - score: 0.63445
 Tox: 0.66313 - STox: 0.28016 - Obs: 0.67377 - Thr: 0.00943 - Ins: 0.62580 - IdH: 0.19036
Epoch 3/5
 train: ROC-AUC - epoch: 3 - score: 0.99704
 Tox: 0.99672 - STox: 0.99381 - Obs: 0.99778 - Thr: 0.99663 - Ins: 0.99500 - IdH: 0.99509
 train: F1 Score - epoch: 3 - score: 0.85589
 Tox: 0.91195 - STox: 0.54369 - Obs: 0.89281 - Thr: 0.22540 - Ins: 0.83184 - IdH: 0.51335

 val: ROC-AUC - epoch: 3 - score: 0.97714
 Tox: 0.96128 - STox: 0.98692 - Obs: 0.97377 - Thr: 0.97864 - Ins: 0.96963 - IdH: 0.97107
 val: F1 Score - epoch: 3 - score: 0.61328


Epoch 3/5
 train: ROC-AUC - epoch: 3 - score: 0.99707
 Tox: 0.99653 - STox: 0.99351 - Obs: 0.99792 - Thr: 0.99654 - Ins: 0.99455 - IdH: 0.99446
 train: F1 Score - epoch: 3 - score: 0.85553
 Tox: 0.91268 - STox: 0.50830 - Obs: 0.89645 - Thr: 0.14149 - Ins: 0.82648 - IdH: 0.56120

 val: ROC-AUC - epoch: 3 - score: 0.97625
 Tox: 0.95892 - STox: 0.98721 - Obs: 0.97303 - Thr: 0.98283 - Ins: 0.96708 - IdH: 0.96936
 val: F1 Score - epoch: 3 - score: 0.60615
 Tox: 0.59311 - STox: 0.40409 - Obs: 0.66059 - Thr: 0.14407 - Ins: 0.62889 - IdH: 0.48436
Epoch 4/5
 train: ROC-AUC - epoch: 4 - score: 0.99778
 Tox: 0.99763 - STox: 0.99426 - Obs: 0.99839 - Thr: 0.99757 - Ins: 0.99636 - IdH: 0.99647
 train: F1 Score - epoch: 4 - score: 0.86228
 Tox: 0.90590 - STox: 0.52818 - Obs: 0.90700 - Thr: 0.28814 - Ins: 0.86173 - IdH: 0.52240

 val: ROC-AUC - epoch: 4 - score: 0.97564
 Tox: 0.95536 - STox: 0.98643 - Obs: 0.97314 - Thr: 0.98078 - Ins: 0.96618 - IdH: 0.97090
 val: F1 Score - epoch: 4 - score: 0.62387


### Dilated CNN
This architecture consists of multiple convolutional layers with a fully connected hidden layer on top of it.
The first convolutional is a non-dilated layer (dilation rate = 1) whereas layer 2 and 3 specify a gradually growing dilation rate.

In [7]:
m4_kernel_sizes = [3, 7, 14]
m4_dilation_rates = [1, 2, 4]
m4_hidden_dim = 100
m4_num_filters = [150, 150, 150]
m4_dropout = 0.4
m4_spatial_dropout = 0.2
m4_batch_size = 64
m4_epochs = 5

m4_weights_path = 'data/models/cnn_dilated/model{}.hdf5'
m4_scores_path = 'data/scores/cnn_dilated/scores_{}'

The model architecture

In [8]:
m4_input = Input((max_comment_length,))
m4_word_emb = Embedding(input_dim=len(embedding_matrix), output_dim=embedding_length, input_length=max_comment_length, weights=[embedding_matrix])(m4_input)
m4_word_emb = SpatialDropout1D(m4_spatial_dropout)(m4_word_emb)

m4_conv1 = Conv1D(kernel_size=m4_kernel_sizes[0], dilation_rate= m4_dilation_rates[0], filters=m4_num_filters[0], padding='same')(m4_word_emb)
m4_conv2 = Conv1D(kernel_size=m4_kernel_sizes[1], dilation_rate= m4_dilation_rates[1], filters=m4_num_filters[1], padding='same')(m4_conv1)
m4_conv3 = Conv1D(kernel_size=m4_kernel_sizes[2], dilation_rate= m4_dilation_rates[2], filters=m4_num_filters[2], padding='same')(m4_conv2)
m4_conv3 = GlobalMaxPooling1D()(m4_conv3)

m4_fc4 = Dense(m4_hidden_dim, activation='relu')(m4_conv3)
m4_fc4 = Dropout(m4_dropout)(m4_fc4)
m4_output = Dense(len(classes), activation='sigmoid')(m4_fc4)

m4_model = Model(inputs=[m4_input], outputs=[m4_output])
m4_model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_1 (InputLayer)         (None, 2000)              0         
_________________________________________________________________
embedding_1 (Embedding)      (None, 2000, 300)         97852800  
_________________________________________________________________
spatial_dropout1d_1 (Spatial (None, 2000, 300)         0         
_________________________________________________________________
conv1d_1 (Conv1D)            (None, 2000, 150)         135150    
_________________________________________________________________
conv1d_2 (Conv1D)            (None, 2000, 150)         157650    
_________________________________________________________________
conv1d_3 (Conv1D)            (None, 2000, 150)         315150    
_________________________________________________________________
global_max_pooling1d_1 (Glob (None, 150)               0         
__________

Train model

In [9]:
m4_scores = train_and_evaluate_model(m4_model, X_train_input, Y_train, (X_test_input, Y_test), \
                                    m4_epochs, m4_batch_size, 'adam', 'binary_crossentropy', ['accuracy'], \
                                    random_seed, runs=5)
np.save(m4_scores_path.format(time.time()), m4_scores)

RUN 1/5
Train on 159571 samples, validate on 63978 samples
Epoch 1/5

  'precision', 'predicted', average, warn_for)



 train: ROC-AUC - epoch: 1 - score: 0.99097
 Tox: 0.98833 - STox: 0.99083 - Obs: 0.99381 - Thr: 0.97564 - Ins: 0.98778 - IdH: 0.97343
 train: F1 Score - epoch: 1 - score: 0.75775
 Tox: 0.82424 - STox: 0.06467 - Obs: 0.82935 - Thr: 0.00000 - Ins: 0.72261 - IdH: 0.00000

 val: ROC-AUC - epoch: 1 - score: 0.97462
 Tox: 0.96074 - STox: 0.98676 - Obs: 0.97505 - Thr: 0.96214 - Ins: 0.96480 - IdH: 0.94791
 val: F1 Score - epoch: 1 - score: 0.63069
 Tox: 0.66764 - STox: 0.08633 - Obs: 0.68299 - Thr: 0.00000 - Ins: 0.60542 - IdH: 0.00000
Epoch 2/5
 train: ROC-AUC - epoch: 2 - score: 0.99455
 Tox: 0.99424 - STox: 0.99274 - Obs: 0.99587 - Thr: 0.98637 - Ins: 0.99121 - IdH: 0.98079
 train: F1 Score - epoch: 2 - score: 0.77467
 Tox: 0.85604 - STox: 0.35463 - Obs: 0.84910 - Thr: 0.00000 - Ins: 0.69206 - IdH: 0.00000

 val: ROC-AUC - epoch: 2 - score: 0.97491
 Tox: 0.95950 - STox: 0.98779 - Obs: 0.97521 - Thr: 0.96809 - Ins: 0.96387 - IdH: 0.94794
 val: F1 Score - epoch: 2 - score: 0.63089
 Tox: 0.6

Epoch 2/5
 train: ROC-AUC - epoch: 2 - score: 0.99338
 Tox: 0.99345 - STox: 0.99153 - Obs: 0.99563 - Thr: 0.98911 - Ins: 0.99055 - IdH: 0.98311
 train: F1 Score - epoch: 2 - score: 0.76973
 Tox: 0.82560 - STox: 0.00000 - Obs: 0.85289 - Thr: 0.00000 - Ins: 0.75106 - IdH: 0.00000

 val: ROC-AUC - epoch: 2 - score: 0.97491
 Tox: 0.96071 - STox: 0.98474 - Obs: 0.97543 - Thr: 0.97121 - Ins: 0.96355 - IdH: 0.95352
 val: F1 Score - epoch: 2 - score: 0.62954
 Tox: 0.67184 - STox: 0.00000 - Obs: 0.68535 - Thr: 0.00000 - Ins: 0.59697 - IdH: 0.00000
Epoch 3/5
 train: ROC-AUC - epoch: 3 - score: 0.99627
 Tox: 0.99658 - STox: 0.99330 - Obs: 0.99758 - Thr: 0.99243 - Ins: 0.99330 - IdH: 0.98955
 train: F1 Score - epoch: 3 - score: 0.82756
 Tox: 0.89226 - STox: 0.52691 - Obs: 0.89072 - Thr: 0.00000 - Ins: 0.79525 - IdH: 0.00000

 val: ROC-AUC - epoch: 3 - score: 0.97649
 Tox: 0.96123 - STox: 0.98670 - Obs: 0.97585 - Thr: 0.96976 - Ins: 0.96629 - IdH: 0.96037
 val: F1 Score - epoch: 3 - score: 0.63117


Epoch 3/5
 train: ROC-AUC - epoch: 3 - score: 0.99651
 Tox: 0.99682 - STox: 0.99329 - Obs: 0.99790 - Thr: 0.99483 - Ins: 0.99375 - IdH: 0.98887
 train: F1 Score - epoch: 3 - score: 0.83767
 Tox: 0.91531 - STox: 0.01858 - Obs: 0.89089 - Thr: 0.00000 - Ins: 0.81147 - IdH: 0.00000

 val: ROC-AUC - epoch: 3 - score: 0.97680
 Tox: 0.96130 - STox: 0.98726 - Obs: 0.97549 - Thr: 0.97716 - Ins: 0.96785 - IdH: 0.95973
 val: F1 Score - epoch: 3 - score: 0.61604
 Tox: 0.63317 - STox: 0.01579 - Obs: 0.65873 - Thr: 0.00000 - Ins: 0.63185 - IdH: 0.00279
Epoch 4/5
 train: ROC-AUC - epoch: 4 - score: 0.99715
 Tox: 0.99738 - STox: 0.99421 - Obs: 0.99839 - Thr: 0.99644 - Ins: 0.99522 - IdH: 0.99190
 train: F1 Score - epoch: 4 - score: 0.85957
 Tox: 0.92681 - STox: 0.46376 - Obs: 0.91251 - Thr: 0.05668 - Ins: 0.83236 - IdH: 0.02099

 val: ROC-AUC - epoch: 4 - score: 0.97577
 Tox: 0.96038 - STox: 0.98696 - Obs: 0.97451 - Thr: 0.97848 - Ins: 0.96760 - IdH: 0.96011
 val: F1 Score - epoch: 4 - score: 0.60309
