In [None]:
import os
os.environ['CUDA_DEVICE_ORDER']='PCI_BUS_ID'
os.environ['CUDA_VISIBLE_DEVICES']='0'
import keras.backend as K
from keras.models import Model
from keras.layers import Input, Dense, Flatten, Embedding, Dropout, PReLU
from keras.layers import Bidirectional, SpatialDropout1D, CuDNNGRU, Conv1D
from keras.layers import GlobalAvgPool1D, GlobalMaxPool1D, concatenate
from keras.regularizers import l2
from toxic.keras_utils import AttentionWithContext
from toxic.NN import NN

In [None]:
class RnnVersion2(NN):
    """Input-> Embedding-> 2x(GRU-> Attention-> GRU-> Attention)-> Output"""

    def __init__(self, model_name, embedding_file, max_seq_len=250):

        super().__init__(model_name, max_seq_len, file_dir='../../input',
                         word_embedding_file=embedding_file)
        self.load_labels()
        self.load_word_sequences()
        self.load_word_vectors()

    def get_model(self, n_recurrent=50, n_dense=50, dropout_rate=0.2, l2_penalty=0.0001):
        K.clear_session()
                
        def att_max_avg_pooling(x):
            x_att = AttentionWithContext()(x)
            x_avg = GlobalAvgPool1D()(x)
            x_max = GlobalMaxPool1D()(x)
            return concatenate([x_att, x_avg, x_max])
        
        inputs = Input(shape=(self.max_seq_len,))
        emb = Embedding(self.word_embedding_matrix.shape[0], 300, 
                        weights=[self.word_embedding_matrix], trainable=False)(inputs)
        
        # model 0
        x0 = SpatialDropout1D(dropout_rate)(emb)
        s0 = Bidirectional(
            CuDNNGRU(2*n_recurrent, return_sequences=True,
                     kernel_regularizer=l2(l2_penalty),
                     recurrent_regularizer=l2(l2_penalty)))(x0)
        x0 = att_max_avg_pooling(s0)
                
        # model 1
        x1 = SpatialDropout1D(dropout_rate)(emb)
        s1 = Bidirectional(
            CuDNNGRU(2*n_recurrent, return_sequences=True,
                     kernel_regularizer=l2(l2_penalty),
                     recurrent_regularizer=l2(l2_penalty)))(x1)
        x1 = att_max_avg_pooling(s1)
        
        # combine sequence output
        x = concatenate([s0, s1])
        x = Bidirectional(
            CuDNNGRU(n_recurrent, return_sequences=True, 
                     kernel_regularizer=l2(l2_penalty),
                     recurrent_regularizer=l2(l2_penalty)))(x)
        x = att_max_avg_pooling(x)
        
        # combine it all
        x = concatenate([x, x0, x1])
        outputs = Dense(6, activation='sigmoid')(x)
        model = Model(inputs=inputs, outputs=outputs)
        model.compile(loss='binary_crossentropy', optimizer='nadam')
        return model

In [None]:
# didn't have time to tune
param_lims = {
    'toxic': { 
        'n_dense':      50,
        'n_recurrent':  50,
        'dropout_rate': 0.15,
        'l2_penalty':   1.5e-05,
    },
    'fasttext': {
        'n_dense':      50,
        'n_recurrent':  50,
        'dropout_rate': 0.15,
        'l2_penalty':   1.5e-05,
    },
    'glove': { 
        'n_dense':      50,
        'n_recurrent':  50,
        'dropout_rate': 0.15,
        'l2_penalty':   1.5e-05,
    },
    'lexvec': {
        'n_dense':      50,
        'n_recurrent':  50,
        'dropout_rate': 0.15,
        'l2_penalty':   1.5e-05,
    },
}


for emb in ['toxic', 'fasttext', 'glove', 'lexvec']:
    nn = RnnVersion2(
        model_name = f'RnnVersion2_{emb}',
        embedding_file = f'embeddings/{emb}_embeddings.txt')
    
    for seed in [0, 1, 2]:
        nn.predict_kfold(param_lims[emb], n_folds=10, seed=seed,
                         monitor_training=False, save_oof=True)