In [None]:
import os
os.environ['CUDA_DEVICE_ORDER']='PCI_BUS_ID'
os.environ['CUDA_VISIBLE_DEVICES']='1'
import numpy as np
import keras.backend as K
from keras.models import Model
from keras.layers import Dense, Input, Embedding, PReLU, SpatialDropout1D, Conv1D
from keras.layers import MaxPool1D, AvgPool1D, GlobalMaxPool1D, GlobalAvgPool1D, concatenate
from keras.regularizers import l2
from toxic.keras_utils import AttentionWithContext
from toxic.NN import NN

In [None]:
class Cnn2Layer(NN):
    """Input-> Embedding-> GRU-> Capsule-> Output"""

    def __init__(self, model_name, embedding_file, max_seq_len=250):

        super().__init__(model_name, max_seq_len, file_dir='../../input',
                         word_embedding_file=embedding_file)
        self.load_labels()
        self.load_word_sequences()
        self.load_word_vectors()

    def get_model(self, n_recurrent=50, n_filters=50, dropout_rate=0.2):
        K.clear_session()
        
        def conv_block(x, n, kernel_size):
            x = Conv1D(n, kernel_size, activation='relu') (x)
            x = Conv1D(n_filters, kernel_size, activation='relu') (x)
            x_att = AttentionWithContext()(x)
            x_avg = GlobalAvgPool1D()(x)
            x_max = GlobalMaxPool1D()(x)
            return concatenate([x_att, x_avg, x_max])    

        inputs = Input(shape=(self.max_seq_len,))
        x = Embedding(self.word_embedding_matrix.shape[0], 300, 
                      weights=[self.word_embedding_matrix], trainable=False)(inputs)        
        x = SpatialDropout1D(dropout_rate)(x)
        x1 = conv_block(x, 4*n_filters, 2)
        x2 = conv_block(x, 3*n_filters, 3)
        x3 = conv_block(x, 2*n_filters, 4)
        x = concatenate([x1, x2, x3])
        outputs = Dense(6, activation='sigmoid')(x)
        model = Model(inputs=inputs, outputs=outputs)
        model.compile(loss='binary_crossentropy', optimizer='nadam')
        return model

In [None]:
param_lims = {
    'toxic': { 
        'n_filters':    65,
        'dropout_rate': 0.043626,
    },
    'fasttext': {
        'n_filters':    75,
        'dropout_rate': 0.078908,
    },
    'glove': { 
        'n_filters':    125,
        'dropout_rate': 0.067983,
    },
    'lexvec': {
        'n_filters':    100,
        'dropout_rate': 0.085463,
    },
}


for emb in ['toxic', 'fasttext', 'glove', 'lexvec']:
    nn = Cnn2Layer(
        model_name = f'Cnn2Layer_{emb}',
        embedding_file = f'embeddings/{emb}_embeddings.txt')
    
    for seed in [0, 1, 2]:
        nn.predict_kfold(param_lims[emb], n_folds=10, seed=seed,
                         monitor_training=False, save_oof=True)