In [None]:
import logging
import warnings
import os
warnings.filterwarnings('ignore')
os.environ['OMP_NUM_THREADS'] = '4'

import numpy as np, pandas as pd, random
np.random.seed(42)

from sklearn.model_selection import train_test_split
from sklearn.metrics import roc_auc_score

from keras.preprocessing.sequence import pad_sequences
from keras.preprocessing.text import Tokenizer
from keras import regularizers
from keras.constraints import maxnorm
from keras.models import Model, load_model
from keras.engine import Layer
from keras.layers import K, Activation, Average, Maximum
from keras.layers import Input, Embedding, Dense, Conv2D, MaxPool2D,GlobalMaxPooling2D
from keras.layers import Bidirectional, GlobalMaxPool1D, MaxPooling1D, Add, Flatten
from keras.layers import GlobalAveragePooling1D, GlobalMaxPooling1D, concatenate, SpatialDropout1D
from keras.layers import Reshape, Flatten, Concatenate, Dropout, SpatialDropout1D
from keras.preprocessing import text, sequence
from keras.callbacks import Callback
from keras.optimizers import Adam, RMSprop
from keras.callbacks import EarlyStopping, ModelCheckpoint, LearningRateScheduler, TerminateOnNaN
from keras.layers import Bidirectional
from keras.layers import GRU, BatchNormalization, Conv1D, MaxPooling1D
from sklearn.preprocessing import MinMaxScaler, StandardScaler

In [None]:
data_dir="data/"
train = pd.read_csv(data_dir+"train.csv")
test = pd.read_csv(data_dir+"test.csv")
submission = pd.read_csv(data_dir+"sample_submission.csv")

#embedding_path = data_dir+"fasttext-crawl-300d-2m/crawl-300d-2M.vec"
embedding_path = data_dir+"glove840b300dtxt/glove.840B.300d.txt"

max_features = 30000
max_len = 150
embed_size = 300

list_sentences_train = train["comment_text"].fillna("").values
list_sentences_test = test["comment_text"].fillna("").values
list_classes = ["toxic", "severe_toxic", "obscene", "threat", "insult", "identity_hate"]
y = train[list_classes].values
tokenizer = Tokenizer(num_words=max_features)
tokenizer.fit_on_texts(list(list_sentences_train))
list_tokenized_train = tokenizer.texts_to_sequences(list_sentences_train)
list_tokenized_test = tokenizer.texts_to_sequences(list_sentences_test)
X_train = {}
X_test = {}
X_train['text'] = sequence.pad_sequences(list_tokenized_train, maxlen=max_len, padding='post', truncating='post')
X_test['text'] = sequence.pad_sequences(list_tokenized_test, maxlen=max_len, padding='post', truncating='post')

train['num_words'] = train.comment_text.str.count('\S+')
test['num_words'] = test.comment_text.str.count('\S+')
train['num_comas'] = train.comment_text.str.count('\.')
test['num_comas'] = test.comment_text.str.count('\.')
train['num_bangs'] = train.comment_text.str.count('\!')
test['num_bangs'] = test.comment_text.str.count('\!')
train['num_quotas'] = train.comment_text.str.count('\"')
test['num_quotas'] = test.comment_text.str.count('\"')
train['avg_word'] = train.comment_text.str.len() / (1 + train.num_words)
test['avg_word'] = test.comment_text.str.len() / (1 + test.num_words)
scaler = MinMaxScaler()
X_train['num_vars'] = scaler.fit_transform(train[['num_words','num_comas','num_bangs','num_quotas','avg_word']])
X_test['num_vars'] = scaler.transform(test[['num_words','num_comas','num_bangs','num_quotas','avg_word']])

N = len(train)
indexs_val=[]
for _ in range(N//10):
    indexs_val.append(random.randint(0,N-1))
indexs_val=list(set(indexs_val))
indexs_train = [i for i in range(N) if i not in indexs_val]

X_valid={}
X_valid['text']=X_train['text'][indexs_val]
X_valid['num_vars']=X_train['num_vars'][indexs_val]
Y_valid=y[indexs_val]
X_train['text']=X_train['text'][indexs_train]
X_train['num_vars']=X_train['num_vars'][indexs_train]
Y_train=y[indexs_train]

In [None]:
def get_coefs(word,*arr): return word, np.asarray(arr, dtype='float32')
embedding_index = dict(get_coefs(*o.strip().split(" ")) for o in open(embedding_path))

In [None]:
word_index = tokenizer.word_index
nb_words = min(max_features, len(word_index))
embedding_matrix = np.zeros((nb_words, embed_size))
for word, i in word_index.items():
    if i >= max_features: continue
    embedding_vector = embedding_index.get(word)
    if embedding_vector is not None: embedding_matrix[i] = embedding_vector

In [None]:
def squash(x, axis=-1):
    # s_squared_norm is really small
    # s_squared_norm = K.sum(K.square(x), axis, keepdims=True) + K.epsilon()
    # scale = K.sqrt(s_squared_norm)/ (0.5 + s_squared_norm)
    # return scale * x
    s_squared_norm = K.sum(K.square(x), axis, keepdims=True)
    scale = K.sqrt(s_squared_norm + K.epsilon())
    return x / scale


# A Capsule Implement with Pure Keras
class Capsule(Layer):
    def __init__(self, num_capsule, dim_capsule, routings=3, kernel_size=(9, 1), share_weights=True,
                 activation='default', **kwargs):
        super(Capsule, self).__init__(**kwargs)
        self.num_capsule = num_capsule
        self.dim_capsule = dim_capsule
        self.routings = routings
        self.kernel_size = kernel_size
        self.share_weights = share_weights
        if activation == 'default':
            self.activation = squash
        else:
            self.activation = Activation(activation)

    def build(self, input_shape):
        super(Capsule, self).build(input_shape)
        input_dim_capsule = input_shape[-1]
        if self.share_weights:
            self.W = self.add_weight(name='capsule_kernel',
                                     shape=(1, input_dim_capsule,
                                            self.num_capsule * self.dim_capsule),
                                     # shape=self.kernel_size,
                                     initializer='glorot_uniform',
                                     trainable=True)
        else:
            input_num_capsule = input_shape[-2]
            self.W = self.add_weight(name='capsule_kernel',
                                     shape=(input_num_capsule,
                                            input_dim_capsule,
                                            self.num_capsule * self.dim_capsule),
                                     initializer='glorot_uniform',
                                     trainable=True)

    def call(self, u_vecs):
        if self.share_weights:
            u_hat_vecs = K.conv1d(u_vecs, self.W)
        else:
            u_hat_vecs = K.local_conv1d(u_vecs, self.W, [1], [1])

        batch_size = K.shape(u_vecs)[0]
        input_num_capsule = K.shape(u_vecs)[1]
        u_hat_vecs = K.reshape(u_hat_vecs, (batch_size, input_num_capsule,
                                            self.num_capsule, self.dim_capsule))
        u_hat_vecs = K.permute_dimensions(u_hat_vecs, (0, 2, 1, 3))
        # final u_hat_vecs.shape = [None, num_capsule, input_num_capsule, dim_capsule]

        b = K.zeros_like(u_hat_vecs[:, :, :, 0])  # shape = [None, num_capsule, input_num_capsule]
        for i in range(self.routings):
            b = K.permute_dimensions(b, (0, 2, 1))  # shape = [None, input_num_capsule, num_capsule]
            c = K.softmax(b)
            c = K.permute_dimensions(c, (0, 2, 1))
            b = K.permute_dimensions(b, (0, 2, 1))
            outputs = self.activation(K.batch_dot(c, u_hat_vecs, [2, 2]))
            if i < self.routings - 1:
                b = K.batch_dot(outputs, u_hat_vecs, [2, 3])

        return outputs

    def compute_output_shape(self, input_shape):
        return (None, self.num_capsule, self.dim_capsule)

In [None]:
class RocAucEvaluation(Callback):
    def __init__(self, validation_data=(), interval=1):
        super(Callback, self).__init__()
        self.interval = interval
        self.X_val, self.y_val = validation_data

    def on_epoch_end(self, epoch, logs={}):
        if epoch % self.interval == 0:
            y_pred = self.model.predict(self.X_val, verbose=0)
            score = roc_auc_score(self.y_val, y_pred)
            print("\nROC-AUC - epoch: {:d} - score: {:.6f}".format(epoch+1, score))
            for i in range(len(list_classes)):
                score2 = roc_auc_score(self.y_val[:,i], y_pred[:,i])
                print("ROC-AUC of class {}- epoch: {:d} - score: {:.6f}".format(list_classes[i],epoch+1, score2))

In [None]:
file_path = "best_model.hdf5"
check_point = ModelCheckpoint(file_path, monitor = "val_loss", verbose = 1,save_best_only = True, mode = "min")
ra_val = RocAucEvaluation(validation_data=(X_valid, Y_valid), interval = 1)
early_stop = EarlyStopping(monitor = "val_loss", mode = "min", patience = 2)
lr_scheduler = LearningRateScheduler(schedule=lambda epoch_n: self.init_lr / (2**(epoch_n)), verbose = 1)
TON = TerminateOnNaN()

Routings = 5
Num_capsule = 8
Dim_capsule = 16
dropout_p = 0.25
rate_drop_dense = 0.28
filter_sizes = [1,2,3,5]
num_filters = 32

def build_model(lr = 0.0):
    
    # Input
    num_vars = Input(shape=[X_train["num_vars"].shape[1]], name="num_vars")
    inp = Input(shape=(max_len, ), name="text")
    x = Embedding(max_features, embed_size, weights=[embedding_matrix],trainable=False)(inp)
    x = SpatialDropout1D(0.2)(x)
    
    # CNN2D
    y = Reshape((max_len, embed_size, 1))(x)
    conv_0 = Conv2D(num_filters, kernel_size=(filter_sizes[0], embed_size), kernel_initializer='normal',activation='elu')(y)
    conv_1 = Conv2D(num_filters, kernel_size=(filter_sizes[1], embed_size), kernel_initializer='normal',activation='elu')(y)
    conv_2 = Conv2D(num_filters, kernel_size=(filter_sizes[2], embed_size), kernel_initializer='normal',activation='elu')(y)
    conv_3 = Conv2D(num_filters, kernel_size=(filter_sizes[3], embed_size), kernel_initializer='normal',activation='elu')(y)
    maxpool_0 = MaxPool2D(pool_size=(max_len - filter_sizes[0] + 1, 1))(conv_0)
    maxpool_1 = MaxPool2D(pool_size=(max_len - filter_sizes[1] + 1, 1))(conv_1)
    maxpool_2 = MaxPool2D(pool_size=(max_len - filter_sizes[2] + 1, 1))(conv_2)
    maxpool_3 = MaxPool2D(pool_size=(max_len - filter_sizes[3] + 1, 1))(conv_3)
    y = Concatenate(axis=1)([maxpool_0, maxpool_1, maxpool_2, maxpool_3])   
    y = Flatten()(y)
    
    # Bigru
    bigru = Bidirectional(GRU(128, activation='relu', dropout=dropout_p,recurrent_dropout=dropout_p, return_sequences=True))(x)
    
    # CNN 1D
    conv1D = Conv1D(64, kernel_size = 2, padding = "valid", kernel_initializer = "he_uniform")(bigru)
    avg_pool = GlobalAveragePooling1D()(conv1D)
    max_pool = GlobalMaxPooling1D()(conv1D)
    z = concatenate([avg_pool, max_pool])
    
    # Capsule
    c = Capsule(num_capsule=Num_capsule, dim_capsule=Dim_capsule, routings=Routings,share_weights=True)(bigru)
    c = Flatten()(c)
    
    # num_vars
    n = Dense(16, activation = "relu", kernel_constraint=maxnorm(3))(num_vars)
    
    # output
    a = Average()([y,z,c])
    m = Maximum()([y,z,c])
    out = concatenate([a,m,n])
    out = Dense(6, activation = "sigmoid")(out)
    model = Model(inputs=[inp,num_vars], outputs=out)
    
    # model
    model.compile(loss = "binary_crossentropy", optimizer = Adam(lr = lr, clipvalue=0.2), metrics = ["accuracy"])
    history = model.fit(X_train, Y_train, batch_size = 32, epochs = 4, validation_data = ([X_valid['text'],X_valid['num_vars']], Y_valid), 
                        verbose = 1, callbacks = [ra_val, check_point, early_stop, TON])
    return model

In [None]:
model = build_model(lr = 1e-3)

In [None]:
pred = model.predict(X_test, batch_size = 512, verbose = 1)
submission[list_classes] = (pred)
submission.to_csv("submission.csv", index = False)

In [None]:
submission.head()

In [None]:
submission.to_csv('submission.csv')

In [None]:
from keras.utils.vis_utils import plot_model
plot_model(model, show_shapes=True, show_layer_names=True)