based on code from https://www.kaggle.com/brucedai003/no-cnn-no-rnn-pure-transformer-encoder

In this kernel, I have implemented the encoder part of the transformer architecture as mentioned in the famous paper: Attention is all you need.(https://arxiv.org/abs/1706.03762).

Many of other codes are adopted from other kernels. For example, loading the embeddings, load the training and test data and preprocessing, etc. I really appreciate their contributions.

p.s. When I run this locally, I get validation f1-score around 0.688.

Happy transforming!

## Imports

In [4]:
import os
import time
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
from tqdm import tqdm
import math
from sklearn.model_selection import train_test_split
from sklearn import metrics

from keras.preprocessing.text import Tokenizer
from keras.preprocessing.sequence import pad_sequences
from keras.layers import Dense, Input, Embedding, Dropout, Activation, Conv1D
from keras.layers import Bidirectional, GlobalMaxPool1D, GlobalMaxPooling1D, GlobalAveragePooling1D
from keras.layers import Input, Embedding, Dense, Conv2D, MaxPool2D, concatenate
from keras.layers import Reshape, Flatten, Concatenate, Dropout, SpatialDropout1D
from keras.layers import BatchNormalization, InputSpec, add
from keras.optimizers import Adam
from keras.models import Model, load_model
from keras import backend as K
from keras.engine.topology import Layer
from keras import initializers, regularizers, constraints, optimizers, layers, activations
from keras.callbacks import Callback, EarlyStopping, ModelCheckpoint
from keras.utils import Sequence

from bpemb import BPEmb

In [37]:
embed_size = 300 # how big is each word vector
max_features = 95000 # how many unique words to use (i.e num rows in embedding vector)
maxlen = 200 # max number of words in a question to use
n_heads = 4 # Number of heads as in Multi-head attention

## Load Embeddings

In [38]:
bpemb = BPEmb(lang='en', dim=embed_size)
vocab_size = bpemb.vs
max_features = vocab_size
print(bpemb)

BPEmb(lang=en, vs=10000, dim=300)


In [39]:
def load_and_prec():
    train_df = pd.read_csv("data/train.csv")
    test_df = pd.read_csv("data/test.csv")
    print("Train shape : ",train_df.shape)
    print("Test shape : ",test_df.shape)
    
    ## split to train and val
    train_df, val_df = train_test_split(train_df, test_size=0.001, random_state=2018) # hahaha


    ## fill up the missing values
    train_X = train_df["question_text"].fillna("_##_").values
    val_X = val_df["question_text"].fillna("_##_").values
    test_X = test_df["question_text"].fillna("_##_").values

    ## Tokenize the sentences
    train_X = bpemb.encode_ids(train_X)
    val_X = bpemb.encode_ids(val_X)
    test_X = bpemb.encode_ids(test_X)
    
    ## Pad the sentences 
    train_X = pad_sequences(train_X, maxlen=maxlen)
    val_X = pad_sequences(val_X, maxlen=maxlen)
    test_X = pad_sequences(test_X, maxlen=maxlen)

    ## Get the target values
    train_y = train_df['target'].values
    val_y = val_df['target'].values  
    
    #shuffling the data
    np.random.seed(2018)
    trn_idx = np.random.permutation(len(train_X))
    val_idx = np.random.permutation(len(val_X))

    train_X = train_X[trn_idx]
    val_X = val_X[val_idx]
    train_y = train_y[trn_idx]
    val_y = val_y[val_idx]    
    
    return train_X, val_X, test_X, train_y, val_y

In [40]:
def __load_and_prec():
    train_df = pd.read_csv("data/train.csv")
    test_df = pd.read_csv("data/test.csv")
    print("Train shape : ",train_df.shape)
    print("Test shape : ",test_df.shape)
    
    ## split to train and val
    train_df, val_df = train_test_split(train_df, test_size=0.001, random_state=2018) # hahaha


    ## fill up the missing values
    train_X = train_df["question_text"].fillna("_##_").values
    val_X = val_df["question_text"].fillna("_##_").values
    test_X = test_df["question_text"].fillna("_##_").values

    ## Tokenize the sentences
    tokenizer = Tokenizer(num_words=max_features)
    tokenizer.fit_on_texts(list(train_X))
    train_X = tokenizer.texts_to_sequences(train_X)
    val_X = tokenizer.texts_to_sequences(val_X)
    test_X = tokenizer.texts_to_sequences(test_X)

    ## Pad the sentences 
    train_X = pad_sequences(train_X, maxlen=maxlen)
    val_X = pad_sequences(val_X, maxlen=maxlen)
    test_X = pad_sequences(test_X, maxlen=maxlen)

    ## Get the target values
    train_y = train_df['target'].values
    val_y = val_df['target'].values  
    
    #shuffling the data
    np.random.seed(2018)
    trn_idx = np.random.permutation(len(train_X))
    val_idx = np.random.permutation(len(val_X))

    train_X = train_X[trn_idx]
    val_X = val_X[val_idx]
    train_y = train_y[trn_idx]
    val_y = val_y[val_idx]    
    
    return train_X, val_X, test_X, train_y, val_y, tokenizer.word_index

## Scaled Dot-product attention

In [41]:
class DotProdSelfAttention(Layer):
    """The self-attention layer as in 'Attention is all you need'.
    paper reference: https://arxiv.org/abs/1706.03762
    
    """
    def __init__(self, units,
                 activation=None,
                 use_bias=False,
                 kernel_initializer='glorot_uniform',
                 bias_initializer='zeros',
                 kernel_regularizer=None,
                 bias_regularizer=None,
                 activity_regularizer=None,
                 kernel_constraint=None,
                 bias_constraint=None,
                 **kwargs):
        if 'input_shape' not in kwargs and 'input_dim' in kwargs:
            kwargs['input_shape'] = (kwargs.pop('input_dim'),)
        super(DotProdSelfAttention, self).__init__(*kwargs)
        self.units = units
        self.activation = activations.get(activation)
        self.use_bias = use_bias
        self.kernel_initializer = initializers.get(kernel_initializer)
        self.bias_initializer = initializers.get(bias_initializer)
        self.kernel_regularizer = regularizers.get(kernel_regularizer)
        self.bias_regularizer = regularizers.get(bias_regularizer)
        self.activity_regularizer = regularizers.get(activity_regularizer)
        self.kernel_constraint = constraints.get(kernel_constraint)
        self.bias_constraint = constraints.get(bias_constraint)
        self.input_spec = InputSpec(min_ndim=2)
        self.supports_masking = True

    def build(self, input_shape):
        assert len(input_shape) == 3
        input_dim = input_shape[-1]
        # We assume the output-dim of Q, K, V are the same
        self.kernels = dict.fromkeys(['Q', 'K', 'V'])
        for key, _ in self.kernels.items():
            self.kernels[key] = self.add_weight(shape=(input_dim, self.units),
                                                initializer=self.kernel_initializer,
                                                name='kernel_{}'.format(key),
                                                regularizer=self.kernel_regularizer,
                                                constraint=self.kernel_constraint)
        if self.use_bias:
            raise NotImplementedError
        super(DotProdSelfAttention, self).build(input_shape)
        
    def call(self, x):
        Q = K.dot(x, self.kernels['Q'])
        K_mat = K.dot(x, self.kernels['K'])
        V = K.dot(x, self.kernels['V'])
        attention = K.batch_dot(Q, K.permute_dimensions(K_mat, [0, 2, 1]))
        d_k = K.constant(self.units, dtype=K.floatx())
        attention = attention / K.sqrt(d_k)
        attention = K.batch_dot(K.softmax(attention, axis=-1), V)
        return attention
    
    def compute_output_shape(self, input_shape):
        assert input_shape and len(input_shape) >= 2
        assert input_shape[-1]
        output_shape = list(input_shape)
        output_shape[-1] = self.units
        return tuple(output_shape)
      


## The Encoder Block

In [42]:
def encoder(input_tensor):
    """One encoder as in Attention Is All You Need
    """
    # Sub-layer 1
    # Multi-Head Attention
    multiheads = []
    d_v = embed_size // n_heads
    for i in range(n_heads):
        multiheads.append(DotProdSelfAttention(d_v)(input_tensor))
    multiheads = concatenate(multiheads, axis=-1)
    multiheads = Dense(embed_size)(multiheads)
    multiheads = Dropout(0.1)(multiheads)
    
    # Residual Connection
    res_con = add([input_tensor, multiheads])
    # Didn't use layer normalization, use Batch Normalization instead here
    res_con = BatchNormalization(axis=-1)(res_con)
    
    # Sub-layer 2
    # 2 Feed forward layer
    ff1 = Dense(64, activation='relu')(res_con)
    ff2 = Dense(embed_size)(ff1)
    output = add([res_con, ff2])
    output = BatchNormalization(axis=-1)(output)
    
    return output

## Positional Encoding

In [43]:
# https://github.com/kpot/keras-transformer/blob/master/keras_transformer/position.py
def positional_signal(hidden_size: int, length: int,
                      min_timescale: float = 1.0, max_timescale: float = 1e4):
    """
    Helper function, constructing basic positional encoding.
    The code is partially based on implementation from Tensor2Tensor library
    https://github.com/tensorflow/tensor2tensor/blob/master/tensor2tensor/layers/common_attention.py
    """

    if hidden_size % 2 != 0:
        raise ValueError(
            f"The hidden dimension of the model must be divisible by 2."
            f"Currently it is {hidden_size}")
    position = K.arange(0, length, dtype=K.floatx())
    num_timescales = hidden_size // 2
    log_timescale_increment = K.constant(
        (np.log(float(max_timescale) / float(min_timescale)) /
         (num_timescales - 1)),
        dtype=K.floatx())
    inv_timescales = (
            min_timescale *
            K.exp(K.arange(num_timescales, dtype=K.floatx()) *
                  -log_timescale_increment))
    scaled_time = K.expand_dims(position, 1) * K.expand_dims(inv_timescales, 0)
    signal = K.concatenate([K.sin(scaled_time), K.cos(scaled_time)], axis=1)
    return K.expand_dims(signal, axis=0)

In [44]:
# https://github.com/kpot/keras-transformer/blob/master/keras_transformer/position.py
class AddPositionalEncoding(Layer):
    """
    Injects positional encoding signal described in section 3.5 of the original
    paper "Attention is all you need". Also a base class for more complex
    coordinate encoding described in "Universal Transformers".
    """

    def __init__(self, min_timescale: float = 1.0,
                 max_timescale: float = 1.0e4, **kwargs):
        self.min_timescale = min_timescale
        self.max_timescale = max_timescale
        self.signal = None
        super().__init__(**kwargs)

    def get_config(self):
        config = super().get_config()
        config['min_timescale'] = self.min_timescale
        config['max_timescale'] = self.max_timescale
        return config

    def build(self, input_shape):
        _, length, hidden_size = input_shape
        self.signal = positional_signal(
            hidden_size, length, self.min_timescale, self.max_timescale)
        return super().build(input_shape)

    def call(self, inputs, **kwargs):
        return inputs + self.signal

## Transformer Encoder model

In [45]:
def model_transformer(embedding_matrix, n_encoder=3):
    inp = Input(shape=(maxlen,))
    x = Embedding(max_features, embed_size, weights=[embedding_matrix], trainable=True)(inp)
    # Add positional encoding
    x = AddPositionalEncoding()(x)
    x = Dropout(0.1)(x)
    for i in range(n_encoder):
        x = encoder(x)
    # These are my own experiments
    avg_pool = GlobalAveragePooling1D()(x)
    max_pool = GlobalMaxPooling1D()(x)
    conc = concatenate([avg_pool, max_pool])
    conc = Dense(64, activation="relu")(conc)
    conc = Dropout(0.1)(conc)
    outp = Dense(1, activation="sigmoid")(conc)
    
    model = Model(inputs=inp, outputs=outp)
    model.compile(loss='binary_crossentropy', optimizer='adam',
                  metrics=['accuracy'])
    return model

In [46]:
# https://stanford.edu/~shervine/blog/keras-how-to-generate-data-on-the-fly
class BaseDataGenerator(Sequence):
    """A data generator"""
    def __init__(self, list_IDs, batch_size=128, shuffle=True):
        self.list_IDs = list_IDs
        self.batch_size = batch_size
        self.shuffle = shuffle
        self.on_epoch_end()

    def __len__(self):
        """number of steps in one epoch"""
        # Here is the trick
        return len(self.list_IDs) // (self.batch_size * 2**2)

    def __getitem__(self, index):
        'Generate one batch of data'
        indexes = self.indexes[index*self.batch_size: (index+1)*self.batch_size]
        list_IDs_temp = [self.list_IDs[k] for k in indexes]

        # Generate data
        X, y = self.__data_generation(list_IDs_temp)
        return X, y

    def on_epoch_end(self):
        'Updates indexes after each epoch'
        self.indexes = np.arange(len(self.list_IDs))
        if self.shuffle:
            np.random.shuffle(self.indexes)

    def __data_generation(self, list_IDs_temp):
        'Generates data containing batch_size samples' 
        X = train_X[list_IDs_temp, :]
        y = train_y[list_IDs_temp]
        return X, y

## Train and Predict
Here I used early stopping and model checkpoint to load the best_val model

In [47]:
# https://www.kaggle.com/strideradu/word2vec-and-gensim-go-go-go
def train_pred(model, epochs=2):
    # learning schedule callback
#     loss_history = LossHistory()
#     lrate = BatchLRScheduler(step_decay)
#     callbacks_list = [loss_history, lrate]
#     es = EarlyStopping(monitor='val_loss', min_delta=0, patience=5)
#     model_path = 'keras_models.h5'
#     mc = ModelCheckpoint(filepath=model_path, monitor='val_loss', save_best_only=True)
#     callbacks = [es, mc]
#     train_generator = BaseDataGenerator(list(np.arange(train_X.shape[0])), batch_size=512)
#     model.fit_generator(train_generator,
#                         epochs=epochs,
#                         validation_data=(val_X, val_y),)
#                         callbacks=callbacks)
#     model = load_model(model_path)
    model.fit(train_X, train_y, batch_size=512,
              epochs=epochs,
              validation_data=(val_X, val_y),)

    pred_val_y = model.predict([val_X], batch_size=1024, verbose=0)
    pred_test_y = model.predict([test_X], batch_size=1024, verbose=0)
    return pred_val_y, pred_test_y

Main part: load, train, pred and blend

In [48]:
train_X, val_X, test_X, train_y, val_y = load_and_prec()


Train shape :  (1306122, 3)
Test shape :  (375806, 2)


Create New Embeddings

In [49]:
embedding_matrix = bpemb.vectors
np.shape(embedding_matrix)

(10000, 300)

## Train and Predict
Here I am experimenting with 2 encoders, it's not guaranteed to be optimal, you can try out other numbers. Notice that I used epochs = 8

In [50]:
outputs = []

In [51]:
n_encoder = 1
pred_val_y, pred_test_y = train_pred(model_transformer(embedding_matrix, n_encoder=n_encoder), epochs = 3)
outputs.append([pred_val_y, pred_test_y, 'transformer_enc{}'.format(n_encoder)])

Train on 1304815 samples, validate on 1307 samples
Epoch 1/3


 103424/1304815 [=>............................] - ETA: 2:00:05 - loss: 0.2653 - acc: 0.92 - ETA: 1:04:44 - loss: 0.3850 - acc: 0.93 - ETA: 46:18 - loss: 0.4184 - acc: 0.9362 - ETA: 37:06 - loss: 0.4107 - acc: 0.93 - ETA: 31:34 - loss: 0.4147 - acc: 0.93 - ETA: 27:52 - loss: 0.3921 - acc: 0.93 - ETA: 25:18 - loss: 0.3785 - acc: 0.92 - ETA: 23:24 - loss: 0.3667 - acc: 0.92 - ETA: 21:52 - loss: 0.3511 - acc: 0.92 - ETA: 20:38 - loss: 0.3379 - acc: 0.92 - ETA: 19:37 - loss: 0.3367 - acc: 0.92 - ETA: 18:47 - loss: 0.3320 - acc: 0.92 - ETA: 18:06 - loss: 0.3223 - acc: 0.92 - ETA: 17:31 - loss: 0.3199 - acc: 0.92 - ETA: 17:00 - loss: 0.3147 - acc: 0.92 - ETA: 16:34 - loss: 0.3121 - acc: 0.92 - ETA: 16:11 - loss: 0.3074 - acc: 0.92 - ETA: 15:49 - loss: 0.3013 - acc: 0.92 - ETA: 15:31 - loss: 0.2964 - acc: 0.92 - ETA: 15:13 - loss: 0.2933 - acc: 0.92 - ETA: 14:56 - loss: 0.2868 - acc: 0.92 - ETA: 14:41 - loss: 0.2822 - acc: 0.92 - ETA: 14:27 - loss: 0.2785 - acc: 0.92 - ETA: 14:14 - loss: 0.27

 207872/1304815 [===>..........................] - ETA: 9:20 - loss: 0.1715 - acc: 0.940 - ETA: 9:19 - loss: 0.1715 - acc: 0.940 - ETA: 9:19 - loss: 0.1715 - acc: 0.940 - ETA: 9:19 - loss: 0.1713 - acc: 0.940 - ETA: 9:18 - loss: 0.1710 - acc: 0.940 - ETA: 9:18 - loss: 0.1710 - acc: 0.940 - ETA: 9:18 - loss: 0.1711 - acc: 0.940 - ETA: 9:17 - loss: 0.1708 - acc: 0.940 - ETA: 9:17 - loss: 0.1706 - acc: 0.940 - ETA: 9:16 - loss: 0.1705 - acc: 0.940 - ETA: 9:16 - loss: 0.1704 - acc: 0.940 - ETA: 9:16 - loss: 0.1701 - acc: 0.940 - ETA: 9:15 - loss: 0.1701 - acc: 0.940 - ETA: 9:15 - loss: 0.1700 - acc: 0.940 - ETA: 9:14 - loss: 0.1699 - acc: 0.940 - ETA: 9:14 - loss: 0.1699 - acc: 0.940 - ETA: 9:14 - loss: 0.1698 - acc: 0.940 - ETA: 9:13 - loss: 0.1697 - acc: 0.940 - ETA: 9:13 - loss: 0.1695 - acc: 0.940 - ETA: 9:13 - loss: 0.1693 - acc: 0.940 - ETA: 9:12 - loss: 0.1691 - acc: 0.940 - ETA: 9:12 - loss: 0.1689 - acc: 0.940 - ETA: 9:12 - loss: 0.1688 - acc: 0.940 - ETA: 9:11 - loss: 0.1686 - ac





















Epoch 2/3


 104448/1304815 [=>............................] - ETA: 12:00 - loss: 0.0953 - acc: 0.96 - ETA: 10:57 - loss: 0.1025 - acc: 0.96 - ETA: 10:36 - loss: 0.0985 - acc: 0.95 - ETA: 10:26 - loss: 0.1046 - acc: 0.95 - ETA: 10:19 - loss: 0.1055 - acc: 0.95 - ETA: 10:14 - loss: 0.1103 - acc: 0.95 - ETA: 10:12 - loss: 0.1134 - acc: 0.95 - ETA: 10:12 - loss: 0.1162 - acc: 0.95 - ETA: 10:13 - loss: 0.1153 - acc: 0.95 - ETA: 10:12 - loss: 0.1149 - acc: 0.95 - ETA: 10:11 - loss: 0.1174 - acc: 0.95 - ETA: 10:10 - loss: 0.1178 - acc: 0.95 - ETA: 10:07 - loss: 0.1179 - acc: 0.95 - ETA: 10:05 - loss: 0.1162 - acc: 0.95 - ETA: 10:04 - loss: 0.1176 - acc: 0.95 - ETA: 10:03 - loss: 0.1173 - acc: 0.95 - ETA: 10:02 - loss: 0.1165 - acc: 0.95 - ETA: 10:01 - loss: 0.1153 - acc: 0.95 - ETA: 10:01 - loss: 0.1164 - acc: 0.95 - ETA: 10:00 - loss: 0.1172 - acc: 0.95 - ETA: 9:59 - loss: 0.1171 - acc: 0.9532 - ETA: 9:59 - loss: 0.1174 - acc: 0.953 - ETA: 9:58 - loss: 0.1174 - acc: 0.953 - ETA: 9:58 - loss: 0.1177 - a

 208896/1304815 [===>..........................] - ETA: 9:16 - loss: 0.1153 - acc: 0.953 - ETA: 9:16 - loss: 0.1151 - acc: 0.953 - ETA: 9:16 - loss: 0.1149 - acc: 0.953 - ETA: 9:16 - loss: 0.1149 - acc: 0.953 - ETA: 9:15 - loss: 0.1149 - acc: 0.953 - ETA: 9:15 - loss: 0.1149 - acc: 0.953 - ETA: 9:15 - loss: 0.1149 - acc: 0.954 - ETA: 9:14 - loss: 0.1149 - acc: 0.954 - ETA: 9:14 - loss: 0.1148 - acc: 0.954 - ETA: 9:14 - loss: 0.1149 - acc: 0.954 - ETA: 9:14 - loss: 0.1151 - acc: 0.953 - ETA: 9:14 - loss: 0.1150 - acc: 0.954 - ETA: 9:13 - loss: 0.1149 - acc: 0.954 - ETA: 9:13 - loss: 0.1149 - acc: 0.954 - ETA: 9:13 - loss: 0.1148 - acc: 0.954 - ETA: 9:13 - loss: 0.1148 - acc: 0.954 - ETA: 9:12 - loss: 0.1149 - acc: 0.954 - ETA: 9:12 - loss: 0.1150 - acc: 0.954 - ETA: 9:12 - loss: 0.1150 - acc: 0.954 - ETA: 9:12 - loss: 0.1150 - acc: 0.954 - ETA: 9:11 - loss: 0.1151 - acc: 0.954 - ETA: 9:11 - loss: 0.1151 - acc: 0.954 - ETA: 9:11 - loss: 0.1151 - acc: 0.954 - ETA: 9:11 - loss: 0.1150 - ac





















Epoch 3/3


 104448/1304815 [=>............................] - ETA: 11:42 - loss: 0.0670 - acc: 0.98 - ETA: 10:48 - loss: 0.0857 - acc: 0.97 - ETA: 10:30 - loss: 0.0905 - acc: 0.96 - ETA: 10:22 - loss: 0.0915 - acc: 0.96 - ETA: 10:16 - loss: 0.0947 - acc: 0.96 - ETA: 10:15 - loss: 0.0928 - acc: 0.96 - ETA: 10:16 - loss: 0.0928 - acc: 0.96 - ETA: 10:17 - loss: 0.0940 - acc: 0.96 - ETA: 10:15 - loss: 0.1008 - acc: 0.96 - ETA: 10:13 - loss: 0.0962 - acc: 0.96 - ETA: 10:11 - loss: 0.0995 - acc: 0.96 - ETA: 10:10 - loss: 0.1011 - acc: 0.96 - ETA: 10:19 - loss: 0.1015 - acc: 0.96 - ETA: 10:19 - loss: 0.1031 - acc: 0.96 - ETA: 10:18 - loss: 0.1016 - acc: 0.96 - ETA: 10:19 - loss: 0.1013 - acc: 0.95 - ETA: 10:18 - loss: 0.1015 - acc: 0.95 - ETA: 10:16 - loss: 0.1015 - acc: 0.95 - ETA: 10:14 - loss: 0.1021 - acc: 0.95 - ETA: 10:13 - loss: 0.1019 - acc: 0.95 - ETA: 10:11 - loss: 0.1013 - acc: 0.95 - ETA: 10:10 - loss: 0.1020 - acc: 0.95 - ETA: 10:10 - loss: 0.1028 - acc: 0.95 - ETA: 10:10 - loss: 0.1025 - a

 208896/1304815 [===>..........................] - ETA: 9:16 - loss: 0.1069 - acc: 0.957 - ETA: 9:16 - loss: 0.1067 - acc: 0.957 - ETA: 9:15 - loss: 0.1069 - acc: 0.957 - ETA: 9:15 - loss: 0.1071 - acc: 0.957 - ETA: 9:15 - loss: 0.1071 - acc: 0.957 - ETA: 9:15 - loss: 0.1071 - acc: 0.957 - ETA: 9:14 - loss: 0.1071 - acc: 0.957 - ETA: 9:14 - loss: 0.1072 - acc: 0.957 - ETA: 9:14 - loss: 0.1071 - acc: 0.957 - ETA: 9:13 - loss: 0.1071 - acc: 0.957 - ETA: 9:13 - loss: 0.1070 - acc: 0.957 - ETA: 9:13 - loss: 0.1070 - acc: 0.957 - ETA: 9:13 - loss: 0.1070 - acc: 0.957 - ETA: 9:12 - loss: 0.1070 - acc: 0.957 - ETA: 9:12 - loss: 0.1070 - acc: 0.957 - ETA: 9:12 - loss: 0.1071 - acc: 0.957 - ETA: 9:12 - loss: 0.1071 - acc: 0.957 - ETA: 9:11 - loss: 0.1072 - acc: 0.957 - ETA: 9:11 - loss: 0.1072 - acc: 0.957 - ETA: 9:11 - loss: 0.1073 - acc: 0.957 - ETA: 9:10 - loss: 0.1074 - acc: 0.957 - ETA: 9:10 - loss: 0.1072 - acc: 0.957 - ETA: 9:10 - loss: 0.1072 - acc: 0.957 - ETA: 9:10 - loss: 0.1072 - ac























In [52]:
for thresh in np.arange(0.1, 0.91, 0.01):
    thresh = np.round(thresh, 2)
    print("F1 score at threshold {0:.2f} is {1}".format(thresh, metrics.f1_score(val_y, (pred_val_y>thresh).astype(int))))

F1 score at threshold 0.10 is 0.5310344827586208
F1 score at threshold 0.11 is 0.5519713261648745
F1 score at threshold 0.12 is 0.5682656826568265
F1 score at threshold 0.13 is 0.5724907063197026
F1 score at threshold 0.14 is 0.5735849056603775
F1 score at threshold 0.15 is 0.5868725868725869
F1 score at threshold 0.16 is 0.5914396887159533
F1 score at threshold 0.17 is 0.5914396887159533
F1 score at threshold 0.18 is 0.5914396887159533
F1 score at threshold 0.19 is 0.5905511811023623
F1 score at threshold 0.20 is 0.5905511811023623
F1 score at threshold 0.21 is 0.596774193548387
F1 score at threshold 0.22 is 0.5999999999999999
F1 score at threshold 0.23 is 0.5991561181434599
F1 score at threshold 0.24 is 0.6068376068376068
F1 score at threshold 0.25 is 0.6060606060606061
F1 score at threshold 0.26 is 0.6140350877192982
F1 score at threshold 0.27 is 0.6098654708520179
F1 score at threshold 0.28 is 0.609090909090909
F1 score at threshold 0.29 is 0.609090909090909
F1 score at threshold 0

  'precision', 'predicted', average, warn_for)


F1 score at threshold 0.90 is 0.0


In [53]:
pred_test_y = (pred_test_y > 0.57).astype(int)
test_df = pd.read_csv("data/test.csv", usecols=["qid"])
out_df = pd.DataFrame({"qid":test_df["qid"].values})
out_df['prediction'] = pred_test_y
out_df.to_csv("submission.csv", index=False)

In [54]:
idx = (pred_test_y > 0.57).astype(int)
test_df = pd.read_csv("data/test.csv", usecols=["qid"])
out_df = pd.DataFrame({"qid":test_df["qid"].values})
out_df['prediction'] = idx

In [55]:
mylist = out_df[out_df.prediction == 1].index
for i in mylist:
    print(i, end=',')

0,16,21,27,42,58,93,99,111,120,153,184,201,211,223,272,273,276,281,289,294,298,327,358,375,433,434,467,478,517,524,525,541,544,552,570,599,639,652,673,682,732,743,773,779,786,790,812,841,849,917,923,927,938,940,950,968,995,1011,1012,1016,1028,1031,1081,1112,1132,1139,1158,1199,1201,1207,1236,1239,1258,1261,1273,1287,1298,1299,1315,1328,1331,1345,1349,1356,1381,1383,1441,1453,1455,1461,1474,1486,1529,1539,1561,1562,1567,1583,1598,1609,1659,1732,1759,1761,1792,1794,1805,1815,1826,1879,1882,1901,1979,1995,1997,2025,2030,2058,2062,2078,2082,2098,2099,2116,2129,2153,2158,2186,2204,2213,2227,2273,2282,2292,2302,2305,2323,2391,2421,2432,2437,2449,2452,2456,2469,2476,2485,2487,2489,2527,2552,2611,2613,2642,2666,2688,2700,2726,2738,2754,2768,2783,2803,2807,2828,2837,2851,2866,2879,2886,2894,2898,2914,2952,2980,3022,3023,3025,3048,3057,3068,3082,3086,3104,3115,3149,3180,3201,3207,3216,3218,3239,3241,3245,3272,3286,3319,3400,3405,3409,3427,3453,3461,3463,3474,3490,3505,3506,3514,3530,3533,3537,36

,28603,28609,28643,28654,28680,28681,28689,28720,28729,28739,28740,28749,28756,28815,28827,28838,28846,28864,28876,28898,28902,28906,28914,28935,28936,28944,28954,28959,29007,29035,29053,29059,29070,29100,29127,29129,29132,29135,29137,29144,29172,29179,29198,29207,29221,29282,29351,29352,29356,29383,29399,29416,29421,29427,29435,29441,29462,29472,29480,29517,29522,29529,29648,29697,29712,29754,29757,29765,29821,29847,29851,29865,29875,29889,29916,29918,29927,29972,29975,30012,30013,30023,30036,30071,30140,30159,30187,30198,30248,30348,30364,30447,30449,30478,30482,30485,30488,30499,30517,30518,30531,30552,30568,30576,30598,30617,30639,30657,30673,30678,30686,30689,30748,30751,30753,30839,30851,30856,30907,30917,30920,30921,30957,30963,30995,31008,31022,31047,31067,31070,31077,31092,31110,31114,31121,31123,31137,31139,31178,31248,31252,31274,31313,31323,31338,31346,31350,31352,31422,31433,31448,31476,31485,31493,31516,31547,31551,31557,31566,31568,31588,31613,31619,31622,31623,31633,316

,54770,54785,54799,54807,54809,54810,54842,54851,54877,54889,54891,54905,54932,54936,54993,54997,55003,55023,55070,55102,55145,55148,55151,55188,55199,55202,55203,55224,55231,55232,55244,55245,55253,55282,55312,55315,55321,55323,55331,55338,55353,55356,55363,55380,55381,55384,55403,55427,55437,55453,55463,55483,55504,55513,55537,55592,55632,55634,55655,55660,55720,55758,55770,55837,55875,55916,55994,56001,56008,56026,56027,56039,56040,56055,56084,56092,56129,56141,56143,56155,56186,56226,56256,56271,56297,56326,56362,56399,56438,56466,56470,56487,56505,56512,56539,56553,56575,56603,56632,56638,56642,56686,56690,56699,56707,56748,56774,56785,56804,56817,56844,56856,56862,56863,56871,56877,56933,56975,56978,56980,56981,57026,57046,57087,57104,57132,57139,57145,57155,57168,57177,57195,57210,57219,57226,57266,57276,57289,57298,57350,57365,57376,57395,57449,57454,57468,57589,57590,57595,57601,57637,57688,57694,57695,57713,57793,57806,57814,57844,57862,57868,57881,57895,57910,57973,58028,580

,82098,82130,82184,82201,82206,82237,82249,82267,82278,82279,82305,82315,82329,82354,82363,82370,82400,82406,82420,82425,82426,82427,82470,82473,82499,82516,82529,82546,82567,82578,82587,82592,82603,82615,82624,82665,82727,82732,82736,82744,82754,82757,82803,82806,82808,82813,82824,82833,82875,82878,82881,82890,82892,82903,82910,82916,82917,82922,82962,82975,82981,82982,83016,83061,83079,83154,83171,83252,83267,83299,83341,83347,83350,83355,83360,83407,83414,83421,83432,83439,83450,83453,83461,83464,83533,83541,83549,83592,83602,83603,83633,83693,83719,83733,83760,83782,83829,83843,83858,83893,83897,83905,83921,83930,83946,84043,84045,84066,84068,84072,84078,84082,84090,84134,84211,84214,84216,84223,84252,84261,84304,84341,84366,84413,84457,84491,84537,84590,84604,84611,84638,84686,84701,84744,84813,84824,84841,84844,84924,85003,85029,85030,85031,85085,85106,85107,85122,85127,85135,85144,85160,85174,85182,85201,85248,85292,85296,85329,85344,85381,85408,85423,85432,85443,85482,85486,855

,108552,108600,108601,108631,108669,108677,108701,108726,108731,108747,108762,108770,108774,108798,108829,108836,108900,108918,108935,108951,108964,108968,108976,109023,109029,109079,109105,109116,109129,109158,109259,109301,109306,109376,109392,109398,109424,109442,109459,109462,109525,109536,109569,109581,109587,109596,109604,109609,109647,109681,109687,109709,109724,109726,109750,109782,109799,109834,109838,109862,109877,109893,109909,109954,109991,110006,110035,110099,110102,110103,110142,110169,110172,110199,110224,110244,110250,110275,110281,110292,110296,110303,110331,110366,110374,110381,110403,110409,110419,110422,110427,110475,110494,110501,110539,110544,110565,110568,110577,110616,110617,110690,110695,110696,110708,110717,110736,110739,110758,110791,110814,110817,110828,110904,110959,110981,110982,110984,111011,111033,111103,111107,111110,111115,111116,111149,111153,111169,111171,111183,111185,111221,111276,111286,111300,111334,111335,111347,111352,111389,111392,111393,11141

,131606,131652,131666,131667,131695,131795,131797,131813,131837,131855,131864,131873,131874,131905,131917,131922,131937,131945,131964,131988,132008,132036,132072,132100,132119,132164,132195,132201,132219,132223,132227,132234,132236,132246,132254,132277,132281,132305,132320,132355,132356,132365,132386,132396,132399,132485,132522,132540,132544,132570,132596,132614,132619,132643,132645,132684,132688,132689,132692,132703,132706,132763,132770,132777,132788,132791,132793,132809,132826,132871,132879,132880,132883,132887,132890,132912,132940,132944,132953,132981,133001,133017,133025,133037,133039,133059,133108,133124,133131,133153,133158,133166,133181,133203,133209,133219,133220,133226,133228,133238,133257,133265,133274,133287,133294,133342,133355,133362,133392,133399,133411,133434,133443,133455,133456,133499,133530,133532,133535,133546,133551,133562,133577,133620,133642,133653,133672,133725,133762,133795,133831,133848,133897,133901,133932,133957,133990,134017,134034,134051,134056,134062,13407

,153852,153870,153894,153902,153925,153937,154002,154005,154023,154038,154039,154040,154064,154085,154088,154093,154098,154147,154197,154212,154232,154242,154270,154280,154284,154328,154336,154343,154344,154358,154391,154486,154493,154530,154538,154541,154553,154561,154563,154594,154611,154616,154655,154659,154677,154723,154724,154737,154746,154759,154765,154784,154787,154810,154814,154891,154938,154960,154963,154984,154987,154994,155011,155021,155032,155088,155090,155113,155119,155192,155206,155213,155251,155258,155278,155283,155302,155308,155314,155331,155338,155345,155352,155358,155472,155487,155499,155500,155533,155560,155609,155661,155724,155738,155741,155765,155800,155801,155806,155812,155830,155844,155848,155878,155911,155928,155950,155952,155958,155963,155978,155979,155993,155998,156039,156063,156077,156104,156146,156161,156168,156177,156190,156195,156203,156217,156220,156224,156271,156300,156348,156365,156386,156397,156407,156417,156426,156460,156488,156492,156501,156521,15653

,176539,176548,176660,176708,176733,176744,176753,176832,176844,176866,176897,176904,176917,176925,176928,176932,176934,176945,176953,177005,177031,177035,177046,177082,177102,177103,177105,177116,177133,177174,177191,177192,177201,177204,177214,177225,177230,177251,177264,177271,177275,177276,177287,177314,177336,177368,177376,177380,177399,177417,177517,177523,177554,177607,177624,177645,177660,177693,177718,177741,177770,177771,177785,177800,177821,177844,177853,177862,177888,177907,177909,177910,177983,177988,178028,178068,178119,178178,178200,178215,178261,178277,178297,178300,178301,178304,178383,178398,178404,178415,178419,178452,178522,178577,178578,178585,178613,178676,178719,178740,178770,178816,178831,178833,178849,178925,179020,179046,179058,179090,179141,179147,179165,179168,179217,179248,179254,179296,179299,179306,179320,179323,179324,179393,179407,179414,179416,179435,179444,179456,179457,179489,179503,179522,179564,179580,179608,179616,179620,179623,179626,179641,17964

,245819,245828,245834,245838,245877,245912,245917,245931,245933,245991,245992,246012,246023,246039,246050,246052,246055,246122,246125,246138,246153,246184,246189,246192,246203,246242,246247,246250,246267,246275,246306,246315,246326,246331,246349,246401,246403,246419,246438,246460,246465,246467,246491,246505,246508,246513,246514,246522,246532,246533,246577,246578,246620,246631,246648,246665,246669,246685,246686,246694,246713,246725,246740,246748,246829,246851,246857,246858,246867,246885,246917,246936,246961,246965,246990,247020,247023,247052,247060,247096,247106,247114,247117,247153,247159,247165,247175,247178,247179,247180,247206,247219,247242,247283,247321,247394,247396,247437,247457,247474,247496,247524,247527,247529,247565,247575,247619,247667,247677,247678,247687,247696,247729,247732,247739,247753,247799,247818,247846,247853,247862,247876,247886,247908,247925,247927,247934,247940,247950,247960,247968,247973,247976,248045,248055,248076,248124,248129,248130,248131,248140,248153,24818

,268970,269108,269129,269139,269162,269169,269180,269186,269195,269222,269261,269279,269294,269308,269309,269318,269334,269374,269380,269442,269467,269472,269490,269515,269548,269561,269565,269573,269589,269603,269665,269673,269709,269718,269738,269740,269747,269755,269765,269766,269770,269801,269811,269832,269853,269859,269883,269897,269901,269919,269926,269978,269995,269999,270001,270003,270067,270071,270079,270150,270196,270221,270230,270263,270264,270300,270313,270326,270333,270334,270351,270368,270378,270380,270391,270424,270427,270434,270461,270477,270514,270542,270590,270604,270683,270695,270708,270735,270777,270801,270803,270804,270807,270817,270841,270847,270883,270885,270937,270973,270996,271004,271007,271036,271065,271074,271075,271088,271092,271094,271155,271216,271237,271243,271281,271324,271331,271363,271367,271390,271406,271407,271421,271428,271433,271440,271454,271459,271462,271469,271489,271512,271534,271540,271552,271554,271578,271584,271600,271605,271625,271639,27165

,291038,291041,291053,291062,291064,291070,291103,291116,291117,291127,291158,291161,291234,291291,291295,291324,291341,291346,291349,291355,291378,291396,291414,291417,291459,291466,291500,291507,291536,291569,291601,291680,291716,291727,291737,291774,291775,291781,291802,291817,291871,291905,291910,291928,291960,291961,291964,292004,292006,292051,292054,292067,292069,292112,292121,292146,292163,292197,292267,292285,292292,292325,292333,292380,292384,292402,292432,292474,292501,292511,292564,292574,292591,292592,292628,292636,292651,292654,292661,292662,292674,292675,292755,292756,292875,292903,292905,292954,292958,292988,292995,293047,293083,293151,293170,293178,293200,293201,293242,293273,293337,293338,293359,293418,293513,293548,293586,293613,293614,293721,293841,293844,293848,293877,293931,293965,294001,294006,294031,294043,294048,294051,294055,294062,294065,294113,294136,294171,294206,294209,294244,294267,294272,294293,294361,294371,294372,294383,294441,294462,294484,294494,29449

,313352,313357,313362,313366,313388,313409,313431,313437,313498,313504,313507,313512,313520,313533,313536,313559,313576,313581,313588,313657,313672,313677,313679,313683,313712,313713,313743,313766,313777,313793,313813,313818,313825,313872,313924,313935,313957,313963,313981,313986,314002,314021,314025,314044,314056,314064,314066,314084,314085,314129,314140,314144,314148,314149,314187,314197,314208,314232,314239,314241,314248,314263,314297,314299,314316,314329,314330,314361,314382,314398,314416,314418,314454,314477,314523,314534,314544,314665,314679,314686,314692,314708,314739,314745,314756,314768,314777,314795,314803,314813,314840,314849,314863,314889,314895,314906,314929,314968,314976,314986,314994,315002,315012,315025,315027,315033,315060,315072,315086,315103,315146,315150,315189,315191,315225,315242,315246,315299,315363,315392,315405,315409,315415,315424,315436,315444,315454,315482,315521,315543,315597,315599,315630,315643,315725,315735,315737,315738,315740,315763,315764,315776,31580