# Quora Insincere Questions Classification
> 0.699 finally


In [None]:
import os
import sys
import gc
import glob
import time
import re
import random
import numpy as np
import pandas as pd
from string import punctuation
from collections import Counter, OrderedDict
from tqdm import tqdm
tqdm.pandas()

import tensorflow as tf
import keras
from keras import Model
from keras.layers import *
from keras.layers.merge import _Merge
from keras.models import *
from keras.initializers import *
from keras.optimizers import *
from keras.callbacks import *
from keras.regularizers import *
from keras import backend as K
from keras.legacy import interfaces
from keras.engine.topology import Layer
from keras.preprocessing.text import Tokenizer
from keras.preprocessing.sequence import pad_sequences
from keras.utils.generic_utils import serialize_keras_object
from keras.utils.generic_utils import deserialize_keras_object
from keras.utils import multi_gpu_model

from sklearn.metrics import f1_score, recall_score, precision_score, roc_auc_score
from sklearn.model_selection import train_test_split, KFold, StratifiedKFold, StratifiedShuffleSplit
from sklearn.preprocessing import StandardScaler

In [None]:
SEED = 2018
# python
os.environ['PYTHONHASHSEED'] = str(SEED)
# random
np.random.seed(SEED)
tf.set_random_seed(SEED)
# tf
session_conf = tf.ConfigProto(intra_op_parallelism_threads=1, inter_op_parallelism_threads=1)
session_conf.gpu_options.allow_growth = True
sess = tf.Session(graph=tf.get_default_graph(), config=session_conf)
K.set_session(sess)
# data
max_features = 95000
maxlen = 72

In [None]:
start_time = time.time()
train = pd.read_csv("../input/train.csv")
test = pd.read_csv("../input/test.csv")
print("Train shape: ", train.shape)
print("Test shape: ", test.shape)
sub = test[['qid']]

In [None]:
def load_emb(filename):
    def get_coefs(word, *arr): 
        return word, np.asarray(arr, dtype='float32')
    if "wiki-news-300d-1M.vec" in filename:
        embeddings_index = dict(get_coefs(*o.split(" ")) for o in open(filename) if len(o)>100)
    else:
        embeddings_index = dict(get_coefs(*o.split(" ")) for o in open(filename, encoding='latin'))
    return embeddings_index

glove = '../input/embeddings/glove.840B.300d/glove.840B.300d.txt'
para = "../input/embeddings/paragram_300_sl999/paragram_300_sl999.txt"
print("Extracting embedding")
embeddings_index_glove = load_emb(glove)
embeddings_index_para = load_emb(para)

In [None]:
def build_vocab(texts):
    sentences = texts.apply(lambda x: x.split()).values
    vocab = {}
    for sentence in sentences:
        for word in sentence:
            try:
                vocab[word] += 1
            except KeyError:
                vocab[word] = 1
    return vocab

def check_coverage(vocab, embeddings_index):
    known_words = {}
    unknown_words = {}
    nb_known_words = 0
    nb_unknown_words = 0
    for word in vocab.keys():
        try:
            known_words[word] = embeddings_index[word]
            nb_known_words += vocab[word]
        except:
            unknown_words[word] = vocab[word]
            nb_unknown_words += vocab[word]
    print('Found embeddings for {:.3%} of vocab'.format(len(known_words) / len(vocab)))
    print('Found embeddings for  {:.3%} of all text'.format(nb_known_words / (nb_known_words + nb_unknown_words)))
    unknown_words = sorted(unknown_words.items(), key=operator.itemgetter(1))[::-1]
    return unknown_words

def add_lower(embedding, vocab):
    count = 0
    for word in vocab:
        if word in embedding and word.lower() not in embedding:  
            embedding[word.lower()] = embedding[word]
            count += 1
    print(f"Added {count} words to embedding")

In [None]:
# Get set of all punctuations in dataset
tmp = []
for x in train.question_text:
    for c in x:
        if not c.isalnum():
            tmp.append(c)
for x in test.question_text:
    for c in x:
        if not c.isalnum():
            tmp.append(c)
puncs = set(tmp) - set(' ')
unpunc = puncs - set(punctuation)

In [None]:
contraction = { "ain't": "is not", "aren't": "are not","can't": "cannot", "'cause": "because",
                "could've": "could have", "couldn't": "could not", "didn't": "did not",
                "doesn't": "does not", "don't": "do not", "hadn't": "had not", "hasn't": "has not",
                "haven't": "have not", "haven ' t""he'd": "he would","he'll": "he will", "he's": "he is",
                "how'd": "how did", "how'd'y": "how do you", "how'll": "how will", "how's": "how is",
                "i'd": "i would", "i'd've": "i would have", "i'll": "i will", "i'll've": "i will have",
                "i'm": "i am", "i've": "i have", "i'd": "i would", "i'd've": "i would have",
                "i'll": "i will",  "i'll've": "i will have","i'm": "i am", "i've": "i have",
                "isn't": "is not", "it'd": "it would", "it'd've": "it would have", "it'll": "it will",
                "it'll've": "it will have","it's": "it is", "let's": "let us", "ma'am": "madam",
                "mayn't": "may not", "might've": "might have","mightn't": "might not","mightn't've": "might not have",
                "must've": "must have", "mustn't": "must not", "mustn't've": "must not have", "needn't": "need not",
                "needn't've": "need not have","o'clock": "of the clock", "oughtn't": "ought not",
                "oughtn't've": "ought not have", "shan't": "shall not", "sha'n't": "shall not",
                "shan't've": "shall not have", "she'd": "she would", "she'd've": "she would have",
                "she'll": "she will", "she'll've": "she will have", "she's": "she is", "should've": "should have",
                "shouldn't": "should not", "shouldn't've": "should not have", "so've": "so have","so's": "so as",
                "this's": "this is","that'd": "that would", "that'd've": "that would have", "that's": "that is",
                "there'd": "there would", "there'd've": "there would have", "there's": "there is", "here's": "here is",
                "they'd": "they would", "they'd've": "they would have", "they'll": "they will",
                "they'll've": "they will have", "they're": "they are", "they've": "they have", "to've": "to have",
                "wasn't": "was not", "we'd": "we would", "we'd've": "we would have", "we'll": "we will",
                "we'll've": "we will have", "we're": "we are", "we've": "we have", "weren't": "were not",
                "what'll": "what will", "what'll've": "what will have", "what're": "what are", "what's": "what is",
                "what've": "what have", "when's": "when is", "when've": "when have", "where'd": "where did",
                "where's": "where is", "where've": "where have", "who'll": "who will", "who'll've": "who will have",
                "who's": "who is", "who've": "who have", "why's": "why is", "why've": "why have",
                "will've": "will have", "won't": "will not", "won't've": "will not have", "would've": "would have",
                "wouldn't": "would not", "wouldn't've": "would not have", "y'all": "you all",
                "y'all'd": "you all would","y'all'd've": "you all would have","y'all're": "you all are",
                "y'all've": "you all have","you'd": "you would", "you'd've": "you would have", "you'll": "you will",
                "you'll've": "you will have", "you're": "you are", "you've": "you have" }

mispell = {'colour': 'color', 'centre': 'center', 'favourite': 'favorite', 'travelling': 'traveling',
           'counselling': 'counseling', 'theatre': 'theater', 'cancelled': 'canceled', 'labour': 'labor',
           'organisation': 'organization', 'wwii': 'world war 2', 'citicise': 'criticize', 'youtu': 'youtube ',
           'qoura': 'quora', 'quorans': 'quora users', 'quoran': 'quora user', 'sallary': 'salary', 'whta': 'what',
           'narcisist': 'narcissist', 'howdo': 'how do', 'whatare': 'what are', 'howcan': 'how can', 'howmuch': 'how much',
           'howmany': 'how many', 'whydo': 'why do', 'doi': 'do i', 'thebest': 'the best', 'howdoes': 'how does',
           'mastrubation': 'masturbation', 'mastrubate': 'masturbate', "mastrubating": 'masturbating',
           'pennis': 'penis', 'etherium': 'ethereum', 'narcissit': 'narcissist', 'bigdata': 'big data',
           '2k15': '2015', '2k16': '2016', '2k17': '2017', '2k18': '2018', 'qouta': 'quota', 'exboyfriend': 'ex boyfriend',
           'airhostess': 'air hostess', "whst": 'what', 'watsapp': 'whatsapp', 'demonitisation': 'demonetization',
           'demonitization': 'demonetization', 'demonetisation': 'demonetization', 'pokémon': 'pokemon',
           'nanodegree': 'nano degree', 'brexit': 'british exit', 'cryptocurrencies': 'crypto currencies',
           'coinbase': 'coin base', 'oneplus': 'one plus', 'redmi': 'red mi', 'GDPR': 'general data protection regulation',
           'DCEU': 'dc extended universe', 'litecoin': 'lite coin', 'unacademy': 'non academy', 'altcoin': 'bitcoin alternative',
           'altcoins': 'bitcoin alternative', 'sjw': 'social justice warriors', 'sjws': 'social justice warriors',
           'fiancé': 'fiance', 'microservices': 'micro services', 'bitconnect': 'bit connect', 'codeforces': 'code forces',
           'wannacry': 'wanna cry', 'onedrive': 'one drive', 'airpods': 'air pods', 'twinflame': 'twin flame',
           'undergraduation': 'under graduation', 'cos2x': 'cos 2 x', 'yourquote': 'your quote', 'xiomi': 'xiaomi',
           'undertale': 'under tale', 'genderfluid': 'gender fluid', 'são': 'sao', 'chapterwise': 'chapter wise',
           'deepmind': 'deep mind', '': '', 'arrowverse': 'arrow verse', 'overbrace': ' ', 'tensorflow': 'tensor flow',
           'hackerrank': 'hacker rank', 'microservice': 'micro service', 'reactjs': 'react js', 'hackerearth': 'hacker earth',
           'fiancée': 'fiance', 'blockchains': 'block chains', 'beyoncé': 'beyonce', 'neuralink': 'neura link',
           'openai': 'open ai', 'zoomcar': 'zoom car', 'hyperconjugation': 'hyper conjugation', 'autoencoder': 'auto encoder',
           'webassembly': 'web assembly', 'quoras': 'quora', 'digilocker': 'digi locker', 'oversmart': 'over smart',
           'cryptocoins': 'crypto coins', 'crytocurrencies': 'cryto currencies', 'cyrptocurrency': 'cyrpto currency',
           'café': 'cafe', 'whatapp': 'whatsapp', 'gaslighter': 'gas lighter', 'darkweb': 'dark web', 'webnovel': 'web novel'}

In [None]:
def replace_quote(text):
    quote = ['´', '‘', '’', "`"]
    for s in quote:
        text = text.replace(s, "'")
    return text
                      
def re_mapping(mapping):
    res = re.compile('(%s)' % '|'.join(mapping.keys()))
    return res

mapping = dict(set(contraction.items()) | set(mispell.items()))
re_map = re_mapping(mapping)
def replace_mapping(text):
    def replace(match):
        return mapping[match.group(0)]
    return re_map.sub(replace, text)

def sep_punc(x):
    for p in puncs:
        x = x.replace(p, f' {p} ')
    return x

def replace_numbers(x):
    x = re.sub('[0-9]{5,}', '#####', x)
    x = re.sub('[0-9]{4}', '####', x)
    x = re.sub('[0-9]{3}', '###', x)
    x = re.sub('[0-9]{2}', '##', x)
    return x

def add_features(df):
    df['question_text'] = df['question_text'].progress_apply(lambda x: str(x))
    df['num_chars'] = df['question_text'].progress_apply(len)
    df['num_words'] = df.question_text.str.count('\S+')

    df['num_capital'] = df['question_text'].progress_apply(lambda x: sum(1 for c in x if c.isupper()))
    df['capital_rate'] = df['num_capital'] / df['num_words']

    df['num_uniquewords'] = df['question_text'].progress_apply(lambda x: len(set(x.split())))
    df["num_exc"] = df["question_text"].progress_apply(lambda x: x.count("!")).astype('uint16')
    df["num_q"] = df['question_text'].progress_apply(lambda x: x.count("?")).astype('uint16')
    df["num_,"] = df['question_text'].progress_apply(lambda x: x.count(",")).astype('uint16')
    df["num_."] = df['question_text'].progress_apply(lambda x: x.count(".")).astype('uint16')
    df["mean_word_len"] = df["question_text"].progress_apply(lambda x: np.mean([len(w) for w in x.split()]))
    df["max_word_len"] = df['question_text'].progress_apply(lambda x: max([len(w) for w in x.split()]))

    df["num_unpunc"] = df["question_text"].progress_apply(lambda x: sum(x.count(p) for p in unpunc)).astype('uint16')
    df["num_punc"] = df["question_text"].progress_apply(lambda x: sum(x.count(p) for p in punctuation)).astype('uint16')

    return df

In [None]:
feature_cols = ['capital_rate', 'num_chars', 'num_words', "max_word_len", "mean_word_len",
                'num_capital', "num_punc", 'num_uniquewords', "num_q", "num_unpunc", "num_exc"]

In [None]:
# Add features
train = add_features(train)
test = add_features(test)

features = train[feature_cols].fillna(0)
test_features = test[feature_cols].fillna(0)
ss = StandardScaler()
ss.fit(np.vstack((features, test_features)))
features = ss.transform(features)
test_features = ss.transform(test_features)
print("Add features done")

vocab = build_vocab(train['question_text'])

# Lower
train["question_text"] = train["question_text"].str.lower()
test["question_text"] = test["question_text"].str.lower()
print("Lower done")

# Add lower word to embedding:
add_lower(embeddings_index_glove, vocab)
add_lower(embeddings_index_para, vocab)

# Replace quote
train['question_text'] = train['question_text'].progress_apply(lambda x: replace_quote(x))
test['question_text'] = test['question_text'].progress_apply(lambda x: replace_quote(x))
print("Replace quote done")

# Replace mapping(contraction & mispell)
train['question_text'] = train['question_text'].progress_apply(lambda x: replace_mapping(x))
test['question_text'] = test['question_text'].progress_apply(lambda x: replace_mapping(x))
print("Replace mapping done")

# Sep punc
train['question_text'] = train['question_text'].progress_apply(lambda x: sep_punc(x))
test['question_text'] = test['question_text'].progress_apply(lambda x: sep_punc(x))
print("Sep punc done")

# Replace numbers
train['question_text'] = train['question_text'].progress_apply(lambda x: replace_numbers(x))
test['question_text'] = test['question_text'].progress_apply(lambda x: replace_numbers(x))
print("Replace numbers done")

In [None]:
X = train["question_text"].fillna("_na_").values
T_X = test["question_text"].fillna("_na_").values
tokenizer = Tokenizer(num_words=max_features, filters='')
tokenizer.fit_on_texts(X.tolist() + T_X.tolist())
X = tokenizer.texts_to_sequences(X)
X = pad_sequences(X, maxlen=maxlen)
T_X = tokenizer.texts_to_sequences(T_X)
T_X = pad_sequences(T_X, maxlen=maxlen)
Y = train['target'].values
word_index = tokenizer.word_index
print(len(word_index))

In [None]:
del train, test
gc.collect()

In [None]:
def get_coefs(word, *arr):
    return word, np.asarray(arr, dtype='float32')

def build_emb(embeddings_index, max_features, word_index):
    all_embs = np.stack(embeddings_index.values())
    emb_mean,emb_std = all_embs.mean(), all_embs.std()
    emb_size = all_embs.shape[1]

    nb_words = min(max_features, len(word_index))
    embedding_matrix = np.random.normal(emb_mean, emb_std, (nb_words, emb_size))
    for word, i in word_index.items():
        if i >= max_features: continue
        embedding_vector = embeddings_index.get(word)
        if embedding_vector is not None:
            embedding_matrix[i] = embedding_vector
    return embedding_matrix

In [None]:
emb_glove = build_emb(embeddings_index_glove, max_features, word_index)
emb_para = build_emb(embeddings_index_para, max_features, word_index)
emb = np.mean([emb_glove, emb_para], axis=0)
print(np.shape(emb))

In [None]:
class Attention(Layer):
    def __init__(self, step_dim,
                 W_regularizer=None, b_regularizer=None,
                 W_constraint=None, b_constraint=None,
                 bias=True, **kwargs):
        self.supports_masking = True
        self.init = initializers.get('glorot_uniform')

        self.W_regularizer = regularizers.get(W_regularizer)
        self.b_regularizer = regularizers.get(b_regularizer)

        self.W_constraint = constraints.get(W_constraint)
        self.b_constraint = constraints.get(b_constraint)

        self.bias = bias
        self.step_dim = step_dim
        self.features_dim = 0
        super(Attention, self).__init__(**kwargs)

    def build(self, input_shape):
        assert len(input_shape) == 3

        self.W = self.add_weight((input_shape[-1],),
                                 initializer=self.init,
                                 name='{}_W'.format(self.name),
                                 regularizer=self.W_regularizer,
                                 constraint=self.W_constraint)
        self.features_dim = input_shape[-1]

        if self.bias:
            self.b = self.add_weight((input_shape[1],),
                                     initializer='zero',
                                     name='{}_b'.format(self.name),
                                     regularizer=self.b_regularizer,
                                     constraint=self.b_constraint)
        else:
            self.b = None

        self.built = True

    def compute_mask(self, input, input_mask=None):
        return None

    def call(self, x, mask=None):
        features_dim = self.features_dim
        step_dim = self.step_dim

        eij = K.reshape(K.dot(K.reshape(x, (-1, features_dim)),
                        K.reshape(self.W, (features_dim, 1))), (-1, step_dim))

        if self.bias:
            eij += self.b

        eij = K.tanh(eij)

        a = K.exp(eij)

        if mask is not None:
            a *= K.cast(mask, K.floatx())

        a /= K.cast(K.sum(a, axis=1, keepdims=True) + K.epsilon(), K.floatx())

        a = K.expand_dims(a)
        weighted_input = x * a
        return K.sum(weighted_input, axis=1)

    def compute_output_shape(self, input_shape):
        return input_shape[0],  self.features_dim


class AttentivePooling(Layer):
    def __init__(self, W_regularizer=None, b_regularizer=None, **kwargs):
        self.supports_masking = False
        # self.mask =mask
        self.W_regularizer = regularizers.get(W_regularizer)
        self.b_regularizer = regularizers.get(b_regularizer)
        super(AttentivePooling, self).__init__(**kwargs)

    def build(self, input_shape):

        n_in = input_shape[2]
        n_out = 1
        lim = np.sqrt(6. / (n_in + n_out))
        # tanh initializer xavier
        self.W = K.random_uniform_variable((n_in, n_out), -lim, lim,
                                           name='{}_W'.format(self.name))
        self.b = K.zeros((n_out,), name='{}_b'.format(self.name))
        self.trainable_weights = [self.W, self.b]
        self.regularizer = []
        if self.W_regularizer is not None:
            self.add_loss(self.W_regularizer(self.W))
        if self.b_regularizer is not None:
            self.add_loss(self.b_regularizer(self.b))
        self.build = True

    def call(self, inputs, mask=None):

        memory = inputs
        print('memory shape', K.int_shape(memory))
        gi = K.tanh(K.dot(memory, self.W) + self.b)  # 32 *6 *1
        gi = K.sum(gi, axis=-1)  # 32 *6
        alfa = K.softmax(gi)
        self.alfa = alfa
        output = K.sum(memory * K.expand_dims(alfa, axis=-1), axis=1)  # sum(32 *6 *310)
        print('output shape', K.int_shape(output))
        return output

    def compute_output_shape(self, input_shape):
        shape = input_shape
        shape = list(shape)

        return (shape[0], shape[2])

    def compute_mask(self, inputs, mask=None):
        return None

In [None]:
def f1_smart(y_true, y_pred):
    args = np.argsort(y_pred)
    tp = y_true.sum()
    fs = (tp - np.cumsum(y_true[args[:-1]])) / np.arange(y_true.shape[0] + tp - 1, tp, -1)
    res_idx = np.argmax(fs)
    return 2 * fs[res_idx], (y_pred[args[res_idx]] + y_pred[args[res_idx + 1]]) / 2

def threshold_search(y_true, y_pred):
    best_threshold = 0
    best_score = 0
    for threshold in [i * 0.01 for i in range(100)]:
        score = f1_score(y_true=y_true, y_pred=(y_pred > threshold).astype(int))
        if score > best_score:
            best_threshold = threshold
            best_score = score
    return best_score, best_threshold

In [None]:
class LstmAtn():
    def model(self, embedding_matrix, maxlen, max_features):
        inp = Input(shape=(maxlen,))
        emb_size = embedding_matrix.shape[1]
        x_emb = Embedding(max_features, emb_size, weights=[embedding_matrix], trainable=False)(inp)
        x = SpatialDropout1D(0.2)(x_emb)
        x = Bidirectional(CuDNNLSTM(64, return_sequences=True))(x)
        y = Bidirectional(CuDNNGRU(64, return_sequences=True))(x)

        atn_1 = Attention(maxlen)(x)
        atn_2 = Attention(maxlen)(y)
        avg_pool = GlobalAveragePooling1D()(y)
        max_pool = GlobalMaxPooling1D()(y)
        x = concatenate([atn_1, atn_2, avg_pool, max_pool])
        x = Dense(16, activation="relu")(x)
        x = Dropout(0.1)(x)
        output = Dense(1, activation="sigmoid")(x)

        model = Model(inputs=inp, outputs=output)
        model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
        return model

class LstmFAtn():
    def model(self, embedding_matrix, maxlen, max_features):
        inp_seq = Input(shape=(maxlen,), name='seq')
        inp_feature = Input(shape=(len(feature_cols),), name='feature')
        emb_size = embedding_matrix.shape[1]
        x_emb = Embedding(max_features, emb_size, weights=[embedding_matrix], trainable=False)(inp_seq)
        x = SpatialDropout1D(0.2)(x_emb)
        x = Bidirectional(CuDNNLSTM(64, return_sequences=True))(x)
        y = Bidirectional(CuDNNGRU(64, return_sequences=True))(x)

        atn_1 = Attention(maxlen)(x)
        atn_2 = Attention(maxlen)(y)
        avg_pool = GlobalAveragePooling1D()(y)
        max_pool = GlobalMaxPooling1D()(y)

        x = concatenate([atn_1, atn_2, avg_pool, max_pool, inp_feature])
        x = Dense(32, activation='relu', kernel_initializer=glorot_normal(seed=SEED))(x)
        x = Dropout(0.1)(x)

        output = Dense(1, activation="sigmoid")(x)
        model = Model(inputs=[inp_seq, inp_feature], outputs=output)
        model.compile(loss='binary_crossentropy', optimizer='adam')
        return model

In [None]:
warmup = False
models = []

In [None]:
def train_pred(model, epochs, X_train, X_val, T_X, Y_train, Y_val, mm=False):
    reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=1, min_lr=0.0001, verbose=2)
    filepath = "best_weights.h5"
    logloss = ModelCheckpoint(filepath, monitor='val_loss', verbose=2, save_best_only=True, mode='min')
    if warmup:
        warm_up = WarmUp()
        callbacks = [logloss, warm_up, reduce_lr]
    else:
        callbacks = [logloss, reduce_lr]
    history = model.fit(X_train, Y_train, batch_size=512, epochs=epochs, validation_data=(X_val, Y_val), verbose=2, callbacks=callbacks)
    best_loss = np.min(history.history['val_loss'])
    model.load_weights(filepath)
    pred_val_y = np.squeeze(model.predict(X_val, batch_size=1024, verbose=2))
    pred_test_y = np.squeeze(model.predict(T_X, batch_size=1024, verbose=2))
    best_score, best_thresh = f1_smart(Y_val, pred_val_y)
    print('Optimal F1: {:.4f} at threshold: {:.4f}'.format(best_score, best_thresh))

    del model
    gc.collect()
    K.clear_session()
    tf.reset_default_graph()
    return pred_val_y, pred_test_y, best_score, best_thresh, best_loss

kfolds, epochs = 5, 5
kf = StratifiedKFold(n_splits=kfolds, random_state=26, shuffle=True).split(X, Y)
loss = []
thresh = []
train_meta = np.zeros(Y.shape)
test_meta = np.zeros(T_X.shape[0])
x_test = [T_X, test_features]

for i, (train_idx, valid_idx) in enumerate(kf):
    X_train, X_val, Y_train, Y_val = X[train_idx], X[valid_idx], Y[train_idx], Y[valid_idx]
    features_train = features[train_idx]
    features_val= features[valid_idx]
    x_train = [X_train, features_train]
    x_val = [X_val, features_val]

    model = LstmFAtn().model(emb, maxlen, max_features)
    if i == 0: print(model.summary())
    pred_val_y, pred_test_y, best_score, best_thresh, best_loss = train_pred(model, epochs, x_train, x_val, x_test, Y_train, Y_val)
    loss.append(best_loss)
    thresh.append(best_thresh)
    train_meta[valid_idx] = pred_val_y
    test_meta += pred_test_y / kfolds

best_score, best_thresh = f1_smart(np.squeeze(Y), train_meta)
print('Optimal F1: {:.4f} at threshold: {:.4f}'.format(best_score, best_thresh))
print('mean_thresh: {:.4f} and mean_loss: {:.4f}'.format(np.mean(thresh), np.mean(loss)))
test_meta = test_meta.reshape((-1, 1))
pred_test_y = (test_meta > best_thresh).astype(int)

sub['prediction'] = pred_test_y
sub.to_csv("submission.csv", index=False)

In [None]:
total_time = (time.time() - start_time)
print("Took {:.2f} seconds".format(total_time))