In [1]:
%%time

import numpy as np
import pandas as pd
import pickle

test_df = pd.read_pickle('test_df.pkl')

X_train = np.load('X_train.npy')
X_val = np.load('X_val.npy')
X_test = np.load('X_test.npy')

y_train = pd.read_pickle('y_train.pkl')
y_val = pd.read_pickle('y_val.pkl')

CPU times: user 36 ms, sys: 1.14 s, total: 1.17 s
Wall time: 1.17 s


In [2]:
from keras.models import load_model

model = load_model('model.h5')
model.load_weights('weights.h5')

Using TensorFlow backend.


In [3]:
model.compile(loss='binary_crossentropy',
              optimizer='adam',
              metrics=['accuracy'])

# Predict validation labels

In [4]:
%%time

pred_val = model.predict([X_val], batch_size=1024, verbose=1)

CPU times: user 28.4 s, sys: 6.83 s, total: 35.2 s
Wall time: 31.5 s


# Find best threshold

In [5]:
from sklearn.metrics import f1_score

def bestThreshold(y_true,y_pred):
    idx = 0
    cur_f1 = 0
    max_f1 = 0
    thres = 0
    for idx in np.arange(0.1, 0.501, 0.01):
        cur_f1 = f1_score(y_true, np.array(y_pred)> idx)
        if cur_f1 > max_f1:
            max_f1 = cur_f1
            thres = idx
    print('best threshold is {:.4f} with F1 score: {:.4f}'.format(thres, max_f1))
    return thres
threshold = bestThreshold(y_val,pred_val)

best threshold is 0.3100 with F1 score: 0.6713


# Predict test labels

In [6]:
%%time

pred_test = model.predict([X_test], batch_size=1024, verbose=1)

CPU times: user 5.76 s, sys: 1.28 s, total: 7.04 s
Wall time: 6.3 s


# Prepare submission

In [7]:
pred_df = pd.DataFrame({"qid":test_df["qid"].values, "question_text":test_df["question_text"].values})
pred_df['prediction'] = (pred_test > threshold).astype(int)

# Analyze train and test data

In [8]:
pd.options.display.max_columns = None
pd.options.display.max_rows = None
pd.options.display.max_colwidth = -1

In [9]:
print("Sample insincere questions (predicted)")
pred_df.loc[pred_df['prediction'] == 1].sample(10)

Sample insincere questions (predicted)


Unnamed: 0,qid,question_text,prediction
40712,b7fa8035853f137af2c0,Isn't it likely that Trump will do something with horrible consequences as a distraction if Mueller starts getting too close?,1
44223,c835db1cd1983406723e,Why do people throughout history misinterpret their Holy Books and are lead to religious hate and wars?,1
11387,33d79802c92a819675d2,"Why do mormons think their church is the only way to get to heaven when common intelligence tells us there is no god, no heaven and no hell?",1
1991,09316d9ebd493451db82,I can not stand white women does that make me racist?,1
39,003069ba70645b15c3ba,Why don't India start a War with Pakistan ? They Kill our Soldiers.,1
37685,aa0fea0bb59410c82638,Why do most Bengali Hindus (including Brahmins) support Mamata Banerjee (TMC) and hate the BJP? Is this because BJP wants to destroy the glorious Bengali culture?,1
11279,336df06f09f4906990b1,Why does rape and go up in countries that are accepting more Muslim immigrants?,1
39039,b0324bd8dc0a4165f578,What’s the difference between an American and a racist white person?,1
13462,3ce4d1e1b7b22ad0fb69,"Is it petty to kick my teenage daughter's boyfriend out the house, but let my son invite his boyfriend over for dinner? Is it me or does anyone else prefer gay male dating their son than a straight male dating their daughter?",1
27879,7dc1ab9c9094fd21f771,Jallikattu and why Tamilians should not seek Independence from India?,1


In [10]:
print("Sample sincere questions (predicted)")
pred_df.loc[pred_df['prediction'] == 0].sample(10)

Sample sincere questions (predicted)


Unnamed: 0,qid,question_text,prediction
48152,da10c033ce1b37a70057,"Before jumping into HC Verma, should I use another book for class 11?",0
21250,6076ad117f8a220a1e6e,What are the risks associated with fixed deposits?,0
16650,4b5246be944279abff15,Has anyone got his/her profile accepted in upwork in 2017? Anyone from India got his/her profile accepted?,0
13868,3f0319a8307b05b6ed6a,What was a typical workout for you when you were training for the Olympics?,0
23059,683dcd1debd30ea91c63,Can I apply for Ireland student visa before 6months of course start?,0
13878,3f10bd80ddf46e175dff,"One of my classmates who bullied me is an undercover agent. I am 100% confirm about this, it was crosschecked and I have evidence. How should I expose this information to hurt him the most professionally?",0
8104,25307509b3f3a66ae8ad,Have you ever seen your parents crying?,0
36476,a46c8a3bf3f161d5a2f3,What does the abbreviation I.M.H.O. stand for?,0
51337,e8b24e1184e9d992deba,"An 8 year old kid just served me dinner in a restaurant, what do I do?",0
35797,a1429c928412d19eba54,How can I make homemade T-shirts?,0
