In [1]:
%%time

import numpy as np
import pandas as pd
import pickle

test_df = pd.read_pickle('test_df.pkl')

X_train = np.load('X_train.npy')
X_val = np.load('X_val.npy')
X_test = np.load('X_test.npy')

y_train = pd.read_pickle('y_train.pkl')
y_val = pd.read_pickle('y_val.pkl')

CPU times: user 32 ms, sys: 1.06 s, total: 1.09 s
Wall time: 1.09 s


In [2]:
from keras.models import load_model

model = load_model('model.h5')
model.load_weights('weights.h5')

Using TensorFlow backend.


In [3]:
model.compile(loss='binary_crossentropy',
              optimizer='adam',
              metrics=['accuracy'])

# Predict validation labels

In [4]:
%%time

pred_val = model.predict([X_val], batch_size=1024, verbose=1)

CPU times: user 28.7 s, sys: 6.94 s, total: 35.6 s
Wall time: 32 s


# Find best threshold

In [5]:
from sklearn.metrics import f1_score

def bestThreshold(y_true,y_pred):
    idx = 0
    cur_f1 = 0
    max_f1 = 0
    thres = 0
    for idx in np.arange(0.1, 0.501, 0.01):
        cur_f1 = f1_score(y_true, np.array(y_pred)> idx)
        print('Current threshold is {:.4f} with F1 score: {:.4f}'.format(idx, cur_f1))
        if cur_f1 > max_f1:
            max_f1 = cur_f1
            thres = idx
    print('best threshold is {:.4f} with F1 score: {:.4f}'.format(thres, max_f1))
    return thres
threshold = bestThreshold(y_val,pred_val)

best threshold is 0.3100 with F1 score: 0.6713


# Predict test labels

In [6]:
%%time

pred_test = model.predict([X_test], batch_size=1024, verbose=1)

CPU times: user 5.75 s, sys: 1.36 s, total: 7.12 s
Wall time: 6.36 s


# Post-Analysis

In [7]:
pred_df = pd.DataFrame({"qid":test_df["qid"].values, "question_text":test_df["question_text"].values})
pred_df['prediction'] = (pred_test > threshold).astype(int)

## Predicted sincere and insincere samples

In [8]:
pd.options.display.max_columns = None
pd.options.display.max_rows = None
pd.options.display.max_colwidth = -1

In [18]:
pd_sincere = pred_df.loc[pred_df['prediction'] == 0].sample(100)
pd_insincere = pred_df.loc[pred_df['prediction'] == 1].sample(100)

df = pd.concat([pd_sincere, pd_insincere])
df.to_csv("predictions.csv", index=False)

In [15]:
print("Sample insincere questions (predicted)")
pred_df.loc[pred_df['prediction'] == 1].sample(100)

Sample insincere questions (predicted)


Unnamed: 0,qid,question_text,prediction
45002,cbaac582ed4a38948782,How to I convence my mom for sex and how i talk about my sexual feeling to her?,1
51870,eb3751d6bbadc8863548,"Why don't more Americans understand that 'collusion' has no legal meaning, and is certainly not a crime?",1
33907,98e438d314fb5cb169c4,Why is LGBT reject by Indonesian people?,1
23153,68beffc5bc4dc08c8fb7,"Why, after all the questions about atheists, from people that believe in a god, didn't your god get rid of all the atheists?",1
9775,2ce5cd98998a0f94ff9e,"I'm Muslim, and when I went into my local cafe earlier with my wife's son to get my soy latte, I was outraged there wasn't an open window pointing East, so I could pray to Mecca. Should all Starbucks be rebuilt so all Muslim can worship in peace?",1
12068,36ada268d439c2fb5c18,How can I tell if my daughter is a slut?,1
4216,1356833262208b31607b,Why must compliments from men about a woman’s figure be considered as harassment when the very purpose of wearing tight clothing is to show off that figure?,1
39968,b4a9f5c47ed696a32ebf,"If the Nazis were responsible for huge amounts of grave war crimes, why do producers of fiction depict them in extremely silly ways?",1
21203,6048f8874369c49b15d7,Is there any concrete evidence that transgender individuals have sexually abused more children than priests have?,1
9275,2abc1b3a09116ca39114,"If a policeman see a guy raping a woman, can we put the guy in jail without the authorization of the woman if she doesn't want to bring the guy in jail, could it be like a murder?",1


In [16]:
print("Sample sincere questions (predicted)")
pred_df.loc[pred_df['prediction'] == 0].sample(100)

Sample sincere questions (predicted)


Unnamed: 0,qid,question_text,prediction
15502,461d1da9be337b5127e7,What makes a man unworthy of love?,0
37994,ab78767b12ad1b566c1f,Which subject are needed in college for architecture?,0
21069,5fa1f0bdae6b682ec99a,Has The Returned (French version) Season 2 been released on DVD in the US yet?,0
44516,c97fe962efad04978523,What invoicing/accounting tools do freelance writers use?,0
49593,e0d6516d2b58dfe45982,Would you dump a girl because she already talks about her pooping habits.?,0
40989,b9417c18379a5b1fb2c5,Can we plea personally to CBSE to give another chance in the same year before the final result/DMC exam because of sudden health issue on exam day?,0
229,012044f1825624a6044c,Is Athena Walker Christian?,0
1678,07d457014294c6378c39,"When people from China come to Scandinavia as tourists, what do they seek and want?",0
500,025cc251234471387bac,What are the specific bills governing all medical and recreational marijuana use in Massachusetts?,0
27990,7e460b3ae877800cd0c5,Where can I get DARPA_2009 DDOS dataset?,0
