In [2]:
import numpy as np
np.random.seed(42)
import pandas as pd

from sklearn.model_selection import train_test_split
from sklearn.metrics import roc_auc_score

from keras.models import Model
from keras.layers import Input, Dense, Dropout, Conv1D, Embedding, SpatialDropout1D, concatenate
from keras.layers import GRU, Bidirectional, GlobalAveragePooling1D, GlobalMaxPooling1D
from keras.preprocessing import text, sequence
from keras.callbacks import Callback

import warnings
warnings.filterwarnings('ignore')

import os
import pathlib
os.environ['OMP_NUM_THREADS'] = '4'
dir = os.getcwd()
print(dir)

Using TensorFlow backend.


C:\Users\marti\Documents\Python_Scripts


In [18]:
EMBEDDING_FILE = os.path.join(dir, 'toxic_comments_data', 'crawl-300d-2M.vec')

train = pd.read_csv(os.path.join(dir, 'toxic_comments_data', 'train.csv'))
test = pd.read_csv(os.path.join(dir, 'toxic_comments_data', 'test.csv'))
submission = pd.read_csv(os.path.join(dir,'toxic_comments_data', 'sample_submission.csv'))

X_train = train["comment_text"].fillna("fillna").values
y_train = train[["toxic", "severe_toxic", "obscene", "threat", "insult", "identity_hate"]].values
X_test = test["comment_text"].fillna("fillna").values


max_features = 30000
maxlen = 100
embed_size = 300

tokenizer = text.Tokenizer(num_words=max_features)
tokenizer.fit_on_texts(list(X_train) + list(X_test))
X_train = tokenizer.texts_to_sequences(X_train)
X_test = tokenizer.texts_to_sequences(X_test)
x_train = sequence.pad_sequences(X_train, maxlen=maxlen)
x_test = sequence.pad_sequences(X_test, maxlen=maxlen)

In [3]:
def get_coefs(word, *arr): return word, np.asarray(arr, dtype='float32')
embeddings_index = dict(get_coefs(*o.rstrip().rsplit(' ')) for o in open(EMBEDDING_FILE, encoding='utf-8'))

word_index = tokenizer.word_index
nb_words = min(max_features, len(word_index))
embedding_matrix = np.zeros((nb_words, embed_size))
for word, i in word_index.items():
    if i >= max_features: continue
    embedding_vector = embeddings_index.get(word)
    if embedding_vector is not None: embedding_matrix[i] = embedding_vector


class RocAucEvaluation(Callback):
    def __init__(self, validation_data=(), interval=1):
        super(Callback, self).__init__()

        self.interval = interval
        self.X_val, self.y_val = validation_data

    def on_epoch_end(self, epoch, logs={}):
        if epoch % self.interval == 0:
            y_pred = self.model.predict(self.X_val, verbose=0)
            score = roc_auc_score(self.y_val, y_pred)
            print("\n ROC-AUC - epoch: %d - score: %.6f \n" % (epoch+1, score))


def get_model():
    inp = Input(shape=(maxlen, ))
    x = Embedding(max_features, embed_size, weights=[embedding_matrix], trainable=False)(inp)
    x = SpatialDropout1D(0.2)(x)
    x1 = Bidirectional(GRU(128, return_sequences=True))(x)
    x2 = Bidirectional(GRU(64, return_sequences=True))(x)
    conc = concatenate([x1, x2])
    avg_pool = GlobalAveragePooling1D()(conc)
    max_pool = GlobalMaxPooling1D()(conc)
    conc = concatenate([avg_pool, max_pool])
    x = Dense(64, activation='relu')(conc)
    x = Dropout(0.2)(x)
    outp = Dense(6, activation="sigmoid")(x)

    model = Model(inputs=inp, outputs=outp)
    model.compile(loss='binary_crossentropy',
                  optimizer='adam',
                  metrics=['accuracy'])
    return model

In [4]:
model = get_model()


batch_size = 32
epochs = 4

X_tra, X_val, y_tra, y_val = train_test_split(x_train, y_train, train_size=0.95, random_state=233)
RocAuc = RocAucEvaluation(validation_data=(X_val, y_val), interval=1)

hist = model.fit(X_tra, y_tra, batch_size=batch_size, epochs=epochs, validation_data=(X_val, y_val),
                 callbacks=[RocAuc], verbose=2)

Train on 151592 samples, validate on 7979 samples
Epoch 1/4
 - 3061s - loss: 0.0534 - acc: 0.9808 - val_loss: 0.0473 - val_acc: 0.9815

 ROC-AUC - epoch: 1 - score: 0.984913 

Epoch 2/4
 - 2360s - loss: 0.0442 - acc: 0.9832 - val_loss: 0.0477 - val_acc: 0.9824

 ROC-AUC - epoch: 2 - score: 0.986456 

Epoch 3/4
 - 2521s - loss: 0.0414 - acc: 0.9839 - val_loss: 0.0434 - val_acc: 0.9834

 ROC-AUC - epoch: 3 - score: 0.987820 

Epoch 4/4
 - 2793s - loss: 0.0391 - acc: 0.9846 - val_loss: 0.0442 - val_acc: 0.9836

 ROC-AUC - epoch: 4 - score: 0.987418 



In [4]:
# keras library import  for Saving and loading model and weights
from keras.models import model_from_json
from keras.models import load_model

# serialize model to JSON
#  the keras model which is trained is defined as 'model' in this example
model_json = model.to_json()
with open("model_num.json", "w") as json_file:
    json_file.write(model_json)

# serialize weights to HDF5
model.save_weights("model_num.h5")
print("Saved model to disk")

NameError: name 'model' is not defined

In [12]:
from keras.models import model_from_json
from keras.models import load_model
# load json and create model
json_file = open('model_num.json', 'r')

loaded_model_json = json_file.read()
json_file.close()
loaded_model = model_from_json(loaded_model_json)

# load weights into new model
loaded_model.load_weights("model_num.h5")
print("Loaded model from disk")

# submission[["toxic", "severe_toxic", "obscene", "threat", "insult", "identity_hate"]] = y_pred
# submission.to_csv('submission.csv', index=False)

Loaded model from disk


In [34]:
print(y_pred[0])

[0.9992951  0.49124226 0.9698933  0.11353455 0.9058463  0.54024   ]


In [37]:
x_test[0]

array([    0,     0,     0,     0,     0,     0,     0,     0,     0,
           0,     0,     0,     0,     0,     0,     0,     0,     0,
           0,     0,     0,     0,     0,     0,     0,     0,     0,
           0,     0,  2030,   378,  4878,   723,     8,    58, 20911,
          84,   888,   349,    16,  3439,    73,    21,     6,     5,
        6226,     6,  1555,     7,    56,   378,  5462,  1488,   578,
        5869,     5,    94,     6,     2,  3771,    30,   340,     6,
         742,    37,  4878,   723,     8,    35,  4222,    10,  1205,
         653,   400,   476, 17214,     9,   227,    15,   154,     5,
       20074,     8,   247, 23545,    48,  4329,    52,    24,     4,
        2108,   155,  2432,   578,  2428,    94,   218,   143,   490,
          85])

In [24]:
X_train = train["comment_text"].fillna("fillna").values
y_train = train[["toxic", "severe_toxic", "obscene", "threat", "insult", "identity_hate"]].values
X_test = test["comment_text"].fillna("fillna").values
tokenizer = text.Tokenizer(num_words=max_features)
tokenizer.fit_on_texts(list(X_train) + list(X_test))

In [69]:
test_text = [' The full podcast is basically Joe slowly realising how stupid she is.']
test_text = np.asarray(test_text)
test_text = tokenizer.texts_to_sequences(test_text)
test_text = sequence.pad_sequences(test_text, maxlen=maxlen)
y_pred = loaded_model.predict(test_text, batch_size=1)
print(y_pred)

[[8.9901292e-01 4.6842294e-03 4.4172296e-01 1.1461467e-04 7.0217258e-01
  4.6742605e-03]]


In [28]:
import time
max_features = 30000
maxlen = 100
comments_1 = pd.read_csv(os.path.join(dir, 'toxic_comments_data', 'Test_comments', 'comments_1.csv'))
comments_2 = pd.read_csv(os.path.join(dir, 'toxic_comments_data', 'Test_comments', 'comments_2.csv'))
comments_3 = pd.read_csv(os.path.join(dir, 'toxic_comments_data', 'Test_comments', 'comments_3.csv'))
comments_6 = pd.read_csv(os.path.join(dir, 'toxic_comments_data', 'Test_comments', 'comments_6.csv'))
comments_7 = pd.read_csv(os.path.join(dir, 'toxic_comments_data', 'Test_comments', 'comments_7.csv'))
test_set = pd.read_csv(os.path.join(dir, 'toxic_comments_data', 'Test_comments', 'test_set.csv'))
comments_1
for comment in comments_7.commentText.dropna():
# for comment in test_set.response_text.dropna():
    test_text = np.asarray([comment])
    test_text = tokenizer.texts_to_sequences(test_text)
    test_text = sequence.pad_sequences(test_text, maxlen=maxlen)
    y_pred = loaded_model.predict(test_text, batch_size=1)
    if np.max(y_pred)>0.7:
        print(comment)
        print(y_pred)

If you want peace, kill Muslims.
That's why she deserves Nobel Peace Prize.
[[0.7944584  0.00676924 0.00956635 0.13648553 0.02499313 0.09236971]]
Fuck you suu kyi..😈
[[9.9988544e-01 4.1168723e-01 9.9604321e-01 9.2781533e-04 8.6239576e-01
  7.3795612e-03]]
I hate this lady so much liar,evil lady wht world understand u dnt even understand ur people kill her
[[0.9611193  0.07327862 0.16908044 0.49127504 0.2956517  0.09923927]]
She is a criminal
[[8.3324951e-01 7.3963245e-03 2.1604773e-01 4.4844084e-04 6.5641588e-01
  2.7586030e-02]]
she go to hell
[[0.88119465 0.03954444 0.45534056 0.01832457 0.2528188  0.00821172]]
Fuck this lady
[[9.9884903e-01 1.5832728e-01 9.8931038e-01 2.9136700e-04 4.1622603e-01
  7.9854028e-03]]
ethnic cleansing supporter worst then jews disgusting to hear this
[[0.8504329  0.00650707 0.05890111 0.00193571 0.1727419  0.49509707]]
You can tells a lie but the truth will come soon. IMAM MAHDI will come soon. at that time MUSLIM WILL CONQUER THE WORLD we will win. and 

It's nothing to do with ethnic cleansing. Its Muslims attacking innocent Buddhists. There is no evidence that the majority attacked were Muslims.Wherever you go in this world Muslims cause trouble, hatred, violence, rape and warfare.
BBC News stands for Bullsh!t, Bollox and Crap.-BBC!
[[0.9830993  0.11204119 0.78876686 0.02183425 0.6276446  0.5178848 ]]
muslim is the evil person not she
[[8.987336e-01 6.562608e-03 6.740591e-02 8.093695e-04 2.736241e-01
  5.563846e-01]]
this woman is a crazy b a danger to alot of people  she has hate in her heart..
[[0.7892826  0.02105279 0.21182166 0.0024215  0.4534782  0.15193698]]
Dictatorship ur ASS evil, lying, shameless, ignorant old lady. Go to HELL!
[[0.9993261  0.14428481 0.95708656 0.00221294 0.9033021  0.02195533]]
This is terrible....but Muslims can turn even Buddhists evil
[[7.1782309e-01 6.2300911e-04 1.2110421e-02 2.3169791e-04 6.9393173e-02
  3.3875728e-01]]
shame on you you have to b hanged till death
[[0.9480783  0.11408016 0.35414025 

fuck you suki kutiya
[[9.9988544e-01 4.1168723e-01 9.9604321e-01 9.2781533e-04 8.6239576e-01
  7.3795612e-03]]
Is funny how people's forget she a hero 
" she live too long so now people's start saying shit " 
You peoples are full of shit 
. What the hell you peoples did in your life time ?
[[0.9483623  0.05360865 0.7966768  0.00310505 0.589695   0.00294379]]
BBC your full of shit
[[9.9663287e-01 1.8362920e-01 9.6279979e-01 8.3260966e-04 6.0418952e-01
  1.0127333e-02]]
Mentally sick person same as hitter, KILLING INNOCENT PEOPLE, SHAM ON YOU :(
[[0.96691126 0.06388285 0.3007158  0.09935297 0.5398196  0.04709036]]
So if she has a nobel peace prize she shouldn't stand up for her nation and what she feels is right to do. She is not for violence and yet you all make her responsible for it. Since when have you all become Myanmar experts? Ya'll can't locate it on a map and your leaders have done FAR worse, regardless which country you come from. Don't believe mainstream media you idiots!
[[7.

  2.6540931e-03]]
This WOMEN is equal to PIG
[[9.2249602e-01 1.8179946e-02 2.8493482e-01 7.1460049e-04 4.9590448e-01
  2.8705302e-01]]
damn on you
[[0.9165237  0.05972848 0.72145855 0.00683948 0.31731564 0.00364652]]
God will burn u inshAllah soon
[[0.87908757 0.07677148 0.14222799 0.5968305  0.18116291 0.03481374]]
Fuck you
[[9.9988544e-01 4.1168723e-01 9.9604321e-01 9.2781533e-04 8.6239576e-01
  7.3795612e-03]]
Narendr modi come and fuck u
[[0.999551   0.37152275 0.99058527 0.0020544  0.8215328  0.02311196]]
Modi will fuck u
[[0.9991203  0.39308226 0.98441213 0.00748854 0.77181864 0.03027426]]
Fuck shut down your dirty ass stupid president
[[0.9999938  0.5620904  0.9982668  0.00364978 0.9773846  0.05904443]]
I want to killing you
[[0.9166844  0.13466349 0.24734895 0.67557955 0.2348302  0.04016709]]
Kill her that's all I want to say
[[0.8291163  0.05490987 0.07077903 0.5789595  0.08301718 0.01978594]]
Very much cheating talking from ASS....Aung. Sung. Suki. Got it?
[[8.01066339e-01 5.

BBC = BULLSHIT BROADCASTING SERVICE remote controlled by muslims in UK
[[0.9788765  0.03923975 0.8032874  0.00245399 0.31096268 0.33559972]]
this woman she is evil. kelling muslim with her barbarik army in then she say boudist fraid ...  you having laugh u need to get executed you in how give the nobel peace for you crimes evil 😈😈😈
[[0.9026582  0.12106474 0.16033217 0.17943433 0.27149934 0.37412953]]
Fuk you
[[0.99631834 0.25524443 0.9604416  0.00314591 0.7514892  0.01576443]]
you are right mom.
[[0.72263396 0.03859231 0.28843233 0.01017126 0.44469452 0.00761688]]
if moslems not the main target.  then why dont u feel ashamed they left ur country? because u wanted their bloods.. fake kyi.. shame on u
[[0.7510962  0.01444393 0.06809282 0.00915139 0.19840027 0.2714454 ]]
she is racist very cruel woman of world go to hell hitler woman😡
[[0.9953701  0.02462328 0.4054978  0.0021533  0.74117243 0.6312183 ]]
Fuckin hore
[[0.99923134 0.39473024 0.98089546 0.00467346 0.7915476  0.23024088]]
Budd

F*************🔫.....................K
[[0.95213073 0.10407079 0.85949117 0.00101225 0.27849957 0.01226303]]
Fuck
[[9.9911982e-01 1.6554387e-01 9.9380738e-01 2.0298212e-04 3.2275587e-01
  3.1347675e-03]]
Fuck
[[9.9911982e-01 1.6554387e-01 9.9380738e-01 2.0298212e-04 3.2275587e-01
  3.1347675e-03]]
Fuck
[[9.9911982e-01 1.6554387e-01 9.9380738e-01 2.0298212e-04 3.2275587e-01
  3.1347675e-03]]
Fuck
[[9.9911982e-01 1.6554387e-01 9.9380738e-01 2.0298212e-04 3.2275587e-01
  3.1347675e-03]]
Fuck u without condom
[[9.9982411e-01 4.2628688e-01 9.9462563e-01 6.7310844e-04 7.4161083e-01
  3.6900762e-02]]
Fuck u without condom
[[9.9982411e-01 4.2628688e-01 9.9462563e-01 6.7310844e-04 7.4161083e-01
  3.6900762e-02]]
Fuck u without condom
[[9.9982411e-01 4.2628688e-01 9.9462563e-01 6.7310844e-04 7.4161083e-01
  3.6900762e-02]]
Fuck u
[[0.99980456 0.43162653 0.9951982  0.00108273 0.8218396  0.02122294]]
Fuck fuck fuck uuuuuuuuiuu
[[9.9988461e-01 4.0557566e-01 9.9780637e-01 6.4504275e-04 6.1717790e-01


stupid slut twisting the question
[[9.9766040e-01 8.9290731e-02 9.5565134e-01 4.1947435e-04 8.6603963e-01
  4.9539503e-02]]
Hey BITCH....Here is the definition for ETHNIC CLEANSING: "the practice of removing or killing people who belong to an ethnic group that is different from the ruling group in a country or region." 
What a coward!!....My apologies to the poor people of Burma.
[[0.9886669  0.18368587 0.90190977 0.03651769 0.83790016 0.2343775 ]]
Bullshit.
[[9.79853511e-01 1.75788589e-02 9.32486475e-01 6.86069252e-05
  1.13192506e-01 6.48815185e-04]]
she is duff
she is old
she is racist.

so go to HELL.
F**k You.
[[0.9974613  0.14569457 0.92330587 0.00333305 0.8874762  0.08566389]]
she is fake bitch
[[9.9918920e-01 1.7883857e-01 9.7961539e-01 5.9212925e-04 8.9898866e-01
  2.0300711e-02]]
Ass kyi looks like an old haggerd lady boy... should be locked up in her cottage for anothe 15 years
[[9.4983613e-01 2.2616748e-02 8.4126288e-01 4.0247355e-04 4.7246519e-01
  2.1771705e-03]]
u bitch 

In [10]:
X_train = train["comment_text"].fillna("fillna").values
print(X_train[0])

Explanation
Why the edits made under my username Hardcore Metallica Fan were reverted? They weren't vandalisms, just closure on some GAs after I voted at New York Dolls FAC. And please don't remove the template from the talk page since I'm retired now.89.205.38.27
