In [3]:
import os                                   #imports
import tensorflow as tf
import time
import pandas as pd
import gensim.downloader as api
import numpy as np
from sklearn.model_selection import train_test_split
from tensorflow import keras
from tensorflow.keras.metrics import Precision,Recall,CategoricalAccuracy
from tensorflow.keras.callbacks import ModelCheckpoint,EarlyStopping
from tensorflow.keras.layers import Dropout,Dense,Embedding,LSTM,TextVectorization,Bidirectional,GRU
from tensorflow.keras.models import Sequential

In [4]:
df = pd.read_csv("train.csv") # reading the training dataset

In [5]:
df

Unnamed: 0,id,comment_text,toxic,severe_toxic,obscene,threat,insult,identity_hate
0,0000997932d777bf,Explanation\nWhy the edits made under my usern...,0,0,0,0,0,0
1,000103f0d9cfb60f,D'aww! He matches this background colour I'm s...,0,0,0,0,0,0
2,000113f07ec002fd,"Hey man, I'm really not trying to edit war. It...",0,0,0,0,0,0
3,0001b41b1c6bb37e,"""\nMore\nI can't make any real suggestions on ...",0,0,0,0,0,0
4,0001d958c54c6e35,"You, sir, are my hero. Any chance you remember...",0,0,0,0,0,0
...,...,...,...,...,...,...,...,...
159566,ffe987279560d7ff,""":::::And for the second time of asking, when ...",0,0,0,0,0,0
159567,ffea4adeee384e90,You should be ashamed of yourself \n\nThat is ...,0,0,0,0,0,0
159568,ffee36eab5c267c9,"Spitzer \n\nUmm, theres no actual article for ...",0,0,0,0,0,0
159569,fff125370e4aaaf3,And it looks like it was actually you who put ...,0,0,0,0,0,0


In [6]:
df.drop_duplicates(inplace = True)
df.drop("id",axis=1,inplace = True)

In [7]:
X = df["comment_text"]
y = df.iloc[:,1:].values

In [8]:
x_train,x_temp,y_train,y_temp = train_test_split(X,y,test_size = 0.3,random_state = 42)
x_val,x_test,y_val,y_test = train_test_split(x_temp,y_temp,test_size = 0.4,random_state = 42)

In [9]:
y

array([[0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0],
       ...,
       [0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0]])

In [10]:
max_features = 100000 # this is the number of words in the vocab
vectorizer = TextVectorization(max_tokens = max_features,
                               output_sequence_length=200,
                               output_mode = 'int')

In [11]:
vectorizer.adapt(x_train.values) # we are making the vectorizer learn the text

In [12]:
train_vectorized_text = vectorizer(x_train.values) #converting all the comments into vectors
val_vectorized_text = vectorizer(x_val.values)
test_vectorized_text = vectorizer(x_test.values)

In [13]:
train_dataset = tf.data.Dataset.from_tensor_slices((train_vectorized_text,y_train))
val_dataset = tf.data.Dataset.from_tensor_slices((val_vectorized_text,y_val))
test_dataset = tf.data.Dataset.from_tensor_slices((test_vectorized_text,y_test))

train_dataset = train_dataset.cache()
train_dataset = train_dataset.shuffle(160000)
train_dataset = train_dataset.batch(64)
train_dataset = train_dataset.prefetch(tf.data.AUTOTUNE)

val_dataset = val_dataset.batch(64)
test_dataset = test_dataset.batch(64)

val_dataset = val_dataset.prefetch(tf.data.AUTOTUNE)
test_dataset = test_dataset.prefetch(tf.data.AUTOTUNE)

In [14]:
glove = api.load("glove-wiki-gigaword-100") #GloVe integration
vocab = vectorizer.get_vocabulary()
embedding_matrix = []
embedding_dim = glove.vector_size
embedding_matrix = np.zeros((len(vocab),embedding_dim))
for idx,word in enumerate(vocab):
  if word in glove:
    embedding_matrix[idx] = glove[word]



In [15]:
hits = 0
for word in vocab:
    if word in glove:
        hits += 1

print("Coverage:", hits / len(vocab))
#as we can see the coverage is only 58% so we must keep the trainable parameter set to True during training

Coverage: 0.58313


In [16]:
def build_model(rnn_layer):
    tf.keras.backend.clear_session() # to clear gpu memory after each training
    model = Sequential([
        Embedding(input_dim = len(vocab),output_dim = embedding_dim,weights = [embedding_matrix],mask_zero = True,trainable = True), #using pretrained vectors for better results
        Bidirectional(rnn_layer(64)),
        Dense(128, activation='relu'),
        Dropout(0.2),
        Dense(128, activation='relu'),
        Dense(6, activation='sigmoid')
    ])
    model.compile(
        optimizer='adam',
        loss='binary_crossentropy',
        metrics=[tf.keras.metrics.AUC(name = "auc",multi_label=True),
                 tf.keras.metrics.Precision(name = "precision"),
                 tf.keras.metrics.Recall(name = "recall")]
    )

    return model

In [17]:
#for experimentation we do trainable false
def build_model_trainable_false(rnn_layer):
    tf.keras.backend.clear_session()
    model = Sequential([
        Embedding(input_dim = len(vocab),output_dim = embedding_dim,weights = [embedding_matrix],mask_zero = True,trainable = False),
        Bidirectional(rnn_layer(64)),
        Dense(128, activation='relu'),
        Dropout(0.2),
        Dense(128, activation='relu'),
        Dense(6, activation='sigmoid')
    ])
    model.compile(
        optimizer='adam',
        loss='binary_crossentropy',
        metrics=[tf.keras.metrics.AUC(name = "auc",multi_label=True),
                 tf.keras.metrics.Precision(name = "precision"),
                 tf.keras.metrics.Recall(name = "recall")]
    )

    return model

In [18]:
model_lstm = build_model(LSTM)
model_gru  = build_model(GRU)
model_lstm_trainable_false = build_model_trainable_false(LSTM)
model_gru_trainable_false  = build_model_trainable_false(GRU)

In [19]:
callbacks = [
    EarlyStopping(
        monitor="val_auc",
        patience=2,
        mode="max",
        restore_best_weights=True
    ),
    ModelCheckpoint(
        "best_model.keras",
        monitor="val_auc",
        mode="max",
        save_best_only=True
    )
]

In [20]:
start = time.time()
history_gru = model_gru.fit(train_dataset,epochs = 5, validation_data=val_dataset,callbacks = callbacks)
print("Time taken for training by GRU:", time.time() - start)

Epoch 1/5
[1m1746/1746[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m71s[0m 35ms/step - auc: 0.8859 - loss: 0.0889 - precision: 0.6794 - recall: 0.4774 - val_auc: 0.9732 - val_loss: 0.0454 - val_precision: 0.8183 - val_recall: 0.6788
Epoch 2/5
[1m1746/1746[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m51s[0m 29ms/step - auc: 0.9786 - loss: 0.0401 - precision: 0.8277 - recall: 0.7231 - val_auc: 0.9713 - val_loss: 0.0461 - val_precision: 0.7764 - val_recall: 0.7468
Epoch 3/5
[1m1746/1746[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m50s[0m 29ms/step - auc: 0.9884 - loss: 0.0307 - precision: 0.8471 - recall: 0.7970 - val_auc: 0.9626 - val_loss: 0.0484 - val_precision: 0.7986 - val_recall: 0.6936
Time taken for training by GRU: 172.68598008155823


In [21]:
start = time.time()
history_gru_trainable_false = model_gru_trainable_false.fit(train_dataset,epochs = 5, validation_data=val_dataset,callbacks = callbacks)
print("Time taken for training by GRU:", time.time() - start)

Epoch 1/5
[1m1746/1746[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m45s[0m 24ms/step - auc: 0.8916 - loss: 0.0907 - precision: 0.6825 - recall: 0.4435 - val_auc: 0.9687 - val_loss: 0.0520 - val_precision: 0.7977 - val_recall: 0.6464
Epoch 2/5
[1m1746/1746[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m41s[0m 23ms/step - auc: 0.9662 - loss: 0.0504 - precision: 0.8196 - recall: 0.6324 - val_auc: 0.9696 - val_loss: 0.0481 - val_precision: 0.8392 - val_recall: 0.6318
Epoch 3/5
[1m1746/1746[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m41s[0m 23ms/step - auc: 0.9766 - loss: 0.0451 - precision: 0.8292 - recall: 0.6690 - val_auc: 0.9720 - val_loss: 0.0470 - val_precision: 0.7976 - val_recall: 0.6965
Epoch 4/5
[1m1746/1746[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m42s[0m 24ms/step - auc: 0.9797 - loss: 0.0428 - precision: 0.8292 - recall: 0.6976 - val_auc: 0.9702 - val_loss: 0.0465 - val_precision: 0.8445 - val_recall: 0.6471
Epoch 5/5
[1m1746/1746[0m [32m━━━━━━━━━━━━━━━━━━━

In [22]:
start = time.time()
history_lstm = model_lstm.fit(train_dataset,epochs = 5, validation_data=val_dataset,callbacks = callbacks)
print("Time taken for training by LSTM:", time.time() - start)

Epoch 1/5
[1m1746/1746[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m57s[0m 31ms/step - auc: 0.8937 - loss: 0.0894 - precision: 0.6956 - recall: 0.4509 - val_auc: 0.9717 - val_loss: 0.0470 - val_precision: 0.8552 - val_recall: 0.6237
Epoch 2/5
[1m1746/1746[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m54s[0m 31ms/step - auc: 0.9761 - loss: 0.0413 - precision: 0.8275 - recall: 0.7018 - val_auc: 0.9699 - val_loss: 0.0455 - val_precision: 0.8008 - val_recall: 0.7144
Epoch 3/5
[1m1746/1746[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m52s[0m 30ms/step - auc: 0.9853 - loss: 0.0325 - precision: 0.8439 - recall: 0.7862 - val_auc: 0.9653 - val_loss: 0.0486 - val_precision: 0.7833 - val_recall: 0.7216
Time taken for training by LSTM: 162.50041127204895


In [23]:
start = time.time()
history_lstm_trainable_false = model_lstm_trainable_false.fit(train_dataset,epochs = 5, validation_data=val_dataset,callbacks = callbacks)
print("Time taken for training by LSTM:", time.time() - start)

Epoch 1/5
[1m1746/1746[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m46s[0m 25ms/step - auc: 0.8953 - loss: 0.0929 - precision: 0.6645 - recall: 0.4139 - val_auc: 0.9603 - val_loss: 0.0542 - val_precision: 0.8202 - val_recall: 0.5834
Epoch 2/5
[1m1746/1746[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m42s[0m 24ms/step - auc: 0.9611 - loss: 0.0530 - precision: 0.8127 - recall: 0.6124 - val_auc: 0.9699 - val_loss: 0.0493 - val_precision: 0.8302 - val_recall: 0.6221
Epoch 3/5
[1m1746/1746[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m82s[0m 24ms/step - auc: 0.9700 - loss: 0.0472 - precision: 0.8281 - recall: 0.6598 - val_auc: 0.9656 - val_loss: 0.0497 - val_precision: 0.8698 - val_recall: 0.5940
Epoch 4/5
[1m1746/1746[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m42s[0m 24ms/step - auc: 0.9756 - loss: 0.0440 - precision: 0.8339 - recall: 0.6803 - val_auc: 0.9716 - val_loss: 0.0471 - val_precision: 0.7805 - val_recall: 0.7122
Epoch 5/5
[1m1746/1746[0m [32m━━━━━━━━━━━━━━━━━━━

In [24]:
print(f"Max AUC for GRU: {max(history_gru.history['val_auc'])}")
print(f"Max AUC for LSTM: {max(history_lstm.history['val_auc'])}")

Max AUC for GRU: 0.9731629490852356
Max AUC for LSTM: 0.971712589263916


In [25]:
print(f"Max AUC for GRU [Frozen]: {max(history_gru_trainable_false.history['val_auc'])}")
print(f"Max AUC for LSTM [Frozen]: {max(history_lstm_trainable_false.history['val_auc'])}")

Max AUC for GRU [Frozen]: 0.972033679485321
Max AUC for LSTM [Frozen]: 0.9716079235076904


In [26]:
print(f"Max Precision for GRU: {max(history_gru.history['val_precision'])}")
print(f"Max Precision for LSTM: {max(history_lstm.history['val_precision'])}")

Max Precision for GRU: 0.818303644657135
Max Precision for LSTM: 0.855182945728302


In [27]:
print(f"Max Precision for GRU [Frozen]: {max(history_gru_trainable_false.history['val_precision'])}")
print(f"Max Precision for LSTM [Frozen]: {max(history_lstm_trainable_false.history['val_precision'])}")

Max Precision for GRU [Frozen]: 0.8445273637771606
Max Precision for LSTM [Frozen]: 0.869767427444458


In [28]:
print(f"Max Recall for GRU: {max(history_gru.history['val_recall'])}")
print(f"Max Recall for LSTM: {max(history_lstm.history['val_recall'])}")

Max Recall for GRU: 0.7468233704566956
Max Recall for LSTM: 0.7215692400932312


In [29]:
print(f"Max Recall for GRU [Frozen]: {max(history_gru_trainable_false.history['val_recall'])}")
print(f"Max Recall for LSTM [Frozen]: {max(history_lstm_trainable_false.history['val_recall'])}")

Max Recall for GRU [Frozen]: 0.6964739561080933
Max Recall for LSTM [Frozen]: 0.7121981978416443


In [30]:
def predictor(ip_text,model_name):
  pred = model_name.predict(np.expand_dims(ip_text,0))
  labels = ["toxic",'severe_toxic',"obscene","threat","insult","identity_hate"]
  threshold = 0.5
  pred_labels = (pred > threshold).astype(int)
  flagged = False
  print("This comment is: ")
  for i in range(len(labels)):
    if pred_labels[0][i] == 1:
      print(labels[i])
      flagged = True
  if flagged == False:
    print("safe")

In [31]:
ip_text = vectorizer("I absolutely hate you!")
predictor(ip_text,model_lstm)

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 270ms/step
This comment is: 
safe


In [32]:
ip_text = vectorizer("I absolutely hate you!")
predictor(ip_text,model_gru)

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 310ms/step
This comment is: 
toxic


In [33]:
ip3_text = vectorizer("I will kill you!")
predictor(ip3_text,model_lstm)

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 48ms/step
This comment is: 
toxic


In [34]:
ip3_text = vectorizer("I will kill you!")
predictor(ip3_text,model_gru)

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 49ms/step
This comment is: 
toxic


In [35]:
ip2_text = vectorizer("I love you!")
predictor(ip2_text,model_lstm)

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 48ms/step
This comment is: 
safe


In [36]:
ip3_text = vectorizer("I love you!")
predictor(ip3_text,model_gru)

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 47ms/step
This comment is: 
safe


In [37]:
pre = Precision()
rec = Recall()

In [38]:
def evaluation_of_model(model_name):
    precision = Precision()
    recall = Recall()
    auc = tf.keras.metrics.AUC(multi_label=True)

    for batch in test_dataset:
        x_true, y_true = batch
        yhat = model_name.predict(x_true)

        precision.update_state(y_true, (yhat>0.3))
        recall.update_state(y_true, (yhat>0.3))
        auc.update_state(y_true, yhat)

    print("Precision:", precision.result().numpy())
    print("Recall:", recall.result().numpy())
    print("AUC:", auc.result().numpy())

In [39]:
evaluation_of_model(model_lstm)

[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 35ms/step 
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 38ms/step
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 47ms/step




[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 25ms/step
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 25ms/step
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 26ms/step
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 26ms/step
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 34ms/step
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 25ms/step
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 26ms/step
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 25ms/step
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 27ms/step
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 24ms/step
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 26ms/step
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 25ms/step
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 26ms/step
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 28

In [40]:
evaluation_of_model(model_gru)

[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 26ms/step 
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 25ms/step
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 26ms/step
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 26ms/step
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 25ms/step
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 26ms/step
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 26ms/step
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 25ms/step
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 23ms/step
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 25ms/step
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 25ms/step
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 26ms/step
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 24ms/step
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2

In [41]:
evaluation_of_model(model_lstm_trainable_false)

[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 26ms/step 
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 27ms/step
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 26ms/step
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 34ms/step
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 26ms/step
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 26ms/step
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 27ms/step
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 25ms/step
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 24ms/step
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 25ms/step
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 27ms/step
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 27ms/step
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 24ms/step
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2

In [42]:
evaluation_of_model(model_gru_trainable_false)

[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 27ms/step 
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 25ms/step
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 26ms/step
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 25ms/step
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 26ms/step
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 25ms/step
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 26ms/step
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 25ms/step
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 24ms/step
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 25ms/step
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 32ms/step
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 38ms/step
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 32ms/step
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3

In [43]:
def score_comment(comment,model_name):
  flagged = False
  input_str = vectorizer(comment)
  res = model_name.predict(np.expand_dims(input_str,0))
  text = ' '
  for idx,cols in enumerate(df.columns[1:]):
    text += '{}: {}  '.format(cols,res[0][idx]>0.3)
    flagged = True
  if flagged == False:
    print("\n\n\n Safe")

  return text