# Setup

In [1]:
import tensorflow as tf
physical_devices = tf.config.list_physical_devices('GPU')
tf.config.experimental.set_memory_growth(physical_devices[0], enable=True)

from tensorflow.keras.models import load_model
from tensorflow.keras.preprocessing.sequence import pad_sequences
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import classification_report
import pickle

In [2]:
# pick_in = open('pick/title.pickle', 'rb')
pick_in = open('pick/pickled_dataset.pickle','rb')
df_cop = pickle.load(pick_in)
pick_in.close()
df_cop

Unnamed: 0,title,check
0,"[71, 3, 337, 47, 1120, 36, 105, 4, 2601, 455, ...",0
1,"[2545, 8291, 3, 1426, 360, 109, 1330, 399]",0
2,"[884, 1487, 2602, 509, 171, 605, 1106, 9, 229,...",0
3,"[3, 8, 238, 4103, 18, 453, 24, 17, 4, 424, 260...",0
4,"[730, 2930, 42, 41, 47, 71, 3, 145, 34, 859, 136]",0
...,...,...
49948,"[2, 9433, 1881, 9434, 39, 513, 174, 2, 3, 4, 5]",1
49949,"[136, 2591, 270, 57, 884, 116, 7, 806, 72, 943...",0
49950,"[184, 284, 35, 9437, 10, 7, 48, 2329, 2, 3, 4, 5]",1
49951,"[43, 2621, 6, 10, 1379, 6, 91, 162, 7, 1834]",0


In [3]:
X_train, X_test, y_train, y_test = train_test_split(df_cop['title'], df_cop['check'], test_size = 20000, shuffle=True, random_state=43)

print(len(X_train), len(X_test), len(y_train), len(y_test))

29953 20000 29953 20000


In [4]:
x_train = pad_sequences(X_train, padding='post', maxlen=40)
x_test = pad_sequences(X_test, padding='post', maxlen=40)


le = LabelEncoder()
y_train = le.fit_transform(y_train)
y_test = le.fit_transform(y_test)


x_train.shape, x_test.shape

((29953, 40), (20000, 40))

# ANN

In [5]:
ANN = load_model('models/ANN.h5')



In [6]:
ANN.evaluate(x_test, y_test)



[0.524923775100708, 0.7412]

In [7]:
y_pred=(ANN.predict(x_test) > 0.5).astype("int32")

from sklearn.metrics import confusion_matrix

confusion_matrix(y_test,y_pred)

array([[7016, 3450],
       [1726, 7808]], dtype=int64)

In [8]:
print(classification_report(y_test,y_pred))

              precision    recall  f1-score   support

           0       0.80      0.67      0.73     10466
           1       0.69      0.82      0.75      9534

    accuracy                           0.74     20000
   macro avg       0.75      0.74      0.74     20000
weighted avg       0.75      0.74      0.74     20000



# CNN

In [9]:
CNN = load_model('models/CNN.h5')

In [10]:
CNN.evaluate(x_test, y_test)



[0.4623451286581578, 0.92715]

In [11]:
y_pred=(CNN.predict(x_test) > 0.5).astype("int32")

from sklearn.metrics import confusion_matrix

confusion_matrix(y_test,y_pred)

array([[9947,  519],
       [ 938, 8596]], dtype=int64)

In [12]:
print(classification_report(y_test,y_pred))

              precision    recall  f1-score   support

           0       0.91      0.95      0.93     10466
           1       0.94      0.90      0.92      9534

    accuracy                           0.93     20000
   macro avg       0.93      0.93      0.93     20000
weighted avg       0.93      0.93      0.93     20000



# LSTM

In [13]:
LSTM = load_model('models/LSTM.h5')

In [14]:
LSTM.evaluate(x_test, y_test)



[0.5056305188378785, 0.926]

In [15]:
y_pred=(LSTM.predict(x_test) > 0.5).astype("int32")

from sklearn.metrics import confusion_matrix

confusion_matrix(y_test,y_pred)

array([[9686,  780],
       [ 700, 8834]], dtype=int64)

In [16]:
print(classification_report(y_test,y_pred))

              precision    recall  f1-score   support

           0       0.93      0.93      0.93     10466
           1       0.92      0.93      0.92      9534

    accuracy                           0.93     20000
   macro avg       0.93      0.93      0.93     20000
weighted avg       0.93      0.93      0.93     20000



# Bidirectional LSTM

In [17]:
Bi_LSTM = load_model('models/Bi-LSTM.h5')

In [18]:
Bi_LSTM.evaluate(x_test, y_test)



[0.4397370012011379, 0.92685]

In [19]:
y_pred=(Bi_LSTM.predict(x_test) > 0.5).astype("int32")

from sklearn.metrics import confusion_matrix

confusion_matrix(y_test,y_pred)

array([[9723,  743],
       [ 720, 8814]], dtype=int64)

In [20]:
print(classification_report(y_test,y_pred))

              precision    recall  f1-score   support

           0       0.93      0.93      0.93     10466
           1       0.92      0.92      0.92      9534

    accuracy                           0.93     20000
   macro avg       0.93      0.93      0.93     20000
weighted avg       0.93      0.93      0.93     20000



# GRU

In [21]:
GRU = load_model('models/GRU.h5')

In [22]:
GRU.evaluate(x_test, y_test)



[0.3806882171727717, 0.9309]

In [23]:
y_pred=(GRU.predict(x_test) > 0.5).astype("int32")

from sklearn.metrics import confusion_matrix

confusion_matrix(y_test,y_pred)

array([[9899,  567],
       [ 815, 8719]], dtype=int64)

In [24]:
print(classification_report(y_test,y_pred))

              precision    recall  f1-score   support

           0       0.92      0.95      0.93     10466
           1       0.94      0.91      0.93      9534

    accuracy                           0.93     20000
   macro avg       0.93      0.93      0.93     20000
weighted avg       0.93      0.93      0.93     20000



# Bidirectional GRU

In [25]:
Bi_GRU = load_model('models/Bi-GRU.h5')

In [26]:
Bi_GRU.evaluate(x_test, y_test)



[0.5034468637532089, 0.92375]

In [27]:
y_pred=(Bi_GRU.predict(x_test) > 0.5).astype("int32")

from sklearn.metrics import confusion_matrix

confusion_matrix(y_test,y_pred)

array([[9518,  948],
       [ 577, 8957]], dtype=int64)

In [28]:
print(classification_report(y_test,y_pred))

              precision    recall  f1-score   support

           0       0.94      0.91      0.93     10466
           1       0.90      0.94      0.92      9534

    accuracy                           0.92     20000
   macro avg       0.92      0.92      0.92     20000
weighted avg       0.92      0.92      0.92     20000



# Hybrid (CNN + LSTM)

In [29]:
Hybrid = load_model('models/Hybrid.h5')

In [30]:
Hybrid.evaluate(x_test, y_test)



[0.3860637866353383, 0.93045]

In [31]:
y_pred=(Hybrid.predict(x_test) > 0.5).astype("int32")

from sklearn.metrics import confusion_matrix

confusion_matrix(y_test,y_pred)

array([[9717,  749],
       [ 642, 8892]], dtype=int64)

In [32]:
print(classification_report(y_test,y_pred))

              precision    recall  f1-score   support

           0       0.94      0.93      0.93     10466
           1       0.92      0.93      0.93      9534

    accuracy                           0.93     20000
   macro avg       0.93      0.93      0.93     20000
weighted avg       0.93      0.93      0.93     20000

