## Step 3: Apply neural networks

In [1]:
from config import models_isot_path
from sklearn.model_selection import train_test_split
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, GRU, Dense, LSTM, Bidirectional
from sklearn.metrics import classification_report, confusion_matrix
import pandas as pd


## GRU

In [2]:
news = pd.read_csv("data/isot_news.csv")

max_words = 20000
max_len = 200

tokenizer = Tokenizer(num_words=max_words)
tokenizer.fit_on_texts(news['text'])

sequences = tokenizer.texts_to_sequences(news['text'])
X = pad_sequences(sequences, maxlen=max_len)

y = news['label'].values

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [3]:
gru = Sequential([
    Embedding(input_dim=max_words, output_dim=128, input_length=max_len),
    GRU(64, return_sequences=False),
    Dense(1, activation='sigmoid')
])

gru.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
gru.summary()


Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 embedding (Embedding)       (None, 200, 128)          2560000   
                                                                 
 gru (GRU)                   (None, 64)                37248     
                                                                 
 dense (Dense)               (None, 1)                 65        
                                                                 
Total params: 2597313 (9.91 MB)
Trainable params: 2597313 (9.91 MB)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________


In [4]:
gru.fit(X_train, y_train, epochs=5, batch_size=64, validation_split=0.1)

loss, accuracy = gru.evaluate(X_test, y_test)
print(f"Test Accuracy: {accuracy:.5f}")

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Test Accuracy: 0.99187


In [5]:
y_pred_probs = gru.predict(X_test)
y_pred = (y_pred_probs > 0.5).astype(int).flatten()

print("Classification Report:")
print(classification_report(y_test, y_pred, digits=5))

print("Confusion Matrix:")
print(confusion_matrix(y_test, y_pred))

Classification Report:
              precision    recall  f1-score   support

           0    0.99382   0.99032   0.99207      4546
           1    0.98982   0.99350   0.99166      4308

    accuracy                        0.99187      8854
   macro avg    0.99182   0.99191   0.99186      8854
weighted avg    0.99188   0.99187   0.99187      8854

Confusion Matrix:
[[4502   44]
 [  28 4280]]


In [6]:
gru.save(f"{models_isot_path}/gru.h5")


  saving_api.save_model(


## LSTM

In [7]:
lstm = Sequential([
    Embedding(input_dim=max_words, output_dim=128, input_length=max_len),
    LSTM(64, return_sequences=False),
    Dense(1, activation='sigmoid')
])


In [8]:
lstm.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
lstm.summary()


Model: "sequential_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 embedding_1 (Embedding)     (None, 200, 128)          2560000   
                                                                 
 lstm (LSTM)                 (None, 64)                49408     
                                                                 
 dense_1 (Dense)             (None, 1)                 65        
                                                                 
Total params: 2609473 (9.95 MB)
Trainable params: 2609473 (9.95 MB)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________


In [9]:
lstm.fit(X_train, y_train, epochs=3, batch_size=64, validation_split=0.1)

loss, accuracy = lstm.evaluate(X_test, y_test)
print(f"Test Accuracy: {accuracy:.5f}")


Epoch 1/3
Epoch 2/3
Epoch 3/3
Test Accuracy: 0.98950


In [10]:
y_pred_probs = lstm.predict(X_test)
y_pred = (y_pred_probs > 0.5).astype(int).flatten()

print("Classification Report:")
print(classification_report(y_test, y_pred, digits=5))

print("Confusion Matrix:")
print(confusion_matrix(y_test, y_pred))

Classification Report:
              precision    recall  f1-score   support

           0    0.98645   0.99318   0.98981      4546
           1    0.99275   0.98561   0.98917      4308

    accuracy                        0.98950      8854
   macro avg    0.98960   0.98939   0.98949      8854
weighted avg    0.98952   0.98950   0.98950      8854

Confusion Matrix:
[[4515   31]
 [  62 4246]]


In [11]:
lstm.save(f"{models_isot_path}/lstm.h5")

  saving_api.save_model(


## Bi-LSTM

In [12]:
bi_lstm = Sequential([
    Embedding(input_dim=max_words, output_dim=128, input_length=max_len),
    Bidirectional(LSTM(64, return_sequences=False)),
    Dense(1, activation='sigmoid')
])


In [13]:
bi_lstm.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
bi_lstm.summary()


Model: "sequential_2"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 embedding_2 (Embedding)     (None, 200, 128)          2560000   
                                                                 
 bidirectional (Bidirection  (None, 128)               98816     
 al)                                                             
                                                                 
 dense_2 (Dense)             (None, 1)                 129       
                                                                 
Total params: 2658945 (10.14 MB)
Trainable params: 2658945 (10.14 MB)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________


In [14]:
bi_lstm.fit(X_train, y_train, epochs=5, batch_size=64, validation_split=0.1)

loss, accuracy = bi_lstm.evaluate(X_test, y_test)
print(f"Test Accuracy: {accuracy:.5f}")


Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Test Accuracy: 0.98667


In [15]:
y_pred_probs = bi_lstm.predict(X_test)
y_pred = (y_pred_probs > 0.5).astype(int).flatten()

print("Classification Report:")
print(classification_report(y_test, y_pred, digits=5))

print("Confusion Matrix:")
print(confusion_matrix(y_test, y_pred))

Classification Report:
              precision    recall  f1-score   support

           0    0.98446   0.98966   0.98706      4546
           1    0.98903   0.98352   0.98627      4308

    accuracy                        0.98667      8854
   macro avg    0.98675   0.98659   0.98666      8854
weighted avg    0.98669   0.98667   0.98667      8854

Confusion Matrix:
[[4499   47]
 [  71 4237]]


In [16]:
bi_lstm.save(f"{models_isot_path}/bi_lstm.h5")


  saving_api.save_model(
