In [159]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import CountVectorizer,TfidfTransformer,TfidfVectorizer
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import OneHotEncoder
from tensorflow import keras
from tensorflow.keras import layers
import nltk
from nltk.corpus import stopwords

In [160]:
df = pd.read_csv('english_fake_news_2212.csv')
df.head()

Unnamed: 0,news_id,headline,body_text,source,label
0,1,Global economy faces recession,The Prime Minister attended the inauguration c...,CNN,Real
1,2,"Earthquake hits Chittagong, panic spreads",Government sources state that an official anno...,Daily Star,Real
2,3,Breakthrough in Bangladesh's tech industry,Local administration has visited the spot for ...,BBC,Fake
3,4,Dengue situation worsens in the country,The Prime Minister attended the inauguration c...,BBC,Fake
4,5,New virus outbreak sparks fear,Government sources state that an official anno...,Reuters,Real


In [161]:
df['label'] = df['label'].map({'Real':1,'Fake':0})
df.drop(columns=['news_id'],inplace=True)
df.head()

Unnamed: 0,headline,body_text,source,label
0,Global economy faces recession,The Prime Minister attended the inauguration c...,CNN,1
1,"Earthquake hits Chittagong, panic spreads",Government sources state that an official anno...,Daily Star,1
2,Breakthrough in Bangladesh's tech industry,Local administration has visited the spot for ...,BBC,0
3,Dengue situation worsens in the country,The Prime Minister attended the inauguration c...,BBC,0
4,New virus outbreak sparks fear,Government sources state that an official anno...,Reuters,1


In [162]:
nltk.download('stopwords')
stopwords_en = stopwords.words("english")

[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


In [163]:
X = df[["headline", "body_text", "source"]]
y = df['label']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

preprocessor = ColumnTransformer(
    transformers=[
        ("tfidf_headline", TfidfVectorizer(stop_words=stopwords_en), "headline"),
        ("tfidf_body_text", TfidfVectorizer(stop_words=stopwords_en), "body_text"),
        ("onehot_source", OneHotEncoder(handle_unknown="ignore"), ["source"])
    ]
)
preprocessor.fit(X_train)
X_train = preprocessor.transform(X_train)
X_test = preprocessor.transform(X_test)

In [164]:
y_train = keras.utils.to_categorical(y_train,num_classes=2)
y_test = keras.utils.to_categorical(y_test,num_classes=2)

In [165]:
input_dim = X_train.shape[1]

model = keras.Sequential([
     keras.Input(shape=(input_dim,)),
    layers.Dense(128, activation="relu"),
     layers.Dropout(0.3),
    layers.Dense(128, activation="relu"),
     layers.Dropout(0.3),
     layers.Dense(128, activation="relu"),
     layers.Dropout(0.3),
      layers.Dense(128, activation="relu"),
     layers.Dropout(0.3),
      layers.Dense(128, activation="relu"),
     layers.Dropout(0.3),
      layers.Dense(128, activation="relu"),
     layers.Dropout(0.3),
      layers.Dense(128, activation="relu"),
     layers.Dropout(0.3),
      layers.Dense(128, activation="relu"),
     layers.Dropout(0.3),
      layers.Dense(128, activation="relu"),
     layers.Dropout(0.3),
      layers.Dense(128, activation="relu"),
     layers.Dropout(0.3),
      layers.Dense(128, activation="relu"),
     layers.Dropout(0.3),
      layers.Dense(128, activation="relu"),
     layers.Dropout(0.3),
      layers.Dense(128, activation="relu"),
     layers.Dropout(0.3),
      layers.Dense(128, activation="relu"),
     layers.Dropout(0.3),
      layers.Dense(128, activation="relu"),
     layers.Dropout(0.3),
      layers.Dense(128, activation="relu"),
     layers.Dropout(0.3),
      layers.Dense(128, activation="relu"),
     layers.Dropout(0.3),
      layers.Dense(128, activation="relu"),
     layers.Dropout(0.3), layers.Dense(128, activation="relu"),
     layers.Dropout(0.3),
    layers.Dense(2, activation="softmax")
])

In [166]:
from tensorflow.keras.callbacks import EarlyStopping

early_stop = EarlyStopping(monitor="val_loss",patience=10,restore_best_weights=True)
optimizer = keras.optimizers.Adam(learning_rate=0.001)
model.compile(optimizer=optimizer,loss="categorical_crossentropy",metrics=["accuracy"])

model.fit(X_train,y_train,epochs=500,batch_size=5,validation_data=(X_test,y_test),callbacks=[early_stop],verbose=1)

Epoch 1/500
[1m354/354[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 8ms/step - accuracy: 0.4860 - loss: 0.6959 - val_accuracy: 0.4921 - val_loss: 0.6962
Epoch 2/500
[1m354/354[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 7ms/step - accuracy: 0.4766 - loss: 0.6962 - val_accuracy: 0.4921 - val_loss: 0.6931
Epoch 3/500
[1m354/354[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 11ms/step - accuracy: 0.4830 - loss: 0.6943 - val_accuracy: 0.4921 - val_loss: 0.6932
Epoch 4/500
[1m354/354[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 7ms/step - accuracy: 0.5245 - loss: 0.6926 - val_accuracy: 0.4921 - val_loss: 0.6932
Epoch 5/500
[1m354/354[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 7ms/step - accuracy: 0.5042 - loss: 0.6932 - val_accuracy: 0.4921 - val_loss: 0.6933
Epoch 6/500
[1m354/354[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 7ms/step - accuracy: 0.5051 - loss: 0.6941 - val_accuracy: 0.5079 - val_loss: 0.6931
Epoch 7/500
[1m354/

<keras.src.callbacks.history.History at 0x791f8fdf0e60>

In [167]:
loss, acc = model.evaluate(X_test,y_test)
print(f"Acurácia no teste: {acc*100:.2f}%")

[1m14/14[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - accuracy: 0.4941 - loss: 0.6932 
Acurácia no teste: 50.79%
