<a href="https://colab.research.google.com/github/joshuabenedict-665/Fake_News_Detector/blob/feature%2Ffake-news-model/fake_news_model.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [3]:
!pip install -q kaggle


In [4]:
import os
from google.colab import files

files.upload()  # Upload kaggle.json

os.environ['KAGGLE_CONFIG_DIR'] = "/content"


Saving kaggle.json to kaggle.json


In [5]:
!kaggle datasets download -d clmentbisaillon/fake-and-real-news-dataset
!unzip fake-and-real-news-dataset.zip


Dataset URL: https://www.kaggle.com/datasets/clmentbisaillon/fake-and-real-news-dataset
License(s): CC-BY-NC-SA-4.0
Downloading fake-and-real-news-dataset.zip to /content
  0% 0.00/41.0M [00:00<?, ?B/s]
100% 41.0M/41.0M [00:00<00:00, 1.31GB/s]
Archive:  fake-and-real-news-dataset.zip
  inflating: Fake.csv                
  inflating: True.csv                


In [6]:
import pandas as pd
df_fake = pd.read_csv("Fake.csv")
df_real = pd.read_csv("True.csv")

df_fake["label"] = 0
df_real["label"] = 1

df = pd.concat([df_fake, df_real])
df = df.sample(frac=1).reset_index(drop=True)  # Shuffle

texts = df["text"]
labels = df["label"]


In [7]:
import re

def clean(text):
    text = text.lower()
    text = re.sub(r"http\S+|www\S+|https\S+", '', text)
    text = re.sub(r"[^a-zA-Z0-9\s]", '', text)
    return text

texts = texts.apply(clean)


In [8]:
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences

tokenizer = Tokenizer(num_words=5000, oov_token="<OOV>")
tokenizer.fit_on_texts(texts)
sequences = tokenizer.texts_to_sequences(texts)
padded = pad_sequences(sequences, maxlen=300, padding='post', truncating='post')


In [9]:
from sklearn.model_selection import train_test_split
import numpy as np

X = np.array(padded)
y = np.array(labels)

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


In [10]:
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, GRU, Dense, Dropout
vocab_size = 5000
embedding_dim = 64
max_length = 300
model=Sequential([Embedding(input_dim=vocab_size, output_dim=embedding_dim, input_length=max_length),
    GRU(64, return_sequences=False),
    Dropout(0.5),
    Dense(1, activation='sigmoid')
])
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
model.summary()



In [11]:
history = model.fit(
    X_train, y_train,
    epochs=5,
    batch_size=64,
    validation_data=(X_test, y_test),
    verbose=1
)


Epoch 1/5
[1m562/562[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m14s[0m 16ms/step - accuracy: 0.6418 - loss: 0.6075 - val_accuracy: 0.9478 - val_loss: 0.2033
Epoch 2/5
[1m562/562[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m16s[0m 15ms/step - accuracy: 0.9466 - loss: 0.1938 - val_accuracy: 0.9866 - val_loss: 0.0584
Epoch 3/5
[1m562/562[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 13ms/step - accuracy: 0.9361 - loss: 0.1730 - val_accuracy: 0.8127 - val_loss: 0.3871
Epoch 4/5
[1m562/562[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 14ms/step - accuracy: 0.9236 - loss: 0.1972 - val_accuracy: 0.9963 - val_loss: 0.0138
Epoch 5/5
[1m562/562[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 15ms/step - accuracy: 0.9980 - loss: 0.0128 - val_accuracy: 0.9980 - val_loss: 0.0097


In [12]:
loss, accuracy = model.evaluate(X_test, y_test)
print(f"Test Accuracy: {accuracy:.4f}")


[1m281/281[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 6ms/step - accuracy: 0.9984 - loss: 0.0085
Test Accuracy: 0.9980


In [13]:
model.save("fake_news_model.h5")

import pickle
with open("tokenizer.pkl", "wb") as f:
    pickle.dump(tokenizer, f)




In [14]:
from google.colab import files
files.download("fake_news_model.h5")
files.download("tokenizer.pkl")


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>