In [2]:
import numpy as np 
import pandas as pd 
import json 
import csv 
import random 

from tensorflow.keras.preprocessing.text import Tokenizer 
from tensorflow.keras.preprocessing.sequence import pad_sequences 
from tensorflow.keras.utils import to_categorical 
from tensorflow.keras import regularizers 

import pprint 
import tensorflow.compat.v1 as tf 
from tensorflow.python.framework import ops 
from sklearn.model_selection import train_test_split 
from sklearn import preprocessing 
tf.disable_eager_execution() 

## Reading the data 

In [3]:

data = pd.read_csv("./data/news.csv") 
data.head() 


Unnamed: 0.1,Unnamed: 0,title,text,label
0,8476,You Can Smell Hillary’s Fear,"Daniel Greenfield, a Shillman Journalism Fello...",FAKE
1,10294,Watch The Exact Moment Paul Ryan Committed Pol...,Google Pinterest Digg Linkedin Reddit Stumbleu...,FAKE
2,3608,Kerry to go to Paris in gesture of sympathy,U.S. Secretary of State John F. Kerry said Mon...,REAL
3,10142,Bernie supporters on Twitter erupt in anger ag...,"— Kaydee King (@KaydeeKing) November 9, 2016 T...",FAKE
4,875,The Battle of New York: Why This Primary Matters,It's primary day in New York and front-runners...,REAL


## Data processing

In [4]:
data = data.drop(["Unnamed: 0"], axis=1) 

## Checking null data

In [5]:
data.isnull().sum()

title    0
text     0
label    0
dtype: int64

## Encoding label values

In [6]:
le=preprocessing.LabelEncoder()
le.fit(data["label"])
data["label"]=le.transform(data["label"])

In [7]:
data

Unnamed: 0,title,text,label
0,You Can Smell Hillary’s Fear,"Daniel Greenfield, a Shillman Journalism Fello...",0
1,Watch The Exact Moment Paul Ryan Committed Pol...,Google Pinterest Digg Linkedin Reddit Stumbleu...,0
2,Kerry to go to Paris in gesture of sympathy,U.S. Secretary of State John F. Kerry said Mon...,1
3,Bernie supporters on Twitter erupt in anger ag...,"— Kaydee King (@KaydeeKing) November 9, 2016 T...",0
4,The Battle of New York: Why This Primary Matters,It's primary day in New York and front-runners...,1
...,...,...,...
6330,State Department says it can't find emails fro...,The State Department told the Republican Natio...,1
6331,The ‘P’ in PBS Should Stand for ‘Plutocratic’ ...,The ‘P’ in PBS Should Stand for ‘Plutocratic’ ...,0
6332,Anti-Trump Protesters Are Tools of the Oligarc...,Anti-Trump Protesters Are Tools of the Oligar...,0
6333,"In Ethiopia, Obama seeks progress on peace, se...","ADDIS ABABA, Ethiopia —President Obama convene...",1


## Variables

In [8]:
embedding_dim = 50
max_length = 54
trunc_type = 'post'
padding_type = 'post'
oov_tok = "<OOV>"
training_size = 3000
test_portion = .1


## Tokenization

In [9]:
title = [] 
text = [] 
labels = [] 
for x in range(training_size): 
    title.append(data['title'][x]) 
    text.append(data['text'][x]) 
    labels.append(data['label'][x]) 


In [10]:
tokenizer1 = Tokenizer() 
tokenizer1.fit_on_texts(title) 
word_index1 = tokenizer1.word_index 
vocab_size1 = len(word_index1) 
sequences1 = tokenizer1.texts_to_sequences(title) 
padded1 = pad_sequences( 
    sequences1, padding=padding_type, truncating=trunc_type) 
split = int(test_portion * training_size) 
training_sequences1 = padded1[split:training_size] 
test_sequences1 = padded1[0:split] 
test_labels = labels[0:split] 
training_labels = labels[split:training_size] 


## Embedding

In [13]:
embeddings_index = {} 
with open('./data/glove.6B.50d.txt') as f: 
    for line in f: 
        values = line.split() 
        word = values[0] 
        coefs = np.asarray(values[1:], dtype='float32') 
        embeddings_index[word] = coefs 

# Generating embeddings 
embeddings_matrix = np.zeros((vocab_size1+1, embedding_dim)) 
for word, i in word_index1.items(): 
    embedding_vector = embeddings_index.get(word) 
    if embedding_vector is not None: 
        embeddings_matrix[i] = embedding_vector 


## Model architecture

In [15]:
model = tf.keras.Sequential([ 
    tf.keras.layers.Embedding(vocab_size1+1, embedding_dim, 
                            input_length=max_length, weights=[ 
                                embeddings_matrix], 
                            trainable=False), 
    tf.keras.layers.Dropout(0.2), 
    tf.keras.layers.Conv1D(64, 5, activation='relu'), 
    tf.keras.layers.MaxPooling1D(pool_size=4), 
    tf.keras.layers.LSTM(64), 
    tf.keras.layers.Dense(1, activation='sigmoid') 
    ]) 
model.compile(loss='binary_crossentropy', 
optimizer='adam', metrics=['accuracy']) 
model.summary() 


2024-04-09 17:26:05.988156: I external/local_xla/xla/stream_executor/cuda/cuda_executor.cc:887] could not open file to read NUMA node: /sys/bus/pci/devices/0000:01:00.0/numa_node
Your kernel may have been built without NUMA support.
2024-04-09 17:26:06.007125: W tensorflow/core/common_runtime/gpu/gpu_device.cc:2256] Cannot dlopen some GPU libraries. Please make sure the missing libraries mentioned above are installed properly if you would like to use GPU. Follow the guide at https://www.tensorflow.org/install/gpu for how to download and setup the required libraries for your platform.
Skipping registering GPU devices...
2024-04-09 17:26:06.016582: I tensorflow/compiler/mlir/mlir_graph_optimization_pass.cc:388] MLIR V1 optimization pass is not enabled
2024-04-09 17:26:06.036397: W tensorflow/c/c_api.cc:305] Operation '{name:'embedding/embeddings/Assign' id:15 op device:{requested: '', assigned: ''} def:{{{node embedding/embeddings/Assign}} = AssignVariableOp[_has_manual_control_dependenc

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 embedding (Embedding)       (None, 54, 50)            377600    
                                                                 
 dropout (Dropout)           (None, 54, 50)            0         
                                                                 
 conv1d (Conv1D)             (None, 50, 64)            16064     
                                                                 
 max_pooling1d (MaxPooling1  (None, 12, 64)            0         
 D)                                                              
                                                                 
 lstm (LSTM)                 (None, 64)                33024     
                                                                 
 dense (Dense)               (None, 1)                 65        
                                                        

## Training

In [16]:
num_epochs = 50

training_padded = np.array(training_sequences1) 
training_labels = np.array(training_labels) 
testing_padded = np.array(test_sequences1) 
testing_labels = np.array(test_labels) 

history = model.fit(training_padded, training_labels, 
                epochs=num_epochs, 
                validation_data=(testing_padded, 
                                testing_labels), 
                verbose=2) 


Train on 2700 samples, validate on 300 samples
Epoch 1/50


2024-04-09 17:26:51.385345: W tensorflow/c/c_api.cc:305] Operation '{name:'training/Adam/lstm/lstm_cell/bias/v/Assign' id:1042 op device:{requested: '', assigned: ''} def:{{{node training/Adam/lstm/lstm_cell/bias/v/Assign}} = AssignVariableOp[_has_manual_control_dependencies=true, dtype=DT_FLOAT, validate_shape=false](training/Adam/lstm/lstm_cell/bias/v, training/Adam/lstm/lstm_cell/bias/v/Initializer/zeros)}}' was changed by setting attribute after it was run by a session. This mutation will have no effect, and will trigger an error in the future. Either don't modify nodes after running them or create a new session.


2700/2700 - 1s - loss: 0.6572 - accuracy: 0.5930 - val_loss: 0.5911 - val_accuracy: 0.6833 - 1s/epoch - 397us/sample
Epoch 2/50


  updates = self.state_updates
2024-04-09 17:26:52.405757: W tensorflow/c/c_api.cc:305] Operation '{name:'loss/mul' id:685 op device:{requested: '', assigned: ''} def:{{{node loss/mul}} = Mul[T=DT_FLOAT, _has_manual_control_dependencies=true](loss/mul/x, loss/dense_loss/value)}}' was changed by setting attribute after it was run by a session. This mutation will have no effect, and will trigger an error in the future. Either don't modify nodes after running them or create a new session.


2700/2700 - 1s - loss: 0.5822 - accuracy: 0.6930 - val_loss: 0.5493 - val_accuracy: 0.7133 - 707ms/epoch - 262us/sample
Epoch 3/50
2700/2700 - 1s - loss: 0.5435 - accuracy: 0.7315 - val_loss: 0.5155 - val_accuracy: 0.7167 - 694ms/epoch - 257us/sample
Epoch 4/50
2700/2700 - 1s - loss: 0.4992 - accuracy: 0.7533 - val_loss: 0.5102 - val_accuracy: 0.7333 - 754ms/epoch - 279us/sample
Epoch 5/50
2700/2700 - 1s - loss: 0.4518 - accuracy: 0.7900 - val_loss: 0.5066 - val_accuracy: 0.7300 - 680ms/epoch - 252us/sample
Epoch 6/50
2700/2700 - 1s - loss: 0.4158 - accuracy: 0.8141 - val_loss: 0.5151 - val_accuracy: 0.7433 - 690ms/epoch - 256us/sample
Epoch 7/50
2700/2700 - 1s - loss: 0.3517 - accuracy: 0.8526 - val_loss: 0.5325 - val_accuracy: 0.7433 - 734ms/epoch - 272us/sample
Epoch 8/50
2700/2700 - 1s - loss: 0.3181 - accuracy: 0.8626 - val_loss: 0.5292 - val_accuracy: 0.7367 - 690ms/epoch - 256us/sample
Epoch 9/50
2700/2700 - 1s - loss: 0.2889 - accuracy: 0.8770 - val_loss: 0.5247 - val_accuracy:

## Model Evaluation and Prediction

In [20]:
# sample text to check if fake or not 
X = "Shakira and Piqué are together again"

# detection 
sequences = tokenizer1.texts_to_sequences([X])[0] 
sequences = pad_sequences([sequences], maxlen=54, 
                    padding=padding_type, 
                    truncating=trunc_type) 
if(model.predict(sequences, verbose=0)[0][0] >= 0.5): 
    print("This news is True") 
else: 
    print("This news is false") 


This news is false
