In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from datetime import datetime

import warnings
warnings.filterwarnings('ignore')

import tensorflow as tf


In [2]:
fake_news = pd.read_csv("../input/fake-and-real-news-dataset/Fake.csv")
fake_news.head()

Unnamed: 0,title,text,subject,date
0,Donald Trump Sends Out Embarrassing New Year’...,Donald Trump just couldn t wish all Americans ...,News,"December 31, 2017"
1,Drunk Bragging Trump Staffer Started Russian ...,House Intelligence Committee Chairman Devin Nu...,News,"December 31, 2017"
2,Sheriff David Clarke Becomes An Internet Joke...,"On Friday, it was revealed that former Milwauk...",News,"December 30, 2017"
3,Trump Is So Obsessed He Even Has Obama’s Name...,"On Christmas day, Donald Trump announced that ...",News,"December 29, 2017"
4,Pope Francis Just Called Out Donald Trump Dur...,Pope Francis used his annual Christmas Day mes...,News,"December 25, 2017"


In [3]:
true_news = pd.read_csv("../input/fake-and-real-news-dataset/True.csv")
true_news.head()

Unnamed: 0,title,text,subject,date
0,"As U.S. budget fight looms, Republicans flip t...",WASHINGTON (Reuters) - The head of a conservat...,politicsNews,"December 31, 2017"
1,U.S. military to accept transgender recruits o...,WASHINGTON (Reuters) - Transgender people will...,politicsNews,"December 29, 2017"
2,Senior U.S. Republican senator: 'Let Mr. Muell...,WASHINGTON (Reuters) - The special counsel inv...,politicsNews,"December 31, 2017"
3,FBI Russia probe helped by Australian diplomat...,WASHINGTON (Reuters) - Trump campaign adviser ...,politicsNews,"December 30, 2017"
4,Trump wants Postal Service to charge 'much mor...,SEATTLE/WASHINGTON (Reuters) - President Donal...,politicsNews,"December 29, 2017"


In [4]:
fake_news['fake']=1
fake_news.head()

Unnamed: 0,title,text,subject,date,fake
0,Donald Trump Sends Out Embarrassing New Year’...,Donald Trump just couldn t wish all Americans ...,News,"December 31, 2017",1
1,Drunk Bragging Trump Staffer Started Russian ...,House Intelligence Committee Chairman Devin Nu...,News,"December 31, 2017",1
2,Sheriff David Clarke Becomes An Internet Joke...,"On Friday, it was revealed that former Milwauk...",News,"December 30, 2017",1
3,Trump Is So Obsessed He Even Has Obama’s Name...,"On Christmas day, Donald Trump announced that ...",News,"December 29, 2017",1
4,Pope Francis Just Called Out Donald Trump Dur...,Pope Francis used his annual Christmas Day mes...,News,"December 25, 2017",1


In [5]:
true_news['fake'] = 0
true_news.head()

Unnamed: 0,title,text,subject,date,fake
0,"As U.S. budget fight looms, Republicans flip t...",WASHINGTON (Reuters) - The head of a conservat...,politicsNews,"December 31, 2017",0
1,U.S. military to accept transgender recruits o...,WASHINGTON (Reuters) - Transgender people will...,politicsNews,"December 29, 2017",0
2,Senior U.S. Republican senator: 'Let Mr. Muell...,WASHINGTON (Reuters) - The special counsel inv...,politicsNews,"December 31, 2017",0
3,FBI Russia probe helped by Australian diplomat...,WASHINGTON (Reuters) - Trump campaign adviser ...,politicsNews,"December 30, 2017",0
4,Trump wants Postal Service to charge 'much mor...,SEATTLE/WASHINGTON (Reuters) - President Donal...,politicsNews,"December 29, 2017",0


In [6]:
news = pd.concat([fake_news, true_news])
news.sample(5)

Unnamed: 0,title,text,subject,date,fake
10740,RUSH LIMBAUGH: This Group is “Roadblocking” Tr...,Rush Limbaugh nails it in the transcript below...,politics,"May 31, 2017",1
7608,Gun control groups spend millions on state bal...,NEW YORK (Reuters) - Gun safety advocates are ...,politicsNews,"November 2, 2016",0
20903,THE VIDEO HILLARY CLINTON Does NOT Want You To...,Spread this EVERYWHERE!,left-news,"Mar 9, 2016",1
6626,"In Trump cabinet, Commerce Secretary will run ...","WEST PALM BEACH, Fla./WASHINGTON (Reuters) - U...",politicsNews,"December 20, 2016",0
3748,"Senate Judiciary Committee seeks Comey memos, ...",WASHINGTON (Reuters) - The Senate Judiciary Co...,politicsNews,"May 17, 2017",0


In [7]:
news.isna().sum()

title      0
text       0
subject    0
date       0
fake       0
dtype: int64

In [8]:
news['date'] = pd.to_datetime(news['date'], errors='coerce')
news['Year'] = news['date'].dt.year
news['Month'] = news['date'].dt.month

news.head()

Unnamed: 0,title,text,subject,date,fake,Year,Month
0,Donald Trump Sends Out Embarrassing New Year’...,Donald Trump just couldn t wish all Americans ...,News,2017-12-31,1,2017.0,12.0
1,Drunk Bragging Trump Staffer Started Russian ...,House Intelligence Committee Chairman Devin Nu...,News,2017-12-31,1,2017.0,12.0
2,Sheriff David Clarke Becomes An Internet Joke...,"On Friday, it was revealed that former Milwauk...",News,2017-12-30,1,2017.0,12.0
3,Trump Is So Obsessed He Even Has Obama’s Name...,"On Christmas day, Donald Trump announced that ...",News,2017-12-29,1,2017.0,12.0
4,Pope Francis Just Called Out Donald Trump Dur...,Pope Francis used his annual Christmas Day mes...,News,2017-12-25,1,2017.0,12.0


In [9]:
news['text'] = news['title'] + news['text']
news.drop(labels=['title'], axis=1,inplace=True)
news.head()

Unnamed: 0,text,subject,date,fake,Year,Month
0,Donald Trump Sends Out Embarrassing New Year’...,News,2017-12-31,1,2017.0,12.0
1,Drunk Bragging Trump Staffer Started Russian ...,News,2017-12-31,1,2017.0,12.0
2,Sheriff David Clarke Becomes An Internet Joke...,News,2017-12-30,1,2017.0,12.0
3,Trump Is So Obsessed He Even Has Obama’s Name...,News,2017-12-29,1,2017.0,12.0
4,Pope Francis Just Called Out Donald Trump Dur...,News,2017-12-25,1,2017.0,12.0


In [10]:
news.drop(labels=['subject','date', 'Year','Month'], axis=1, inplace=True)
news.head()

Unnamed: 0,text,fake
0,Donald Trump Sends Out Embarrassing New Year’...,1
1,Drunk Bragging Trump Staffer Started Russian ...,1
2,Sheriff David Clarke Becomes An Internet Joke...,1
3,Trump Is So Obsessed He Even Has Obama’s Name...,1
4,Pope Francis Just Called Out Donald Trump Dur...,1


In [11]:
news = news.sample(frac=1)
news.head()

Unnamed: 0,text,fake
7726,Trump will accept election results if it's fai...,0
18738,POPULAR YOUTUBE PERSONALITY Goes Undercover Wi...,1
12116,Argentina fires head of navy over submarine tr...,0
10853,House seen passing North Korea sanctions bill ...,0
14340,HILLARY TO OBAMA: “call off your f–king dogs”A...,1


In [12]:
from sklearn.model_selection import train_test_split

train_sentences, val_sentences, train_labels, val_labels=train_test_split(news['text'].to_numpy(),
                                                                            news['fake'].to_numpy(),
                                                                            test_size=0.25,
                                                                            random_state=42)

In [13]:
max_vocab_length = 10000
max_length = 418


from tensorflow.keras.layers.experimental.preprocessing import TextVectorization
text_vectorizer = TextVectorization(max_tokens=max_vocab_length,
                                   output_mode='int',
                                   output_sequence_length=max_length)

2022-01-14 12:12:56.125777: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:937] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-01-14 12:12:56.238545: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:937] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-01-14 12:12:56.239258: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:937] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-01-14 12:12:56.240337: I tensorflow/core/platform/cpu_feature_guard.cc:142] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 AVX512F FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compil

In [14]:
text_vectorizer.adapt(train_sentences)

2022-01-14 12:12:58.765860: I tensorflow/compiler/mlir/mlir_graph_optimization_pass.cc:185] None of the MLIR Optimization Passes are enabled (registered 2)


In [15]:
import random
random_sentence = random.choice(train_sentences)
print(f"Original text;\n{random_sentence}\
\n\n Vectorized Version:")
text_vectorizer([random_sentence])

Original text;
Walmart, Target join call for 'Dreamer' legislation(Reuters) - Wal-Mart Stores Inc, Target Corp and PepsiCo Inc on Wednesday joined an expanded group of nearly 800 companies calling in a letter for U.S. legislation to protect immigrants brought into the country illegally by their parents from deportation, according to organizer FWD.us. The retailers and other companies asking Congress to pass a permanent replacement for Deferred Action for Childhood Arrivals, or DACA, reflect broadening pressure on political leaders to find a solution for the roughly 800,000 immigrants known as “Dreamers”.  A group of businesses less than half the size and weighted toward technology companies sent a similar letter in August, before President Donald Trump said he would end the program. The new letter reflects Trump’s decision and asks for a permanent solution to let DACA recipients remain in the United States. Trump said that DACA immigrants who have work permits that expire before March 

<tf.Tensor: shape=(1, 418), dtype=int64, numpy=
array([[6424, 1150, 1316,  301,   10,    1,    1, 6424, 4408, 1289, 1150,
        2144,    5,    1, 1289,    9,  202, 1488,   32, 3078,  157,    4,
         589, 4576,  435,  544,    7,    6,  532,   10,   31,  469,    3,
         713,  663,  876,   85,    2,  114, 1818,   21,   41, 1177,   26,
        3187,  167,    3, 5139,    1,    2, 7251,    5,   81,  435, 1027,
         195,    3,  939,    6, 2337, 2808,   10, 6362,  443,   10, 4872,
        4404,   48, 2941, 3636,    1,  839,    9,  125,  283,    3,  536,
           6, 1700,   10,    2, 2438, 7439,  663,  418,   19,    1,    6,
         157,    4, 1110,  541,   78,  950,    2, 2828,    5,    1,  885,
        1466,  435,  584,    6,  995,  532,    7,  643,  113,   39,   69,
          15,   14,   13,   35,  260,    2,  339,    2,   60,  532, 4906,
         153,  284,    5, 2430,   10,    6, 2337, 1700,    3,  406, 2941,
        4959,  979,    7,    2,   87,   76,   15,   14,    8, 29

In [16]:
words = text_vectorizer.get_vocabulary()
len(words)

10000

In [17]:
from tensorflow.keras import layers

embedding = layers.Embedding(input_dim=max_vocab_length,
                            output_dim=128,
                            embeddings_initializer='uniform',
                            input_length=max_length)
embedding

<keras.layers.embeddings.Embedding at 0x7fb72c25a2d0>

In [18]:
random_sentenc = random.choice(train_sentences)
print(f"Original text:\n{random_sentence}\
      \n\nEmbedd version: ")
embedding(text_vectorizer([random_sentence]))

Original text:
Walmart, Target join call for 'Dreamer' legislation(Reuters) - Wal-Mart Stores Inc, Target Corp and PepsiCo Inc on Wednesday joined an expanded group of nearly 800 companies calling in a letter for U.S. legislation to protect immigrants brought into the country illegally by their parents from deportation, according to organizer FWD.us. The retailers and other companies asking Congress to pass a permanent replacement for Deferred Action for Childhood Arrivals, or DACA, reflect broadening pressure on political leaders to find a solution for the roughly 800,000 immigrants known as “Dreamers”.  A group of businesses less than half the size and weighted toward technology companies sent a similar letter in August, before President Donald Trump said he would end the program. The new letter reflects Trump’s decision and asks for a permanent solution to let DACA recipients remain in the United States. Trump said that DACA immigrants who have work permits that expire before March 

<tf.Tensor: shape=(1, 418, 128), dtype=float32, numpy=
array([[[ 0.0240123 , -0.01046357, -0.02817433, ...,  0.00261321,
         -0.02424902, -0.02458638],
        [-0.01099094,  0.00658823, -0.0295269 , ...,  0.02844239,
          0.03787437, -0.03429838],
        [-0.0233611 ,  0.01126381, -0.04519541, ...,  0.03115543,
         -0.01237054, -0.00267513],
        ...,
        [ 0.00505142, -0.02706517, -0.01072867, ...,  0.00451959,
         -0.04512727,  0.0248014 ],
        [-0.0247488 , -0.00183231, -0.04264076, ...,  0.00315108,
          0.0139557 ,  0.00665152],
        [-0.04524728,  0.01013379, -0.04451755, ..., -0.03693455,
         -0.03847367, -0.04969149]]], dtype=float32)>

In [19]:
# Create an LSTM model
from tensorflow.keras import layers
inputs = layers.Input(shape=(1,),dtype='string')

# Pass inputs to text_vectorizer(convert text into numbers)
x = text_vectorizer(inputs) 

# Convert text_vectorizer layer into embedding layer
x = embedding(x)

# Model 
x = layers.LSTM(64)(x)
# output
outputs = layers.Dense(1, activation='sigmoid')(x)

# Pass inputs and outputs to our model
model = tf.keras.Model(inputs, outputs, name='model_LSTM')

In [20]:
# Get a summary
model.summary()

Model: "model_LSTM"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_1 (InputLayer)         [(None, 1)]               0         
_________________________________________________________________
text_vectorization (TextVect (None, 418)               0         
_________________________________________________________________
embedding (Embedding)        (None, 418, 128)          1280000   
_________________________________________________________________
lstm (LSTM)                  (None, 64)                49408     
_________________________________________________________________
dense (Dense)                (None, 1)                 65        
Total params: 1,329,473
Trainable params: 1,329,473
Non-trainable params: 0
_________________________________________________________________


In [21]:
# Compile the model
model.compile(loss='binary_crossentropy',
             optimizer=tf.keras.optimizers.Adam(),
             metrics=['accuracy'])

In [22]:
model_history = model.fit(train_sentences,
                         train_labels,
                          epochs=5,
                         validation_data=(val_sentences, val_labels))

Epoch 1/5


2022-01-14 12:13:07.349321: I tensorflow/stream_executor/cuda/cuda_dnn.cc:369] Loaded cuDNN version 8005


Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


In [23]:
from textblob import TextBlob
import re
def clean_text(text):
    return ' '.join(re.sub('(@[A-Za-z0-9]+)|([^0-9A-Za-z \t])|(\w+:\/\/\S+)', ' ', text).split())
def analyze_sentiment(text):
    analysis = TextBlob(text)
    if analysis.sentiment.polarity > 0:
        return 'Positive'
    elif analysis.sentiment.polarity == 0:
        return 'Neutral'
    else:
        return 'Negative'

In [24]:
df=pd.DataFrame(news)
df

Unnamed: 0,text,fake
7726,Trump will accept election results if it's fai...,0
18738,POPULAR YOUTUBE PERSONALITY Goes Undercover Wi...,1
12116,Argentina fires head of navy over submarine tr...,0
10853,House seen passing North Korea sanctions bill ...,0
14340,HILLARY TO OBAMA: “call off your f–king dogs”A...,1
...,...,...
11057,Congress to probe Juniper 'back door' exposure...,0
16690,MAKE $250K A YEAR? You Could Qualify For Subsi...,1
6187,Japan PM aims to strengthen U.S. alliance unde...,0
2290,Saturday Night Live Takes On GOP Cowardice In...,1


In [25]:
df['clean_text'] = df['text'].apply(lambda x : clean_text(x))

In [26]:
df['Sentiment'] = df['clean_text'].apply(lambda x : analyze_sentiment(x))

In [27]:
def result(text):
    x=(np.array(text)).reshape(1,)
    cltxt=clean_text(text)
    x=model.predict(x)
    x=np.array(x).reshape(1)
    x=x[0]
    x=round(x)
    if x==0:
        y='Real'
    else:
        y='Fake'
    return analyze_sentiment(cltxt),y

In [28]:
result(news['text'].iloc[5])

('Positive', 'Fake')

In [29]:
x=(np.array(news['text'].iloc[5])).reshape(1,)
x=model.predict(x)
x=np.array(x).reshape(1)
x=x[0]
x=round(x)
x

1

In [30]:
np.floor(0.9)

0.0