In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from datetime import datetime

import warnings
warnings.filterwarnings('ignore')

import tensorflow as tf


In [2]:
fake_news = pd.read_csv("../input/fake-and-real-news-dataset/Fake.csv")
fake_news.head()

Unnamed: 0,title,text,subject,date
0,Donald Trump Sends Out Embarrassing New Year’...,Donald Trump just couldn t wish all Americans ...,News,"December 31, 2017"
1,Drunk Bragging Trump Staffer Started Russian ...,House Intelligence Committee Chairman Devin Nu...,News,"December 31, 2017"
2,Sheriff David Clarke Becomes An Internet Joke...,"On Friday, it was revealed that former Milwauk...",News,"December 30, 2017"
3,Trump Is So Obsessed He Even Has Obama’s Name...,"On Christmas day, Donald Trump announced that ...",News,"December 29, 2017"
4,Pope Francis Just Called Out Donald Trump Dur...,Pope Francis used his annual Christmas Day mes...,News,"December 25, 2017"


In [3]:
true_news = pd.read_csv("../input/fake-and-real-news-dataset/True.csv")
true_news.head()

Unnamed: 0,title,text,subject,date
0,"As U.S. budget fight looms, Republicans flip t...",WASHINGTON (Reuters) - The head of a conservat...,politicsNews,"December 31, 2017"
1,U.S. military to accept transgender recruits o...,WASHINGTON (Reuters) - Transgender people will...,politicsNews,"December 29, 2017"
2,Senior U.S. Republican senator: 'Let Mr. Muell...,WASHINGTON (Reuters) - The special counsel inv...,politicsNews,"December 31, 2017"
3,FBI Russia probe helped by Australian diplomat...,WASHINGTON (Reuters) - Trump campaign adviser ...,politicsNews,"December 30, 2017"
4,Trump wants Postal Service to charge 'much mor...,SEATTLE/WASHINGTON (Reuters) - President Donal...,politicsNews,"December 29, 2017"


In [4]:
fake_news['fake']=1
fake_news.head()

Unnamed: 0,title,text,subject,date,fake
0,Donald Trump Sends Out Embarrassing New Year’...,Donald Trump just couldn t wish all Americans ...,News,"December 31, 2017",1
1,Drunk Bragging Trump Staffer Started Russian ...,House Intelligence Committee Chairman Devin Nu...,News,"December 31, 2017",1
2,Sheriff David Clarke Becomes An Internet Joke...,"On Friday, it was revealed that former Milwauk...",News,"December 30, 2017",1
3,Trump Is So Obsessed He Even Has Obama’s Name...,"On Christmas day, Donald Trump announced that ...",News,"December 29, 2017",1
4,Pope Francis Just Called Out Donald Trump Dur...,Pope Francis used his annual Christmas Day mes...,News,"December 25, 2017",1


In [5]:
true_news['fake'] = 0
true_news.head()

Unnamed: 0,title,text,subject,date,fake
0,"As U.S. budget fight looms, Republicans flip t...",WASHINGTON (Reuters) - The head of a conservat...,politicsNews,"December 31, 2017",0
1,U.S. military to accept transgender recruits o...,WASHINGTON (Reuters) - Transgender people will...,politicsNews,"December 29, 2017",0
2,Senior U.S. Republican senator: 'Let Mr. Muell...,WASHINGTON (Reuters) - The special counsel inv...,politicsNews,"December 31, 2017",0
3,FBI Russia probe helped by Australian diplomat...,WASHINGTON (Reuters) - Trump campaign adviser ...,politicsNews,"December 30, 2017",0
4,Trump wants Postal Service to charge 'much mor...,SEATTLE/WASHINGTON (Reuters) - President Donal...,politicsNews,"December 29, 2017",0


In [6]:
news = pd.concat([fake_news, true_news])
news.sample(5)

Unnamed: 0,title,text,subject,date,fake
2914,Trump considering lawyer for White House Russi...,(Reuters) - President Donald Trump is consider...,politicsNews,"July 3, 2017",0
2210,Sean Spicer Just Gave Trump Credit For Someth...,Donald Trump has basically built his life arou...,News,"March 10, 2017",1
10603,"CONSERVATIVE ACTIVIST, Author Mike Chernovich ...","The Washington Post [Compost] reports: So far,...",politics,"Jun 16, 2017",1
8926,New rules give protesters more leeway at Repub...,CLEVELAND (Reuters) - A federal judge on Wedne...,politicsNews,"June 29, 2016",0
17,"Trump on Twitter (Dec 26) - Hillary Clinton, T...",The following statements were posted to the ve...,politicsNews,"December 26, 2017",0


In [7]:
news.isna().sum()

title      0
text       0
subject    0
date       0
fake       0
dtype: int64

In [8]:
news['date'] = pd.to_datetime(news['date'], errors='coerce')
news['Year'] = news['date'].dt.year
news['Month'] = news['date'].dt.month

news.head()

Unnamed: 0,title,text,subject,date,fake,Year,Month
0,Donald Trump Sends Out Embarrassing New Year’...,Donald Trump just couldn t wish all Americans ...,News,2017-12-31,1,2017.0,12.0
1,Drunk Bragging Trump Staffer Started Russian ...,House Intelligence Committee Chairman Devin Nu...,News,2017-12-31,1,2017.0,12.0
2,Sheriff David Clarke Becomes An Internet Joke...,"On Friday, it was revealed that former Milwauk...",News,2017-12-30,1,2017.0,12.0
3,Trump Is So Obsessed He Even Has Obama’s Name...,"On Christmas day, Donald Trump announced that ...",News,2017-12-29,1,2017.0,12.0
4,Pope Francis Just Called Out Donald Trump Dur...,Pope Francis used his annual Christmas Day mes...,News,2017-12-25,1,2017.0,12.0


In [9]:
news['text'] = news['title'] + news['text']
news.drop(labels=['title'], axis=1,inplace=True)
news.head()

Unnamed: 0,text,subject,date,fake,Year,Month
0,Donald Trump Sends Out Embarrassing New Year’...,News,2017-12-31,1,2017.0,12.0
1,Drunk Bragging Trump Staffer Started Russian ...,News,2017-12-31,1,2017.0,12.0
2,Sheriff David Clarke Becomes An Internet Joke...,News,2017-12-30,1,2017.0,12.0
3,Trump Is So Obsessed He Even Has Obama’s Name...,News,2017-12-29,1,2017.0,12.0
4,Pope Francis Just Called Out Donald Trump Dur...,News,2017-12-25,1,2017.0,12.0


In [10]:
news.drop(labels=['subject','date', 'Year','Month'], axis=1, inplace=True)
news.head()

Unnamed: 0,text,fake
0,Donald Trump Sends Out Embarrassing New Year’...,1
1,Drunk Bragging Trump Staffer Started Russian ...,1
2,Sheriff David Clarke Becomes An Internet Joke...,1
3,Trump Is So Obsessed He Even Has Obama’s Name...,1
4,Pope Francis Just Called Out Donald Trump Dur...,1


In [11]:
news = news.sample(frac=1)
news.head()

Unnamed: 0,text,fake
14306,Unclear if anyone will attend Mugabe cabinet m...,0
18455,Factbox: Main elements in France's counter-ter...,0
13102,Turkish academics to be tried in April over Ku...,0
3473,Republicans To Give MASSIVE Tax Cuts To The W...,1
10285,SCARAMUCCI’S WIFE FILES FOR DIVORCE…Why She’s ...,1


In [12]:
from sklearn.model_selection import train_test_split

train_sentences, val_sentences, train_labels, val_labels=train_test_split(news['text'].to_numpy(),
                                                                            news['fake'].to_numpy(),
                                                                            test_size=0.25,
                                                                            random_state=42)

In [13]:
max_vocab_length = 10000
max_length = 418


from tensorflow.keras.layers.experimental.preprocessing import TextVectorization
text_vectorizer = TextVectorization(max_tokens=max_vocab_length,
                                   output_mode='int',
                                   output_sequence_length=max_length)

2022-01-14 12:31:06.071147: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:937] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-01-14 12:31:06.180947: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:937] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-01-14 12:31:06.181679: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:937] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-01-14 12:31:06.182784: I tensorflow/core/platform/cpu_feature_guard.cc:142] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 AVX512F FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compil

In [14]:
text_vectorizer.adapt(train_sentences)

2022-01-14 12:31:08.613727: I tensorflow/compiler/mlir/mlir_graph_optimization_pass.cc:185] None of the MLIR Optimization Passes are enabled (registered 2)


In [15]:
import random
random_sentence = random.choice(train_sentences)
print(f"Original text;\n{random_sentence}\
\n\n Vectorized Version:")
text_vectorizer([random_sentence])

Original text;
TOP TEN Clinton Scandals That Wikileaks Exposed And That YOU Need To Know About1. Mrs. Clinton had cozy and improper relationship with the mainstream media.2. The State Department paid special attention to  Friends of Bill. 3. Mrs. Clinton argued for  a hemispheric common market with open trade and open borders. 4. The Clinton campaign was in touch with Department of Justice officials regarding the release of her emails.5. The Clinton camp was tipped off to the release of the Benghazi emails.6. Mrs. Clinton admitted sometimes her public and private positions differ.7. Mrs. Clinton s spokesman mocked Catholics and evangelicals as  severely backwards. 8. Mrs. Clinton admitted she has a hard time relating to the struggles of the middle class.9. Mrs. Clinton campaign used Benghazi as a distraction from the email scandal.10. The Clinton team strategized on how to delay releasing emails, knowing it was against the law.Via: WT

 Vectorized Version:


<tf.Tensor: shape=(1, 418), dtype=int64, numpy=
array([[ 262, 2890,   73, 3660,    8, 1577, 2463,    6,    8,   43,  233,
           3,  155,    1, 2751,   73,   42, 9364,    6, 6588,  975,   17,
           2, 1125,    1,    2,   59,  178,  822,  481, 1152,    3,  959,
           4,  143,  767, 2751,   73, 1622,   10,    5,    1, 1251,  827,
          17,  485,  283,    6,  485, 1627,  847,    2,   73,   93,   18,
           7, 3473,   17,  178,    4,  265,  161, 1304,    2,  659,    4,
          53,    1,    2,   73, 1887,   18,    1,  220,    3,    2,  659,
           4,    2, 2086,    1, 2751,   73, 1651, 1827,   53,  166,    6,
         502, 1530,    1, 2751,   73,   11,  366, 3867, 7185,    6, 8564,
          19, 5365, 9844,  584, 2751,   73, 1651,   52,   22,    5,  562,
         100, 6883,    3,    2, 5880,    4,    2,  620,    1, 2751,   73,
          93,  258, 2086,   19,    5, 7425,   26,    2,  508,    1,    2,
          73,  450,    1,    9,  101,    3, 2383, 3778,  600, 34

In [16]:
words = text_vectorizer.get_vocabulary()
len(words)

10000

In [17]:
from tensorflow.keras import layers

embedding = layers.Embedding(input_dim=max_vocab_length,
                            output_dim=128,
                            embeddings_initializer='uniform',
                            input_length=max_length)
embedding

<keras.layers.embeddings.Embedding at 0x7f0f0c826510>

In [18]:
random_sentenc = random.choice(train_sentences)
print(f"Original text:\n{random_sentence}\
      \n\nEmbedd version: ")
embedding(text_vectorizer([random_sentence]))

Original text:
TOP TEN Clinton Scandals That Wikileaks Exposed And That YOU Need To Know About1. Mrs. Clinton had cozy and improper relationship with the mainstream media.2. The State Department paid special attention to  Friends of Bill. 3. Mrs. Clinton argued for  a hemispheric common market with open trade and open borders. 4. The Clinton campaign was in touch with Department of Justice officials regarding the release of her emails.5. The Clinton camp was tipped off to the release of the Benghazi emails.6. Mrs. Clinton admitted sometimes her public and private positions differ.7. Mrs. Clinton s spokesman mocked Catholics and evangelicals as  severely backwards. 8. Mrs. Clinton admitted she has a hard time relating to the struggles of the middle class.9. Mrs. Clinton campaign used Benghazi as a distraction from the email scandal.10. The Clinton team strategized on how to delay releasing emails, knowing it was against the law.Via: WT      

Embedd version: 


<tf.Tensor: shape=(1, 418, 128), dtype=float32, numpy=
array([[[-0.00305661, -0.0307065 ,  0.04842762, ...,  0.01771382,
         -0.02969207,  0.04334847],
        [ 0.0218651 ,  0.00444781,  0.03491278, ...,  0.04229176,
         -0.03736008, -0.00869061],
        [ 0.01990941, -0.01489741, -0.01931634, ..., -0.01029264,
         -0.02559783, -0.02871361],
        ...,
        [-0.03686148,  0.02837714, -0.00417056, ..., -0.00316422,
         -0.01192106, -0.00202546],
        [-0.03686148,  0.02837714, -0.00417056, ..., -0.00316422,
         -0.01192106, -0.00202546],
        [-0.03686148,  0.02837714, -0.00417056, ..., -0.00316422,
         -0.01192106, -0.00202546]]], dtype=float32)>

In [19]:
# Create an LSTM model
from tensorflow.keras import layers
inputs = layers.Input(shape=(1,),dtype='string')

# Pass inputs to text_vectorizer(convert text into numbers)
x = text_vectorizer(inputs) 

# Convert text_vectorizer layer into embedding layer
x = embedding(x)

# Model 
x = layers.LSTM(64)(x)
# output
outputs = layers.Dense(1, activation='sigmoid')(x)

# Pass inputs and outputs to our model
model = tf.keras.Model(inputs, outputs, name='model_LSTM')

In [20]:
# Get a summary
model.summary()

Model: "model_LSTM"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_1 (InputLayer)         [(None, 1)]               0         
_________________________________________________________________
text_vectorization (TextVect (None, 418)               0         
_________________________________________________________________
embedding (Embedding)        (None, 418, 128)          1280000   
_________________________________________________________________
lstm (LSTM)                  (None, 64)                49408     
_________________________________________________________________
dense (Dense)                (None, 1)                 65        
Total params: 1,329,473
Trainable params: 1,329,473
Non-trainable params: 0
_________________________________________________________________


In [21]:
# Compile the model
model.compile(loss='binary_crossentropy',
             optimizer=tf.keras.optimizers.Adam(),
             metrics=['accuracy'])

In [22]:
model_history = model.fit(train_sentences,
                         train_labels,
                          epochs=5,
                         validation_data=(val_sentences, val_labels))

Epoch 1/5


2022-01-14 12:31:17.300430: I tensorflow/stream_executor/cuda/cuda_dnn.cc:369] Loaded cuDNN version 8005


Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


In [23]:
from textblob import TextBlob
import re
def clean_text(text):
    return ' '.join(re.sub('(@[A-Za-z0-9]+)|([^0-9A-Za-z \t])|(\w+:\/\/\S+)', ' ', text).split())
def analyze_sentiment(text):
    analysis = TextBlob(text)
    if analysis.sentiment.polarity > 0:
        return 'Positive'
    elif analysis.sentiment.polarity == 0:
        return 'Neutral'
    else:
        return 'Negative'

In [24]:
df=pd.DataFrame(news)
df

Unnamed: 0,text,fake
14306,Unclear if anyone will attend Mugabe cabinet m...,0
18455,Factbox: Main elements in France's counter-ter...,0
13102,Turkish academics to be tried in April over Ku...,0
3473,Republicans To Give MASSIVE Tax Cuts To The W...,1
10285,SCARAMUCCI’S WIFE FILES FOR DIVORCE…Why She’s ...,1
...,...,...
22659,"SHOUT POLL: Will Donald Trump hold his lead, o...",1
12204,U.N. appalled at 30-year sentence for woman un...,0
10691,“DRIVE THEM OUT OF YOUR PLACES OF WORSHIP!” Lo...,1
18088,"London museum says serious incident outside, w...",0


In [25]:
df['clean_text'] = df['text'].apply(lambda x : clean_text(x))

In [26]:
df['Sentiment'] = df['clean_text'].apply(lambda x : analyze_sentiment(x))

In [27]:
def result(text):
    x=(np.array(text)).reshape(1,)
    cltxt=clean_text(text)
    x=model.predict(x)
    x=np.array(x).reshape(1)
    x=x[0]
    x=round(x)
    if x==0:
        y='Real'
    else:
        y='Fake'
    return analyze_sentiment(cltxt),y

In [28]:
result(news['text'].iloc[5])

('Positive', 'Real')

In [29]:
x=(np.array(news['text'].iloc[5])).reshape(1,)
x=model.predict(x)
x=np.array(x).reshape(1)
x=x[0]
x=round(x)
x

0

In [30]:
np.floor(0.9)

0.0