In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from datetime import datetime

import warnings
warnings.filterwarnings('ignore')

import tensorflow as tf


In [2]:
fake_news = pd.read_csv("../input/fake-and-real-news-dataset/Fake.csv")
fake_news.head()

Unnamed: 0,title,text,subject,date
0,Donald Trump Sends Out Embarrassing New Year’...,Donald Trump just couldn t wish all Americans ...,News,"December 31, 2017"
1,Drunk Bragging Trump Staffer Started Russian ...,House Intelligence Committee Chairman Devin Nu...,News,"December 31, 2017"
2,Sheriff David Clarke Becomes An Internet Joke...,"On Friday, it was revealed that former Milwauk...",News,"December 30, 2017"
3,Trump Is So Obsessed He Even Has Obama’s Name...,"On Christmas day, Donald Trump announced that ...",News,"December 29, 2017"
4,Pope Francis Just Called Out Donald Trump Dur...,Pope Francis used his annual Christmas Day mes...,News,"December 25, 2017"


In [3]:
true_news = pd.read_csv("../input/fake-and-real-news-dataset/True.csv")
true_news.head()

Unnamed: 0,title,text,subject,date
0,"As U.S. budget fight looms, Republicans flip t...",WASHINGTON (Reuters) - The head of a conservat...,politicsNews,"December 31, 2017"
1,U.S. military to accept transgender recruits o...,WASHINGTON (Reuters) - Transgender people will...,politicsNews,"December 29, 2017"
2,Senior U.S. Republican senator: 'Let Mr. Muell...,WASHINGTON (Reuters) - The special counsel inv...,politicsNews,"December 31, 2017"
3,FBI Russia probe helped by Australian diplomat...,WASHINGTON (Reuters) - Trump campaign adviser ...,politicsNews,"December 30, 2017"
4,Trump wants Postal Service to charge 'much mor...,SEATTLE/WASHINGTON (Reuters) - President Donal...,politicsNews,"December 29, 2017"


In [4]:
fake_news['fake']=1
fake_news.head()

Unnamed: 0,title,text,subject,date,fake
0,Donald Trump Sends Out Embarrassing New Year’...,Donald Trump just couldn t wish all Americans ...,News,"December 31, 2017",1
1,Drunk Bragging Trump Staffer Started Russian ...,House Intelligence Committee Chairman Devin Nu...,News,"December 31, 2017",1
2,Sheriff David Clarke Becomes An Internet Joke...,"On Friday, it was revealed that former Milwauk...",News,"December 30, 2017",1
3,Trump Is So Obsessed He Even Has Obama’s Name...,"On Christmas day, Donald Trump announced that ...",News,"December 29, 2017",1
4,Pope Francis Just Called Out Donald Trump Dur...,Pope Francis used his annual Christmas Day mes...,News,"December 25, 2017",1


In [5]:
true_news['fake'] = 0
true_news.head()

Unnamed: 0,title,text,subject,date,fake
0,"As U.S. budget fight looms, Republicans flip t...",WASHINGTON (Reuters) - The head of a conservat...,politicsNews,"December 31, 2017",0
1,U.S. military to accept transgender recruits o...,WASHINGTON (Reuters) - Transgender people will...,politicsNews,"December 29, 2017",0
2,Senior U.S. Republican senator: 'Let Mr. Muell...,WASHINGTON (Reuters) - The special counsel inv...,politicsNews,"December 31, 2017",0
3,FBI Russia probe helped by Australian diplomat...,WASHINGTON (Reuters) - Trump campaign adviser ...,politicsNews,"December 30, 2017",0
4,Trump wants Postal Service to charge 'much mor...,SEATTLE/WASHINGTON (Reuters) - President Donal...,politicsNews,"December 29, 2017",0


In [6]:
news = pd.concat([fake_news, true_news])
news.sample(5)

Unnamed: 0,title,text,subject,date,fake
11694,Philippine police chief defends deadly drug wa...,MANILA (Reuters) - The police chief of the Phi...,worldnews,"December 20, 2017",0
10911,Pennsylvania $2 bln budget gap is 'time bomb':...,NEW YORK (Reuters) - Pennsylvania’s $2 billion...,politicsNews,"February 9, 2016",0
7805,Trump pledges strong U.S. ties with India if e...,"EDISON, N.J. (Reuters) - U.S. Republican presi...",politicsNews,"October 16, 2016",0
2650,CNN Just Went After Trump For His Latest Melt...,Donald Trump has been on an insane Twitter ram...,News,"February 6, 2017",1
2435,Trump says Senate should have passed healthcar...,"BRENTWOOD, N.Y. (Reuters) - U.S. President Don...",politicsNews,"July 28, 2017",0


In [7]:
news.isna().sum()

title      0
text       0
subject    0
date       0
fake       0
dtype: int64

In [8]:
news['date'] = pd.to_datetime(news['date'], errors='coerce')
news['Year'] = news['date'].dt.year
news['Month'] = news['date'].dt.month

news.head()

Unnamed: 0,title,text,subject,date,fake,Year,Month
0,Donald Trump Sends Out Embarrassing New Year’...,Donald Trump just couldn t wish all Americans ...,News,2017-12-31,1,2017.0,12.0
1,Drunk Bragging Trump Staffer Started Russian ...,House Intelligence Committee Chairman Devin Nu...,News,2017-12-31,1,2017.0,12.0
2,Sheriff David Clarke Becomes An Internet Joke...,"On Friday, it was revealed that former Milwauk...",News,2017-12-30,1,2017.0,12.0
3,Trump Is So Obsessed He Even Has Obama’s Name...,"On Christmas day, Donald Trump announced that ...",News,2017-12-29,1,2017.0,12.0
4,Pope Francis Just Called Out Donald Trump Dur...,Pope Francis used his annual Christmas Day mes...,News,2017-12-25,1,2017.0,12.0


In [9]:
news['text'] = news['title'] + news['text']
news.drop(labels=['title'], axis=1,inplace=True)
news.head()

Unnamed: 0,text,subject,date,fake,Year,Month
0,Donald Trump Sends Out Embarrassing New Year’...,News,2017-12-31,1,2017.0,12.0
1,Drunk Bragging Trump Staffer Started Russian ...,News,2017-12-31,1,2017.0,12.0
2,Sheriff David Clarke Becomes An Internet Joke...,News,2017-12-30,1,2017.0,12.0
3,Trump Is So Obsessed He Even Has Obama’s Name...,News,2017-12-29,1,2017.0,12.0
4,Pope Francis Just Called Out Donald Trump Dur...,News,2017-12-25,1,2017.0,12.0


In [10]:
news.drop(labels=['subject','date', 'Year','Month'], axis=1, inplace=True)
news.head()

Unnamed: 0,text,fake
0,Donald Trump Sends Out Embarrassing New Year’...,1
1,Drunk Bragging Trump Staffer Started Russian ...,1
2,Sheriff David Clarke Becomes An Internet Joke...,1
3,Trump Is So Obsessed He Even Has Obama’s Name...,1
4,Pope Francis Just Called Out Donald Trump Dur...,1


In [11]:
news = news.sample(frac=1)
news.head()

Unnamed: 0,text,fake
16655,Poland's PM Szydlo to reshuffle cabinet soonWA...,0
19321,NICOLE KIDMAN BREAKS RANKS With Hollywood Left...,1
17069,BREAKING: Putin Tramples Obama’s Imaginary Red...,1
8270,Clinton voices concern about Russian interfere...,0
2329,Factbox: Highlights of Trump's calls with lead...,0


In [12]:
from sklearn.model_selection import train_test_split

train_sentences, val_sentences, train_labels, val_labels=train_test_split(news['text'].to_numpy(),
                                                                            news['fake'].to_numpy(),
                                                                            test_size=0.25,
                                                                            random_state=42)

In [13]:
max_vocab_length = 10000
max_length = 418


from tensorflow.keras.layers.experimental.preprocessing import TextVectorization
text_vectorizer = TextVectorization(max_tokens=max_vocab_length,
                                   output_mode='int',
                                   output_sequence_length=max_length)

2022-01-14 12:38:02.602188: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:937] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-01-14 12:38:02.707704: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:937] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-01-14 12:38:02.708428: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:937] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-01-14 12:38:02.709620: I tensorflow/core/platform/cpu_feature_guard.cc:142] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 AVX512F FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compil

In [14]:
text_vectorizer.adapt(train_sentences)

2022-01-14 12:38:05.088727: I tensorflow/compiler/mlir/mlir_graph_optimization_pass.cc:185] None of the MLIR Optimization Passes are enabled (registered 2)


In [15]:
import random
random_sentence = random.choice(train_sentences)
print(f"Original text;\n{random_sentence}\
\n\n Vectorized Version:")
text_vectorizer([random_sentence])

Original text;
Melania's Slovenian hometown eyes Trump win as boon for tourismSEVNICA, Slovenia (Reuters) - The small Slovenian town of Sevnica, Melania Trump’s birthplace, savored her husband’s shock win in the U.S. election as a likely boon for tourism on Wednesday, while a former schoolmate remembered her as “creative and innovative”. Until about a year ago, the town of fewer than 5,000 people, nestled in the foothills of the Alps, was scarcely known in Slovenia, let alone abroad.   But on Monday, Sevnica was clearly looking forward to a financial spin-off from being the hometown of the 46-year-old future U.S. First Lady. “Sometimes the pressure of the media was too hard. The people of Sevnica are not used to it. On the other hand, the global attention is positive because Sevnica is developing into a tourist destination,” mayor Srecko Ocvirk said. Speaking to Reuters shortly after 7 a.m. (0600 GMT) in Sevnica’s Central cafe, the smiling mayor said local residents had supported Trump

<tf.Tensor: shape=(1, 418), dtype=int64, numpy=
array([[   1,    1, 7004, 2534,   15,  365,   19,    1,   10,    1, 9737,
          70,    2,  754,    1,  921,    4,    1, 2066,  152,    1,    1,
          53,    1, 3855,  365,    7,    2,   32,   99,   19,    5,  305,
           1,   10, 4639,    9,  204,  113,    5,  106,    1, 8805,   53,
          19,    1,    6,    1,  385,   39,    5,  111,  530,    2,  921,
           4, 3188,   78, 4398,   46,    1,    7,    2,    1,    4,    2,
           1,   18,    1,  427,    7, 9737,  395, 1558, 2118,   34,    9,
         219,    1,   18,  875,  771,  867,    3,    5,  460,    1,   26,
         108,    2, 7004,    4,    2,    1,  545,   32,  105, 1495,    1,
           2,  840,    4,    2,  120,   18,  301,  558,    2,   46,    4,
           1,   29,   25,  256,    3,   16,    9,    2,   80,  974,    2,
         578, 1155,   12, 1701,   92,    1,   12, 3196,   85,    5, 5951,
           1,  894,    1,    1,   14,  671,    3,   70, 2236,   

In [16]:
words = text_vectorizer.get_vocabulary()
len(words)

10000

In [17]:
from tensorflow.keras import layers

embedding = layers.Embedding(input_dim=max_vocab_length,
                            output_dim=128,
                            embeddings_initializer='uniform',
                            input_length=max_length)
embedding

<keras.layers.embeddings.Embedding at 0x7fe8ae1ba850>

In [18]:
random_sentenc = random.choice(train_sentences)
print(f"Original text:\n{random_sentence}\
      \n\nEmbedd version: ")
embedding(text_vectorizer([random_sentence]))

Original text:
Melania's Slovenian hometown eyes Trump win as boon for tourismSEVNICA, Slovenia (Reuters) - The small Slovenian town of Sevnica, Melania Trump’s birthplace, savored her husband’s shock win in the U.S. election as a likely boon for tourism on Wednesday, while a former schoolmate remembered her as “creative and innovative”. Until about a year ago, the town of fewer than 5,000 people, nestled in the foothills of the Alps, was scarcely known in Slovenia, let alone abroad.   But on Monday, Sevnica was clearly looking forward to a financial spin-off from being the hometown of the 46-year-old future U.S. First Lady. “Sometimes the pressure of the media was too hard. The people of Sevnica are not used to it. On the other hand, the global attention is positive because Sevnica is developing into a tourist destination,” mayor Srecko Ocvirk said. Speaking to Reuters shortly after 7 a.m. (0600 GMT) in Sevnica’s Central cafe, the smiling mayor said local residents had supported Trump

<tf.Tensor: shape=(1, 418, 128), dtype=float32, numpy=
array([[[ 0.02650726,  0.04713878,  0.04025898, ..., -0.03885808,
          0.04728569,  0.0098158 ],
        [ 0.02650726,  0.04713878,  0.04025898, ..., -0.03885808,
          0.04728569,  0.0098158 ],
        [-0.0198065 , -0.04703027, -0.02483473, ...,  0.03968923,
          0.04142232, -0.0137097 ],
        ...,
        [-0.00219069,  0.01425601, -0.04276626, ...,  0.04618606,
          0.03161791, -0.01626281],
        [-0.03293516,  0.0091668 ,  0.04454314, ...,  0.01265756,
         -0.02992531,  0.02483901],
        [ 0.02183039, -0.00647342, -0.0165597 , ..., -0.00869855,
         -0.00888197, -0.01361972]]], dtype=float32)>

In [19]:
# Create an LSTM model
from tensorflow.keras import layers
inputs = layers.Input(shape=(1,),dtype='string')

# Pass inputs to text_vectorizer(convert text into numbers)
x = text_vectorizer(inputs) 

# Convert text_vectorizer layer into embedding layer
x = embedding(x)

# Model 
x = layers.LSTM(64)(x)
# output
outputs = layers.Dense(1, activation='sigmoid')(x)

# Pass inputs and outputs to our model
model = tf.keras.Model(inputs, outputs, name='model_LSTM')

In [20]:
# Get a summary
model.summary()

Model: "model_LSTM"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_1 (InputLayer)         [(None, 1)]               0         
_________________________________________________________________
text_vectorization (TextVect (None, 418)               0         
_________________________________________________________________
embedding (Embedding)        (None, 418, 128)          1280000   
_________________________________________________________________
lstm (LSTM)                  (None, 64)                49408     
_________________________________________________________________
dense (Dense)                (None, 1)                 65        
Total params: 1,329,473
Trainable params: 1,329,473
Non-trainable params: 0
_________________________________________________________________


In [21]:
# Compile the model
model.compile(loss='binary_crossentropy',
             optimizer=tf.keras.optimizers.Adam(),
             metrics=['accuracy'])

In [22]:
model_history = model.fit(train_sentences,
                         train_labels,
                          epochs=5,
                         validation_data=(val_sentences, val_labels))

Epoch 1/5


2022-01-14 12:38:13.782567: I tensorflow/stream_executor/cuda/cuda_dnn.cc:369] Loaded cuDNN version 8005


Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


In [23]:
from textblob import TextBlob
import re
def clean_text(text):
    return ' '.join(re.sub('(@[A-Za-z0-9]+)|([^0-9A-Za-z \t])|(\w+:\/\/\S+)', ' ', text).split())
def analyze_sentiment(text):
    analysis = TextBlob(text)
    if analysis.sentiment.polarity > 0:
        return 'Positive'
    elif analysis.sentiment.polarity == 0:
        return 'Neutral'
    else:
        return 'Negative'

In [24]:
df=pd.DataFrame(news)
df

Unnamed: 0,text,fake
16655,Poland's PM Szydlo to reshuffle cabinet soonWA...,0
19321,NICOLE KIDMAN BREAKS RANKS With Hollywood Left...,1
17069,BREAKING: Putin Tramples Obama’s Imaginary Red...,1
8270,Clinton voices concern about Russian interfere...,0
2329,Factbox: Highlights of Trump's calls with lead...,0
...,...,...
6542,U.S. Republican senator introduces Obamacare r...,0
11732,WOW! Kellyanne Conway’s “MISTAKE” Forced Lefti...,1
11494,U.N. appoints American to lead Children's Fund...,0
1222,U.S. officials try to ease concerns Trump may ...,0


In [25]:
df['clean_text'] = df['text'].apply(lambda x : clean_text(x))

In [26]:
df['Sentiment'] = df['clean_text'].apply(lambda x : analyze_sentiment(x))

In [27]:
def result(text):
    x=(np.array(text)).reshape(1,)
    cltxt=clean_text(text)
    x=model.predict(x)
    x=np.array(x).reshape(1)
    x=x[0]
    x=round(x)
    if x==0:
        y='Real'
    else:
        y='Fake'
    return analyze_sentiment(cltxt),y

In [28]:
result(news['text'].iloc[5])

('Positive', 'Real')

In [29]:
x=(np.array(news['text'].iloc[5])).reshape(1,)
x=model.predict(x)
x=np.array(x).reshape(1)
x=x[0]
x=round(x)
x

0

In [30]:
np.floor(0.9)

0.0