In [4]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, SimpleRNN, Input, Embedding, Bidirectional
import warnings
warnings.filterwarnings("ignore")

#### data collection

In [6]:
df=pd.read_csv(r"E:\RNN\fake_and_real_news.csv")
df

Unnamed: 0,Text,label
0,Top Trump Surrogate BRUTALLY Stabs Him In The...,Fake
1,U.S. conservative leader optimistic of common ...,Real
2,"Trump proposes U.S. tax overhaul, stirs concer...",Real
3,Court Forces Ohio To Allow Millions Of Illega...,Fake
4,Democrats say Trump agrees to work on immigrat...,Real
...,...,...
9895,Wikileaks Admits To Screwing Up IMMENSELY Wit...,Fake
9896,Trump consults Republican senators on Fed chie...,Real
9897,Trump lawyers say judge lacks jurisdiction for...,Real
9898,WATCH: Right-Wing Pastor Falsely Credits Trum...,Fake


In [17]:
df.label.value_counts()

label
Fake    5000
Real    4900
Name: count, dtype: int64

In [7]:
df['label_enc'] = df['label'].replace({"Real":1, "Fake":0})
df

Unnamed: 0,Text,label,label_enc
0,Top Trump Surrogate BRUTALLY Stabs Him In The...,Fake,0
1,U.S. conservative leader optimistic of common ...,Real,1
2,"Trump proposes U.S. tax overhaul, stirs concer...",Real,1
3,Court Forces Ohio To Allow Millions Of Illega...,Fake,0
4,Democrats say Trump agrees to work on immigrat...,Real,1
...,...,...,...
9895,Wikileaks Admits To Screwing Up IMMENSELY Wit...,Fake,0
9896,Trump consults Republican senators on Fed chie...,Real,1
9897,Trump lawyers say judge lacks jurisdiction for...,Real,1
9898,WATCH: Right-Wing Pastor Falsely Credits Trum...,Fake,0


### Text Preprocessing

In [8]:
vocab_size=5000
max_len=100

tokenizer = Tokenizer(num_words = vocab_size, lower=True, oov_token = "<OOV>") # Top 5000 most frequent words
tokenizer.fit_on_texts(df['Text'])
sequence=tokenizer.texts_to_sequences(df['Text'])

In [9]:
tokenizer.num_words

5000

In [31]:
tokenizer.word_counts

OrderedDict([('top', 2077),
             ('trump', 55818),
             ('surrogate', 68),
             ('brutally', 53),
             ('stabs', 4),
             ('him', 8204),
             ('in', 70543),
             ('the', 212622),
             ('back', 3272),
             ('‘he’s', 6),
             ('pathetic’', 1),
             ('video', 2673),
             ('it', 32196),
             ('s', 48841),
             ('looking', 811),
             ('as', 22988),
             ('though', 1694),
             ('republican', 8297),
             ('presidential', 2568),
             ('candidate', 1220),
             ('donald', 11814),
             ('is', 41263),
             ('losing', 405),
             ('support', 2735),
             ('even', 5394),
             ('from', 16971),
             ('within', 731),
             ('his', 27792),
             ('own', 2608),
             ('ranks', 112),
             ('you', 12571),
             ('know', 3607),
             ('things', 1877),
           

In [10]:
x=dict(tokenizer.word_counts)
x

{'top': 2077,
 'trump': 55818,
 'surrogate': 68,
 'brutally': 53,
 'stabs': 4,
 'him': 8204,
 'in': 70543,
 'the': 212622,
 'back': 3272,
 '‘he’s': 6,
 'pathetic’': 1,
 'video': 2673,
 'it': 32196,
 's': 48841,
 'looking': 811,
 'as': 22988,
 'though': 1694,
 'republican': 8297,
 'presidential': 2568,
 'candidate': 1220,
 'donald': 11814,
 'is': 41263,
 'losing': 405,
 'support': 2735,
 'even': 5394,
 'from': 16971,
 'within': 731,
 'his': 27792,
 'own': 2608,
 'ranks': 112,
 'you': 12571,
 'know': 3607,
 'things': 1877,
 'are': 16919,
 'getting': 1255,
 'bad': 1161,
 'when': 7634,
 'your': 2616,
 'surrogates': 79,
 'start': 822,
 'turning': 201,
 'against': 4638,
 'which': 8108,
 'exactly': 693,
 'what': 8181,
 'just': 7831,
 'happened': 716,
 'on': 45605,
 'fox': 1701,
 'news': 4423,
 'newt': 87,
 'gingrich': 189,
 'called': 3096,
 'pathetic': 284,
 'knows': 685,
 'that': 62391,
 'needs': 936,
 'to': 127925,
 'keep': 1484,
 'focus': 582,
 'hillary': 3046,
 'clinton': 3847,
 'if': 909

In [12]:
sorted(x.items(),key=lambda x:x [1],reverse=False)

[('pathetic’', 1),
 ('meadows’', 1),
 ('stirs', 1),
 ('lcp', 1),
 ('0711', 1),
 ('0717', 1),
 ('failingnewyorktimes', 1),
 ('1410', 1),
 ('worse”', 1),
 ('l2n1my1md', 1),
 ("heart'", 1),
 ('rampaging', 1),
 ('sanjosepd', 1),
 ('99wgi2gilf', 1),
 ('trujillo', 1),
 ('newsdamian', 1),
 ('2016san', 1),
 ('evergreen', 1),
 ('yazadi', 1),
 ('wwiii’', 1),
 ('2017senator', 1),
 ('“dossier”', 1),
 ('ohr’s', 1),
 ('subcontractor', 1),
 ('ohrs', 1),
 ('british\xa0intelligence', 1),
 ('indoctrinate', 1),
 ('amarillo', 1),
 ('evangelism', 1),
 ('workshop', 1),
 ('nikolaos', 1),
 ('myra', 1),
 ('folktales', 1),
 ('chimneys', 1),
 ('noem\xa0told', 1),
 ('abetter', 1),
 ('‘trumpian’', 1),
 ('insiders”', 1),
 ('“bend', 1),
 ('‘big’', 1),
 ('“reds”', 1),
 ('“yellows”', 1),
 ('“moore’s', 1),
 ('‘small', 1),
 ('reignites', 1),
 ('vallarta', 1),
 ('camo', 1),
 ('oozing', 1),
 ('1352', 1),
 ('opacity', 1),
 ('‘follow', 1),
 ('money’', 1),
 ('unintelligently', 1),
 ('twittereither', 1),
 ('beet', 1),
 ('rati

In [15]:
x=pad_sequences(sequence,maxlen=max_len,padding="post")
x

array([[ 439,  381,  329, ..., 2927,  165,  160],
       [   5,  373,   81, ...,    6,  212,  151],
       [3609,   97,   72, ..., 1987,   72,  435],
       ...,
       [   6, 1665,    7, ...,  176,    4, 4460],
       [   1, 2530,    3, ...,  108,  165,  160],
       [4295,   38, 2928, ...,   27, 3973,  193]])

In [16]:
y=df["label_enc"]
y

0       0
1       1
2       1
3       0
4       1
       ..
9895    0
9896    1
9897    1
9898    0
9899    0
Name: label_enc, Length: 9900, dtype: int64

In [18]:
x_train,x_test,y_train,y_test=train_test_split(x,y,test_size=0.2,random_state=1,stratify=y)

In [19]:
x_train

array([[ 461,    9,   10, ..., 1943,  165,  160],
       [2219,   27, 1292, ...,  108,  107,   87],
       [   1,   16,    8, ...,    1,  165,  160],
       ...,
       [  64,    1,   71, ..., 1608,  165,  160],
       [ 399,    2,   95, ...,  107,  165,  160],
       [  16,  697,   46, ...,    7,  119, 2939]])

In [20]:
y_train

5471    0
9886    0
3396    0
1192    0
1029    0
       ..
5109    0
4150    1
5884    0
4832    0
5593    1
Name: label_enc, Length: 7920, dtype: int64

### Model Building

In [28]:
model = Sequential()
model.add(Input(shape = (max_len,)))
model.add(Embedding(input_dim = vocab_size, output_dim = 64))
model.add(SimpleRNN(units = 32, return_sequences=False))
model.add(Dense(units = 1, activation="sigmoid"))
model.summary()

Model: "sequential_6"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 embedding_4 (Embedding)     (None, 100, 64)           320000    
                                                                 
 simple_rnn_3 (SimpleRNN)    (None, 32)                3104      
                                                                 
 dense_3 (Dense)             (None, 1)                 33        
                                                                 
Total params: 323,137
Trainable params: 323,137
Non-trainable params: 0
_________________________________________________________________


### model_training

In [31]:
model.compile(optimizer="adam",loss="binary_crossentropy",metrics=['accuracy'])
model.fit(x_train,y_train,validation_data=(x_test,y_test),epochs=10,batch_size=32,verbose=2)

Epoch 1/10
248/248 - 17s - loss: 0.1197 - accuracy: 0.9697 - val_loss: 0.0148 - val_accuracy: 0.9975 - 17s/epoch - 68ms/step
Epoch 2/10
248/248 - 16s - loss: 0.0088 - accuracy: 0.9989 - val_loss: 0.0130 - val_accuracy: 0.9975 - 16s/epoch - 64ms/step
Epoch 3/10
248/248 - 16s - loss: 0.0152 - accuracy: 0.9972 - val_loss: 0.0060 - val_accuracy: 0.9990 - 16s/epoch - 64ms/step
Epoch 4/10
248/248 - 16s - loss: 0.0037 - accuracy: 0.9996 - val_loss: 0.0076 - val_accuracy: 0.9980 - 16s/epoch - 63ms/step
Epoch 5/10
248/248 - 15s - loss: 0.0027 - accuracy: 0.9997 - val_loss: 0.0133 - val_accuracy: 0.9970 - 15s/epoch - 61ms/step
Epoch 6/10
248/248 - 15s - loss: 0.0024 - accuracy: 0.9997 - val_loss: 0.0134 - val_accuracy: 0.9975 - 15s/epoch - 61ms/step
Epoch 7/10
248/248 - 15s - loss: 0.0079 - accuracy: 0.9986 - val_loss: 0.0132 - val_accuracy: 0.9970 - 15s/epoch - 61ms/step
Epoch 8/10
248/248 - 15s - loss: 0.0045 - accuracy: 0.9990 - val_loss: 0.0107 - val_accuracy: 0.9980 - 15s/epoch - 61ms/step


<keras.callbacks.History at 0x1bf4453ebf0>

### Evaluation

In [32]:
loss,accuracy=model.evaluate(x_test,y_test)



In [33]:
loss,accuracy=model.evaluate(x_train,y_train)



### testing on single text`

In [48]:
text = " Sean Spicer HILARIOUSLY Branded As Chickensh*t After He Bolts From Briefing (TWEETS) (VIDEO) Oh, Mr. Spicey. Our favorite press secretary because we love to hate him. His gymnastics around Donald Trump s antics and gaffes (along with his own) are already the stuff of legend, and not in a good way. It makes his relationship with the press acrimonious, and he may well have just about had it with that. Today, he turned chickenshit, tucked his tail between his legs, and ran away from his own press briefing, leaving the White House Press Corps absolutely stunned.Cries of,  Sean! Hey Sean! Seeeeeean! Ah come on, Sean! He walked away!  are among just a few of the loud and disappointed protests coming from a press corps that undoubtedly had a squillion uncomfortable questions for him.Watch below:.@PressSec literally fled press questions today pic.twitter.com/S9uIapiBn4  American Bridge (@American_Bridge) May 2, 2017Near the end of the video, you can hear someone say,  Really?  Yes, really, apparently. And people on Twitter are going at Spicey for being a coward with gleeful abandon:@American_Bridge @PressSec Live feed on sean spicer running out the WH.. pic.twitter.com/wt4J2Q45WE  diane1261 (@diane1261) May 2, 2017@American_Bridge @PressSec pic.twitter.com/pqz03w8WhL  Jas (@barteej) May 2, 2017@American_Bridge @glasgowdick @PressSec @PressSec  Quick, the press are here. Run! Hide!  pic.twitter.com/y2Jib6YrZB  Sean Callaghan (@keanespirit) May 2, 2017@American_Bridge @PressSec Moments before: pic.twitter.com/U2XzJwewlF  Judge Steve Reinhold (@Galileo908) May 2, 2017@American_Bridge @wycam1 @PressSec Maybe Spicey had to GO POTTY ??? pic.twitter.com/KeSHWDnYsM  NY Justice Seeker (@NYJusticeSeeker) May 2, 2017Just in, footage of Sean Spicer at today s presser  pic.twitter.com/BNVX2m9CmA  Tynisa Walker (@Kalarigamerchic) May 2, 2017@American_Bridge @PressSec pic.twitter.com/lClJnDpGBt  L M J (@ViewFrom14thFlr) May 2, 2017@American_Bridge @PressSec Damn Sean. At least do it with a little style. pic.twitter.com/YTB8hEt05Z  Bob Parker (@BobusParkorum) May 2, 2017@American_Bridge @Marengel4 @PressSec I wouldn t put it pass you @seanspicer smh  pic.twitter.com/6K8jCnqKgl   R?G  (@lastbreath8) May 2, 2017@American_Bridge @PressSec pic.twitter.com/6DMtkNi0W9  Allbizness (@TerniValentine) May 2, 2017@American_Bridge @PressSec A coward dies a thousand deaths  pic.twitter.com/X0hCvrpzkn  Erin Larkin Foster (@larkster_Erin) May 2, 2017Live look at Sean Spicer ghosting the WH press pic.twitter.com/W9vmc2Ef4g  Andi Zeisler (@andizeisler) May 2, 2017@JakeTurx The childish nature of this Administration never ceases to amaze me. I honestly wish they would stop. pic.twitter.com/Z7uCwa8aaJ  ACTIndivisible (@ACTIndivisible) May 2, 2017Scaredy Spice @PressSec is too much of a coward to face the free press reps in White House press briefing! @maddow #Spicer #100DaysOfShame pic.twitter.com/xNTmw4xYeQ  Daniel Quinn (@MightyQuinnUSA) May 2, 2017Actual pics of Sean Spicer fleeing the press briefing today pic.twitter.com/NnogAnWC5w  Prubjot Kaur (@prubjotkaur) May 2, 2017Sean Spicer fleeing the WH press corps like pic.twitter.com/S1jns97dfN  Jenn Lerner (@JenniferLerner1) May 2, 2017Poor @seanspicer he s such a huge pussy that @realDonaldTrump is going to be grabbing him all day long. https://t.co/pJ9Ej1TMw1  Pretty Hat Machine? (@PrettyHatMech) May 2, 2017What s the rush? Sean Spicer leaves WH press briefing without fielding questions  https://t.co/U1rd4B6cZ2  All American Girl (@AIIAmericanGirI) May 2, 2017Sean Spicer leaving that press conference was not shocking to me. That looks me putting my kids to bed everynight pic.twitter.com/BHwAJhOYGw  Lulu Chatter (@LuluChatter1) May 2, 2017Sean Spicer leaves White House briefing without taking questions from the press Where s Spicer? pic.twitter.com/afi8EMgIqf  Mangoman (@69mangoman) May 2, 2017Actual footage of Sean Spicer at today s press briefing. pic.twitter.com/2xHZdKhTkm        (@slickvandal) May 2, 2017Some of the media are unhappy with this development, too, and really not afraid to say so. Mother Jones called it  the soundtrack for a presidency.  The Hill said that Spicer  stunned reporters. Market Watch noted that the press was left  volleying questions toward an empty podium: Sean? Sean? Hey, Sean, come on! What about the Putin call, Sean?  The caption under their photo reads,  It just like concluded. The Root had its own take: Perhaps Spicer needed some time to compose the  alternative facts  he knew he d have to present after Salmon-Skin Stalin said Tuesday that the new budget has enough money for a down payment on the wall, and Sen. Patrick Leahy (D-Vt.), the Senate Appropriations Committee s ranking Democratic member, responded on Twitter with,  To be clear, there is NO funding in this bill for ANY kind of down payment on construction of a new border wall. Or, maybe Spicer was nervous about responding to Trump s tweets from earlier Tuesday insisting that the government needed a  good shutdown.' While some blindly loyal Trump supporters are saying that he just owned the press, unfortunately for their narrative and for Spicey here, answering questions he doesn t necessarily like or want to answer is part and parcel of his job. He doesn t get to run away when he doesn t want to talk, or without explaining that his time is getting cut short for something else. Really, Spicer just got owned here.Featured image via screen capture from embedded video"
seq = tokenizer.texts_to_sequences([text])
padded = pad_sequences(seq, maxlen=max_len, padding="post")
# padded


In [50]:
prob=model.predict(padded)[0,0]
prob



0.00010104472

In [53]:
prediction="Real" if prob>0.5 else "False"
print(f"news is {prediction}")

news is False


In [59]:
import pickle

model.save("train_model.keras")

print( "Model saved as fake_news_model.keras")

with open("tokenizer.pkl", "wb") as f:
    pickle.dump(tokenizer, f)

Model saved as fake_news_model.keras


In [60]:
import tensorflow as tf
print(tf.__version__)


2.10.1
