### Import sections

In [1]:
import pandas as pd
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.model_selection import train_test_split
from sklearn import metrics
from sklearn.metrics import roc_curve
from sklearn.naive_bayes import MultinomialNB
import matplotlib.pyplot as plt
import pickle 
import numpy as np
import time

#### Reading the Data 

In [2]:
fake_data = pd.read_csv('https://s3.amazonaws.com/assets.datacamp.com/production/course_3629/fake_or_real_news.csv',
            low_memory=True,
            nrows = 100
           )

In [3]:
fake_data.head()

Unnamed: 0.1,Unnamed: 0,title,text,label
0,8476,You Can Smell Hillary‚Äôs Fear,"Daniel Greenfield, a Shillman Journalism Fello...",FAKE
1,10294,Watch The Exact Moment Paul Ryan Committed Pol...,Google Pinterest Digg Linkedin Reddit Stumbleu...,FAKE
2,3608,Kerry to go to Paris in gesture of sympathy,U.S. Secretary of State John F. Kerry said Mon...,REAL
3,10142,Bernie supporters on Twitter erupt in anger ag...,"‚Äî Kaydee King (@KaydeeKing) November 9, 2016 T...",FAKE
4,875,The Battle of New York: Why This Primary Matters,It's primary day in New York and front-runners...,REAL


In [4]:
response = fake_data.label

In [5]:
count_vectorizer = CountVectorizer(stop_words="english",
                                   ngram_range=(1,4))

#### Train test split

In [6]:
X_train, X_test, Y_train, Y_test = train_test_split(fake_data["text"],
                                                   response,
                                                   test_size=0.33,
                                                   random_state=53)

In [7]:
X_train[1]

'Google Pinterest Digg Linkedin Reddit Stumbleupon Print Delicious Pocket Tumblr \nThere are two fundamental truths in this world: Paul Ryan desperately wants to be president. And Paul Ryan will never be president. Today proved it. \nIn a particularly staggering example of political cowardice, Paul Ryan re-re-re-reversed course and announced that he was back on the Trump Train after all. This was an aboutface from where he was a few weeks ago. He had previously declared he would not be supporting or defending Trump after a tape was made public in which Trump bragged about assaulting women. Suddenly, Ryan was appearing at a pro-Trump rally and boldly declaring that he already sent in his vote to make him President of the United States. It was a surreal moment. The figurehead of the Republican Party dosed himself in gasoline, got up on a stage on a chilly afternoon in Wisconsin, and lit a match. . @SpeakerRyan says he voted for @realDonaldTrump : ‚ÄúRepublicans, it is time to come home‚Ä

In [8]:
count_train = count_vectorizer.fit_transform(X_train)

In [9]:
count_train

<67x83297 sparse matrix of type '<class 'numpy.int64'>'
	with 94064 stored elements in Compressed Sparse Row format>

In [None]:
count_test = count_vectorizer.transform(X_test)

In [None]:
count_test

#### Naiive Bayes Training

In [None]:
nb_classifier = MultinomialNB()

In [None]:
nb_classifier.fit(count_train, Y_train)

In [None]:
pred = nb_classifier.predict(count_test)

In [None]:
score = metrics.accuracy_score(Y_test, pred)

In [None]:
score

In [None]:
cm = metrics.confusion_matrix(Y_test,
                              pred,
                              labels=["FAKE",  "REAL"]
                             )

In [None]:
cm

In [None]:
pickle.dump(nb_classifier, open('naiiveBayes.pkl', 'wb'))

In [None]:
vect = CountVectorizer(stop_words="english",
                                   ngram_range=(1,4))

#### Load the model

In [10]:
model = pickle.load(open('naiiveBayes.pkl', 'rb'))

#### Streaming

In [11]:
from pyspark import SparkContext,SparkConf
from pyspark.streaming import StreamingContext
from pyspark.sql import SQLContext
from pyspark.sql.functions import desc
import pickle 
import time
from sklearn.naive_bayes import MultinomialNB

#### Set spark configurations

In [12]:
conf = SparkConf()
conf.set("spark.app.name", "MyApp")
conf.set("spark.ui.port", "36000")

<pyspark.conf.SparkConf at 0x116b78400>

#### Create spark context

In [13]:
sc = SparkContext("local[2]", "Simple App", conf)

In [14]:
ssc = StreamingContext(sc, 10)

In [15]:
tweets = ssc.socketTextStream("127.0.0.1", 5555)

#### Do prediction on stream of tweets 

In [17]:
transformed = tweets.map(lambda x : (x, model.predict(count_vectorizer.transform(pd.Series(x)))))

In [18]:
transformed.pprint()
tweets.pprint()

#### Start of the stream

In [19]:
ssc.start()
ssc.awaitTermination()

-------------------------------------------
Time: 2017-12-11 16:14:30
-------------------------------------------
('RT @SonjaPlomp: Dit is James. James kon kennelijk de kerstafdeling van de IKEA niet vinden en in plaats van de weg te vragen, geeft‚Ä¶ RT @lopezgovlaw: #News "New York\'s Port Authority evacuated after explosion" via #USAToday | https://t.co/91VtvprOaS https://t.co/e2MipytWcURT @hasan_can: √úmraniye\'de Ya≈ülƒ±lar da Yalnƒ±z Deƒüil! https://t.co/KUz5ijnREN @umraniyebeltr aracƒ±lƒ±ƒüƒ±ylaRT @Phire2Fire: @leahmcelrath Every single one of our media outlets conveniently left out the news about Nigel Farage campaigning f‚Ä¶ https://t.co/SfW1wdQR3U', array(['FAKE'],
      dtype='<U4'))
('https://t.co/YlJnJxFcyb', array(['FAKE'],
      dtype='<U4'))
('https://t.co/SfW1wdQR3U', array(['FAKE'],
      dtype='<U4'))
('https://t.co/YlJnJxFcyb', array(['FAKE'],
      dtype='<U4'))
('', array(['REAL'],
      dtype='<U4'))
('Sir,', array(['REAL'],
      dtype='<U4'))
("Please Visit‚Ä¶ h

-------------------------------------------
Time: 2017-12-11 16:15:00
-------------------------------------------
("Why @JPMorgan, @AmericanExpress , @HSBC are backing ‚Äòisolation‚Äô web browsing :‚Ä¶ Saudi Arabia will allow public movie theaters for the first time in more than 35 years https://t.co/c4X2TiWukQ via @bpoliticsMSNBC NEWS Today 6:00 AM ET 12/11/17 | MSNBC NEWS December 11, 2017 https://t.co/8LTV0kdHAN via @YouTubeRT @NipoSen: Genitals cut, hot oil poured on eyes and face, head split into two, condition of Hindu-Dalit activist Paresh Mesta,‚Ä¶ RT @beckettmedia: Shohei Ohtani has lots of cool Japanese baseball cards. Here's a look at his earliest: https://t.co/vZtlEyKpyA‚Ä¶ RT @pamfoundation: https://t.co/K9YwJMZRN0RT @RVAwonk: NEW: Women who have accused Trump of sexual harassment and/or assault will speak at a news conference, hosted by‚Ä¶ RT @nhk_kabun: „Äê„Éé„Éº„Éô„É´Âπ≥ÂíåË≥ûÊéàË≥ûÂºè Ë¢´ÁàÜËÄÖ„ÇÇÊºîË™¨„Äë„Éé„Éº„Éô„É´Âπ≥ÂíåË≥û„ÅÆÊéàË≥ûÂºè„Åå„Éé„É´„Ç¶„Çß„Éº„ÅßË°å„Çè„Çå„

-------------------------------------------
Time: 2017-12-11 16:15:20
-------------------------------------------
('https://t.co/bZ7IF2Em31RT @DineshDSouza: DOGCATCHER CALLS IT QUITS: Her complete obscurity shows how little fallout this is actually causing at the GOP https://t.‚Ä¶RT @okaguchik: Ë°åÊîøÊõ∏Â£´Ë©¶È®ì„Åß„ÄÅ‰∫å‰∫∫ÂàÜ„ÅÆËß£Á≠îÊ∏à„ÅøÁ≠îÊ°à„ÇíÁ¥õÂ§±„Åï„Åõ„ÅüË≤¨‰ªªËÄÖ„ÅåÔºåÂçò„Å™„ÇãÂé≥ÈáçÊ≥®ÊÑè„ÅßÁµÇ„Çè„Çã(^_^)', array(['REAL'],
      dtype='<U4'))
('', array(['REAL'],
      dtype='<U4'))
('https://t.co/lhwkjoPbSI', array(['FAKE'],
      dtype='<U4'))
('', array(['REAL'],
      dtype='<U4'))
('‰ø∫„ÅØ„ÄÅ„Éó„É©„Ç§„Éô„Éº„Éà„ÅÆ„Å§„Å∂„ÇÑ„Åç„Å†„Åë„ÅßÔºåÂé≥ÈáçÊ≥®ÊÑè(^_^)RT @mariko_dayo: Ê°êË∞∑ÂÅ•Â§™„Å®ÂÖ•Ê±üÊÇ†„ÅÆ‚Äú‰ª£Ë°®‰Ωú‚Äù„Äå„Éì„Ç∏„É©„É≥„ÉÜ„ÄçÊú¨Êó•ÂÖ¨Èñã„ÄÅÂ§ßÊ£ÆÂçóÊúã„ÅåÊíÆÂΩ±ÂõûÊÉ≥ - Êò†Áîª„Éä„Çø„É™„Éº https://t.co/iD2UzMxxUe–ö–ª—É–±–∞–º #–†–§ –ø–æ–≤–µ–∑–ª–æ —Å –∂–µ—Ä–µ–±—å–µ–≤–∫–æ–π –õ–∏–≥–∏ –ï–≤—Ä–æ–ø—ã #–£–ï–§–ê #–∂–µ—Ä–µ–±—å–µ–≤–∫–∞ #—Ñ—É—Ç–±–æ–ª https://t.co/O1ZeEYSwQ6NFL

-------------------------------------------
Time: 2017-12-11 16:15:50
-------------------------------------------
('Take one of their beers and y‚Ä¶ https://t.co/bBixTfTkg3masdrnews ŸÑÿ∑ŸäŸÅÿ© ÿ∂ŸäŸÅÿ© ÿ®ÿ±ŸÜÿßŸÖÿ¨ "ŸÉÿ®ÿ± ÿØŸÖÿßÿ∫ŸÉ".. ÿßŸÑÿ£ÿ±ÿ®ÿπÿßÿ° https://t.co/QvhF1plo52RT @gekisaka: „É¨„Ç¢„É´„ÅØ„Éë„É™SG„Å®ÊøÄÁ™Å!!‚ÄúELÊñ∞ÊóßÁéãËÄÖÂØæÊ±∫‚Äù„ÇÇÂÆüÁèæ‚Ä¶CL„Éô„Çπ„Éà16„ÅÆÁµÑ„ÅøÂêà„Çè„ÅõÊ±∫ÂÆö https://t.co/kVTWhqccNI #gekisaka https://t.co/onQVHI3HRTRT @JohnJHarwood: new Fox News Poll of AL Senate: Doug Jones 50%, Roy Moore 40%RT @JPhlps: @Cernovich @kincsi_chris Correction @CNN are clinton news network is comprised of lying leftist activists who spew hate.RT @LoveLive_staff: „Äê„É©„Éñ„É©„Ç§„ÉñÔºÅËôπ„É∂Âí≤Â≠¶Âúí„Çπ„ÇØ„Éº„É´„Ç¢„Ç§„Éâ„É´ÂêåÂ•Ω‰ºö „ÅØ„Åò„ÇÅ„Åæ„Åó„Å¶ÔºÅÂÖ¨ÈñãÁîüÊîæÈÄÅ Á¨¨ÔºìÂõû_ÂëäÁü•ÊÉÖÂ†±„Äë„Äå„Çπ„ÇØ„Éï„Çß„Çπ5th„Ç¢„Éã„Éê„Éº„Çµ„É™„Éº„Ç≠„É£„É≥„Éö„Éº„É≥„Ç¨„Éº„É´27‰∫∫Á∑èÈÅ∏Êåô„ÄçÁµêÊûúÁô∫Ë°®ÔºÅ„ÄåŒº\'sÔºöÁü¢Êæ§„Å´„Åì AqoursÔºöÊ¥•Â≥∂ÂñÑÂ≠ê Ëôπ„É∂Âí≤Â≠¶Âúí„Çπ„ÇØ„Éº„É´„Ç¢‚Ä¶ R

-------------------------------------------
Time: 2017-12-11 16:16:10
-------------------------------------------

-------------------------------------------
Time: 2017-12-11 16:16:20
-------------------------------------------

-------------------------------------------
Time: 2017-12-11 16:16:30
-------------------------------------------

-------------------------------------------
Time: 2017-12-11 16:16:40
-------------------------------------------

-------------------------------------------
Time: 2017-12-11 16:16:50
-------------------------------------------

-------------------------------------------
Time: 2017-12-11 16:17:00
-------------------------------------------

-------------------------------------------
Time: 2017-12-11 16:17:10
-------------------------------------------

-------------------------------------------
Time: 2017-12-11 16:17:20
-------------------------------------------

-------------------------------------------
Time: 2017-12-11 16:17:30
----------

KeyboardInterrupt: 

-------------------------------------------
Time: 2017-12-11 16:18:20
-------------------------------------------

-------------------------------------------
Time: 2017-12-11 16:18:30
-------------------------------------------

-------------------------------------------
Time: 2017-12-11 16:18:40
-------------------------------------------

-------------------------------------------
Time: 2017-12-11 16:18:50
-------------------------------------------

-------------------------------------------
Time: 2017-12-11 16:19:00
-------------------------------------------

-------------------------------------------
Time: 2017-12-11 16:19:10
-------------------------------------------

-------------------------------------------
Time: 2017-12-11 16:19:20
-------------------------------------------

-------------------------------------------
Time: 2017-12-11 16:19:30
-------------------------------------------

-------------------------------------------
Time: 2017-12-11 16:19:40
----------