In [147]:
#For Streaming data

In [148]:
import tweepy
from tweepy import API 
from tweepy import Cursor
from tweepy.streaming import StreamListener
from tweepy import OAuthHandler
from tweepy import Stream
 
import twitter_credentials

In [149]:
# # # # TWITTER CLIENT # # # #
class TwitterClient():
    def __init__(self, twitter_user=None):
        self.auth = TwitterAuthenticator().authenticate_twitter_app()
        self.twitter_client = API(self.auth)

        self.twitter_user = twitter_user

    def get_user_timeline_tweets(self, num_tweets):
        tweets = []
        for tweet in Cursor(self.twitter_client.user_timeline, id=self.twitter_user).items(num_tweets):
            tweets.append(tweet)
        return tweets

    def get_friend_list(self, num_friends):
        friend_list = []
        for friend in Cursor(self.twitter_client.friends, id=self.twitter_user).items(num_friends):
            friend_list.append(friend)
        return friend_list

    def get_home_timeline_tweets(self, num_tweets):
        home_timeline_tweets = []
        for tweet in Cursor(self.twitter_client.home_timeline, id=self.twitter_user).items(num_tweets):
            home_timeline_tweets.append(tweet)
        return home_timeline_tweets


In [150]:
# # # # TWITTER AUTHENTICATER # # # #
class TwitterAuthenticator():

    def authenticate_twitter_app(self):
        auth = OAuthHandler(twitter_credentials.CONSUMER_KEY, twitter_credentials.CONSUMER_SECRET)
        auth.set_access_token(twitter_credentials.ACCESS_TOKEN, twitter_credentials.ACCESS_TOKEN_SECRET)
        return auth

In [151]:
# # # # TWITTER STREAMER # # # #
class TwitterStreamer():
    """
    Class for streaming and processing live tweets.
    """
    def __init__(self):
        self.twitter_autenticator = TwitterAuthenticator()    

    def stream_tweets(self, fetched_tweets_filename, hash_tag_list):
        # This handles Twitter authetification and the connection to Twitter Streaming API
        listener = TwitterListener(fetched_tweets_filename)
        auth = self.twitter_autenticator.authenticate_twitter_app() 
        stream = Stream(auth, listener)

        # This line filter Twitter Streams to capture data by the keywords: 
        stream.filter(track=hash_tag_list)


In [152]:
# # # # TWITTER STREAM LISTENER # # # #
class TwitterListener(StreamListener):
    """
    This is a basic listener that just prints received tweets to stdout.
    """
    def __init__(self, fetched_tweets_filename):
        self.fetched_tweets_filename = fetched_tweets_filename

    def on_data(self, data):
        try:
            print(data)
            with open(self.fetched_tweets_filename, 'a') as tf:
                tf.write(data)
            return True
        except BaseException as e:
            print("Error on_data %s" % str(e))
        return True
          
    def on_error(self, status):
        if status == 420:
            # Returning False on_data method in case rate limit occurs.
            return False
        print(status)


In [153]:
if __name__ == '__main__':
 
    # Authenticate using config.py and connect to Twitter Streaming API.
    hash_tag_list = ["Python", "JavaScript", "Ruby"]
    fetched_tweets_filename = "tweets_json.json"

    twitter_client = TwitterClient('vishu_tyagi_')
    print(twitter_client.get_user_timeline_tweets(1))

    twitter_streamer = TwitterStreamer()
    twitter_streamer.stream_tweets(fetched_tweets_filename, hash_tag_list)

[Status(_api=<tweepy.api.API object at 0x0000016073B07A20>, _json={'created_at': 'Fri May 01 12:32:01 +0000 2020', 'id': 1256199698727272449, 'id_str': '1256199698727272449', 'text': '@airindiain I booked two tickets from Delhi to San Francisco for Tuesday, June 2 (PNR JYVGD). Considering the prese… https://t.co/V4Lcegam94', 'truncated': True, 'entities': {'hashtags': [], 'symbols': [], 'user_mentions': [{'screen_name': 'airindiain', 'name': 'Air India', 'id': 387573617, 'id_str': '387573617', 'indices': [0, 11]}], 'urls': [{'url': 'https://t.co/V4Lcegam94', 'expanded_url': 'https://twitter.com/i/web/status/1256199698727272449', 'display_url': 'twitter.com/i/web/status/1…', 'indices': [117, 140]}]}, 'source': '<a href="http://twitter.com/download/android" rel="nofollow">Twitter for Android</a>', 'in_reply_to_status_id': None, 'in_reply_to_status_id_str': None, 'in_reply_to_user_id': 387573617, 'in_reply_to_user_id_str': '387573617', 'in_reply_to_screen_name': 'airindiain', 'user': {'id

{"created_at":"Sat May 02 18:43:16 +0000 2020","id":1256655514227798016,"id_str":"1256655514227798016","text":"RT @javarevisited: Top 10 Free TypeScript Courses to Learn Online\u200a\u2014\u200aBest of Lot by @javinpaul https:\/\/t.co\/lO89KL36RJ #TypeScript #javascri\u2026","source":"\u003ca href=\"http:\/\/twitter.com\/download\/android\" rel=\"nofollow\"\u003eTwitter for Android\u003c\/a\u003e","truncated":false,"in_reply_to_status_id":null,"in_reply_to_status_id_str":null,"in_reply_to_user_id":null,"in_reply_to_user_id_str":null,"in_reply_to_screen_name":null,"user":{"id":125107613,"id_str":"125107613","name":"sunnyesquire","screen_name":"sunny_esquire","location":null,"url":null,"description":"Shunya,  Retweets are not personal endorsements.","translator_type":"none","protected":false,"verified":false,"followers_count":181,"friends_count":333,"listed_count":8,"favourites_count":16511,"statuses_count":9982,"created_at":"Sun Mar 21 18:24:18 +0000 2010","utc_offset":null,"time_zone":

KeyboardInterrupt: 

In [154]:
import json
import pandas as pd
import re
import numpy as np

In [155]:
def word_in_text(word, text):
    word = word.lower()
    text = text.lower()
    match = re.search(word, text)
    if match:
        return True
    return False

In [None]:
#Sentimental Analysis from here onwards

In [156]:
auth = tweepy.OAuthHandler(twitter_credentials.CONSUMER_KEY, twitter_credentials.CONSUMER_SECRET)
auth.set_access_token(twitter_credentials.ACCESS_TOKEN, twitter_credentials.ACCESS_TOKEN_SECRET)
api = tweepy.API(auth)

In [157]:
def list_tweets(user_id, count, prt=False):
    tweets = api.user_timeline(
        "@" + user_id, count=count, tweet_mode='extended')
    tw = []
    for t in tweets:
        tw.append(t.full_text)
        if prt:
            print(t.full_text)
            print()
    return tw

In [158]:
user_id = "realDonaldTrump"
count=200
tw_trump = list_tweets(user_id, count)

In [159]:
print(len(tw_trump))

199


In [160]:
def remove_pattern(input_txt, pattern):
    r = re.findall(pattern, input_txt)
    #print(r)
    for i in r:
        input_txt = re.sub(i, '', input_txt)
    #print (input_txt)
    return input_txt
def clean_tweets(lst):
    # remove twitter Return handles (RT @xxx:)
    lst = np.vectorize(remove_pattern)(lst, "RT @[\w]*:")
    # remove twitter handles (@xxx)
    lst = np.vectorize(remove_pattern)(lst, "@[\w]*")
    # remove URL links (httpxxx)
    print("before++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++")
    print(lst)
    lst = np.vectorize(remove_pattern)(lst, "https?://[A-Za-z0-9./]*")
    print("After_______________________________________________________________________________________________________________")
    print(lst)
    #lst = re.sub(r"http\S+", "", lst)
    #clean_tweet = re.match('(.*?)http.*?\s?(.*?)', lst)
    #if clean_tweet: 
        #lst = clean_tweet.group(1)
        #lst = clean_tweet.group(2) # will print everything after the URL 
    # remove special characters, numbers, punctuations (except for #)
    lst = np.core.defchararray.replace(lst, "[^a-zA-Z#]", " ")
    return lst

In [161]:
clean_tw = []
clean_tw = clean_tweets(tw_trump)

before++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
['Thank you Michael. You also! https://t.co/VFi6Nhj3zG'
 ' The FBI found "no derogatory information" about . But they decided to crush him anyway.\n\n"There is nothing worse…'
 'The Russia Hoax is the biggest political scandal in American history. Treason!!! Lets see how it ends???? https://t.co/VRsVdiQrsf'
 ' Join me for a quick laugh about the funniest Trump kill shot on Biden. Ever.🤣🤣🤣🤣🤣🤣 https://t.co/xn6U5w5iZe'
 ' This kill shot is so perfectly executed it is painful to watch . . . and still funny. 🤣🤣🤣'
 ' Poll for Democrats only. Are you aware that most of the political news you consume is either misleading and out of cont…'
 " Greg Gutfeld on Biden response to Tara Reade claims: He 'was doing great in that interview until he confessed' https://…"
 "  It's an absolute disgrace what they've done to  and his family.\n\nThis is one of the most highly…"
 "   Why does MSM i

In [162]:
#VaderSentiment for sentimental analysis

In [163]:
from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer
analyser = SentimentIntensityAnalyzer()

In [164]:
sent = []
for t in clean_tw:
    sent.append(analyser.polarity_scores(t)['compound'])
    #print(analyser.polarity_scores(t))

In [165]:
sent

[0.4199,
 -0.2263,
 -0.8669,
 0.9716,
 0.8825,
 -0.4019,
 0.6249,
 -0.4939,
 -0.8002,
 0.0,
 0.4199,
 -0.6957,
 0.4019,
 0.4019,
 0.8268,
 -0.0387,
 0.0,
 -0.296,
 -0.296,
 -0.3869,
 -0.0258,
 0.5719,
 0.3818,
 -0.0516,
 0.0,
 0.0,
 0.5423,
 0.5106,
 0.0,
 0.0,
 0.0,
 0.6459,
 0.906,
 0.3664,
 0.8074,
 0.2473,
 -0.8111,
 0.6476,
 0.0,
 0.2023,
 -0.4926,
 0.0,
 0.5411,
 0.4767,
 0.3612,
 -0.012,
 0.4404,
 -0.3612,
 -0.5106,
 -0.1531,
 -0.8786,
 0.4019,
 -0.4168,
 0.6885,
 0.9439,
 0.0,
 0.0,
 0.6249,
 -0.1761,
 -0.631,
 -0.7783,
 0.0,
 0.2263,
 -0.128,
 -0.2263,
 0.0,
 0.0,
 -0.3506,
 -0.6486,
 0.9325,
 0.25,
 0.6486,
 -0.25,
 0.5267,
 0.0,
 0.4588,
 0.6597,
 0.3612,
 0.5859,
 -0.4215,
 -0.7777,
 0.3612,
 0.0,
 0.0,
 -0.73,
 -0.5255,
 0.6588,
 -0.5706,
 -0.4926,
 0.4404,
 0.6891,
 0.0,
 0.0,
 -0.4767,
 0.0,
 -0.5707,
 0.4391,
 0.5267,
 -0.6408,
 0.0,
 0.0,
 0.0343,
 -0.765,
 0.0,
 -0.7277,
 0.4199,
 -0.8858,
 -0.2003,
 0.0,
 -0.819,
 0.5848,
 -0.4981,
 -0.8676,
 0.0,
 0.0,
 0.6114,
 0.0

In [166]:
for (s, t) in zip(sent, clean_tw):
    print("Tweet is:")
    print(t)
    if(s < 0):
        print("negative")
    elif(s == 0):
        print("neutral")
    else:
        print("positive")
    print("-------------------------------------------------------------------------------------------------------------")

Tweet is:
Thank you Michael. You also! 
positive
-------------------------------------------------------------------------------------------------------------
Tweet is:
 The FBI found "no derogatory information" about . But they decided to crush him anyway.

"There is nothing worse…
negative
-------------------------------------------------------------------------------------------------------------
Tweet is:
The Russia Hoax is the biggest political scandal in American history. Treason!!! Lets see how it ends???? 
negative
-------------------------------------------------------------------------------------------------------------
Tweet is:
 Join me for a quick laugh about the funniest Trump kill shot on Biden. Ever.🤣🤣🤣🤣🤣🤣 
positive
-------------------------------------------------------------------------------------------------------------
Tweet is:
 This kill shot is so perfectly executed it is painful to watch . . . and still funny. 🤣🤣🤣
positive
-------------------------------------

In [167]:
from nltk.tokenize import TweetTokenizer

In [168]:
tweet_tokenizer = TweetTokenizer(preserve_case=False, strip_handles=True, reduce_len=True)
#tockenizer example
for tweet in clean_tw:
    print (tweet_tokenizer.tokenize(tweet))

['thank', 'you', 'michael', '.', 'you', 'also', '!']
['the', 'fbi', 'found', '"', 'no', 'derogatory', 'information', '"', 'about', '.', 'but', 'they', 'decided', 'to', 'crush', 'him', 'anyway', '.', '"', 'there', 'is', 'nothing', 'worse', '…']
['the', 'russia', 'hoax', 'is', 'the', 'biggest', 'political', 'scandal', 'in', 'american', 'history', '.', 'treason', '!', '!', '!', 'lets', 'see', 'how', 'it', 'ends', '?', '?', '?']
['join', 'me', 'for', 'a', 'quick', 'laugh', 'about', 'the', 'funniest', 'trump', 'kill', 'shot', 'on', 'biden', '.', 'ever', '.', '🤣', '🤣', '🤣']
['this', 'kill', 'shot', 'is', 'so', 'perfectly', 'executed', 'it', 'is', 'painful', 'to', 'watch', '. . .', 'and', 'still', 'funny', '.', '🤣', '🤣', '🤣']
['poll', 'for', 'democrats', 'only', '.', 'are', 'you', 'aware', 'that', 'most', 'of', 'the', 'political', 'news', 'you', 'consume', 'is', 'either', 'misleading', 'and', 'out', 'of', 'cont', '…']
['greg', 'gutfeld', 'on', 'biden', 'response', 'to', 'tara', 'reade', 'clai

In [169]:
import string
import re
from nltk.corpus import stopwords
from nltk.stem import PorterStemmer

In [170]:
stopwords_english = stopwords.words('english')
stemmer = PorterStemmer()
emoticons_happy = set([
    ':-)', ':)', ';)', ':o)', ':]', ':3', ':c)', ':>', '=]', '8)', '=)', ':}',
    ':^)', ':-D', ':D', '8-D', '8D', 'x-D', 'xD', 'X-D', 'XD', '=-D', '=D',
    '=-3', '=3', ':-))', ":'-)", ":')", ':*', ':^*', '>:P', ':-P', ':P', 'X-P',
    'x-p', 'xp', 'XP', ':-p', ':p', '=p', ':-b', ':b', '>:)', '>;)', '>:-)',
    '<3'
    ])
emoticons_sad = set([
    ':L', ':-/', '>:/', ':S', '>:[', ':@', ':-(', ':[', ':-||', '=L', ':<',
    ':-[', ':-<', '=\\', '=/', '>:(', ':(', '>.<', ":'-(", ":'(", ':\\', ':-c',
    ':c', ':{', '>:\\', ';('
    ])
emoticons = emoticons_happy.union(emoticons_sad)

In [124]:
def cl_tweets(tweet):
    tweet = re.sub(r'\$\w*', '', tweet)
    tweet = re.sub(r'^RT[\s]+', '', tweet)
    tweet = re.sub(r'https?:\/\/.*[\r\n]*', '', tweet)
    tweet = re.sub(r'#', '', tweet)
    tokenizer = TweetTokenizer(preserve_case=False, strip_handles=True, reduce_len=True)
    tweet_tokens = tokenizer.tokenize(tweet)
 
    tweets_clean = []    
    for word in tweet_tokens:
        if (word not in stopwords_english and
              word not in emoticons and
                word not in string.punctuation):
            stem_word = stemmer.stem(word)
            tweets_clean.append(stem_word)
 
    return tweets_clean

In [125]:
cl_tw = []
for tweet in clean_tw:
    cl_tw.append(cl_tweets(tweet))

In [126]:
cl_tw

[['thank', 'michael', 'also'],
 ['fbi',
  'found',
  'derogatori',
  'inform',
  'decid',
  'crush',
  'anyway',
  'noth',
  'wors',
  '…'],
 ['russia',
  'hoax',
  'biggest',
  'polit',
  'scandal',
  'american',
  'histori',
  'treason',
  'let',
  'see',
  'end'],
 ['join',
  'quick',
  'laugh',
  'funniest',
  'trump',
  'kill',
  'shot',
  'biden',
  'ever',
  '🤣',
  '🤣',
  '🤣'],
 ['kill',
  'shot',
  'perfectli',
  'execut',
  'pain',
  'watch',
  '. . .',
  'still',
  'funni',
  '🤣',
  '🤣',
  '🤣'],
 ['poll',
  'democrat',
  'awar',
  'polit',
  'news',
  'consum',
  'either',
  'mislead',
  'cont',
  '…'],
 ['greg',
  'gutfeld',
  'biden',
  'respons',
  'tara',
  'read',
  'claim',
  'great',
  'interview',
  'confess',
  '…'],
 ['absolut', 'disgrac', "they'v", 'done', 'famili', 'one', 'highli', '…'],
 ['msm', 'ignor', 'peopl', 'ignor', 'care', 'american', 'worker', '…'],
 ['sleepi', 'joe', 'fell', 'asleep', 'last', 'town', 'hall'],
 ['thank'],
 ['rate', 'support', 'trump', 're

In [133]:
#feature extrction
def bag_of_words(tweet):
    words = cl_tweets(tweet)
    words_dictionary = dict([word, True] for word in words)    
    return words_dictionary

In [134]:
from nltk.corpus import twitter_samples

In [135]:
pos_tweets = twitter_samples.strings('positive_tweets.json')
print (len(pos_tweets)) # Output: 5000
 
neg_tweets = twitter_samples.strings('negative_tweets.json')
print (len(neg_tweets)) # Output: 5000
 
all_tweets = twitter_samples.strings('tweets.20150430-223406.json')
print (len(all_tweets)) # Output: 20000

5000
5000
20000


In [136]:
pos_tweets_set = []
for tweet in pos_tweets:
    pos_tweets_set.append((bag_of_words(tweet), 'pos'))

In [137]:
neg_tweets_set = []
for tweet in neg_tweets:
    neg_tweets_set.append((bag_of_words(tweet), 'neg'))

In [138]:
from random import shuffle 
shuffle(pos_tweets_set)
shuffle(neg_tweets_set)
 
test_set = pos_tweets_set[:1000] + neg_tweets_set[:1000]
train_set = pos_tweets_set[1000:] + neg_tweets_set[1000:]
 
print(len(test_set),  len(train_set))

2000 8000


In [139]:
from nltk import classify
from nltk import NaiveBayesClassifier

In [140]:
classifier = NaiveBayesClassifier.train(train_set)
 
accuracy = classify.accuracy(classifier, test_set)
print(accuracy)

0.743


In [141]:
print (classifier.show_most_informative_features(10))  

Most Informative Features
                     via = True              pos : neg    =     35.0 : 1.0
                     sad = True              neg : pos    =     26.7 : 1.0
                     bam = True              pos : neg    =     21.7 : 1.0
                    blog = True              pos : neg    =     16.3 : 1.0
                    glad = True              pos : neg    =     15.4 : 1.0
                   didnt = True              neg : pos    =     14.3 : 1.0
                      ff = True              pos : neg    =     14.2 : 1.0
                  welcom = True              pos : neg    =     13.9 : 1.0
                     bro = True              pos : neg    =     13.7 : 1.0
                    lost = True              neg : pos    =     13.0 : 1.0
None


In [143]:
clean_tw

array(['Thank you Michael. You also! ',
       ' The FBI found "no derogatory information" about . But they decided to crush him anyway.\n\n"There is nothing worse…',
       'The Russia Hoax is the biggest political scandal in American history. Treason!!! Lets see how it ends???? ',
       ' Join me for a quick laugh about the funniest Trump kill shot on Biden. Ever.🤣🤣🤣🤣🤣🤣 ',
       ' This kill shot is so perfectly executed it is painful to watch . . . and still funny. 🤣🤣🤣',
       ' Poll for Democrats only. Are you aware that most of the political news you consume is either misleading and out of cont…',
       " Greg Gutfeld on Biden response to Tara Reade claims: He 'was doing great in that interview until he confessed' …",
       "  It's an absolute disgrace what they've done to  and his family.\n\nThis is one of the most highly…",
       "   Why does MSM ignore this???\n\nWhy do people ignore that he doesn't care about the #American worker…",
       '  Sleepy Joe fell asleep during

In [144]:
for tweet in clean_tw:
    custom_tweet_set = bag_of_words(tweet)
    print (classifier.classify(custom_tweet_set))

pos
neg
pos
neg
neg
neg
pos
pos
neg
neg
pos
neg
pos
neg
pos
pos
pos
neg
pos
neg
pos
pos
pos
pos
pos
pos
pos
pos
neg
pos
pos
neg
pos
pos
pos
neg
pos
pos
neg
pos
neg
pos
pos
pos
pos
pos
pos
pos
pos
pos
neg
pos
pos
neg
pos
neg
neg
pos
neg
pos
neg
pos
pos
pos
pos
pos
pos
pos
neg
pos
pos
pos
pos
pos
neg
pos
pos
pos
pos
pos
pos
neg
pos
pos
pos
neg
pos
pos
neg
pos
pos
pos
pos
pos
neg
pos
pos
pos
pos
pos
neg
pos
pos
neg
neg
neg
neg
neg
pos
neg
pos
pos
pos
pos
pos
pos
pos
pos
pos
neg
pos
pos
neg
pos
neg
pos
pos
neg
pos
pos
pos
neg
pos
pos
neg
neg
neg
pos
pos
pos
pos
pos
pos
neg
pos
pos
neg
pos
pos
pos
pos
pos
neg
pos
pos
pos
neg
pos
pos
pos
neg
pos
pos
pos
neg
pos
neg
neg
neg
pos
pos
neg
pos
neg
pos
pos
pos
neg
pos
pos
pos
neg
neg
pos
pos
pos
pos
neg
pos
pos
pos
pos
neg
pos
pos
neg
neg
pos
pos
