In [None]:
from pymongo import MongoClient
from dateutil import parser
from nltk.corpus import stopwords
from nltk.stem import WordNetLemmatizer
from wordcloud import WordCloud, STOPWORDS
from textblob import TextBlob
import re
import tweepy
import json
import time
import pandas as pd
import nltk
import numpy as np
import matplotlib.pyplot as plt

In [None]:
#Change twitterdb to name of your database
MONGO_HOST= 'mongodb://localhost/twitterdb'

In [None]:
#Function that connects to MongoDB & converts text column to dataframe
def connect():
    client = MongoClient(MONGO_HOST)
    db = client.twitterdb
    tweets = db.tweets
    df = pd.DataFrame(list(tweets.find({}, { '_id': 0,'text': 1})))
    return df 

#Function for cleaning tweets
def clean_tweets(inp_df):
    stopword_list = stopwords.words('english')
    lemmatizer = WordNetLemmatizer()
    inp_df['clean_tweets'] = None
    for i in range(0,len(inp_df['text'])):
        exclude = ['[^a-zA-Z]','rt','RT','http\S+']
        exclusions = '|'.join(exclude)
        text = re.sub(exclusions, ' ',str(inp_df['text'][i]))
        text = text.lower()
        words = text.split()
        words = [lemmatizer.lemmatize(word) for word in words if word not in stopword_list]
        inp_df['clean_tweets'][i] = ' '.join(words)
    return inp_df    

#Function for counting sentiment value for each tweet
def sentiment(tweet):
    analysis = TextBlob(tweet)
    if analysis.sentiment.polarity > 0:
        return 1
    elif analysis.sentiment.polarity == 0:
        return 0
    else:
        return -1     
    
#Function for separating tweets
def separate(inp_df):
    pos_tweets = [tweet for index,tweet in enumerate(inp_df['Sentiment']) if inp_df['Sentiment'][index] > 0]
    neu_tweets = [tweet for index,tweet in enumerate(inp_df['Sentiment']) if inp_df['Sentiment'][index] == 0]
    neg_tweets = [tweet for index,tweet in enumerate(inp_df['Sentiment']) if inp_df['Sentiment'][index] < 0]
    return pos_tweets,neu_tweets,neg_tweets

#Function for creating wordcloud
def word_cloud(inp_df):
    plt.subplots(figsize = (12,10))
    wordcloud = WordCloud(background_color = 'white',width = 1000,height = 800).generate(" ".join(inp_df['clean_tweets']))
    plt.imshow(wordcloud)
    plt.axis('off')
    plt.show()

In [None]:
#Scraps tweets of donald trump & prints sentiment percentage & wordcloud for every 1000 tweets
class Streamlistener(tweepy.StreamListener):
    def on_connect(self):
        print("You are connected to the Twitter API")
        
    def on_error(self):
        if error_code != 200:
            print('error')
            return False
        
    def on_data(self,data):
        try:
            client = MongoClient(MONGO_HOST)
            db = client.twitterdb
            datajson = json.loads(data)
            db.tweets.insert(datajson)
            created_at = parser.parse(datajson['created_at'])         
            print("Tweet collected at: {}".format(str(created_at)))
            if db.tweets.find().count()%1000 == 0:
                df = connect()
                df = clean_tweets(df)
                df['Sentiment'] = np.array([sentiment(x) for x in df['clean_tweets']])
                pos_tweets,neu_tweets,neg_tweets = separate(df)
                print("Positive tweets percentage: {}%".format(round(100*(len(pos_tweets)/len(df['clean_tweets'])),2)))
                print("Negative tweets percentage: {}%".format(round(100*(len(neg_tweets)/len(df['clean_tweets'])),2)))
                print("Neutral tweets percentage: {}%".format(round(100*(len(neu_tweets)/len(df['clean_tweets'])),2)))
                word_cloud(df)
                time.sleep(5)

                
        except Exception as e:
            print(e)
            
if __name__ == '__main__':
    count = 0
    consumer_key = ''
    consumer_secret = ''
    access_key = ''
    access_secret = ''

    auth = tweepy.OAuthHandler(consumer_key,consumer_secret)
    auth.set_access_token(access_key,access_secret)
    api =tweepy.API(auth, wait_on_rate_limit=True)
    listener = Streamlistener(api=api)
    stream = tweepy.Stream(auth,listener=listener)
    track = ['trump','donald trump']
    stream.filter(track = track, languages = ['en'])      