# TwitAnlysis-to-csv

### Automated Twitter Keyword Sentiment Analysis using Textblob Lexicon
By Chakrit Thong Ek

**How to use file**<br>
1) Import the libraries<br>
2) Change twitter authentication keys to your own<br>
3) Execute cell<br>
4) Enter key word (any length) <br>
5) Scrapped results are generated and exported to CSV format

### 1. Import Libraries 

In [None]:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import plotly.graph_objects as go
import re
from textblob import TextBlob
import time
import warnings
from datetime import date
from pylab import rcParams
from tweepy import *

warnings.filterwarnings(action='ignore')

### 2. Twitter Keyword & Hashtag Search

In [None]:
################################################################
# Setting up Twitter Scrapper 
################################################################

#### Twitter Authentication Keys
class twitter_API_credentials():
    CONSUMER_KEY = 'XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX'
    CONSUMER_SECRET = 'XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX'
    ACCESS_TOKEN = 'XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX'
    ACCESS_TOKEN_SECRET = 'XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX'
    # Key in your twitter credentials
    
#### Twitter Authenticator
class twitter_authenticator():
    
    def authenticator(self):
        auth = OAuthHandler(twitter_API_credentials.CONSUMER_KEY, twitter_API_credentials.CONSUMER_SECRET)
        auth.set_access_token(twitter_API_credentials.ACCESS_TOKEN, twitter_API_credentials.ACCESS_TOKEN_SECRET)
        api = API(auth)
        return api
    
#### Twitter Cleaner and Processor
class twitter_cleaner():
    
    # Filters the sentences for words before sentiment analysis
    def clean_tweet(self, tweet):
        return ' '.join(re.sub("(@[A-Za-z0-9]+)|([^0-9A-Za-z \t])|(\w+:\/\/\S+)|(https?://[^\s]+)", " ", tweet).split())  
    
    # Perform sentiment analysis and display result
    def analyze_sentiment(self, tweet):
        analysis = TextBlob(self.clean_tweet(tweet))
        
        if analysis.sentiment.polarity > 0:
            return 'POSITIVE'
        elif analysis.sentiment.polarity == 0:
            return 'NEUTRAL'
        else:
            return 'NEGATIVE'

    def tweets_to_data_frame(self, tweets):
        tweets_df = pd.DataFrame(data=[tweet.text for tweet in tweets], columns=['Tweets'])        
        tweets_df['ID'] = np.array([tweet.id for tweet in tweets])
        tweets_df['Length'] = np.array([len(tweet.text) for tweet in tweets])
        tweets_df['Date'] = np.array([tweet.created_at for tweet in tweets])
        tweets_df['Upload by'] = np.array([tweet.source for tweet in tweets])
        tweets_df['Likes'] = np.array([tweet.favorite_count for tweet in tweets])
        tweets_df['Retweets'] = np.array([tweet.retweet_count for tweet in tweets])
        
        # Remove retweet label, @, # and links <- Remove this line to include retweeter ID
        # COMMENT THE CODE BELOW TO GENERATE RAW TWEETS FOR CSV OUTPUT
        tweets_df['Tweets'].replace('(RT @[A-Za-z0-9_:]+)|(@[A-Za-z0-9_]+)|(#)|(https?://[^\s]+)', '',
                                    #regex=True, inplace=True)
        return tweets_df 
    
    def tweets_post_process(tweets):
        counter = len(tweets)
        tweet_dataframe = clean_twitter.tweets_to_data_frame(tweets)
        tweet_dataframe['Sentiment'] = np.array([clean_twitter.analyze_sentiment(tweet) for tweet in tweet_dataframe['Tweets']])
        tweet_dataframe.drop_duplicates(subset='Tweets', inplace=True)
        tweet_dataframe.reset_index(inplace=True, drop=True)
        return(tweet_dataframe)
        
#### Twitter Scrapper
class twitter_scrapper():
    
    def scrapper(search_hashtag):
        try:
            fetch_tweet = api.search(search_hashtag, count=100, lang='en')
            return fetch_tweet
            time.sleep(timer.limitter())
            print('Limit reached, please wait')
        except:
            print('Sorry there was an error ')
            return None

#########################################################
# Plotting a bar graph with result
#########################################################

class plotting_barg():
    
    def bar_graph(output_tweet):
        #initialize the plotly figure
        count_sentiment = pd.DataFrame((output_tweet['Sentiment'].value_counts()))

        colors = ['lightslategray',] * 3
        colors[0] = '#1f77b4'

        fig = go.Figure(data=[go.Bar(x=count_sentiment.index, y=count_sentiment.Sentiment, marker_color=colors)])
        fig.update_layout(title={'text': ('Figure 2. SENTIMENT COUNT: ' + search_hashtag.upper()),
                               'y':0.9, 'x':0.5, 'xanchor': 'center','yanchor': 'top'},font=dict(family='Arial',size=18),
                              template='simple_white', yaxis=dict( title='COUNT', title_font_family="Arial", 
                                                                      titlefont_size=16, tickfont_size=16))
        
        fig.show()
    
    def percentage_horizontal(output_tweet):
        # Data restructuring, get count of sentiments
        Percentage = pd.DataFrame((output_tweet['Sentiment'].value_counts('POSITIVE')*100).round(2))
        Percentage['Sentiment'] = Percentage['Sentiment'].astype(int)
        
        # Plotting horizontal Plotly Bar graph
        top_labels = Percentage.index

        colors = ['rgba(38, 24, 74, 0.8)', 'rgba(71, 58, 131, 0.8)',
                  'rgba(122, 120, 168, 0.8)']

        x_data = [Percentage['Sentiment'].tolist()]
        y_data= ['SENTIMENT']
        fig = go.Figure()

        for i in range(0, len(x_data[0])):
            for xd, yd in zip(x_data, y_data):
                fig.add_trace(go.Bar(
                    x=[xd[i]], y=[yd],
                    orientation='h',
                    marker=dict(
                        color=colors[i],
                        line=dict(color='rgb(248, 248, 249)', width=1)
                    )
                ))

        fig.update_layout(
            xaxis=dict(
                showgrid=False,
                showline=False,
                showticklabels=False,
                zeroline=False,
                domain=[0.15, 1]
            ),
            yaxis=dict(
                showgrid=False,
                showline=False,
                showticklabels=False,
                zeroline=False,
            ),
            barmode='stack',
            paper_bgcolor='rgb(255, 255, 255)',
            plot_bgcolor='rgb(255, 255, 255)',
            margin=dict(l=120, r=10, t=140, b=80),
            showlegend=False,
        )

        annotations = []

        for yd, xd in zip(y_data, x_data):
            # labeling the y-axis
            annotations.append(dict(xref='paper', yref='y',
                                    x=0.14, y=yd,
                                    xanchor='right',
                                    text=str(yd),
                                    font=dict(family='Arial', size=16,
                                              color='rgb(67, 67, 67)'),
                                    showarrow=False, align='right'))
            # labeling the first percentage of each bar (x_axis)
            annotations.append(dict(xref='x', yref='y',
                                    x=xd[0] / 2, y=yd,
                                    text=str(xd[0]) + '%',
                                    font=dict(family='Arial', size=16,
                                              color='rgb(248, 248, 255)'),
                                    showarrow=False))
            # labeling the first Likert scale (on the top)
            if yd == y_data[-1]:
                annotations.append(dict(xref='x', yref='paper',
                                        x=xd[0] / 2, y=1.1,
                                        text=top_labels[0],
                                        font=dict(family='Arial', size=16,
                                                  color='rgb(67, 67, 67)'),
                                        showarrow=False))
            space = xd[0]
            for i in range(1, len(xd)):
                    # labeling the rest of percentages for each bar (x_axis)
                    annotations.append(dict(xref='x', yref='y',
                                            x=space + (xd[i]/2), y=yd,
                                            text=str(xd[i]) + '%',
                                            font=dict(family='Arial', size=16,
                                                      color='rgb(248, 248, 255)'),
                                            showarrow=False))
                    # labeling the Likert scale
                    if yd == y_data[-1]:
                        annotations.append(dict(xref='x', yref='paper',
                                                x=space + (xd[i]/2), y=1.1,
                                                text=top_labels[i],
                                                font=dict(family='Arial', size=16,
                                                          color='rgb(67, 67, 67)'),
                                                showarrow=False))
                    space += xd[i]
                    
        fig.update_layout(title={ 'text': ('Figure 1. SENTIMENT (%) ON SEARCH TERM: ' + search_hashtag.upper()),
                               'y':0.9, 'x':0.5, 'xanchor': 'center','yanchor': 'top'}, 
                                font=dict(family='Arial',size=18),annotations=annotations)

        fig.show()
    
#################################################################
#################################################################

if __name__ == '__main__':
    
    # Initialize class twitter cleaner
    clean_twitter = twitter_cleaner()
    
    # Request for keyword
    search_hashtag = input('Please insert Keyword? (i.e #GE2020): ')
    # Authenticate program
    api = twitter_authenticator().authenticator()
    
    # Scrape and process tweets into dataframe
    tweets = twitter_scrapper.scrapper(search_hashtag)
    output_tweet = twitter_cleaner.tweets_post_process(tweets)
    print('We have fetched '+ str(len(output_tweet)) + ' tweets!')
    print('File has been saved to local drive')
    
    # Plotting graphs
    plotting_barg.percentage_horizontal(output_tweet)
    plotting_barg.bar_graph(output_tweet)
    
    # Extract dataframe to csv file
    output_tweet.to_csv('Twitter_Analysis.csv')