# Sentiment Analysis Practice/Demo


### Objectives:
- Provide practice on how to authenticate to Twitter API using python's tweepy library.
- Analyze sentiment scores of tweets using TextBlob library.




In [79]:
# Tweepy - Python library for accessing the Twitter API.
import tweepy

# TextBlob - Python library for processing textual data
from textblob import TextBlob

# Pandas - Data manipulation and analysis library
import pandas as pd

# NumPy - mathematical functions on multi-dimensional arrays and matrices
import numpy as np

# Regular Expression Python module
import re

In [80]:
# Read twitter api creds from local machine.
config = pd.read_csv("/users/tamer/desktop/big_data/config.csv")

In [81]:
# Twitter API config
twitterApiKey = config['twitterApiKey'][0]
twitterApiSecret = config['twitterApiSecret'][0]
twitterApiAccessToken = config['twitterApiAccessToken'][0]
twitterApiAccessTokenSecret =config['twitterApiAccessTokenSecret'][0]

In [82]:
# Authenticate
auth = tweepy.OAuthHandler(twitterApiKey,twitterApiSecret)
auth.set_access_token(twitterApiAccessToken, twitterApiAccessTokenSecret)
twetterApi = tweepy.API(auth, wait_on_rate_limit = True)

In [84]:
#helper functions
# negative, nautral, positive analysis based on polarity
def getTextAnalysis(a):
    if a < 0:
        return "Negative"
    elif a == 0:
        return "Neutral"
    else:
        return "Positive"
    
#calculate sunjectivity
def getTextSubjectivity(txt):
    return TextBlob(txt).sentiment.subjectivity

#calculate polarity
def getTextPolarity(txt):
    return TextBlob(txt).sentiment.polarity

In [85]:
# helper function to clean the tweets
def cleanUpTweet(txt):
    # Remove mentions
    txt = re.sub(r'@[A-Za-z0-9_]+', '', txt)
    # Remove hashtags
    txt = re.sub(r'#', '', txt)
    # Remove retweets:
    txt = re.sub(r'RT : ', '', txt)
    # Remove urls
    txt = re.sub(r'https?:\/\/[A-Za-z0-9\.\/]+', '', txt)
    return txt

In [86]:
 #choose twitter username
twitterAccount ="berniesanders"

# lets get tweets!
tweets = tweepy.Cursor(twetterApi.user_timeline, 
                        screen_name=twitterAccount, 
                        count=None,
                        since_id=None,
                        max_id=None,
                        trim_user=True,
                        exclude_replies=True,
                        contributor_details=False,
                        include_entities=False
                        ).items(100);


In [87]:
#Lets initialize and play with the data frame
df = pd.DataFrame(data=[tweet.text for tweet in tweets], columns=['Tweet'])
 #Clean up column by applying cleanUpTweet function to the dataframe.
df['Tweet'] = df['Tweet'].apply(cleanUpTweet)
#Drop empty rows
df['Tweet'] = df.drop(df[df['Tweet'] == ''].index)
# Calculate subjectivity and polarity of tweet by applying previously defined helper functions
df['Subjectivity'] = df['Tweet'].apply(getTextSubjectivity)
df['Polarity'] = df['Tweet'].apply(getTextPolarity) 
#overall score
df['score'] = df['Polarity'].apply(getTextAnalysis)

In [90]:
df.head(20)

Unnamed: 0,Tweet,Subjectivity,Polarity,score
0,Interesting. The Republican Party claims to be...,0.375,0.25,Positive
1,"To the progressive movement, it is because of ...",0.6875,0.4375,Positive
2,"Under GOP control, Congress passed $1.9 trilli...",0.333333,0.166667,Positive
3,What we did is write and pass a bill to addres...,0.47619,0.238095,Positive
4,The American Rescue Plan addresses a crisis th...,0.2,-0.025,Negative
5,"In 2017, Republicans passed a tax bill that pr...",0.5,0.5,Positive
6,Right-wing Republicans in CA are trying to rec...,0.0,0.0,Neutral
7,"Today, 56 years on from Bloody Sunday, it is a...",0.858929,-0.253571,Negative
8,The American Rescue Plan is the most significa...,0.41875,0.26875,Positive
9,Where was Republicans' concern for the deficit...,0.75,0.375,Positive
