In [1]:
import re
import tweepy
from tweepy import OAuthHandler
from textblob import TextBlob
from pprint import pprint
import pandas as pd
import plotly.plotly as py
import plotly.graph_objs as go
import plotly.offline as offline
from plotly.offline import init_notebook_mode
init_notebook_mode(connected=True)

In [2]:
class TwitterClient(object):
    '''
    Generic Twitter Class for sentiment analysis.
    '''
    def __init__(self):
        '''
        Class constructor or initialization method.
        '''
        # keys and tokens from the Twitter Dev Console
        consumer_key = 'n9LMcL7CRMtaTY5TXMp1VfIKo'
        consumer_secret = 'G0ghn8E8TJPCKl29AfmA4019U1hq6NhGQFoMsJ05CARnmkeE7U'
        access_token = '1959972582-gfpDYaAbKj7c412HOalcL0jQv0QdhJtgwZguXjl'
        access_token_secret = 'nZJVEdDSHsCZvV8dvRtXBjOoDIzeKOSKyvtaavjeV5ARK'
 
        self.auth = tweepy.OAuthHandler(consumer_key, consumer_secret)
        self.auth.set_access_token(access_token, access_token_secret)
        self.api  = tweepy.API(self.auth)
 
    def clean_tweet(self, tweet):
        '''
        Utility function to clean tweet text by removing links, special characters
        using simple regex statements.
        '''
        return ' '.join(re.sub("(@[A-Za-z0-9]+)|([^0-9A-Za-z \t])|(\w+:\/\/\S+)", " ", tweet).split())
 
    def get_tweet_sentiment(self, tweet):
        '''
        Utility function to classify sentiment of passed tweet
        using textblob's sentiment method
        '''
        # create TextBlob object of passed tweet text
        analysis = TextBlob(self.clean_tweet(tweet))
        # set sentiment
        if analysis.sentiment.polarity > 0:
            return 'positive'
        elif analysis.sentiment.polarity == 0:
            return 'neutral'
        else:
            return 'negative'
 
    def get_um_tweets(self, query, count = 200):
        '''
        Function to fetch user-mentioned tweets and parse them.
        '''
        # empty list to store parsed tweets
        tweets = []
 
        try:
            # call twitter api to fetch tweets
            fetched_tweets = self.api.search(q = query, count = count, tweet_mode = 'extended')
     
            # parsing tweets one by one
            for tweet in fetched_tweets:
                
                # empty dictionary to store required params of a tweet
                parsed_tweet = {}
 
                # saving text of tweet
                parsed_tweet['text'] = tweet.full_text
                
                # saving sentiment of tweet
                parsed_tweet['sentiment'] = self.get_tweet_sentiment(tweet.full_text)
                parsed_tweet['tweet_id'] = tweet.id
                parsed_tweet['num_reactions'] = tweet.favorite_count+tweet.retweet_count
                parsed_tweet['username'] = query
                
                
                if parsed_tweet['sentiment']=='negative':
                    parsed_tweet['net_sent'] = -1*parsed_tweet['num_reactions']
                elif parsed_tweet['sentiment']=='positive':
                    parsed_tweet['net_sent'] = parsed_tweet['num_reactions']
                else:
                    parsed_tweet['net_sent'] = 0
                
                # appending parsed tweet to tweets list
                if tweet.retweet_count > 0:
                    # if tweet has retweets, ensure that it is appended only once
                    if parsed_tweet not in tweets:
                        tweets.append(parsed_tweet)
                else:
                    tweets.append(parsed_tweet)
 
            # return parsed tweets
            return tweets
 
        except tweepy.TweepError as e:
            # print error (if any)
            print("Error : " + str(e))
    
    def get_own_tweets(self, screen_name, count = 200):
        '''
        Function to fetch own tweets and parse them.
        '''
        # empty list to store parsed tweets
        tweets = []
        
        try:
            # call twitter api to fetch tweets
            fetched_tweets = self.api.user_timeline(screen_name = screen_name)
     
            # parsing tweets one by one
            for tweet in fetched_tweets:
                
                # empty dictionary to store required params of a tweet
                parsed_tweet = {}
 
                # saving text/attributes of tweet
                parsed_tweet['text'] = tweet.text
                parsed_tweet['tweet_id'] = tweet.id
                parsed_tweet['num_favorites'] = tweet.favorite_count
                parsed_tweet['num_retweets'] = tweet.retweet_count
                parsed_tweet['reaction_follower_ratio'] = (tweet.favorite_count+tweet.retweet_count) / tweet.user.followers_count
                parsed_tweet['username'] = screen_name
        
                # appending parsed tweet to tweets list
                tweets.append(parsed_tweet)
 
            # return parsed tweets
            return tweets
    
        except tweepy.TweepError as e:
            # print error (if any)
            print("Error : " + str(e))

In [3]:
def get_row(screen_name):
    
    own_tweets = api.get_own_tweets(screen_name = screen_name)
    own_df = pd.DataFrame(own_tweets)
    own_df = own_df.set_index('tweet_id')
    
    ave_fav = own_df['num_favorites'].mean()
    ave_rt  = own_df['num_retweets'].mean()
    ave_rfr = own_df['reaction_follower_ratio'].mean()
    # print('nFav: {}\nnRT: {}\n(nFav+nRT)/nFollowers: {}'.format(ave_fav, ave_rt, ave_rfr))
    
    um_tweets = api.get_um_tweets(query = screen_name)
    
    sentiment_df = pd.DataFrame(um_tweets)
    sentiment_df = sentiment_df.set_index('tweet_id')
    
    ptweets = [tweet for tweet in um_tweets if tweet['sentiment'] == 'positive']
    ntweets = [tweet for tweet in um_tweets if tweet['sentiment'] == 'negative']
    
    pos_perc = len(ptweets)/len(um_tweets)
    neg_perc = len(ntweets)/len(um_tweets)
    
    if len(ntweets)==0:
        pos_to_neg = pos_perc
    else:
        pos_to_neg = pos_perc/neg_perc
    
    neu_perc = 1-pos_perc-neg_perc
    # print('Positive: {}\nNegative: {}\nNeutral: {}\nPos/Neg: {}'.format(pos_perc, neg_perc, neu_perc, pos_to_neg))
    
    print([ave_fav, ave_rt, ave_rfr, pos_to_neg, screen_name])
    return([ave_fav, ave_rt, ave_rfr, pos_to_neg, screen_name])

In [4]:
def makeAxis(title): 
    return {
      'title': title,
      'titlefont': { 'size': 20 },
      'tickfont': { 'size': 15 },
      'tickcolor': 'rgba(0,0,0,0)',
      'ticklen': 5,
      'showline': True,
      'showgrid': True
    }

def ternary_plot(df):

    data = [{ 
        'type': 'scatterternary',
        'mode': 'markers',
        'a': df['ave_rt'],
        'b': df['ave_rfr'],
        'c': df['ave_fav'],
        'text': df['username'],
        'marker': {
            'symbol': 'x',
            'color': df['pos_to_neg'],
            'autocolorscale': True,
            'size': 12
        }
    }]

    layout = {
        'ternary': {
            'sum': 1,
            'aaxis': makeAxis('Retweets'),
            'baxis': makeAxis('<br>RFR'),
            'caxis': makeAxis('<br>Likes')
        },
        'annotations': [{
          'showarrow': False,
          'text': 'Celebrities',
            'x': 0.5,
            'y': 1.3,
            'font': { 'size': 35 }
        }]
    }

    fig = {'data': data, 'layout': layout}
    offline.iplot(fig, validate=False)

## Main

In [5]:
api = TwitterClient()

In [6]:
get_row('TheRock')

[1570.3, 217.55000000000001, 0.00014092930336300665, 5.799999999999999, 'TheRock']


[1570.3,
 217.55000000000001,
 0.00014092930336300665,
 5.799999999999999,
 'TheRock']

In [15]:
# celeb_screen_names = ['TheRock','tomhanks','prattprattpratt','LeoDiCaprio','jimmyfallon']
celeb_screen_names = ['TheRock','tomhanks','prattprattpratt','LeoDiCaprio','jimmyfallon',
                      'HereIsGina','EmmaWatson','AnnaKendrick47','TheEllenShow','Oprah',
                      'Caitlyn_Jenner','DrOz','piersmorgan','charliesheen','KevinSpacey',
                      'chelseahandler','lindsaylohan','KimKardashian','GwynethPaltrow','_KrisJStewart']

In [16]:
celeb_list = []
for name in celeb_screen_names:
    celeb_list.append(get_row(name))
celeb_df = pd.DataFrame(celeb_list, columns=['ave_fav','ave_rt','ave_rfr','pos_to_neg','username'])
celeb_df

[1575.45, 217.84999999999999, 0.00014135873865092226, 5.2727272727272725, 'TheRock']
[14465.200000000001, 1077.55, 0.0010144731348272346, 1.95, 'tomhanks']
[19942.150000000001, 2759.8000000000002, 0.0043591077845633631, 4.1, 'prattprattpratt']
[1995.3499999999999, 744.35000000000002, 0.00014256486774333663, 5.1, 'LeoDiCaprio']
[4714.1000000000004, 559.0, 0.0001039735602596888, 1.2413793103448276, 'jimmyfallon']
[2565.8499999999999, 342.64999999999998, 0.0063344070354713735, 3.2222222222222223, 'HereIsGina']
[37142.199999999997, 6085.8000000000002, 0.0015008584071263486, 0.35714285714285715, 'EmmaWatson']
[26030.450000000001, 4954.8999999999996, 0.0043646360433306202, 10.285714285714285, 'AnnaKendrick47']
[6057.0, 770.89999999999998, 8.8396007901175643e-05, 14.0, 'TheEllenShow']
[5827.8000000000002, 890.14999999999998, 0.0001595959564764001, 3.1818181818181817, 'Oprah']
[3995.75, 1219.4000000000001, 0.0013505261721336588, 0.8620689655172414, 'Caitlyn_Jenner']
[87.099999999999994, 21.949

Unnamed: 0,ave_fav,ave_rt,ave_rfr,pos_to_neg,username
0,1575.45,217.85,0.000141,5.272727,TheRock
1,14465.2,1077.55,0.001014,1.95,tomhanks
2,19942.15,2759.8,0.004359,4.1,prattprattpratt
3,1995.35,744.35,0.000143,5.1,LeoDiCaprio
4,4714.1,559.0,0.000104,1.241379,jimmyfallon
5,2565.85,342.65,0.006334,3.222222,HereIsGina
6,37142.2,6085.8,0.001501,0.357143,EmmaWatson
7,26030.45,4954.9,0.004365,10.285714,AnnaKendrick47
8,6057.0,770.9,8.8e-05,14.0,TheEllenShow
9,5827.8,890.15,0.00016,3.181818,Oprah


In [17]:
tmp = celeb_df.drop('username', axis=1)
celeb_df_norm = (tmp - tmp.mean()) / (tmp.max() - tmp.min())
celeb_df_norm['username'] = celeb_df['username']
celeb_df_norm

Unnamed: 0,ave_fav,ave_rt,ave_rfr,pos_to_neg,username
0,-0.153223,-0.163288,-0.17541,0.091194,TheRock
1,0.19463,-0.021514,-0.037024,-0.152357,tomhanks
2,0.342436,0.255909,0.493094,0.005235,prattprattpratt
3,-0.141892,-0.076462,-0.175219,0.078533,LeoDiCaprio
4,-0.068521,-0.107029,-0.181336,-0.204297,jimmyfallon
5,-0.126496,-0.142707,0.806175,-0.059105,HereIsGina
6,0.806611,0.804405,0.040067,-0.269111,EmmaWatson
7,0.50674,0.617907,0.49397,0.458638,AnnaKendrick47
8,-0.03228,-0.072084,-0.183805,0.730889,TheEllenShow
9,-0.038466,-0.052418,-0.17252,-0.062066,Oprah


In [20]:
ternary_plot(celeb_df_norm)