In [1]:
import re
import tweepy
from tweepy import OAuthHandler

from textblob import TextBlob

from pprint import pprint

import pandas as pd

from sklearn.cluster import k_means

import plotly.plotly as py
import plotly.graph_objs as go
import plotly.offline as offline
from plotly.offline import init_notebook_mode
init_notebook_mode(connected=True)

In [2]:
class TwitterClient(object):
    '''
    Generic Twitter Class for sentiment analysis.
    '''
    def __init__(self):
        '''
        Class constructor or initialization method.
        '''
        # keys and tokens from the Twitter Dev Console
        consumer_key = 'n9LMcL7CRMtaTY5TXMp1VfIKo'
        consumer_secret = 'G0ghn8E8TJPCKl29AfmA4019U1hq6NhGQFoMsJ05CARnmkeE7U'
        access_token = '1959972582-gfpDYaAbKj7c412HOalcL0jQv0QdhJtgwZguXjl'
        access_token_secret = 'nZJVEdDSHsCZvV8dvRtXBjOoDIzeKOSKyvtaavjeV5ARK'
 
        self.auth = tweepy.OAuthHandler(consumer_key, consumer_secret)
        self.auth.set_access_token(access_token, access_token_secret)
        self.api  = tweepy.API(self.auth)
 
    def clean_tweet(self, tweet):
        '''
        Utility function to clean tweet text by removing links, special characters
        using simple regex statements.
        '''
        return ' '.join(re.sub("(@[A-Za-z0-9]+)|([^0-9A-Za-z \t])|(\w+:\/\/\S+)", " ", tweet).split())
 
    def get_tweet_sentiment(self, tweet):
        '''
        Utility function to classify sentiment of passed tweet
        using textblob's sentiment method
        '''
        # create TextBlob object of passed tweet text
        analysis = TextBlob(self.clean_tweet(tweet))
        # set sentiment
        if analysis.sentiment.polarity > 0:
            return 'positive'
        elif analysis.sentiment.polarity == 0:
            return 'neutral'
        else:
            return 'negative'
 
    def get_um_tweets(self, query, count = 200):
        '''
        Function to fetch user-mentioned tweets and parse them.
        '''
        # empty list to store parsed tweets
        tweets = []
 
        try:
            # call twitter api to fetch tweets
            fetched_tweets = self.api.search(q = query, count = count, tweet_mode = 'extended')
     
            # parsing tweets one by one
            for tweet in fetched_tweets:
                
                # empty dictionary to store required params of a tweet
                parsed_tweet = {}
 
                # saving text of tweet
                parsed_tweet['text'] = tweet.full_text
                
                # saving sentiment of tweet
                parsed_tweet['sentiment'] = self.get_tweet_sentiment(tweet.full_text)
                parsed_tweet['tweet_id'] = tweet.id
                parsed_tweet['num_reactions'] = tweet.favorite_count+tweet.retweet_count
                parsed_tweet['username'] = query
                
                
                if parsed_tweet['sentiment']=='negative':
                    parsed_tweet['net_sent'] = -1*parsed_tweet['num_reactions']
                elif parsed_tweet['sentiment']=='positive':
                    parsed_tweet['net_sent'] = parsed_tweet['num_reactions']
                else:
                    parsed_tweet['net_sent'] = 0
                
                # appending parsed tweet to tweets list
                if tweet.retweet_count > 0:
                    # if tweet has retweets, ensure that it is appended only once
                    if parsed_tweet not in tweets:
                        tweets.append(parsed_tweet)
                else:
                    tweets.append(parsed_tweet)
 
            # return parsed tweets
            return tweets
 
        except tweepy.TweepError as e:
            # print error (if any)
            print("Error : " + str(e))
    
    def get_own_tweets(self, screen_name, count = 200):
        '''
        Function to fetch own tweets and parse them.
        '''
        # empty list to store parsed tweets
        tweets = []
        
        try:
            # call twitter api to fetch tweets
            fetched_tweets = self.api.user_timeline(screen_name = screen_name)
     
            # parsing tweets one by one
            for tweet in fetched_tweets:
                
                # empty dictionary to store required params of a tweet
                parsed_tweet = {}
 
                # saving text/attributes of tweet
                parsed_tweet['text'] = tweet.text
                parsed_tweet['tweet_id'] = tweet.id
                parsed_tweet['num_favorites'] = tweet.favorite_count
                parsed_tweet['num_retweets'] = tweet.retweet_count
                parsed_tweet['reaction_follower_ratio'] = (tweet.favorite_count+tweet.retweet_count) / tweet.user.followers_count
                parsed_tweet['username'] = screen_name
        
                # appending parsed tweet to tweets list
                tweets.append(parsed_tweet)
 
            # return parsed tweets
            return tweets
    
        except tweepy.TweepError as e:
            # print error (if any)
            print("Error : " + str(e))

In [3]:
def get_twitter_stats(screen_name):
    
    own_tweets = api.get_own_tweets(screen_name = screen_name)
    own_df = pd.DataFrame(own_tweets)
    own_df = own_df.set_index('tweet_id')
    
    ave_fav = own_df['num_favorites'].mean()
    ave_rt  = own_df['num_retweets'].mean()
    ave_rfr = own_df['reaction_follower_ratio'].mean()
    # print('nFav: {}\nnRT: {}\n(nFav+nRT)/nFollowers: {}'.format(ave_fav, ave_rt, ave_rfr))
    
    um_tweets = api.get_um_tweets(query = screen_name)
    
    sentiment_df = pd.DataFrame(um_tweets)
    sentiment_df = sentiment_df.set_index('tweet_id')
    
    ptweets = [tweet for tweet in um_tweets if tweet['sentiment'] == 'positive']
    ntweets = [tweet for tweet in um_tweets if tweet['sentiment'] == 'negative']
    
    pos_perc = len(ptweets)/len(um_tweets)
    neg_perc = len(ntweets)/len(um_tweets)
    
    if len(ntweets)==0:
        pos_to_neg = pos_perc
    else:
        pos_to_neg = pos_perc/neg_perc
    
    neu_perc = 1-pos_perc-neg_perc
    # print('Positive: {}\nNegative: {}\nNeutral: {}\nPos/Neg: {}'.format(pos_perc, neg_perc, neu_perc, pos_to_neg))
    
    print([ave_fav, ave_rt, ave_rfr, pos_to_neg, screen_name])
    return([ave_fav, ave_rt, ave_rfr, pos_to_neg, screen_name])

In [4]:
def makeAxis(title): 
    return {
      'title': title,
      'titlefont': { 'size': 20 },
      'tickfont': { 'size': 15 },
      'tickcolor': 'rgba(0,0,0,0)',
      'ticklen': 5,
      'showline': True,
      'showgrid': True
    }

def ternary_plot(df, category):

    data = [{ 
        'type': 'scatterternary',
        'mode': 'markers',
        'a': df['ave_rt'],
        'b': df['ave_rfr'],
        'c': df['ave_fav'],
        'text': df['username'],
        'marker': {
            'symbol': 'x',
            'color': df['pos_to_neg'],
            'autocolorscale': True,
            'size': 12
        }
    }]

    layout = {
        'ternary': {
            'sum': 1,
            'aaxis': makeAxis('Retweets'),
            'baxis': makeAxis('<br>RFR'),
            'caxis': makeAxis('<br>Likes')
        },
        'annotations': [{
          'showarrow': False,
          'text': category,
            'x': 0.5,
            'y': 1.3,
            'font': { 'size': 35 }
        }]
    }

    fig = {'data': data, 'layout': layout}
    offline.iplot(fig, validate=False)

## Main

In [5]:
api = TwitterClient()

In [6]:
get_twitter_stats('TheRock')

[1793.75, 240.09999999999999, 0.0001603023111560194, 4.0, 'TheRock']


[1793.75, 240.09999999999999, 0.0001603023111560194, 4.0, 'TheRock']

In [7]:
# celeb_screen_names = ['TheRock','tomhanks','prattprattpratt','LeoDiCaprio','jimmyfallon']
celeb_screen_names = ['TheRock','tomhanks','prattprattpratt','LeoDiCaprio','jimmyfallon',
                      'HereIsGina','EmmaWatson','AnnaKendrick47','TheEllenShow','Oprah',
                      'Caitlyn_Jenner','DrOz','piersmorgan','charliesheen','KevinSpacey',
                      'chelseahandler','lindsaylohan','KimKardashian','GwynethPaltrow','_KrisJStewart']

In [8]:
celeb_list = []
for name in celeb_screen_names:
    celeb_list.append(get_twitter_stats(name))
celeb_df = pd.DataFrame(celeb_list, columns=['ave_fav','ave_rt','ave_rfr','pos_to_neg','username'])
celeb_df

[1793.75, 240.09999999999999, 0.0001603023111560194, 4.454545454545454, 'TheRock']
[14467.299999999999, 1077.6500000000001, 0.0010146750751266666, 9.75, 'tomhanks']
[19963.299999999999, 2760.0999999999999, 0.0043623402834290625, 5.75, 'prattprattpratt']
[1998.5, 766.64999999999998, 0.00014387674287592503, 4.1, 'LeoDiCaprio']
[4688.8999999999996, 557.14999999999998, 0.000103432953539271, 1.352941176470588, 'jimmyfallon']
[2568.25, 342.69999999999999, 0.006338721302693839, 8.166666666666666, 'HereIsGina']
[37149.099999999999, 6086.4499999999998, 0.001500905322353981, 3.3333333333333335, 'EmmaWatson']
[25926.900000000001, 4934.3999999999996, 0.0043472467071275084, 0.3728813559322034, 'AnnaKendrick47']
[6233.9499999999998, 851.14999999999998, 9.1719203595466313e-05, 4.625, 'TheEllenShow']
[5837.3000000000002, 891.54999999999995, 0.00015983133558316768, 2.0, 'Oprah']
[3996.0, 1218.7, 0.0013504411133661485, 0.7058823529411764, 'Caitlyn_Jenner']
[72.5, 20.850000000000001, 2.1548301435229985e-

Unnamed: 0,ave_fav,ave_rt,ave_rfr,pos_to_neg,username
0,1793.75,240.1,0.00016,4.454545,TheRock
1,14467.3,1077.65,0.001015,9.75,tomhanks
2,19963.3,2760.1,0.004362,5.75,prattprattpratt
3,1998.5,766.65,0.000144,4.1,LeoDiCaprio
4,4688.9,557.15,0.000103,1.352941,jimmyfallon
5,2568.25,342.7,0.006339,8.166667,HereIsGina
6,37149.1,6086.45,0.001501,3.333333,EmmaWatson
7,25926.9,4934.4,0.004347,0.372881,AnnaKendrick47
8,6233.95,851.15,9.2e-05,4.625,TheEllenShow
9,5837.3,891.55,0.00016,2.0,Oprah


In [9]:
tmp = celeb_df.drop('username', axis=1)
celeb_df_norm = (tmp - tmp.mean()) / (tmp.max() - tmp.min())
celeb_df_norm['username'] = celeb_df['username']
celeb_df_norm

Unnamed: 0,ave_fav,ave_rt,ave_rfr,pos_to_neg,username
0,-0.147488,-0.160434,-0.172248,0.032506,TheRock
1,0.194333,-0.022353,-0.037001,0.597227,tomhanks
2,0.342567,0.255023,0.492929,0.170657,prattprattpratt
3,-0.141965,-0.073625,-0.174848,-0.005304,LeoDiCaprio
4,-0.069402,-0.108164,-0.18125,-0.298257,jimmyfallon
5,-0.126598,-0.143519,0.805788,0.428376,HereIsGina
6,0.806088,0.803419,0.039968,-0.087063,EmmaWatson
7,0.503412,0.613487,0.49054,-0.402773,AnnaKendrick47
8,-0.02773,-0.059694,-0.183104,0.050684,TheEllenShow
9,-0.038428,-0.053034,-0.172322,-0.229253,Oprah


In [10]:
ternary_plot(celeb_df_norm, 'Celebrities')

In [21]:
athlete_screen_names = []
athlete_list = []
for name in athlete_screen_names:
    athlete_list.append(get_twitter_stats(name))
athlete_df = pd.DataFrame(athlete_list, columns=['ave_fav','ave_rt','ave_rfr','pos_to_neg','username'])
tmp = athlete_df.drop('username', axis=1)
athlete_df_norm = (tmp - tmp.mean()) / (tmp.max() - tmp.min())
athlete_df_norm['username'] = athlete_df['username']
ternary_plot(athlete_df_norm, 'Athletes')

[5110.25, 1063.7, 0.0011052324426121219, 2.9999999999999996, 'russwest44']
[28925.450000000001, 5377.3000000000002, 0.00083567441460706082, 41.5, 'KingJames']
[19321.450000000001, 2362.3499999999999, 0.0033933348617356655, 31.5, 'TigerWoods']


In [20]:
musician_screen_names = ['JohnMayer','ACDC','BryanCranston']
musician_list = []
for name in musician_screen_names:
    musician_list.append(get_twitter_stats(name))
musician_df = pd.DataFrame(musician_list, columns=['ave_fav','ave_rt','ave_rfr','pos_to_neg','username'])
tmp = musician_df.drop('username', axis=1)
musician_df_norm = (tmp - tmp.mean()) / (tmp.max() - tmp.min())
musician_df_norm['username'] = musician_df['username']
ternary_plot(musician_df_norm, 'Musicians')

[18065.400000000001, 3005.1999999999998, 0.014430642771974256, 2.714285714285714, 'JohnMayer']
[1662.3499999999999, 631.5, 0.0075523165463836067, 1.3125, 'ACDC']
[10210.450000000001, 1732.5999999999999, 0.0055047040755749422, 2.3499999999999996, 'BryanCranston']
