In [1]:
import re
import tweepy
import pandas as pd
from textblob import TextBlob
from sklearn import preprocessing
import plotly.offline as offline
offline.init_notebook_mode(connected=True)

In [2]:
def write_FROM_user_tweets_to_file(screen_name):

    print('Writing to file for '+screen_name)
    
    # fetch tweets
    tweets = api.user_timeline(screen_name, count=200)
    
    try:
        
        # open file
        with open('from_user_tweets_celebrities.csv', 'a') as myfile:
                
            # loop through tweets
            for tweet in tweets:
                
                # check if tweet in file
                if str(tweet.id) not in open('from_user_tweets_celebrities.csv').read():
                                        
                    s = '{},{},{},{},{},{}\n'.format(tweet.id,
                                                     tweet.created_at,
                                                     tweet.favorite_count,
                                                     tweet.retweet_count,
                                                     tweet.user.screen_name, 
                                                     tweet.user.followers_count)        
                    # write to file
                    myfile.write(s)
                
    except tweepy.TweepError as e:
            # print error (if any)
            print("Error : " + str(e))
            
def get_df_FROM_user_tweets():
    
    # read in data
    from_user_df = pd.read_csv('from_user_tweets_celebrities.csv', header=None, engine='python')

    # name columns
    from_user_df.columns = ['tweet_id','created_at','favorite_count','retweet_count','screen_name','followers_count']

    # set tweet_id as index
    from_user_df.set_index('tweet_id', inplace=True)

    # remove duplicate tweets
    from_user_df = from_user_df[~from_user_df.index.duplicated(keep='first')]
    
    # RFR
    from_user_df['rfr'] = (from_user_df['favorite_count']+from_user_df['retweet_count'])/from_user_df['followers_count']
        
    # screen_name index for plotting colors
    le = preprocessing.LabelEncoder()
    le.fit(from_user_df['screen_name'])
    from_user_df['screen_name_ind'] = le.transform(from_user_df['screen_name'])
        
    # normalize
    tmp = from_user_df.drop(['created_at','screen_name','screen_name_ind'],axis=1)
    normalized = (tmp-tmp.min())/(tmp.max()-tmp.min())
    normalized['screen_name']=from_user_df['screen_name']
    normalized['screen_name_ind']=from_user_df['screen_name_ind']
    normalized['created_at']=from_user_df['created_at']
    
    return normalized

In [3]:
def clean_tweet(tweet):
    return ' '.join(re.sub("(@[A-Za-z0-9]+)|([^0-9A-Za-z \t])|(\w+:\/\/\S+)", " ", tweet).split())

def write_ABOUT_user_tweets_to_file(screen_name):
    
    print('Writing to file for '+screen_name)
    
    # fetch tweets
    tweets = api.search(screen_name, lang='en', count=50)
    
    try:
        
        # open file
        with open('about_user_tweets_celebrities.csv', 'a') as myfile:
                
            # loop through tweets
            for tweet in tweets:
                
                clean_text = clean_tweet(tweet.text)
                analysis = TextBlob(clean_text)
                polarity = analysis.sentiment.polarity
                
                # make sure tweet not already in file
                if str(tweet.id) not in open('about_user_tweets_celebrities.csv').read():
                                        
                    s = '{},{},{},{},{},{},{},{}\n'.format(tweet.id, 
                                                        tweet.created_at, 
                                                        clean_text, 
                                                        polarity, 
                                                        polarity*(0.5*tweet.favorite_count+tweet.retweet_count),
                                                        tweet.favorite_count, 
                                                        tweet.retweet_count, 
                                                        screen_name)        
                    # write to file
                    myfile.write(s)
                
    except tweepy.TweepError as e:
            # print error (if any)
            print("Error : " + str(e))
    
def get_df_ABOUT_user_tweets():
    
    # read in data
    about_user_df = pd.read_csv('about_user_tweets_celebrities.csv', header=None)

    # name columns
    about_user_df.columns = ['tweet_id','created_at','text','polarity_ind','polarity_agg','favorite_count','retweet_count','screen_name']

    # set tweet_id as index
    about_user_df.set_index('tweet_id', inplace=True)

    # remove duplicate tweets
    about_user_df = about_user_df[~about_user_df.index.duplicated(keep='first')]
    
    return about_user_df

In [4]:
def ternary_plot_all(df, title):

    data = [{ 
        'type': 'scatterternary',
        'mode': 'markers',
        'opacity': .5,
        'a': df['retweet_count'],
        'b': df['rfr'],
        'c': df['favorite_count'],
        'text': df['screen_name'],
        'marker': {
            'symbol': 'o',
            'color': df['screen_name_ind'],
            'autocolorscale': True,
            'size': 14
        }
    }]

    layout = {
        'ternary': {
            'sum': 1,
            'aaxis': makeAxis('Retweets'),
            'baxis': makeAxis('<br>RFR'),
            'caxis': makeAxis('<br>Favorites')
        },
        'annotations': [{
            'showarrow': False,
            'text': title,
            'x': 0.5,
            'y': 1.3,
            'font': { 'size': 35 }
        }],
    }

    fig = {'data': data, 'layout': layout}
    offline.iplot(fig, validate=False)
    
def ternary_plot_avg(df, title):

    data = [{ 
        'type': 'scatterternary',
        'mode': 'markers',
        'opacity': .5,
        'a': df['retweet_count'],
        'b': df['rfr'],
        'c': df['favorite_count'],
        'text': df.index,
        'marker': {
            'symbol': 'o',
            'color': df['screen_name_ind'],
            'autocolorscale': True,
            'size': 14
        }
    }]

    layout = {
        'ternary': {
            'sum': 1,
            'aaxis': makeAxis('Retweets'),
            'baxis': makeAxis('<br>RFR'),
            'caxis': makeAxis('<br>Favorites')
        },
        'annotations': [{
            'showarrow': False,
            'text': title,
            'x': 0.5,
            'y': 1.3,
            'font': { 'size': 35 }
        }],
    }

    fig = {'data': data, 'layout': layout}
    offline.iplot(fig, validate=False)
    
def makeAxis(title): 
    return {
      'title': title,
      'titlefont': { 'size': 20 },
      'tickfont': { 'size': 15 },
      'tickcolor': 'rgba(0,0,0,0)',
      'ticklen': 5,
      'showline': True,
      'showgrid': True
    }

### Tweepy Setup

In [5]:
consumer_key = 'n9LMcL7CRMtaTY5TXMp1VfIKo'
consumer_secret = 'G0ghn8E8TJPCKl29AfmA4019U1hq6NhGQFoMsJ05CARnmkeE7U'
access_token = '1959972582-gfpDYaAbKj7c412HOalcL0jQv0QdhJtgwZguXjl'
access_token_secret = 'nZJVEdDSHsCZvV8dvRtXBjOoDIzeKOSKyvtaavjeV5ARK'

auth = tweepy.OAuthHandler(consumer_key, consumer_secret)
auth.set_access_token(access_token, access_token_secret)
api  = tweepy.API(auth)

# Tweets FROM User

In [38]:
people = pd.read_csv('people_list.csv')

In [39]:
celeb_screen_names = list(people[people['industry']=='celebrity']['screen_name'])

In [34]:
[write_FROM_user_tweets_to_file(screen_name) for screen_name in celeb_screen_names]

Writing to file for Adele
Writing to file for BrunoMars
Writing to file for selenagomez
Writing to file for coldplay
Writing to file for ed sheeran
Writing to file for john legend
Writing to file for shakira
Writing to file for beyonce
Writing to file for nickjonas
Writing to file for jason_mraz
Writing to file for kanyaewest
Writing to file for johnmayer
Writing to file for taylorswift13
Writing to file for britneyspears
Writing to file for justinbieber
Writing to file for mileycyrus
Writing to file for marilynmanson
Writing to file for chrisbrown
Writing to file for eminem
Writing to file for fergie


[None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None]

In [35]:
from_user_df = get_df_FROM_user_tweets()

### Plotting

In [36]:
ternary_plot_all(from_user_df, 'Musicians')

In [37]:
from_user_df_ave = from_user_df.groupby('screen_name').mean()
ternary_plot_avg(from_user_df_ave, 'Musicians')

# Tweets ABOUT User

In [None]:
# [write_ABOUT_user_tweets_to_file(screen_name) for screen_name in celeb_screen_names]

In [None]:
# about_user_df = get_df_ABOUT_user_tweets()

In [None]:
# about_user_df.head(10)

# BREAK BREAK BREAK