In [43]:
# Tweepy
import tweepy
import pandas as pd
import nltk
from tweepy import OAuthHandler
from tweepy import Cursor

In [44]:
"""
Twitter Authentification Credentials
Please update with your own credentials
"""
cons_key = ''
cons_secret = ''
acc_token = ''
acc_secret = ''

In [49]:
def get_twitter_auth():
    """
    @return:
        - the authentification to Twitter
    """
    try:
        consumer_key = cons_key
        consumer_secret = cons_secret
        access_token = acc_token
        access_secret = acc_secret
        
    except KeyError:
        sys.stderr.write("Twitter Environment Variable not Set\n")
        sys.exit(1)
        
    auth = tweepy.OAuthHandler(consumer_key, consumer_secret)
    auth.set_access_token(access_token, access_secret)
    
    return auth

def get_twitter_client():
    """
    @return:
        - the client to access the authentification API
    """
    auth = get_twitter_auth()
    client = tweepy.API(auth, wait_on_rate_limit=True)
    return client


def get_tweets_from_user(twitter_user_name, page_limit=16, count_tweet=200):
    """
    @params:
        - twitter_user_name: the twitter username of a user (company, etc.)
        - page_limit: the total number of pages (max=16)
        - count_tweet: maximum number to be retrieved from a page
        
    @return
        - all the tweets from the user twitter_user_name
    """
    client = get_twitter_client()
    
    all_tweets = []
    
    for page in Cursor(client.user_timeline, 
                        screen_name=twitter_user_name, 
                        count=count_tweet).pages(page_limit):
        for tweet in page:
            parsed_tweet = {}
            parsed_tweet['date'] = tweet.created_at
            parsed_tweet['author'] = tweet.user.name
            parsed_tweet['twitter_name'] = tweet.user.screen_name
            parsed_tweet['text'] = tweet.text
            parsed_tweet['number_of_likes'] = tweet.favorite_count
            parsed_tweet['number_of_retweets'] = tweet.retweet_count
                
            all_tweets.append(parsed_tweet)
    
    # Create dataframe 
    df = pd.DataFrame(all_tweets)
    
    # Revome duplicates if there are any
    df = df.drop_duplicates( "text" , keep='first')
    
    return df

In [55]:
googleAI = get_tweets_from_user("GoogleAI") 

In [56]:
print("Data Shape: {}".format(googleAI.shape))

Data Shape: (1743, 6)


In [59]:
googleAI.head(10)

Unnamed: 0,date,author,twitter_name,text,number_of_likes,number_of_retweets
0,2021-11-19 23:29:13,Google AI,GoogleAI,It can be challenging for robots to imitate pr...,1181,215
1,2021-11-19 17:58:16,Google AI,GoogleAI,"RT @TensorFlow: 📣 Announcing TensorFlow GNN, a...",0,307
2,2021-11-18 20:40:54,Google AI,GoogleAI,#ReinforcementLearning agents typically perfor...,536,94
3,2021-11-18 17:58:40,Google AI,GoogleAI,We are pleased to announce the 2021 recipients...,61,10
4,2021-11-17 23:20:25,Google AI,GoogleAI,Today we present a new strategy for assessing ...,198,56
5,2021-11-17 19:52:28,Google AI,GoogleAI,"Introducing RLiable, an easy-to-use library fo...",400,90
6,2021-11-16 00:14:59,Google AI,GoogleAI,As part of Google Australia’s Digital Future I...,93,21
7,2021-11-15 22:06:09,Google AI,GoogleAI,"Continuing our work on #NeuralWeatherModels, w...",381,96
8,2021-11-12 19:16:21,Google AI,GoogleAI,"Announcing the release of VHP, a new open-sour...",129,39
9,2021-11-11 22:10:48,Google AI,GoogleAI,Making reasonable predictions about the future...,165,44
