## Download Twitter Data

<!--
import data_analytics.github as github
print(github.create_jupyter_notebook_header("markcrowe-com", "data-analytics-project-template", "notebooks/notebook-1-02-dc-twitter-api.ipynb", "master")
-->
<table style="margin: auto;"><tr><td><a href="https://mybinder.org/v2/gh/markcrowe-com/data-analytics-project-template/master?filepath=notebooks/notebook-1-02-dc-twitter-api.ipynb" target="_parent"><img src="https://mybinder.org/badge_logo.svg" alt="Open In Binder"/></a></td><td>online editors</td><td><a href="https://colab.research.google.com/github/markcrowe-com/data-analytics-project-template/blob/master/notebooks/notebook-1-02-dc-twitter-api.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a></td></tr></table>

> "It took exhaustive research, sifting through teraquads of data, separating fact from rumor, but eventually I arrived at the truth." - Captain Kathryn Janeway, 2376

### Setup

Import required third party Python libraries, import supporting functions and sets up data source file paths.

In [1]:
# Local
#%pip install -r notebook-1-02-dc-twitter-api/requirements.txt
# Remote option
#%pip install -r https://raw.githubusercontent.com/markcrowe-com/data-analytics-project-template/notebook-1-02-dc-twitter-api/requirements.txt
#Options: --quiet --user

In [2]:
from configparser import ConfigParser
from IPython.display import clear_output
from pandas import DataFrame
import tweepy

#### Function

In [3]:
MAX_COUNT: int = 200  # 200 is the maximum allowed count
EXTENDED_MODE: str = "extended"  # to keep full_text

def download_user_tweets(o_auth_handler: tweepy.OAuthHandler,
                         screen_name: str,
                         include_re_tweets: bool = False,
                         tweet_count: int = MAX_COUNT,
                         tweet_mode: str = EXTENDED_MODE,
                         tweepy_api_wait_on_rate_limit: bool = True,
                         print_status_updates: bool = True):
    if tweet_count > MAX_COUNT:
        tweet_count = MAX_COUNT

    tweepy_api: tweepy.API = tweepy.API(
        o_auth_handler, wait_on_rate_limit=tweepy_api_wait_on_rate_limit)

    tweets_list: list[tweepy.models.Status] = []
    oldest_id = None
    while True:
        tweets_result_set = tweepy_api.user_timeline(screen_name=screen_name,
                                                     count=tweet_count,
                                                     include_rts=include_re_tweets,
                                                     max_id=oldest_id,
                                                     tweet_mode=tweet_mode)
        if len(tweets_result_set) == 0:
            break
        oldest_id:int = tweets_result_set[-1].id - 1
        tweets_list.extend(tweets_result_set)
        if print_status_updates:
            clear_output(wait=True)
            print(f'Number of tweets downloaded so far {len(tweets_list)}')
    return tweets_list

### Twitter config

<https://python-twitter.readthedocs.io/en/latest/getting_started.html>  


You need to have a developer account: <https://developer.twitter.com/en/portal/petition/essential/basic-info>

And apply for elevated access.
<https://developer.twitter.com/en/portal/products/elevated>

See [twitter-config.ini.sample](twitter-config.ini.sample) for the ini file format.

In [4]:
config_filepath: str = "twitter-config.ini"
config_parser: ConfigParser = ConfigParser()
config_parser.read(config_filepath)

access_token: str = config_parser["Twitter"]["AccessToken"]
access_token_secret: str = config_parser["Twitter"]["AccessTokenSecret"]
consumer_key: str = config_parser["Twitter"]["ApiKey"]
consumer_secret: str = config_parser["Twitter"]["ApiKeySecret"]

In [5]:
o_auth_handler: tweepy.OAuthHandler = tweepy.OAuthHandler(consumer_key, consumer_secret)
o_auth_handler.set_access_token(access_token, access_token_secret)

### Download Irish Farming's Tweets

Dept of Agriculture, Food and the Marine:  <a href="https://twitter.com/agriculture_ie" target="_new">@agriculture_ie</a>

In [6]:
screen_name: str = "agriculture_ie"  #Dept of Agriculture, Food and the Marine
user_tweets: list[tweepy.models.Status] = download_user_tweets(o_auth_handler, screen_name, True)

Number of tweets downloaded so far 3250


Print 3 Top Tweets

In [7]:
tweet_status: tweepy.models.Status
for tweet_status in user_tweets[:3]:
    print(f"Id: {tweet_status.id}, Timestamp:{tweet_status.created_at}")
    print(tweet_status.full_text.rstrip())
    print()

Id: 1612874586684592135, Timestamp:2023-01-10 18:10:41+00:00
RT @teagasc: The Minister of State @agriculture_ie with special responsibility for Farm Safety, @martinheydonfg welcomed the recent re-publ…

Id: 1612854180372254721, Timestamp:2023-01-10 16:49:36+00:00
💬#HaveYourSay on a new public consultation to inform the next phase of the development of the Deer Management Strategy.🦌

🗓️Closing date for submissions is 5pm on Friday, February 10, 2023.

🗳️https://t.co/ZCtqaO2EcR https://t.co/E0OkBbaMRD

Id: 1612821982428659712, Timestamp:2023-01-10 14:41:40+00:00
Minister @McConalogue has met fishing industry representatives in the @MarineInst in Galway to discuss EU and Norway negotiations and the implementation of Sea Fisheries Taskforce proposal for voluntary decommissioning scheme.

📍 https://t.co/KWN0CHWIhK https://t.co/g5M5Q9L1Wt



### Extract fields of interest from Tweets

In [8]:
selected_tweet_fields_list: list[list[any]] = [[
    tweet.id_str,
    tweet.user.screen_name, 
    tweet.created_at,
    tweet.favorite_count, 
    tweet.retweet_count,
    tweet.full_text.encode("utf-8").decode("utf-8")
] for _, tweet in enumerate(user_tweets)]

### Save Asset

In [9]:
tweet_column_names: list[str] = [
    "id",
    "screen_name",
    "created_at",
    "favorite_count",
    "retweet_count",
    "text"
]
dataframe: DataFrame = DataFrame(selected_tweet_fields_list, columns=tweet_column_names)
dataframe.to_csv('./../assets/twitter-agriculture-ie.csv', index=False)
dataframe.head(3)

Unnamed: 0,id,screen_name,created_at,favorite_count,retweet_count,text
0,1612874586684592135,agriculture_ie,2023-01-10 18:10:41+00:00,0,5,RT @teagasc: The Minister of State @agricultur...
1,1612854180372254721,agriculture_ie,2023-01-10 16:49:36+00:00,4,2,💬#HaveYourSay on a new public consultation to ...
2,1612821982428659712,agriculture_ie,2023-01-10 14:41:40+00:00,4,3,Minister @McConalogue has met fishing industry...


Author &copy; 2022 <a href="https://github.com/markcrowe-com" target="_parent">Mark Crowe</a>. All rights reserved.