# Tweet acquisition
In this Jupyter Notebook, example code on acquiring tweets from Twitter is presented.

In [14]:
import os
import tweepy as tw
import pandas as pd
from datetime import datetime as dt

In [15]:
# Set Twitter API Keys
consumer_key= ''
consumer_secret= ''
access_token= ''
access_token_secret= ''

# Connect with Twitter API using Tweepy
auth = tw.OAuthHandler(consumer_key, consumer_secret)
auth.set_access_token(access_token, access_token_secret)
api = tw.API(auth, wait_on_rate_limit=True)

# Get tweets based on keywords

In [16]:
# Determine search_words and date
search_words = ["demonstratie"]
date_since = "2021-07-13"

# Create a tweet list to store data in
tweet_list = []

# Get the current datetime
date_str = dt.now().strftime("%d-%m-%y")

# Get the tweets from Twitter API
tweets = tw.Cursor(api.search,
              q=search_words,
              tweet_mode='extended',
              since=date_since).items()

In [17]:
# Store the tweets in the tweet_list
for tweet in tweets:
    tweet_list.append(tweet)

In [18]:
# List to store all the tweet information in
tweet_dict_list = []

# Function to get the attributes from tweets
def get_attrib_data(obj, attrib_list):
    try:
        o = getattr(obj, attrib_list[0], None)
        if len(attrib_list) > 1:
            return get_attrib_data(o, attrib_list[1:])
        return o
    except AttributeError:
        return None

# Store all the tweets with according attributes
for tweet in tweet_list:
    tweet_dict_list.append({
        # Tweet object
        "created_at": get_attrib_data(tweet, ["created_at"]),
        "id":get_attrib_data(tweet, ["id"]),
        "id_str": get_attrib_data(tweet, ["id_str"]),
        "full_text": get_attrib_data(tweet, ["full_text"]),
        "truncated": get_attrib_data(tweet, ["truncated"]),
        "display_text_range": get_attrib_data(tweet, ["display_text_range"]),
        "coordinates": get_attrib_data(tweet, ["coordinates"]),
        "lang": get_attrib_data(tweet, ["lang"]),
        "source": get_attrib_data(tweet, ["source"]),
        'metadata': get_attrib_data(tweet, ["metadata"]),
        "tweet_iso_language_code": get_attrib_data(tweet, ["metadata"]).get("iso_language_code"),
        
        "in_reply_to_status_id": get_attrib_data(tweet, ["in_reply_to_status_id"]),
        "in_reply_to_status_id_str": get_attrib_data(tweet, ["in_reply_to_status_id_str"]),
        "in_reply_to_user_id": get_attrib_data(tweet, ["in_reply_to_user_id"]),
        "in_reply_to_user_id_str": get_attrib_data(tweet, ["in_reply_to_user_id_str"]),
        "in_reply_to_screen_name": get_attrib_data(tweet, ["in_reply_to_screen_name"]),
        
        'retweeted': get_attrib_data(tweet, ["retweeted"]),
        'retweeted_status': get_attrib_data(tweet, ["retweeted_status"]),
        "possibly_sensitive": get_attrib_data(tweet, ["retweeted_status", "possibly_sensitive"]),
        "org_tweet_created_at": get_attrib_data(tweet, ["retweeted_status", "created_at"]),
        "org_tweet_id" : get_attrib_data(tweet, ["retweeted_status", "id"]),
        "org_tweet_user": get_attrib_data(tweet, ["retweeted_status", "user", "id_str"]),
        "retweet_count": get_attrib_data(tweet, ["retweet_count"]),
        
        "is_quote_status": get_attrib_data(tweet, ["is_quote_status"]),
        "quoted_status_id" : get_attrib_data(tweet, ["quoted_status_id"]),
        "quoted_status_id_str" : get_attrib_data(tweet, ["quoted_status_id_str"]),
        "quoted_status" : get_attrib_data(tweet, ["quoted_status"]),
        
        'favorited': get_attrib_data(tweet, ["favorited"]),
        "favorite_count": get_attrib_data(tweet, ["favorite_count"]),
        
        
        # Place object
        "place": get_attrib_data(tweet, ["place"]),
        "place_id": get_attrib_data(tweet, ["place", "id"]),
        "place_url": get_attrib_data(tweet, ["place", "url"]),
        "place_type": get_attrib_data(tweet, ["place", "place_type"]),
        "place_name": get_attrib_data(tweet, ["place", "name"]),
        "place_full_name": get_attrib_data(tweet, ["place", "full_name"]),
        "place_country": get_attrib_data(tweet, ["place", "country"]),
        "place_country_code": get_attrib_data(tweet, ["place", "country_code"]),
        "place_bounding_box": get_attrib_data(tweet, ["place", "bounding_box"]),
        "place_bounding_box_coordinates": get_attrib_data(tweet, ["place", "bounding_box", "coordinates"]),
        "place_attributes": get_attrib_data(tweet, ["place", "attributes"]),
        
        
        # Entity Objects
        "entities": get_attrib_data(tweet, ["entities"]),
        "hashtags": get_attrib_data(tweet, ["entities"]).get("hashtags"),
        "urls": get_attrib_data(tweet, ["entities"]).get("urls"),
        "symbols": get_attrib_data(tweet, ["entities"]).get("symbols"),
        "user_mentions": get_attrib_data(tweet, ["entities"]).get("user_mentions"),
        "media": get_attrib_data(tweet, ["entities"]).get("media"),
        "polls": get_attrib_data(tweet, ["entities"]).get("polls"),
        
        
        # User Object
        'user': get_attrib_data(tweet, ["user"]),
        "user_id": get_attrib_data(tweet, ["user", "id"]),
        "user_id_str": get_attrib_data(tweet, ["user", "id_str"]),
        "user_name": get_attrib_data(tweet, ["user", "name"]),
        "user_screen_name": get_attrib_data(tweet, ["user", "screen_name"]),
        "user_location": get_attrib_data(tweet, ["user", "location"]),
        "user_derived" : get_attrib_data(tweet, ["user", "derived"]),
        "user_url": get_attrib_data(tweet, ["user", "url"]),
        "user_description": get_attrib_data(tweet, ["user", "description"]),
        "user_protected": get_attrib_data(tweet, ["user", "protected"]),
        "user_verified": get_attrib_data(tweet, ["user", "verified"]),
        "user_followers_count": get_attrib_data(tweet, ["user", "followers_count"]),
        "user_friends_count": get_attrib_data(tweet, ["user", "friends_count"]),
        "user_listed_count": get_attrib_data(tweet, ["user", "listed_count"]),
        "user_favourites_count": get_attrib_data(tweet, ["user", "favourites_count"]),
        "user_statuses_count": get_attrib_data(tweet, ["user", "statuses_count"]),
        "user_created_at": get_attrib_data(tweet, ["user", "created_at"]),
    })

# Create dataframe and save as CSV
tweets_df = pd.DataFrame(tweet_dict_list)
tweets_df.to_csv('tweets.csv')