# collect data from Twitter using Twitter API

## import packages and configure twitter api

In [2]:
# !pip install tweepy
import tweepy as tw
print(tw.__version__)

4.10.0


In [3]:
# !pip install pandas
import pandas as pd
import json
import time
import csv
import requests
print(requests.__version__)

2.28.1


In [4]:
# import keys from a .gitignore file

import keys

In [5]:
# assign the values accordingly

client = tw.Client(
    wait_on_rate_limit = True,
    consumer_key = keys.consumer_key,
    consumer_secret = keys.consumer_secret,
    access_token = keys.access_token,
    access_token_secret = keys.access_token_secret,
    bearer_token = keys.bearer_token,
)

In [6]:
# client = tw.Client(bearer_token)
client

<tweepy.client.Client at 0x7fcdb1c448e0>

## get the list of news media twitter username

In [7]:
# read a csv file with selected news media's twitter username 

news_media_basic_data = pd.read_csv (r'/Users/katherina/data_science_tweets_neutrality/news_media_twitter_username.csv')
news_media_basic_data

Unnamed: 0,news_media,twitter_username,us_or_foreign,general_or_political
0,ABC News,ABC,US,General
1,ABC News,ABCPolitics,US,Political
2,ABC News,ABCNewsLive,US,General
3,BBC News,BBCWorld,Foreign,General
4,BBC News,BBCBreaking,Foreign,General
5,BBC News,BBCNews,Foreign,General
6,BBC News,BBCNorthAmerica,Foreign,General
7,CBS News,CBSNews,US,General
8,CBS News,CBSPolitics,US,Political
9,CBS News,CBSNewsPoll,US,Political


In [8]:
news_media_twitter_username_list = news_media_basic_data['twitter_username'].tolist()
news_media_twitter_username_list

news_media_twitter_username_list = ["ABC","ABCPolitics","ABCNewsLive"]
news_media_twitter_username_list

['ABC', 'ABCPolitics', 'ABCNewsLive']

## Call Twitter API v2

### get user details

In [9]:
# call twitter API v2 to get user id and other user information for the user in the list

def get_users_details(usernames):
        users_details = client.get_users(usernames=usernames, user_fields=["created_at","location","protected","public_metrics","verified"])
        return users_details

twitter_user_details = get_users_details(news_media_twitter_username_list)
twitter_user_details

Response(data=[<User id=28785486 name=ABC News username=ABC>, <User id=16815644 name=ABC News Politics username=ABCPolitics>, <User id=384438102 name=ABC News Live username=ABCNewsLive>], includes={}, errors=[], meta={})

In [40]:
# create a list of records
twitter_user_details_lst = []

# iterate over each tweet and corresponding user details
for user in twitter_user_details.data:
    user_info = {
        'user_id': user.id,
        'user_name': user.name,
        'user_username': user.username,
        "user_created_at" : user.created_at,
        "user_location" : user.location,
        "user_protected" : user.protected,
        "user_followers_count" : user.public_metrics["followers_count"],
        "user_following_count" : user.public_metrics["following_count"],
        "user_tweet_count" : user.public_metrics["tweet_count"],
        "user_listed_count" : user.public_metrics["listed_count"],
        "user_verified" : user.verified
    }
    twitter_user_details_lst.append(user_info)

# create dataframe from the extracted records
twitter_user_details_df = pd.DataFrame(twitter_user_details_lst)
# display the dataframe
twitter_user_details_df

Unnamed: 0,user_id,user_name,user_username,user_created_at,user_location,user_protected,user_followers_count,user_following_count,user_tweet_count,user_listed_count,user_verified
0,28785486,ABC News,ABC,2009-04-04 12:40:32+00:00,New York City / Worldwide,False,17625912,487,392310,66027,True
1,16815644,ABC News Politics,ABCPolitics,2008-10-16 22:43:16+00:00,"Washington, DC",False,1077940,474,192389,8485,True
2,384438102,ABC News Live,ABCNewsLive,2011-10-03 17:47:38+00:00,,False,95749,223,36587,2235,True


In [42]:
twitter_user_details_df.to_csv (r'/Users/katherina/data_science_tweets_neutrality/twitter_user_details.csv', index = False, header=True)

In [None]:
# for user in users.data:
#     print(user["id"])
#     print(user["created_at"])

In [33]:
# twitter_user_details.data[0]["location"]

In [43]:
twitter_user_details_data = pd.read_csv (r'/Users/katherina/data_science_tweets_neutrality/twitter_user_details.csv')
twitter_user_details_data

Unnamed: 0,user_id,user_name,user_username,user_created_at,user_location,user_protected,user_followers_count,user_following_count,user_tweet_count,user_listed_count,user_verified
0,28785486,ABC News,ABC,2009-04-04 12:40:32+00:00,New York City / Worldwide,False,17625912,487,392310,66027,True
1,16815644,ABC News Politics,ABCPolitics,2008-10-16 22:43:16+00:00,"Washington, DC",False,1077940,474,192389,8485,True
2,384438102,ABC News Live,ABCNewsLive,2011-10-03 17:47:38+00:00,,False,95749,223,36587,2235,True


In [44]:
user_id_list = twitter_user_details_data["user_id"].tolist()
user_id_list

[28785486, 16815644, 384438102]

In [59]:
# Get User's Tweets

# This endpoint/method returns Tweets composed by a single user, specified by the requested user ID

user_id = 28785486
start_time = "2022-06-01T00:00:00Z"
end_time = "2022-07-01T00:00:01Z"

def get_users_tweets(user_id):
    users_tweets = client.get_users_tweets(id=user_id,
                                           end_time=end_time, 
                                           exclude=["retweets"], 
                                           expansions="referenced_tweets.id", 
                                           max_results=5, 
                                           start_time=start_time, 
                                           tweet_fields=["author_id","context_annotations","created_at","entities","in_reply_to_user_id","lang","public_metrics","referenced_tweets"])
    return users_tweets

users_tweets = get_users_tweets(user_id)
users_tweets

Response(data=[<Tweet id=1542653639482920961 text='The Supreme Court announced it will hear a case this fall that could upend state election laws across the country. https://t.co/sFiMDyVJdD'>, <Tweet id=1542647056254345221 text='The FDA says it has advised COVID-19 vaccine companies to produce an updated vaccine for this fall, an aim to give people broader and stronger immunity in an upcoming booster campaign ahead of winter.  https://t.co/M6C3HbmiGN'>, <Tweet id=1542641335378874368 text='Tear gas or pepper spray was used on patrons at a popular nightclub in South Africa when more than a dozen teenagers mysteriously died there, an eyewitness told @ABC News. https://t.co/HOQnzVFEC2'>, <Tweet id=1542634836518735872 text='OFF YOU GO: Alligator released into the river by a brave local police officer after being found outside a Georgia resident’s house. https://t.co/H26TVoE7xI https://t.co/70jk77bDfu'>, <Tweet id=1542633166896259074 text='Ruja Ignatova, the so-called Cryptoqueen, has now be

In [62]:
users_tweets.data[0]["context_annotations"]

[{'domain': {'id': '45',
   'name': 'Brand Vertical',
   'description': 'Top level entities that describe a Brands industry'},
  'entity': {'id': '781974597310615553', 'name': 'Entertainment'}},
 {'domain': {'id': '46',
   'name': 'Brand Category',
   'description': 'Categories within Brand Verticals that narrow down the scope of Brands'},
  'entity': {'id': '781974596157181956', 'name': 'Online Site'}},
 {'domain': {'id': '46',
   'name': 'Brand Category',
   'description': 'Categories within Brand Verticals that narrow down the scope of Brands'},
  'entity': {'id': '781974597105094656', 'name': 'TV/Movies Related'}},
 {'domain': {'id': '47',
   'name': 'Brand',
   'description': 'Brands and Companies'},
  'entity': {'id': '1065650820518051840',
   'name': 'ABC News',
   'description': 'ABC News'}},
 {'domain': {'id': '29',
   'name': 'Events [Entity Service]',
   'description': 'Entity Service related Events domain'},
  'entity': {'id': '1448744041105944600', 'name': 'Breaking News'}

In [13]:
# search_words = "Joe Biden" # This will be changed to Donald Trump when we retrieve tweets related to him.
# date_since = "2020-10-27"
# tweets = tw.Cursor(api.search_tweets, q=search_words, lang="en").items(10)
# tweets

In [14]:
# tweet_details = [[tweet.geo, tweet.text, tweet.user.screen_name, tweet.user.location] for tweet in tweets]
# tweet_df = pd.DataFrame(data=tweet_details, columns=["geo","text","user","location"])
# tweet_df.head()

In [None]:
# df.to_csv('data.csv')