In [81]:
#Import all required libraries here
import os
import tweepy
import pandas as pd
import requests
import csv
import torch
from transformers import pipeline
from dotenv import load_dotenv
from datetime import datetime

#I have stored all my API keys in a separate .env file and loaded here.
load_dotenv()

#Sentiment Analysis Model
distilbert_model = pipeline(model="bhadresh-savani/distilbert-base-uncased-emotion")

Downloading:   0%|          | 0.00/4.14k [00:00<?, ?B/s]

#### API Authentication

In [82]:
#Authentication - importing keys
consumer_key = os.environ['API_KEY']
consumer_secret = os.environ['API_KEY_SECRET']
access_token = os.environ['ACCESS_TOKEN']
access_token_secret = os.environ['ACCESS_TOKEN_SECRET']
bearer_token = os.environ['BEARER_TOKEN']

In [89]:
#Authentication using OAuth1
auth = tweepy.OAuth1UserHandler(
  consumer_key, 
  consumer_secret, 
  access_token, 
  access_token_secret
)

#Initiate API
api = tweepy.API(auth, wait_on_rate_limit=True)

#### User Identification and Tweet Extraction

In [90]:
#EDIT HERE ONLY
#add the twitter handle of an account for data extraction from timeline
user = "FoxNews"

In [91]:
def retrieveTweets(user):
    print ("Extracting records from ",user)
    
    #Start recording from the latest tweet
    tweets = api.user_timeline(screen_name=user, 
                           # 200 is the maximum allowed count
                           count=3000,
                           include_rts = False,
                           tweet_mode = 'extended',
                           exclude_replies = True
                           )
    
    """
    Use while True to keep extracting 200 records at a time
    After one extraction, use the ID earliest created record,
    and start the extraction process from the previous one for another 200 records.
    """
    all_tweets = []
    all_tweets.extend(tweets)
    oldest_id = tweets[-1].id
    while True:
        tweets = api.user_timeline(screen_name=user, 
                               # 200 is the maximum allowed count
                               count=200,
                               include_rts = False,
                               max_id = oldest_id - 1,
                               # Necessary to keep full_text 
                               # otherwise only the first 140 words are extracted
                               tweet_mode = 'extended'
                               )
        if len(tweets) == 0:
            break
        oldest_id = tweets[-1].id
        all_tweets.extend(tweets)
        print('N of tweets downloaded till now {}'.format(len(all_tweets)))
        
        #save each selected value of a response into a list.
    outtweets = [[tweet.id_str,
                  tweet.user.id_str, 
                  tweet.user.name, 
                  tweet.created_at,
                  tweet.favorite_count,
                  tweet.retweet_count,
                  tweet.full_text.encode("utf-8").decode("utf-8"),
                  distilbert_model([tweet.full_text])[0]["label"],
                 ] 
             for idx,tweet in enumerate(all_tweets)]
    return outtweets

#### Saving to a Pandas DataFrame and a CSV File

In [92]:
extracted_tweets = retrieveTweets(user)
print ("Extraction completed with ",len(extracted_tweets)," records.")

Extracting records from  FoxNews
N of tweets downloaded till now 400
N of tweets downloaded till now 600
N of tweets downloaded till now 800
N of tweets downloaded till now 1000
N of tweets downloaded till now 1200
N of tweets downloaded till now 1400
N of tweets downloaded till now 1600
N of tweets downloaded till now 1800
N of tweets downloaded till now 2000
N of tweets downloaded till now 2200
N of tweets downloaded till now 2400
N of tweets downloaded till now 2600
N of tweets downloaded till now 2800
N of tweets downloaded till now 3000
N of tweets downloaded till now 3200
N of tweets downloaded till now 3250
Extraction completed with  3250  records.


In [95]:
df = pd.DataFrame (extracted_tweets, columns = ["tweet_id",
                                                "user_id",
                                                "user_name",
                                                "created_at",
                                                "favorite_count",
                                                "retweet_count",
                                                "tweet",
                                                "sentiment"])
df.to_csv('%s_tweets.csv' % user,index=False)
df

Unnamed: 0,tweet_id,user_id,user_name,created_at,favorite_count,retweet_count,tweet,sentiment
0,1589658958725275648,1367531,Fox News,2022-11-07 16:40:04+00:00,63,31,Tennessee election officials vow to find cause...,anger
1,1589656706862161920,1367531,Fox News,2022-11-07 16:31:07+00:00,33,3,Bears' Justin Fields on refs potentially missi...,sadness
2,1589653935983394816,1367531,Fox News,2022-11-07 16:20:07+00:00,68,23,NPR's horrific recording won't 'normalize' abo...,sadness
3,1589651406910275584,1367531,Fox News,2022-11-07 16:10:04+00:00,66,23,Human remains found twice in Chicago neighborh...,fear
4,1589648952839790596,1367531,Fox News,2022-11-07 16:00:19+00:00,92,18,Human remains found twice in Chicago neighborh...,fear
...,...,...,...,...,...,...,...,...
3245,1583825522856931328,1367531,Fox News,2022-10-22 14:20:05+00:00,434,132,Arizona Gov. Doug Ducey suing Biden admin over...,anger
3246,1583824282756358144,1367531,Fox News,2022-10-22 14:15:09+00:00,52,17,Virginia congressional race in closely watched...,fear
3247,1583823011051814913,1367531,Fox News,2022-10-22 14:10:06+00:00,238,61,"OPINION: Biden's energy policies hurting US, E...",sadness
3248,1583821754186637312,1367531,Fox News,2022-10-22 14:05:06+00:00,58,15,CANDIDATE SPOTLIGHT: Arizona Democrat candidat...,anger


In [None]:
df.info()