In [81]:
#Import all required libraries here
import os
import tweepy
import pandas as pd
import requests
import csv
import torch
from transformers import pipeline
from dotenv import load_dotenv
from datetime import datetime

#I have stored all my API keys in a separate .env file and loaded here.
load_dotenv()

#Sentiment Analysis Model
distilbert_model = pipeline(model="bhadresh-savani/distilbert-base-uncased-emotion")

Downloading:   0%|          | 0.00/4.14k [00:00<?, ?B/s]

#### API Authentication

In [82]:
#Authentication - importing keys
consumer_key = os.environ['API_KEY']
consumer_secret = os.environ['API_KEY_SECRET']
access_token = os.environ['ACCESS_TOKEN']
access_token_secret = os.environ['ACCESS_TOKEN_SECRET']
bearer_token = os.environ['BEARER_TOKEN']

In [83]:
#Authentication using OAuth1
auth = tweepy.OAuth1UserHandler(
  consumer_key, 
  consumer_secret, 
  access_token, 
  access_token_secret
)

#Initiate API
api = tweepy.API(auth, wait_on_rate_limit=True)

#### User Identification and Tweet Extraction

In [84]:
#EDIT HERE ONLY
#add the twitter handle of an account for data extraction from timeline
user = "RayDalio"

In [85]:
def retrieveTweets(user):
    print ("Extracting records from ",user)
    
    #Start recording from the latest tweet
    tweets = api.user_timeline(screen_name=user, 
                           # 200 is the maximum allowed count
                           count=3000,
                           include_rts = False,
                           tweet_mode = 'extended',
                           exclude_replies = True
                           )
    
    """
    Use while True to keep extracting 200 records at a time
    After one extraction, use the ID earliest created record,
    and start the extraction process from the previous one for another 200 records.
    """
    all_tweets = []
    all_tweets.extend(tweets)
    oldest_id = tweets[-1].id
    while True:
        tweets = api.user_timeline(screen_name=user, 
                               # 200 is the maximum allowed count
                               count=200,
                               include_rts = False,
                               max_id = oldest_id - 1,
                               # Necessary to keep full_text 
                               # otherwise only the first 140 words are extracted
                               tweet_mode = 'extended'
                               )
        if len(tweets) == 0:
            break
        oldest_id = tweets[-1].id
        all_tweets.extend(tweets)
        print('N of tweets downloaded till now {}'.format(len(all_tweets)))
        
        #save each selected value of a response into a list.
    outtweets = [[tweet.id_str,
                  tweet.user.id_str, 
                  tweet.user.name, 
                  tweet.created_at,
                  tweet.favorite_count,
                  tweet.retweet_count,
                  tweet.full_text.encode("utf-8").decode("utf-8"),
                  distilbert_model([tweet.full_text])[0]["label"],
                 ] 
             for idx,tweet in enumerate(all_tweets)]
    return outtweets

#### Saving to a Pandas DataFrame and a CSV File

In [None]:
extracted_tweets = retrieveTweets(user)
df = pd.DataFrame (extracted_tweets, columns = ["tweet_id", "user_id","user_name","created_at", "favorite_count","retweet_count","tweet","sentiment"])

Extracting records from  RayDalio
N of tweets downloaded till now 254
N of tweets downloaded till now 454
N of tweets downloaded till now 654
N of tweets downloaded till now 854
N of tweets downloaded till now 1051
N of tweets downloaded till now 1250
N of tweets downloaded till now 1450
N of tweets downloaded till now 1650
N of tweets downloaded till now 1850
N of tweets downloaded till now 2050
N of tweets downloaded till now 2250
N of tweets downloaded till now 2450
N of tweets downloaded till now 2650
N of tweets downloaded till now 2849
N of tweets downloaded till now 3048
N of tweets downloaded till now 3098


In [None]:
df.to_csv('%s_tweets.csv' % user,index=False)
df

In [None]:
df.info()