In [1]:
import numpy as np
import pandas as pd
import tweepy
import requests
import time
from datetime import date
from datetime import timedelta

In [7]:
#import key json
file_name = "../keys.json"
with open(file_name, "r") as key_file:
    keys = json.load(key_file)

In [8]:
# asign keys
bearer_token = keys['bearer_token']
consumer_key = keys['consumer_key']
consumer_secret = keys['consumer_secret']
access_token = keys['access_token']
token_secret = keys['token_secret']

In [9]:
auth = tweepy.OAuthHandler(consumer_key, consumer_secret)
auth.set_access_token(access_token, token_secret)
api = tweepy.API(auth)

### Get dates

In [10]:
# returns a dataframe with dates, having random hours to be used in retrieving tweets

def get_dates_df():
    
    ##### get number of days to loop through #####
    def get_days_delta():
        delta = date.today()-date(2016, 1, 1)
        return delta.days

    ##### generate creates a list of dates from today to the start day in get_days_delta() #####
    dates_to_check = [(date.today() - timedelta(i)).isoformat() for i in range(get_days_delta())] 

    ##### generate random times and concat to each date  #####
    random_time_dates = []
    for i in dates_to_check:
        h = '00' #random.randint(0, 24)
        m = '00' #random.randint(0, 60)
        s = '00' #random.randint(0, 60)

        i =  f'{i}T{h}:{m}:{s}.000Z'
        random_time_dates.append(i)
        
    return random_time_dates

In [11]:
tweet_dates = get_dates_df()[0:2]

# create clusters to split the job
# dates_clusters = [tweet_dates[n:n+100] for n in range(0, len(tweet_dates), 100)] 


### Authenticate and define func to connect to api

In [12]:
search_url = "https://api.twitter.com/2/tweets/search/all"
query_params = {'query': "us infrastructure","end_time": "2021-11-19T14:44:18.000Z", "max_results":10, "tweet.fields":"public_metrics"}

def bearer_oauth(r):
    """
    Method required by bearer token authentication.
    """

    r.headers["Authorization"] = f"Bearer {bearer_token}"
    r.headers["User-Agent"] = "CryptoTrading699"
    return r



def connect_to_endpoint(url, params):
    response = requests.request("GET", search_url, auth=bearer_oauth, params=params)
#     print(response.status_code)
    if response.status_code != 200:
        raise Exception(response.status_code, response.text)
    return response.json()



In [48]:
bearer_oauth

<function __main__.bearer_oauth(r)>

In [13]:

def get_tweets(api_response, max_results=51):
    list_tweets = []
    
    for i in range(0,max_results):
        text = api_response['data'][i]['text']
        list_tweets.append(text)
    return list_tweets 


def get_tweets_ids(api_response, max_results=51):
    list_tweets_ids = []
    
    for i in range(0,max_results):
        ids = api_response['data'][i]['id']
        list_tweets_ids.append(ids)
    return list_tweets_ids


def get_dates(tweet_date, max_results=51):
    list_dates = []

    for i in range(0,max_results):
        list_dates.append(tweet_date)
    return list_dates


def get_topic(topic, max_results=51):
    list_topics = [topic for i in range(0,max_results)]
    return list_topics


### pull data from api and asign to dict

In [16]:
# retrieve a dictionary of tweets(max_results) for each date, for each topic
def get_tweets_dict(dates, topics, max_results):
    tweeter_data = {
        'tweet':[],
        'tweet_date':[],
        'topic':[],
        'tweet_id':[],
    }
    
    # loop through dates
    for tweet_date in dates:
        # loop through topics
        for topic in topics:
            query_params = {'query':topic ,"end_time": tweet_date, "max_results":max_results, "tweet.fields":"public_metrics"}
            json_response = connect_to_endpoint(search_url, query_params)
            available_tweets= len(json_response['data'])-1 # get number of tweets returned by the request if 

            tweeter_data['tweet'] += get_tweets(json_response, max_results=available_tweets)
            tweeter_data['tweet_id'] += get_tweets_ids(json_response, max_results=available_tweets)
            tweeter_data['tweet_date'] += get_dates(tweet_date, max_results=available_tweets)
            tweeter_data['topic'] += get_topic(topic, max_results=available_tweets)

            time.sleep(5)
            
    return tweeter_data

In [17]:
tweet_dates

['2021-11-26T00:00:00.000Z', '2021-11-25T00:00:00.000Z']

In [35]:
topics = ["economy"]

In [43]:
tweet_dict = get_tweets_dict(tweet_dates, topics, 10)

In [44]:
tweeter_data = tweet_dict

In [45]:
tweeter_data

{'tweet': ['RT @BenOquist: This is outrageous \u2066▶️ Senator on the hook for $150,000 in government legal fees . \u2066@Senator_Patrick\u2069  https://t.co/cid3ry2M…',
  'RT @PalmerReport: So what’s our winning messaging?\n\n1) We’ve accomplished a lot this year (strong economy, vaccinations, build back better)…',
  'Premium Economy  @LATAM_BRA https://t.co/1ItlgCdYIo',
  'RT @thewirehindi: ममता बनर्जी से मुलाकात के बाद भाजपा नेता सुब्रमण्यम स्वामी मोदी सरकार को असफल बताया\n\nhttps://t.co/EjwM3xjq72\n\n#Subramaniy…',
  "RT @Madisontx76: What do you say to a person who truly believes the economy is much better off now than it was under Trump? \n\nI just don't…",
  'RT @Kristy91808800: #drbonniehenry @adriandix don’t  care about #BC residents health or death.  It’s all about optics and the economy',
  '@frankoz95967943 @FinanceLancelot @INArteCarloDoss Seriously though if they could somehow devalue by 50% without triggering a meltdown it would actually get them out of the bind and save

In [10]:
# query_params = {'query':topic ,"end_time": tweet_date, "max_results":20, "tweet.fields":"public_metrics"}
# json_response = connect_to_endpoint(search_url, query_params)
# json_response['data']

In [11]:
# df = pd.DataFrame(tweeter_data)

In [46]:
df =pd.DataFrame(tweeter_data)
len(df['tweet_id'].unique())

18

In [47]:
df

Unnamed: 0,tweet,tweet_date,topic,tweet_id
0,RT @BenOquist: This is outrageous ⁦▶️ Senator ...,2021-11-26T00:00:00.000Z,economy,1464021076845817860
1,RT @PalmerReport: So what’s our winning messag...,2021-11-26T00:00:00.000Z,economy,1464021064791506949
2,Premium Economy @LATAM_BRA https://t.co/1Itlg...,2021-11-26T00:00:00.000Z,economy,1464021057602527239
3,RT @thewirehindi: ममता बनर्जी से मुलाकात के बा...,2021-11-26T00:00:00.000Z,economy,1464021053936574464
4,RT @Madisontx76: What do you say to a person w...,2021-11-26T00:00:00.000Z,economy,1464021046667923458
5,RT @Kristy91808800: #drbonniehenry @adriandix ...,2021-11-26T00:00:00.000Z,economy,1464021042893000707
6,@frankoz95967943 @FinanceLancelot @INArteCarlo...,2021-11-26T00:00:00.000Z,economy,1464021041466937346
7,RT @tanniefm: the city of los angeles needs to...,2021-11-26T00:00:00.000Z,economy,1464021032617062409
8,RT @Dr_D_Robertson: Thank you to the scientist...,2021-11-26T00:00:00.000Z,economy,1464021023028879362
9,RT @robbystarbuck: If Biden goes forward with ...,2021-11-25T00:00:00.000Z,economy,1463658688510455811


In [16]:
df.to_csv('tweets_a person w... 	2021-11-26T00:00:00.000Z 	economy 	1464021046667923458
5 	RT @Kristy91808800: #drbonniehenry @adriandix ... 	2021-11-26T00:00:00.000Z 	economy 	1464021042893000707
6 	@frankoz95967943 @FinanceLancelot @INArteCarlo... 	2021-11-26T00:00:00.000Z 	economy 	1464021041466937346
7 	RT @tanniefm: the city of los angeles needs to... 	2021-11-26T00:00:00.000Z 	economy 	1464021032617062409
8 	RT @Dr_D_Robertson: Thank you to the scientist... 	2021-11-26T00:00:00.000Z 	economy 	1464021023028879362
9 	RT @robbystarbuck: If Biden goes forward with ... 	2021-11-25T00:00:00.000Z 	economy 	1463658688510455811
10 	RT @faizalhamssin: With or without EPF, it a p... 	2021-11-25T00:00:00.000Z 	economy 	1463658684517474307
11 	RT @Pat300000: Biden is a narcissistic,vicious... 	2021-11-25T00:00:00.000Z 	economy 	1463658682688983044
12 	RT @Aerycrow: https://t.co/QpAmhMelMb\n지난 50년간... 	2021-11-25T00:00:00.000Z 	economy 	1463658677412392962
13 	RT @POTUS: We are experiencing the strongest e... 	2021-11-25T00:00:00.000Z 	economy 	1463658676871483393
14 	How should the Business Development Bank of Ca... 	2021-11-25T00:00:00.000Z 	economy 	1463658665706246146
15 	RT @Jim_Jordan: Your Thanksgiving turkey costs... 	2021-11-25T00:00:00.000Z 	economy 	1463658665089671178
16 	RT @RepKatiePorter: The 2020 Silverado Fire th... 	2021-11-25T00:00:00.000Z 	economy 	1463658663852261380
17 	RT @DeepLearn007subset.csv',index=False)