In [2]:
#Import Various Libraries, including Tweepy, a Python library for the Twitter API.
import os
import requests
import pandas as pd
from dotenv import load_dotenv
#%matplotlib inline
import tweepy

load_dotenv()

In [3]:
#Pass API Keys to Twitter API and build Tweepy API handler object.
consumer_key = os.getenv('TWITTER_API_KEY')
consumer_secret = os.getenv('TWITTER_SECRET_KEY')
auth = tweepy.AppAuthHandler(consumer_key, consumer_secret)
api = tweepy.API(auth)

# Defining the results DataFrame and Search Targets

In [62]:
#Creating a DataFrame for tweet search results data structure.
dict_tweet_structure = {"twitter_user":"","category":[],"time":"","sentiment":"","text":"","tweet_id":"",
                        "tweet_source":"","quote_count":0,"reply_count":0,"retweet_count":0,"fav_count":0}
df_tweets_found = pd.DataFrame(dict_tweet_structure)
df_tweets_found

Unnamed: 0,twitter_user,category,time,sentiment,text,tweet_id,tweet_url,quote_count,reply_count,retweet_count,fav_count


In [5]:
#Define Search Term Library
dict_search_terms = {'bitcoin':['bitcoin','btc','#bitcoin','#btc',],
                     'generic':['cryptocurrency','crypto','blockchain', 'struggle']}
#Define Twitter User List
list_twitterers = ['joerogan','elonmusk','officialmcafee','vitalikbuterin']

# Functions

In [6]:
#Function for creating search query string for tweepy.Cursor(api.search). Max query length is 128 for sandbox env.
#In it's current form it supports a bunch of keywords joined by OR, grouped by (), AND from a single tweeter.
def query_creator(list_keywords, twitter_user = "!", mood = "!"):
    query = "("
    for keyword in (list_keywords):
        query += keyword
        if keyword != list_keywords[-1]:
            query += " OR "
        else:
            query += ")"
    if twitter_user != "!":
        query += " from:" + twitter_user
    return query

In [77]:
#This is the core function for tweet searching.
def tweet_search_full(string_query, date_from = "202001010000", date_to = "202101170000", number_tweets = 100):
    for tweets in tweepy.Cursor(api.search_full_archive, query=string_query, environment_name='CryptoSentimentFullArc',
                                fromDate=date_from, toDate=date_to).items(100):
        
        screen_name = tweets.user.screen_name
        contents = ""
        if tweets.truncated == True:
            contents = tweets.full_text
        else:
            contents = tweets.text
        category = category_key(contents,dict_search_terms)
        yield [screen_name, category, str(tweets.created_at), "", contents, tweets.id, tweets.source, tweets.quote_count,
               tweets.reply_count, tweets.retweet_count, tweets.favorite_count]

In [63]:
#Verifying DataFrame structure.
df_tweets_found

Unnamed: 0,twitter_user,category,time,sentiment,text,tweet_id,tweet_url,quote_count,reply_count,retweet_count,fav_count


In [78]:
#This function searches the last month, but uses the same arguments and formatting as full archive; useful for testing without using up queries.
def tweet_search_month(string_query, date_from = "202012190000", date_to = "202101170000", number_tweets = 100):
    for tweets in tweepy.Cursor(api.search_30_day, query=string_query, environment_name='CryptoSentimentQueryTest',
                                fromDate=date_from, toDate=date_to,).items():
        screen_name = tweets.user.screen_name
        contents = ""
        if tweets.truncated == True:
            contents = tweets.full_text
        else:
            contents = tweets.text
    
        category = category_key(contents,dict_search_terms)
        yield [screen_name, category, str(tweets.created_at), "", contents, tweets.id, tweets.source, tweets.quote_count,
               tweets.reply_count, tweets.retweet_count, tweets.favorite_count]

In [9]:
#Function for generating a string list of categories from the presence of keywords in text.
def category_key(text_block,dict_keywords):
    category_list = []
    output = " "
    contents = text_block.lower()
    for key in dict_keywords:
        for keyword in dict_keywords[key]:
            if keyword in contents:
                category_list.append(key)
    #return output.join(category_list)
    return category_list

# Function usage and testing the DataFrame

In [65]:
#Testing Query Creator
dict_search_terms = {'bitcoin':['bitcoin','btc','#bitcoin','#btc',],
                     'generic':['cryptocurrency','crypto','blockchain', 'struggle']}
query = query_creator(dict_search_terms['generic']+dict_search_terms['bitcoin'], 'joerogan')
#query += " -is:retweet"
date_since_pro = "202001140000"
date_from_pro = "202101170000"
print(query)
print(len(query))
#Query length is limited to 128 characters, max tweets per query is limited to 100.

(cryptocurrency OR crypto OR blockchain OR struggle OR bitcoin OR btc OR #bitcoin OR #btc) from:joerogan
104


In [70]:
#Testing primary search and DataFrame append.
search_results = tweet_search_month('(cryptocurrency OR crypto OR blockchain OR struggle OR bitcoin OR btc OR #bitcoin OR #btc) (from:joerogan OR from:elonmusk)')

for tweets in search_results:
    print (tweets)
    series_result = pd.Series(tweets, index=df_tweets_found.columns)
    df_tweets_found = df_tweets_found.append(series_result, ignore_index=True)

['joerogan', 'generic', '2021-01-15 17:29:42+00:00', '', 'Self imposed struggle is essential for a clear mind. @onnit https://t.co/mvd8yseLc6', 1350133067193544712, 'Instagram', 8, 187, 133, 1637]
['elonmusk', 'bitcoin', '2020-12-20 09:24:37+00:00', '', 'Bitcoin is almost as bs as fiat money', 1340588909974200321, 'Twitter for iPhone', 3117, 8427, 10423, 142330]
['elonmusk', 'bitcoin', '2020-12-20 08:21:25+00:00', '', 'Bitcoin is my safe word', 1340573003579617280, 'Twitter Web App', 3190, 6871, 20557, 243156]


In [71]:
df_tweets_found

Unnamed: 0,twitter_user,category,time,sentiment,text,tweet_id,tweet_url,quote_count,reply_count,retweet_count,fav_count
0,joerogan,generic,2021-01-15 17:29:42+00:00,,Self imposed struggle is essential for a clear...,1350133067193544712,Instagram,8,187,133,1637
1,elonmusk,bitcoin,2020-12-20 09:24:37+00:00,,Bitcoin is almost as bs as fiat money,1340588909974200321,Twitter for iPhone,3117,8427,10423,142330
2,elonmusk,bitcoin,2020-12-20 08:21:25+00:00,,Bitcoin is my safe word,1340573003579617280,Twitter Web App,3190,6871,20557,243156


In [74]:
df_tweets_found.loc[2]['text']

# Old, Defunct, Or Reference Code Beyond This Point

In [31]:
#This is the user_timeline method, which seems to return around 500~1000 tweets tops.
ticker = 0
for tweet in tweepy.Cursor(api.user_timeline, id='elonmusk',trim_user=True, max_id=2237531699681981416, count =10000, exclude_replies = True).items():
    #if 'bitcoin' in tweet.text:
    ticker += 1
    print(tweet.text + " " + str(tweet.created_at)+" "+str(tweet.id)+"\n")
print(ticker)

Battery cell production is the fundamental rate-limiter slowing down a sustainable energy future. Very important pr… https://t.co/9ybABEzxCV 2021-01-18 05:52:28+00:00 1351044768030142464

Monty Python is amazing
https://t.co/UJq94IWT88 2021-01-15 06:44:22+00:00 1349970666477527042

RT @SpaceX: Splashdown of Dragon confirmed, completing SpaceX’s 21st @Space_Station resupply mission and the first return of a cargo resupp… 2021-01-14 03:51:19+00:00 1349564729417756672

https://t.co/ho7yGXAS3a 2021-01-13 21:20:05+00:00 1349466269121179653

Today at SpaceX is about practicing Starship engine starts. Ship is held down by massive pins while engines are fir… https://t.co/QtIiWLxXPl 2021-01-13 19:06:00+00:00 1349432529657294848

Legalize comedy 2021-01-13 09:25:42+00:00 1349286488618491904

RT @SpaceX: Separation confirmed! Dragon performing 3 departure burns to move away from the @Space_Station https://t.co/G27ohWvnXj 2021-01-13 01:25:16+00:00 1349165584995512320

Hey you …
Yeah you Queen …
Yo

In [35]:
for tweet in tweepy.Cursor(api.user_timeline, id="elonmusk",tweet_mode="extended").items(100):
    #if 'Python' in tweet.full_text:
        print(tweet.full_text)
        #print(tweet.user['id_str'])

In [None]:
#Defunct Old Search Method
def tweet_gather(keywords, user_id):
    #contents = ""
    for tweet in tweepy.Cursor(api.user_timeline, id=user_id).items():
        """if tweet.truncated == True:
            contents = tweet.full_text
        else:
            contents = str(tweet.text).lower()"""
        
        contents = tweet.text.lower()
        #contents = contents.lower() 
        for word in keywords:           
            if word.lower() in contents:
                #print(tweet.text + " " + str(tweet.created_at)+"\n")
                """Here we can append to a pd.DataFrame that collects the twitter handle, keywords used, keyword category
                   for ex:'bitcoin' for hits on 'bitcoin' or 'btc', as well as any sentiment rating we create.
                   Ulitmately we can then export that DataFrame to csv file."""
                yield tweet.text, str(tweet.created_at)

In [None]:
#This is the basic api.search; it only returns tweets from the last 7 days.
for tweet in tweepy.Cursor(api.search, q='(cryptocurrency OR crypto OR blockchain OR bitcoin OR struggle OR python) from:elonmusk').items(1000):
    print(tweet.text + " " + str(tweet.created_at)+"\n")

In [None]:
#Function for creating search query string for tweepy.Cursor(api.search_full_archive).
def query_creator_full(list_keywords, twitter_user = "!", mood = "!"):
    query = " "
    query = query.join(list_keywords)
    #print(query)
    if twitter_user != "!":
        query += " from:" + twitter_user
        #print(query)
    return query