In [241]:
#Import Various Libraries, including Tweepy, a Python library for the Twitter API.
import os
import requests
import pandas as pd
from dotenv import load_dotenv
from textblob import TextBlob 
#%matplotlib inline
import tweepy

load_dotenv()

True

In [163]:
#Pass API Keys to Twitter API and build Tweepy API handler object.
consumer_key = os.getenv('ALT_TWITTER_API_KEY')
consumer_secret = os.getenv('ALT_TWITTER_SECRET_KEY')
auth = tweepy.AppAuthHandler(consumer_key, consumer_secret)
api = tweepy.API(auth)

# Defining the results DataFrame and Search Targets

In [260]:
#Creating a DataFrame for tweet search results data structure.
dict_tweet_structure = {"twitter_user":"","category":[],"time":"","sentiment":"","text":"","tweet_id":"",
                        "tweet_source":"","quote_count":0,"reply_count":0,"retweet_count":0,"fav_count":0,
                        "Polarity Rating": "", "Popularity Rating":""}

In [106]:
#Initializing primary DataFrame of tweets. DON'T RUN! Or you will lose your data. :)
df_tweets_found = pd.DataFrame(dict_tweet_structure)
df_tweets_found

Unnamed: 0,twitter_user,category,time,sentiment,text,tweet_id,tweet_source,quote_count,reply_count,retweet_count,fav_count


In [268]:
#Define Search Term Library
dict_search_terms = {'bitcoin':['bitcoin','btc','#bitcoin',],
                     'generic':['cryptocurrency','blockchain'],
                     'litecoin':['litecoin','ltc','#litecoin', '#ltc'],
                     'ethereum':['ethereum','eth','#ethereum','#eth']   }
#Define Twitter User List
list_twitterers = ['joerogan','elonmusk','officialmcafee','vitalikbuterin']

# Functions

In [6]:
#Function for creating search query string for tweepy.Cursor(api.search). Max query length is 128 for sandbox env.
#In it's current form it supports a bunch of keywords joined by OR, grouped by (), AND from a single tweeter.
def query_creator(list_keywords, twitter_user = "!", mood = "!"):
    query = "("
    for keyword in (list_keywords):
        query += keyword
        if keyword != list_keywords[-1]:
            query += " OR "
        else:
            query += ")"
    if twitter_user != "!":
        query += " from:" + twitter_user
    return query

In [184]:
#This is the core function for tweet searching.
def tweet_search_full(string_query, date_from = "200603210000", date_to = "201801010000", number_tweets = 100):
    for tweets in tweepy.Cursor(api.search_full_archive, query=string_query, environment_name='CryptoSentimentQueryProd',
                                fromDate=date_from, toDate=date_to).items():
        #Primary environment name = CryptoSentimentFullArc
        screen_name = tweets.user.screen_name
        contents = ""
        #if tweets.truncated == True:
        #    contents = tweets.full_text
        #else:
        contents = tweets.text
        category = category_key(contents,dict_search_terms)
        yield [screen_name, category, str(tweets.created_at), "", contents, tweets.id, tweets.source, tweets.quote_count,
               tweets.reply_count, tweets.retweet_count, tweets.favorite_count]

In [269]:
#Verifying DataFrame structure.
df_tweets_found.head()

Unnamed: 0,twitter_user,category,time,sentiment,text,tweet_id,tweet_source,quote_count,reply_count,retweet_count,fav_count,Polarity Rating,Popularity Rating
0,elonmusk,bitcoin,2020-12-20 09:24:37+00:00,,Bitcoin is almost as bs as fiat money,1340588909974200321,Twitter for iPhone,3117,8427,10418,142292,0.0,0.0
1,elonmusk,bitcoin,2020-12-20 08:21:25+00:00,,Bitcoin is my safe word,1340573003579617280,Twitter Web App,3191,6871,20548,243138,0.5,121569.0
2,elonmusk,,2020-11-17 06:54:03+00:00,,@trylolli @Maisie_Williams 👻 💵 \nhttps://t.co/...,1328592219956252673,Twitter for iPhone,31,85,57,969,0.0,0.0
3,elonmusk,bitcoin,2020-11-16 22:02:51+00:00,,@Maisie_Williams 🎶 Toss a bitcoin to ur Witcher 🎶,1328458535340949505,Twitter for iPhone,765,1138,3433,66880,0.0,0.0
4,elonmusk,bitcoin,2020-12-20 09:24:37+00:00,,Bitcoin is almost as bs as fiat money,1340588909974200321,Twitter for iPhone,3117,8426,10418,142292,0.0,0.0


In [204]:
#This function searches the last month, but uses the same arguments and formatting as full archive; useful for testing without using up queries.
def tweet_search_month(string_query, date_from = "200603210000", date_to = "202101170000", number_tweets = 100):
    for tweets in tweepy.Cursor(api.search_30_day, query=string_query, environment_name='CryptoSentimentQueryTest',
                                fromDate=date_from, toDate=date_to,).items():
        screen_name = tweets.user.screen_name
        contents = ""
        #if tweets.truncated == True:
        #    contents = tweets.full_text
        #else:
        contents = tweets.text
        
        category = category_key(contents,dict_search_terms)
        yield [screen_name, category, str(tweets.created_at), str(tweets.coordinates), contents, tweets.id, tweets.source, tweets.quote_count,
               tweets.reply_count, tweets.retweet_count, tweets.favorite_count]
        

In [283]:
def tweet_search_free(string_query, number_tweets = 100):
    for tweets in tweepy.Cursor(api.search, q=string_query).items(number_tweets):
        screen_name = tweets.user.screen_name
        contents = ""
        #if tweets.truncated == True:
        #    contents = tweets.full_text
        #else:
        contents = tweets.text
        polarity = sentiment_reader(contents)
        
        category = category_key(contents,dict_search_terms)
        yield [screen_name, category, str(tweets.created_at), str(tweets.coordinates), contents, tweets.id, tweets.source,"x",
               "x", tweets.retweet_count, tweets.favorite_count, polarity, (polarity * int(tweets.retweet_count))]

In [266]:
#Needs Testing, but here's a function for appending search results to the tweet df.
def tweet_dataframe_append(rows, target_dataframe):
    for row in rows:
        series_result = pd.Series(row, index=target_dataframe.columns)
        target_dataframe = target_dataframe.append(series_result, ignore_index=True)
        return target_dataframe

In [9]:
#Function for generating a list of categories from the presence of keywords in text.
def category_key(text_block,dict_keywords):
    category_list = []
    output = " "
    contents = text_block.lower()
    for key in dict_keywords:
        for keyword in dict_keywords[key]:
            if keyword in contents:
                category_list.append(key)
    #return output.join(category_list)
    return category_list

In [280]:
#Function for generating sentiment values using textblob library.
def sentiment_reader(text_block):
    sentiment_text = TextBlob(text_block)
    return sentiment_text.polarity

# Function usage and testing the DataFrame

In [245]:
#Testing Query Creator
query = query_creator(dict_search_terms['generic']+dict_search_terms['bitcoin']+dict_search_terms['litecoin'], 'satoshilite')
print(query)
print(len(query))
#Query length is limited to 128 characters, max tweets per query is limited to 100.

(cryptocurrency OR blockchain OR bitcoin OR btc OR #bitcoin OR litecoin OR ltc OR #litecoin OR #ltc) from:satoshilite
117


In [201]:
#Testing primary search and DataFrame append.
search_results = tweet_search_full(query)

for tweets in search_results:
    print (tweets)
    series_result = pd.Series(tweets, index=df_tweets_found.columns)
    df_tweets_found = df_tweets_found.append(series_result, ignore_index=True)

['SatoshiLite', 'litecoin', '2017-12-31 20:12:34+00:00', '', 'RT @TheRealXinxi: Most of you may not know. Charlie actually donated millions of dollars to the Litecoin development recently.', 947561135754043392, 'Twitter for Android', 0, 0, 0, 0]
['SatoshiLite', 'bitcoin litecoin litecoin litecoin litecoin', '2017-12-31 05:16:49+00:00', '', 'RT @YourBTCC: #Litecoin price increased by more than 7,705% from December 27 last year to December 26 this year 🕺🍻😀 #LTC! We created a seri…', 947335713766256640, 'Twitter for Android', 0, 0, 0, 0]
['SatoshiLite', 'litecoin', '2017-12-31 04:14:56+00:00', '', "Above all else, I'm a X of Litecoin.", 947320140789555200, 'Twitter for Android', 15, 424, 160, 735]
['SatoshiLite', 'litecoin', '2017-12-31 02:26:20+00:00', '', '@taymorawd Come on! LTC has gone up like 50x since then. Stop complaining.', 947292811262541824, 'Twitter for Android', 1, 12, 3, 173]
['SatoshiLite', 'litecoin', '2017-12-31 02:24:27+00:00', '', "@mflambert @whalepool People shouldn'

TweepError: {'message': 'Request exceeds account’s current package request limits. Please upgrade your package and retry or contact Twitter about enterprise access.', 'sent': '2021-01-22T03:50:47+00:00', 'transactionId': '00db3f4f002f314b'}

In [167]:
df_tweets_found

Unnamed: 0,twitter_user,category,time,sentiment,text,tweet_id,tweet_source,quote_count,reply_count,retweet_count,fav_count
0,elonmusk,bitcoin,2020-12-20 09:24:37+00:00,,Bitcoin is almost as bs as fiat money,1340588909974200321,Twitter for iPhone,3117,8427,10418,142292
1,elonmusk,bitcoin,2020-12-20 08:21:25+00:00,,Bitcoin is my safe word,1340573003579617280,Twitter Web App,3191,6871,20548,243138
2,elonmusk,,2020-11-17 06:54:03+00:00,,@trylolli @Maisie_Williams 👻 💵 \nhttps://t.co/...,1328592219956252673,Twitter for iPhone,31,85,57,969
3,elonmusk,bitcoin,2020-11-16 22:02:51+00:00,,@Maisie_Williams 🎶 Toss a bitcoin to ur Witcher 🎶,1328458535340949505,Twitter for iPhone,765,1138,3433,66880
4,elonmusk,bitcoin,2020-12-20 09:24:37+00:00,,Bitcoin is almost as bs as fiat money,1340588909974200321,Twitter for iPhone,3117,8426,10418,142292
5,elonmusk,bitcoin,2020-12-20 08:21:25+00:00,,Bitcoin is my safe word,1340573003579617280,Twitter Web App,3191,6871,20549,243138
6,elonmusk,,2020-11-17 06:54:03+00:00,,@trylolli @Maisie_Williams 👻 💵 \nhttps://t.co/...,1328592219956252673,Twitter for iPhone,31,85,57,969
7,elonmusk,bitcoin,2020-11-16 22:02:51+00:00,,@Maisie_Williams 🎶 Toss a bitcoin to ur Witcher 🎶,1328458535340949505,Twitter for iPhone,765,1138,3433,66879
8,elonmusk,bitcoin,2020-12-20 09:24:37+00:00,,Bitcoin is almost as bs as fiat money,1340588909974200321,Twitter for iPhone,3117,8426,10418,142292
9,elonmusk,bitcoin,2020-12-20 08:21:25+00:00,,Bitcoin is my safe word,1340573003579617280,Twitter Web App,3191,6871,20549,243140


In [94]:
df_tweets_found.loc[0]['text']


'Self imposed struggle is essential for a clear mind. @onnit https://t.co/mvd8yseLc6'

In [208]:
df_tweets_found.to_csv('../data/raw_data/raw_tweets_01.csv',index=False)

In [236]:
df_cleaning = pd.read_csv('../data/raw_data/raw_tweets_01_filter.csv')
#df_cleaning.drop_duplicates(inplace=True)
#df_cleaning.xs('text', axis=1)
df_cleaning.loc[13]['text']

'@techreview Just use this handy guide https://t.co/wWZGuNpe5f'

In [228]:
#Run several times to eliminate usernames containing crypto.
df_cleaning = df_cleaning[~df_cleaning.text.str.contains("_crypto")]

In [237]:
df_cleaning.to_csv('../data/raw_data/raw_tweets_01_filter.csv',index=False)
df_tweets_found
#df_cleaning.head(100)

Unnamed: 0,twitter_user,category,time,sentiment,text,tweet_id,tweet_source,quote_count,reply_count,retweet_count,fav_count
0,elonmusk,bitcoin,2020-12-20 09:24:37+00:00,,Bitcoin is almost as bs as fiat money,1340588909974200321,Twitter for iPhone,3117,8427,10418,142292
1,elonmusk,bitcoin,2020-12-20 08:21:25+00:00,,Bitcoin is my safe word,1340573003579617280,Twitter Web App,3191,6871,20548,243138
2,elonmusk,,2020-11-17 06:54:03+00:00,,@trylolli @Maisie_Williams 👻 💵 \nhttps://t.co/...,1328592219956252673,Twitter for iPhone,31,85,57,969
3,elonmusk,bitcoin,2020-11-16 22:02:51+00:00,,@Maisie_Williams 🎶 Toss a bitcoin to ur Witcher 🎶,1328458535340949505,Twitter for iPhone,765,1138,3433,66880
4,elonmusk,bitcoin,2020-12-20 09:24:37+00:00,,Bitcoin is almost as bs as fiat money,1340588909974200321,Twitter for iPhone,3117,8426,10418,142292
5,elonmusk,bitcoin,2020-12-20 08:21:25+00:00,,Bitcoin is my safe word,1340573003579617280,Twitter Web App,3191,6871,20549,243138
6,elonmusk,,2020-11-17 06:54:03+00:00,,@trylolli @Maisie_Williams 👻 💵 \nhttps://t.co/...,1328592219956252673,Twitter for iPhone,31,85,57,969
7,elonmusk,bitcoin,2020-11-16 22:02:51+00:00,,@Maisie_Williams 🎶 Toss a bitcoin to ur Witcher 🎶,1328458535340949505,Twitter for iPhone,765,1138,3433,66879
8,elonmusk,bitcoin,2020-12-20 09:24:37+00:00,,Bitcoin is almost as bs as fiat money,1340588909974200321,Twitter for iPhone,3117,8426,10418,142292
9,elonmusk,bitcoin,2020-12-20 08:21:25+00:00,,Bitcoin is my safe word,1340573003579617280,Twitter Web App,3191,6871,20549,243140


In [243]:
##trying to test sentiment analysis 
df_tweets_found = pd.read_csv('../data/raw_data/raw_tweets_01_filter.csv')

#pull the tweet text as a list from the data frame
tweet_text_list = df_tweets_found.text.tolist()


#create objects for sentiment

sentiment_text = [TextBlob(tweet) for tweet in tweet_text_list]

sentiment_text[0].polarity , sentiment_text[0]

#create sentiment list

sentiment_list = [[tweet.sentiment.polarity, str(tweet)] for tweet in sentiment_text]
sentiment_list[0]
#remove the tweet from the list of lists
for x in sentiment_list:
    del x[1]
# convert the list of lists into a DF column
df_polarity = pd.DataFrame(sentiment_list, columns = ['Polarity'])
#convert back to a single list
polarity_list = df_polarity["Polarity"].tolist()
#append to OG data frame

df_tweets_found['Polarity Rating'] = polarity_list

df_tweets_found["Popularity Rating"]= df_tweets_found["fav_count"] * df_tweets_found["Polarity Rating"]
df_tweets_found.to_csv('../data/raw_data/raw_tweets_01_filter_polarity.csv',index=False)

In [284]:
#Using recent search to generate recent tweets from anyone that include our keywords.
df_tweets_live = pd.DataFrame(dict_tweet_structure)
query = query_creator(dict_search_terms['generic']+dict_search_terms['bitcoin']+dict_search_terms['litecoin']+dict_search_terms['ethereum'])
print(query)
live_results = tweet_search_free(query, 100)
for tweets in live_results:
    print (tweets)
    df_tweets_live = tweet_dataframe_append(live_results, df_tweets_live)
    

(cryptocurrency OR blockchain OR bitcoin OR btc OR #bitcoin OR litecoin OR ltc OR #litecoin OR #ltc OR ethereum OR eth OR #ethereum OR #eth)
['kdotoa', 'bitcoin bitcoin', '2021-01-23 20:26:12+00:00', 'None', 'This crazy thought just passed through my head..\n\nWhat are the possibilities of @elonmusk starlinks mining #Bitcoin in orbit?', 1353076587382497286, 'Twitter for iPhone', 'x', 'x', 0, 0, -0.6, -0.0]
['PoolGrow', 'bitcoin ethereum', '2021-01-23 20:26:11+00:00', 'None', "@LiftStakePool @repsistance Let's see if we can duplicate this process on BTC and ETH chains and demonstrate the di… https://t.co/jWdYF6eebx", 1353076585927069696, 'Twitter for Android', 'x', 'x', 0, 0, 0.0, 0.0]
['bitcoin216', 'bitcoin', '2021-01-23 20:26:09+00:00', 'None', 'Hut 8 Seals $11.8 Million Loan to Procure Next-Gen Bitcoin\xa0Miners https://t.co/0PgN2phAmY', 1353076576670408705, 'WordPress.com', 'x', 'x', 0, 0, 0.0, 0.0]
['Kada_soulayman', '', '2021-01-23 20:26:08+00:00', 'None', 'Time : 2021-01-23T20:2

In [288]:
import hvplot.pandas
#df_tweets_live.hvplot()
df_tweets_live.head()

In [304]:
polarity_test = df_tweets_live.xs(['time','Polarity Rating'],axis=1)#.hvplot(x='time',rot=90)
polarity_test.loc['time'] = pd.to_datetime(polarity_test['time'])

TypeError: unhashable type: 'list'

# Old, Defunct, Or Reference Code Beyond This Point

In [31]:
#This is the user_timeline method, which seems to return around 500~1000 tweets tops.
ticker = 0
for tweet in tweepy.Cursor(api.user_timeline, id='elonmusk',trim_user=True, max_id=2237531699681981416, count =10000, exclude_replies = True).items():
    #if 'bitcoin' in tweet.text:
    ticker += 1
    print(tweet.text + " " + str(tweet.created_at)+" "+str(tweet.id)+"\n")
print(ticker)

Battery cell production is the fundamental rate-limiter slowing down a sustainable energy future. Very important pr… https://t.co/9ybABEzxCV 2021-01-18 05:52:28+00:00 1351044768030142464

Monty Python is amazing
https://t.co/UJq94IWT88 2021-01-15 06:44:22+00:00 1349970666477527042

RT @SpaceX: Splashdown of Dragon confirmed, completing SpaceX’s 21st @Space_Station resupply mission and the first return of a cargo resupp… 2021-01-14 03:51:19+00:00 1349564729417756672

https://t.co/ho7yGXAS3a 2021-01-13 21:20:05+00:00 1349466269121179653

Today at SpaceX is about practicing Starship engine starts. Ship is held down by massive pins while engines are fir… https://t.co/QtIiWLxXPl 2021-01-13 19:06:00+00:00 1349432529657294848

Legalize comedy 2021-01-13 09:25:42+00:00 1349286488618491904

RT @SpaceX: Separation confirmed! Dragon performing 3 departure burns to move away from the @Space_Station https://t.co/G27ohWvnXj 2021-01-13 01:25:16+00:00 1349165584995512320

Hey you …
Yeah you Queen …
Yo

In [35]:
for tweet in tweepy.Cursor(api.user_timeline, id="elonmusk",tweet_mode="extended").items(100):
    #if 'Python' in tweet.full_text:
        print(tweet.full_text)
        #print(tweet.user['id_str'])

In [None]:
#Defunct Old Search Method
def tweet_gather(keywords, user_id):
    #contents = ""
    for tweet in tweepy.Cursor(api.user_timeline, id=user_id).items():
        """if tweet.truncated == True:
            contents = tweet.full_text
        else:
            contents = str(tweet.text).lower()"""
        
        contents = tweet.text.lower()
        #contents = contents.lower() 
        for word in keywords:           
            if word.lower() in contents:
                #print(tweet.text + " " + str(tweet.created_at)+"\n")
                """Here we can append to a pd.DataFrame that collects the twitter handle, keywords used, keyword category
                   for ex:'bitcoin' for hits on 'bitcoin' or 'btc', as well as any sentiment rating we create.
                   Ulitmately we can then export that DataFrame to csv file."""
                yield tweet.text, str(tweet.created_at)

In [247]:
#This is the basic api.search; it only returns tweets from the last 7 days.
for tweet in tweepy.Cursor(api.search, q='(cryptocurrency OR crypto OR blockchain OR bitcoin OR struggle OR python)').items(1000):
    print(tweet.text + " " + str(tweet.created_at)+" "+str(tweet.place)+"\n")

RT @FactsOfSchool: The struggle is real https://t.co/vbFRNsj086 2021-01-23 19:54:08+00:00 None

RT @APompliano: “Wall Street is the only place that people drive to in a Rolls Royce to take advice from people who ride the subway.” - War… 2021-01-23 19:54:08+00:00 None

RT @BitcoinTre: The highest number this #Bitcoin OG user could fathom was just $10,000. https://t.co/1WKdDhf14e 2021-01-23 19:54:08+00:00 None

RT @George1Trader: Pure luck and all because of the news, right?

Morning! ☕

#bitcoin 2021-01-23 19:54:07+00:00 None

RT @glitchbotio: "Conscience is a dog that does not stop us from passing but that we cannot prevent from barking."- Nicolas Chamfort #softw… 2021-01-23 19:54:07+00:00 None

RT @chainyoda: Me sitting tight on #bitcoin #ethereum #aave and literally nothing else watching all manner of junk go to the moon https://t… 2021-01-23 19:54:06+00:00 None

@pupmochii it is a very big struggle, yes 😔

At least ppl who know who directly represents me tho, most people cant… https

In [None]:
#Function for creating search query string for tweepy.Cursor(api.search_full_archive).
def query_creator_full(list_keywords, twitter_user = "!", mood = "!"):
    query = " "
    query = query.join(list_keywords)
    #print(query)
    if twitter_user != "!":
        query += " from:" + twitter_user
        #print(query)
    return query