In [35]:
#Import Various Libraries, including Tweepy, a Python library for the Twitter API.
import os
import requests
import pandas as pd
from dotenv import load_dotenv
from textblob import TextBlob 
#%matplotlib inline
import tweepy

load_dotenv()

True

In [36]:
#Pass API Keys to Twitter API and build Tweepy API handler object.
consumer_key = os.getenv('TWITTER_API_KEY')
consumer_secret = os.getenv('TWITTER_SECRET_KEY')
auth = tweepy.AppAuthHandler(consumer_key, consumer_secret)
api = tweepy.API(auth)

# Defining the results DataFrame and Search Targets

In [22]:
#Creating a DataFrame for tweet search results data structure.
dict_tweet_structure = {"twitter_user":"","category":[],"time":"","sentiment":"","text":"","tweet_id":"",
                        "tweet_source":"","quote_count":0,"reply_count":0,"retweet_count":0,"fav_count":0,
                        "Polarity Rating": "", "Popularity Rating":""}

In [23]:
#Initializing primary DataFrame of tweets. DON'T RUN! Or you will lose your data. :)
df_tweets_found = pd.DataFrame(dict_tweet_structure)
df_tweets_found

Unnamed: 0,twitter_user,category,time,sentiment,text,tweet_id,tweet_source,quote_count,reply_count,retweet_count,fav_count,Polarity Rating,Popularity Rating


In [24]:
#Define Search Term Library
dict_search_terms = {'bitcoin':['bitcoin','btc','#bitcoin',],
                     'generic':['cryptocurrency','blockchain'],
                     'litecoin':['litecoin','ltc','#litecoin', '#ltc'],
                     'ethereum':['ethereum','eth','#ethereum','#eth']   }
#Define Twitter User List
list_twitterers = ['joerogan','elonmusk','officialmcafee','vitalikbuterin']

# Functions

In [25]:
#Function for creating search query string for tweepy.Cursor(api.search). Max query length is 128 for sandbox env.
#In it's current form it supports a bunch of keywords joined by OR, grouped by (), AND from a single tweeter.
def query_creator(list_keywords, twitter_user = "!", mood = "!"):
    query = "("
    for keyword in (list_keywords):
        query += keyword
        if keyword != list_keywords[-1]:
            query += " OR "
        else:
            query += ")"
    if twitter_user != "!":
        query += " from:" + twitter_user
    return query

In [26]:
#This is the core function for tweet searching.
def tweet_search_full(string_query, date_from = "200603210000", date_to = "201801010000", number_tweets = 500):
    for tweets in tweepy.Cursor(api.search_full_archive, query=string_query, environment_name='CryptoSentimentQueryProd',
                                fromDate=date_from, toDate=date_to).items():
        #Primary environment name = CryptoSentimentFullArc
        screen_name = tweets.user.screen_name
        contents = ""
        #if tweets.truncated == True:
        #    contents = tweets.full_text
        #else:
        contents = tweets.text
        category = category_key(contents,dict_search_terms)
        yield [screen_name, category, str(tweets.created_at), "", contents, tweets.id, tweets.source, tweets.quote_count,
               tweets.reply_count, tweets.retweet_count, tweets.favorite_count]

In [27]:
#This function searches the last month, but uses the same arguments and formatting as full archive; useful for testing without using up queries.
def tweet_search_month(string_query, date_from = "200603210000", date_to = "202101170000", number_tweets = 500):
    for tweets in tweepy.Cursor(api.search_30_day, query=string_query, environment_name='CryptoSentimentQueryTest',
                                fromDate=date_from, toDate=date_to,).items():
        screen_name = tweets.user.screen_name
        contents = ""
        #if tweets.truncated == True:
        #    contents = tweets.full_text
        #else:
        contents = tweets.text
        
        category = category_key(contents,dict_search_terms)
        yield [screen_name, category, str(tweets.created_at), str(tweets.coordinates), contents, tweets.id, tweets.source, tweets.quote_count,
               tweets.reply_count, tweets.retweet_count, tweets.favorite_count]
        

In [28]:
#This function searches the most recent tweets about a given search query. It is always free within the Twitter API. (default/max number = 500 tweets)
def tweet_search_free(string_query, number_tweets = 100):
    for tweets in tweepy.Cursor(api.search, q=string_query).items(number_tweets):
        screen_name = tweets.user.screen_name
        contents = ""
        #if tweets.truncated == True:
        #    contents = tweets.full_text
        #else:
        contents = tweets.text
        polarity = sentiment_reader(contents)
        
        category = category_key(contents,dict_search_terms)
        yield [screen_name, category, str(tweets.created_at), str(tweets.coordinates), contents, tweets.id, tweets.source,"x",
               "x", tweets.retweet_count, tweets.favorite_count, polarity, (polarity * int(tweets.retweet_count))]

In [29]:
#Function for appending search results rows to the tweet df.
def tweet_dataframe_append(rows, target_dataframe):
    for row in rows:
        series_result = pd.Series(row, index=target_dataframe.columns)
        target_dataframe = target_dataframe.append(series_result, ignore_index=True)
        return target_dataframe

In [30]:
#Function for generating a list of categories from the presence of keywords in text.
def category_key(text_block,dict_keywords):
    category_list = []
    output = " "
    contents = text_block.lower()
    for key in dict_keywords:
        for keyword in dict_keywords[key]:
            if keyword in contents:
                category_list.append(key)
    #return output.join(category_list)
    return category_list

In [31]:
#Function for generating sentiment values using textblob library.
def sentiment_reader(text_block):
    sentiment_text = TextBlob(text_block)
    return sentiment_text.polarity

In [32]:
#Function for fixing duplicate values in the category column.
def no_dupes(string):
    string =  str(string)
    if string[-1] != " ":
        string = string + " "
    wordlist = []
    output = ""
    word = ""
    for letter in string:
        if letter != " ":
            word = word + letter
        elif word not in wordlist:
            wordlist.append(word)
            output = output + word + " "
            word = ""
        else:
            word = ""
    return output

# Function usage and testing the DataFrame

In [33]:
#Testing Query Creator
query = query_creator(dict_search_terms['generic']+dict_search_terms['bitcoin']+dict_search_terms['litecoin'], 'satoshilite')
print(query)
print(len(query))
#Query length is limited to 128 characters, max tweets per query is limited to 100.

(cryptocurrency OR blockchain OR bitcoin OR btc OR #bitcoin OR litecoin OR ltc OR #litecoin OR #ltc) from:satoshilite
117


In [34]:
#Testing primary search and DataFrame append.
search_results = tweet_search_full(query)

for tweets in search_results:
    print (tweets)
    series_result = pd.Series(tweets, index=df_tweets_found.columns)
    df_tweets_found = df_tweets_found.append(series_result, ignore_index=True)

TweepError: {'message': 'Request exceeds account’s current package request limits. Please upgrade your package and retry or contact Twitter about enterprise access.', 'sent': '2021-01-27T02:40:41+00:00', 'transactionId': '00c586950012e3ac'}

In [167]:
df_tweets_found

Unnamed: 0,twitter_user,category,time,sentiment,text,tweet_id,tweet_source,quote_count,reply_count,retweet_count,fav_count
0,elonmusk,bitcoin,2020-12-20 09:24:37+00:00,,Bitcoin is almost as bs as fiat money,1340588909974200321,Twitter for iPhone,3117,8427,10418,142292
1,elonmusk,bitcoin,2020-12-20 08:21:25+00:00,,Bitcoin is my safe word,1340573003579617280,Twitter Web App,3191,6871,20548,243138
2,elonmusk,,2020-11-17 06:54:03+00:00,,@trylolli @Maisie_Williams 👻 💵 \nhttps://t.co/...,1328592219956252673,Twitter for iPhone,31,85,57,969
3,elonmusk,bitcoin,2020-11-16 22:02:51+00:00,,@Maisie_Williams 🎶 Toss a bitcoin to ur Witcher 🎶,1328458535340949505,Twitter for iPhone,765,1138,3433,66880
4,elonmusk,bitcoin,2020-12-20 09:24:37+00:00,,Bitcoin is almost as bs as fiat money,1340588909974200321,Twitter for iPhone,3117,8426,10418,142292
5,elonmusk,bitcoin,2020-12-20 08:21:25+00:00,,Bitcoin is my safe word,1340573003579617280,Twitter Web App,3191,6871,20549,243138
6,elonmusk,,2020-11-17 06:54:03+00:00,,@trylolli @Maisie_Williams 👻 💵 \nhttps://t.co/...,1328592219956252673,Twitter for iPhone,31,85,57,969
7,elonmusk,bitcoin,2020-11-16 22:02:51+00:00,,@Maisie_Williams 🎶 Toss a bitcoin to ur Witcher 🎶,1328458535340949505,Twitter for iPhone,765,1138,3433,66879
8,elonmusk,bitcoin,2020-12-20 09:24:37+00:00,,Bitcoin is almost as bs as fiat money,1340588909974200321,Twitter for iPhone,3117,8426,10418,142292
9,elonmusk,bitcoin,2020-12-20 08:21:25+00:00,,Bitcoin is my safe word,1340573003579617280,Twitter Web App,3191,6871,20549,243140


In [94]:
df_tweets_found.loc[0]['text']

'Self imposed struggle is essential for a clear mind. @onnit https://t.co/mvd8yseLc6'

In [208]:
df_tweets_found.to_csv('../data/raw_data/raw_tweets_01.csv',index=False)

# Data Cleaning (Data cleaning was performed in a couple different stages throughout the progress of the project, the steps used here on df_cleaning and df_graphing reflect that process.

In [41]:
#Read in and test data.
df_cleaning = pd.read_csv('../data/raw_data/raw_tweets_01_filter.csv')
df_cleaning.loc[13]['text']
df_graphing = pd.read_csv('../data/raw_data/raw_tweets_01_filter_polarity.csv')

'@techreview Just use this handy guide https://t.co/wWZGuNpe5f'

In [228]:
#Run several times to eliminate usernames containing crypto. For example, there were many false positives from retweets involving users with 'crypto' appearing in their username.
df_cleaning = df_cleaning[~df_cleaning.text.str.contains("_crypto")]

In [36]:
#Testing time based index.
df_graphing = df_graphing.set_index('time')
df_graphing.sort_index(inplace=True)
df_graphing

Unnamed: 0_level_0,twitter_user,category,sentiment,text,tweet_id,tweet_source,quote_count,reply_count,retweet_count,fav_count,Polarity Rating,Popularity Rating
time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
2012-08-08 02:29:54+00:00,joerogan,,,RT @TG_2: @joerogan 2 million a month in drug ...,233027225165914112,Twitter Web Client,0,0,0,0,0.000000,0.000000
2012-08-31 14:28:56+00:00,VitalikButerin,bitcoin,,RT @FlipPro: What is Bitcoin? #tweetforum htt...,241543098390425600,UpTweet Inc.,0,0,0,0,0.000000,0.000000
2012-08-31 14:36:39+00:00,VitalikButerin,bitcoin,,@UzivajEstates Publicize it more perhaps. AFAI...,241545036863520768,Twitter Web Client,0,0,2,5,0.483333,2.416667
2012-08-31 14:39:10+00:00,VitalikButerin,bitcoin,,@UzivajEstates Publicize it more perhaps. AFAI...,241545670681575425,UpTweet Inc.,0,1,1,1,0.483333,0.483333
2013-01-01 12:05:57+00:00,VitalikButerin,bitcoin,,"@FlipPRO What does ""regulating"" bitcoin even m...",286080819506843648,UpTweet Inc.,0,0,2,2,0.093750,0.187500
2013-01-23 22:44:11+00:00,VitalikButerin,bitcoin,,Avalon Ships Bitcoin’s First Consumer ASICs (+...,294213970154364928,UpTweet Inc.,0,0,6,3,0.250000,0.750000
2013-02-07 14:21:57+00:00,VitalikButerin,bitcoin,,"Finally, Canada's getting another btc exchange...",299523397203935233,Twitter for Websites,0,0,2,3,0.000000,0.000000
2013-02-08 00:09:32+00:00,VitalikButerin,bitcoin,,January 2013: Bitcoin News Roundup #UT http://...,299671267752161280,UpTweet Inc.,0,0,4,4,0.000000,0.000000
2013-02-08 00:11:01+00:00,VitalikButerin,bitcoin,,Bitcoin Wallet Options #UT http://t.co/NRgRktH9,299671639438798848,UpTweet Inc.,0,0,6,5,0.000000,0.000000
2013-03-29 06:39:26+00:00,joerogan,bitcoin,,Future of currency? \nBitcoin: How An Unregul...,317526394575917056,Twitter Web Client,4,52,305,215,0.000000,0.000000


In [37]:
#Dropping the old sentiment column, which is made obsolete by the polarity and popularity rating columns.
df_graphing.drop('sentiment',axis = 1, inplace=True)

In [237]:
#Checking data
df_cleaning.head(100)

Unnamed: 0,twitter_user,category,time,sentiment,text,tweet_id,tweet_source,quote_count,reply_count,retweet_count,fav_count
0,elonmusk,bitcoin,2020-12-20 09:24:37+00:00,,Bitcoin is almost as bs as fiat money,1340588909974200321,Twitter for iPhone,3117,8427,10418,142292
1,elonmusk,bitcoin,2020-12-20 08:21:25+00:00,,Bitcoin is my safe word,1340573003579617280,Twitter Web App,3191,6871,20548,243138
2,elonmusk,,2020-11-17 06:54:03+00:00,,@trylolli @Maisie_Williams 👻 💵 \nhttps://t.co/...,1328592219956252673,Twitter for iPhone,31,85,57,969
3,elonmusk,bitcoin,2020-11-16 22:02:51+00:00,,@Maisie_Williams 🎶 Toss a bitcoin to ur Witcher 🎶,1328458535340949505,Twitter for iPhone,765,1138,3433,66880
4,elonmusk,bitcoin,2020-12-20 09:24:37+00:00,,Bitcoin is almost as bs as fiat money,1340588909974200321,Twitter for iPhone,3117,8426,10418,142292
5,elonmusk,bitcoin,2020-12-20 08:21:25+00:00,,Bitcoin is my safe word,1340573003579617280,Twitter Web App,3191,6871,20549,243138
6,elonmusk,,2020-11-17 06:54:03+00:00,,@trylolli @Maisie_Williams 👻 💵 \nhttps://t.co/...,1328592219956252673,Twitter for iPhone,31,85,57,969
7,elonmusk,bitcoin,2020-11-16 22:02:51+00:00,,@Maisie_Williams 🎶 Toss a bitcoin to ur Witcher 🎶,1328458535340949505,Twitter for iPhone,765,1138,3433,66879
8,elonmusk,bitcoin,2020-12-20 09:24:37+00:00,,Bitcoin is almost as bs as fiat money,1340588909974200321,Twitter for iPhone,3117,8426,10418,142292
9,elonmusk,bitcoin,2020-12-20 08:21:25+00:00,,Bitcoin is my safe word,1340573003579617280,Twitter Web App,3191,6871,20549,243140


In [311]:
#Using the no_dupes function to simplify the categories column down to unique category values instead of a instanced category values.
df_graphing = df_graphing.reset_index()
df_graphing['category'] = df_graphing.category.apply(no_dupes)
df_graphing.to_csv("../data/raw_data/raw_tweets_cleaning.csv",index=False)

In [311]:
#Tweaking the time column to only reflect 
df_graphing['time'] = pd.to_datetime(df_graphing['time'],format= '%b:%d:%Y',infer_datetime_format=True ).dt.date #
#df_graphing

In [311]:
#Saving Data
df_cleaning.to_csv('../data/raw_data/raw_tweets_01_filter.csv',index=False)
df_graphing.to_csv("../data/raw_data/raw_tweets_cleaning.csv",index=False)

# Using the free recent tweet search API method to capture the most recent 500 tweets from any user that features the keywords.

In [39]:
#Using recent search to generate recent tweets from any user that includes our keywords.
df_tweets_live = pd.DataFrame(dict_tweet_structure)
query = query_creator(dict_search_terms['generic']+dict_search_terms['bitcoin']+dict_search_terms['litecoin']+dict_search_terms['ethereum'])
print(query)
live_results = tweet_search_free(query, 1000)
for tweets in live_results:
    #print (tweets)
    df_tweets_live = tweet_dataframe_append(live_results, df_tweets_live)
    

(cryptocurrency OR blockchain OR bitcoin OR btc OR #bitcoin OR litecoin OR ltc OR #litecoin OR #ltc OR ethereum OR eth OR #ethereum OR #eth)


In [26]:
import hvplot.pandas
#df_tweets_live.hvplot()
df_tweets_live.head()

In [26]:

df_tweets_found

Unnamed: 0,twitter_user,category,time,sentiment,text,tweet_id,tweet_source,quote_count,reply_count,retweet_count,fav_count,Polarity Rating,Popularity Rating
0,cryptorizeapp,"[bitcoin, bitcoin, bitcoin, litecoin, litecoin...",2021-01-25 05:14:02+00:00,,"Hourly Price Alert: BTC rose by 2.47% to $33,3...",1353571810583977984,,x,x,0,0,0.675,0.0
1,Februar06652421,[],2021-01-25 05:14:02+00:00,,RT @enriquesouza_: #smartcash to 1 #USDT this ...,1353571809677860864,Twitter for Android,x,x,124,0,0.214286,26.571429
2,idunnnoU,[ethereum],2021-01-25 05:14:01+00:00,,The last time I felt the momentum that $LINK h...,1353571804846182400,Twitter Web App,x,x,0,0,0.0,0.0
3,renjie,[ethereum],2021-01-25 05:14:00+00:00,,"RT @jerallaire: New ETH ATH, entire DeFi lands...",1353571803222958081,Twitter Web App,x,x,217,0,0.068182,14.795455
4,nk062062,[],2021-01-25 05:14:00+00:00,,"We design, evaluate and justify\ntechnology so...",1353571802362994690,Twitter Web App,x,x,0,0,0.0,0.0


In [38]:
polarity_test = df_tweets_live#.filter(['time','Popularity Rating'] ,axis=1)#.hvplot(x='time',rot=90)
polarity_test

NameError: name 'df_tweets_live' is not defined

In [37]:
#Visualizing the recent tweet data.
polarity_test['time'] = pd.to_datetime(polarity_test['time'],format= '%M:%S',infer_datetime_format=True ).dt.time
rolling_poll = polarity_test['Polarity Rating'].rolling(window=20).mean()
end = pd.to_timedelta([str(polarity_test['time'].iloc[0])])
start = pd.to_timedelta([str(polarity_test['time'].iloc[-1])])
recent_tweets_duration = str((end-start)[0])
roll = rolling_poll.hvplot(x='time',y='Polarity Rating',rot=90, hover_cols = ['twitter_user','text'])
pol = polarity_test.hvplot(x='time',y='Polarity Rating',rot=90, hover_cols = ['twitter_user','text'],xticks=6,xlabel=f"Duration length = {recent_tweets_duration}")
polarity_test.mean()

In [37]:
pol

NameError: name 'polarity_test' is not defined

# Generating Visuals Based on the df of historic tweets from influential tweeters.

In [45]:
df_graphing = df_graphing.set_index('time')
df_graphing.sort_index(inplace=True)
df_bitcoin = df_graphing.loc[df_graphing.category.str.contains('bitcoin')]
df_ethereum = df_graphing.loc[df_graphing.category.str.contains('ethereum')]
df_litecoin = df_graphing.loc[df_graphing.category.str.contains('litecoin')]
df_generic = df_graphing.loc[df_graphing.category.str.contains('generic')]
df_bitcoin_rolling = df_bitcoin['Polarity Rating'].rolling(window=50).mean()
df_ethereum_rolling = df_ethereum['Polarity Rating'].rolling(window=50).mean()
df_litecoin_rolling = df_litecoin['Polarity Rating'].rolling(window=50).mean()
df_generic_rolling = df_generic['Polarity Rating'].rolling(window=50).mean()
#df_bitcoin_rolling#.hvplot(rot=90)
df_bitcoin_rolling

time
2012-08-31         NaN
2012-08-31         NaN
2012-08-31         NaN
2013-01-01         NaN
2013-01-23         NaN
2013-02-07         NaN
2013-02-08         NaN
2013-02-08         NaN
2013-03-29         NaN
2013-03-29         NaN
2013-04-11         NaN
2013-05-04         NaN
2013-05-18         NaN
2013-05-18         NaN
2013-08-13         NaN
2013-09-14         NaN
2013-10-23         NaN
2013-10-27         NaN
2013-11-22         NaN
2013-12-22         NaN
2014-01-16         NaN
2014-01-16         NaN
2014-01-27         NaN
2014-01-27         NaN
2014-01-28         NaN
2014-01-28         NaN
2014-01-28         NaN
2014-01-28         NaN
2014-01-28         NaN
2014-01-29         NaN
                ...   
2020-10-22    0.030403
2020-10-22    0.039403
2020-10-26    0.047403
2020-10-26    0.049069
2020-11-06    0.049236
2020-11-11    0.053236
2020-11-16    0.051236
2020-11-16    0.044569
2020-11-18    0.044569
2020-11-18    0.040569
2020-11-25    0.044653
2020-11-25    0.053653
2020-1

In [46]:
df_graphing.head()

Unnamed: 0_level_0,twitter_user,category,text,tweet_id,tweet_source,quote_count,reply_count,retweet_count,fav_count,Polarity Rating,Popularity Rating
time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
2012-08-08,joerogan,,RT @TG_2: @joerogan 2 million a month in drug ...,233027225165914112,Twitter Web Client,0,0,0,0,0.0,0.0
2012-08-31,VitalikButerin,bitcoin,RT @FlipPro: What is Bitcoin? #tweetforum htt...,241543098390425600,UpTweet Inc.,0,0,0,0,0.0,0.0
2012-08-31,VitalikButerin,bitcoin,@UzivajEstates Publicize it more perhaps. AFAI...,241545036863520768,Twitter Web Client,0,0,2,5,0.483333,2.416667
2012-08-31,VitalikButerin,bitcoin,@UzivajEstates Publicize it more perhaps. AFAI...,241545670681575425,UpTweet Inc.,0,1,1,1,0.483333,0.483333
2013-01-01,VitalikButerin,bitcoin,"@FlipPRO What does ""regulating"" bitcoin even m...",286080819506843648,UpTweet Inc.,0,0,2,2,0.09375,0.1875


In [47]:
df_graphing.to_csv("../data/raw_data/cleaner_tweets.csv")

In [48]:
from bokeh.models.formatters import DatetimeTickFormatter

formatter = DatetimeTickFormatter(months='%M%Y')
bitcoin_roller =df_bitcoin_rolling.hvplot(x='time',y=['Polarity Rating'],xticks=12,color='red',width=1000, height=500,)
ethereum_roller =df_ethereum_rolling.hvplot(x='time',y=['Polarity Rating'],xticks=12,color='blue',width=1000, height=500,)
generic_roller =df_generic_rolling.hvplot(x='time',y=['Polarity Rating'],xticks=12,color='black',width=1000, height=500,)
litecoin_roller =df_litecoin_rolling.hvplot(x='time',y=['Polarity Rating'],xticks=12,color='yellow',legend='top_left',width=1000, height=500,)
gene = df_bitcoin.hvplot(width=1000, height=500,x='time',y='Polarity Rating',rot=90,xticks=12,hover_cols = ['twitter_user','text'],kind='scatter',aggregator='mean',color='twitter_user')
gene * bitcoin_roller

In [32]:
bitcoin_roller * ethereum_roller * generic_roller * litecoin_roller

In [611]:
df_graphing.hvplot(groupby='category')

In [29]:
graph_bitcoin = df_bitcoin.hvplot(x='time',y='Popularity Rating',rot=90, hover_cols = ['twitter_user','text'])
graph_ethereum = df_ethereum.hvplot(x='time',y='Popularity Rating',rot=90, hover_cols = ['twitter_user','text'])
graph_litecoin = df_litecoin.hvplot(x='time',y='Popularity Rating',rot=90, hover_cols = ['twitter_user','text'])
graph_generic = df_generic.hvplot(x='time',y='Popularity Rating',rot=90, hover_cols = ['twitter_user','text'])
graph_etheruem + graph_bitcoin

NameError: name 'graph_etheruem' is not defined

# Old Or Reference Code Beyond This Point

In [31]:
#This is the user_timeline method, which seems to return around 500~1000 tweets tops.
ticker = 0
for tweet in tweepy.Cursor(api.user_timeline, id='elonmusk',trim_user=True, max_id=2237531699681981416, count =10000, exclude_replies = True).items():
    #if 'bitcoin' in tweet.text:
    ticker += 1
    print(tweet.text + " " + str(tweet.created_at)+" "+str(tweet.id)+"\n")
print(ticker)

Battery cell production is the fundamental rate-limiter slowing down a sustainable energy future. Very important pr… https://t.co/9ybABEzxCV 2021-01-18 05:52:28+00:00 1351044768030142464

Monty Python is amazing
https://t.co/UJq94IWT88 2021-01-15 06:44:22+00:00 1349970666477527042

RT @SpaceX: Splashdown of Dragon confirmed, completing SpaceX’s 21st @Space_Station resupply mission and the first return of a cargo resupp… 2021-01-14 03:51:19+00:00 1349564729417756672

https://t.co/ho7yGXAS3a 2021-01-13 21:20:05+00:00 1349466269121179653

Today at SpaceX is about practicing Starship engine starts. Ship is held down by massive pins while engines are fir… https://t.co/QtIiWLxXPl 2021-01-13 19:06:00+00:00 1349432529657294848

Legalize comedy 2021-01-13 09:25:42+00:00 1349286488618491904

RT @SpaceX: Separation confirmed! Dragon performing 3 departure burns to move away from the @Space_Station https://t.co/G27ohWvnXj 2021-01-13 01:25:16+00:00 1349165584995512320

Hey you …
Yeah you Queen …
Yo

In [35]:
#Testing the Tweepy API
for tweet in tweepy.Cursor(api.user_timeline, id="elonmusk",tweet_mode="extended").items(100):
    #if 'Python' in tweet.full_text:
        print(tweet.full_text)
        #print(tweet.user['id_str'])

In [None]:
#Defunct Old Search Method
def tweet_gather(keywords, user_id):
    #contents = ""
    for tweet in tweepy.Cursor(api.user_timeline, id=user_id).items():
        """if tweet.truncated == True:
            contents = tweet.full_text
        else:
            contents = str(tweet.text).lower()"""
        
        contents = tweet.text.lower()
        #contents = contents.lower() 
        for word in keywords:           
            if word.lower() in contents:
                #print(tweet.text + " " + str(tweet.created_at)+"\n")
                """Here we can append to a pd.DataFrame that collects the twitter handle, keywords used, keyword category
                   for ex:'bitcoin' for hits on 'bitcoin' or 'btc', as well as any sentiment rating we create.
                   Ulitmately we can then export that DataFrame to csv file."""
                yield tweet.text, str(tweet.created_at)

In [247]:
#This is the basic api.search; it only returns tweets from the last 7 days.
for tweet in tweepy.Cursor(api.search, q='(cryptocurrency OR crypto OR blockchain OR bitcoin OR struggle OR python)').items(1000):
    print(tweet.text + " " + str(tweet.created_at)+" "+str(tweet.place)+"\n")

RT @FactsOfSchool: The struggle is real https://t.co/vbFRNsj086 2021-01-23 19:54:08+00:00 None

RT @APompliano: “Wall Street is the only place that people drive to in a Rolls Royce to take advice from people who ride the subway.” - War… 2021-01-23 19:54:08+00:00 None

RT @BitcoinTre: The highest number this #Bitcoin OG user could fathom was just $10,000. https://t.co/1WKdDhf14e 2021-01-23 19:54:08+00:00 None

RT @George1Trader: Pure luck and all because of the news, right?

Morning! ☕

#bitcoin 2021-01-23 19:54:07+00:00 None

RT @glitchbotio: "Conscience is a dog that does not stop us from passing but that we cannot prevent from barking."- Nicolas Chamfort #softw… 2021-01-23 19:54:07+00:00 None

RT @chainyoda: Me sitting tight on #bitcoin #ethereum #aave and literally nothing else watching all manner of junk go to the moon https://t… 2021-01-23 19:54:06+00:00 None

@pupmochii it is a very big struggle, yes 😔

At least ppl who know who directly represents me tho, most people cant… https

In [None]:
#Function for creating search query string for tweepy.Cursor(api.search_full_archive).
def query_creator_full(list_keywords, twitter_user = "!", mood = "!"):
    query = " "
    query = query.join(list_keywords)
    #print(query)
    if twitter_user != "!":
        query += " from:" + twitter_user
        #print(query)
    return query

In [None]:
##trying to test sentiment analysis 
df_tweets_found = pd.read_csv('../data/raw_data/raw_tweets_01_filter.csv')

#pull the tweet text as a list from the data frame
tweet_text_list = df_tweets_found.text.tolist()


#create objects for sentiment

sentiment_text = [TextBlob(tweet) for tweet in tweet_text_list]

sentiment_text[0].polarity , sentiment_text[0]

#create sentiment list

sentiment_list = [[tweet.sentiment.polarity, str(tweet)] for tweet in sentiment_text]
sentiment_list[0]
#remove the tweet from the list of lists
for x in sentiment_list:
    del x[1]
# convert the list of lists into a DF column
df_polarity = pd.DataFrame(sentiment_list, columns = ['Polarity'])
#convert back to a single list
polarity_list = df_polarity["Polarity"].tolist()
#append to OG data frame

df_tweets_found['Polarity Rating'] = polarity_list

df_tweets_found["Popularity Rating"]= df_tweets_found["fav_count"] * df_tweets_found["Polarity Rating"]
df_tweets_found.to_csv('../data/raw_data/raw_tweets_01_filter_polarity.csv',index=False)