In [2]:
# Initial imports

import requests
import os
import json
from dotenv import load_dotenv
import pandas as pd

# tweepy for twitter data requests
import tweepy as tw


In [3]:
# Loading the twitter tokens and keys for twitter OAuth access

load_dotenv()

consumer_key = os.getenv("consumer_key")
consumer_secret = os.getenv("consumer_secret")
access_token = os.getenv("access_token")
access_token_secret = os.getenv("access_token_secret")
bearer_token = os.getenv("BEARER")



In [11]:
query = 'from:elonmusk -is:retweet'
client = tw.Client( bearer_token=bearer_token, 
                        consumer_key=consumer_key, 
                        consumer_secret=consumer_secret, 
                        access_token=access_token, 
                        access_token_secret=access_token_secret, 
                        return_type = requests.Response,
                        wait_on_rate_limit=True)
     

In [20]:
    
# start time of the data parameter
start_time = '2021-01-01T00:00:00Z'

# end_time of the data parameter
end_time = '2022-05-06T00:00:00Z'

userid="44196397"

# get maximun 100 tweets for last 7 days only
tweets = client.search_recent_tweets(query=query, 
                                     tweet_fields=['text','author_id', 'created_at'],
                                     max_results=100)


tweets_dict = tweets.json()
tweets_data = tweets_dict['data'] 
df = pd.json_normalize(tweets_data) 

df.tail(10)

Unnamed: 0,created_at,author_id,id,text
77,2022-05-02T18:53:29.000Z,44196397,1521201226834137088,@RenataKonkoly @ggreenwald Haha 💯
78,2022-05-02T18:48:23.000Z,44196397,1521199942626381826,@RenataKonkoly @ggreenwald All’s well that’s O...
79,2022-05-02T18:31:09.000Z,44196397,1521195604596113408,@ggreenwald 🤣🤣 https://t.co/56Rr6dWxLL
80,2022-05-02T18:12:19.000Z,44196397,1521190864592293891,the elusive beauty of imperfection\n\n侘寂
81,2022-05-02T18:07:27.000Z,44196397,1521189640312664065,@PPathole @kr0mb0pul0smike @RationalEtienne @t...
82,2022-05-02T17:58:59.000Z,44196397,1521187508645773312,@kr0mb0pul0smike @RationalEtienne @tomselliott...
83,2022-05-02T17:55:25.000Z,44196397,1521186612457132034,@RationalEtienne @tomselliott @mehdirhasan Whe...
84,2022-05-02T17:42:45.000Z,44196397,1521183425914417158,@tomselliott @mehdirhasan Same org that covere...
85,2022-05-02T17:36:17.000Z,44196397,1521181798067224578,@tomselliott @mehdirhasan NBC basically saying...
86,2022-05-02T16:04:34.000Z,44196397,1521158715193315328,@GerberKawasaki Will do our best


In [21]:
# due to the limitation on search_recent_tweets trying different api call to fetch full data

tweets = client.search_all_tweets(query=query, tweet_fields=['text','author_id', 'created_at'],
                                  start_time=start_time,
                                  end_time=end_time, 
                                  max_results=100)

# There is a limitation that search_all_tweets in API v2 is not available for the elevated access level. 
# only the 'Academic Research' level access has the access to all time data. 


Forbidden: 403 Forbidden
When authenticating requests to the Twitter API v2 endpoints, you must use keys and tokens from a Twitter developer App that is attached to a Project. You can create a project via the developer portal.

In [22]:
# Due to the twitter limiation on historical data use the kaggle to fetch the publicly available dataset
# https://www.kaggle.com/datasets/ayhmrba/elon-musk-tweets-2010-2021?select=2021.csv

tweet_path = "Resources/2021_ElonMusk_Tweets.csv"
tweet_df = pd.read_csv( tweet_path, index_col="id", infer_datetime_format=True, parse_dates=True)
tweet_df.sort_index()
tweet_df.head()

Unnamed: 0_level_0,date,name,tweet,replies_count,retweets_count,likes_count
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
1.476662e+18,2021-12-31,Elon Musk,@roshanpateI 🤣 $7,793,364,13468
1.476656e+18,2021-12-31,Elon Musk,@tesla_raj Many UI improvements coming,1008,435,12209
1.476652e+18,2021-12-31,Elon Musk,@CSmithson80 @heydave7 @BLKMDL3 @mims This cha...,240,143,2529
1.47662e+18,2021-12-30,Elon Musk,@BLKMDL3 @mims Predicting macroeconomics is ch...,709,1235,5756
1.476618e+18,2021-12-30,Elon Musk,"@mims If history is any guide, not many will m...",370,549,5247


In [23]:
# cleanup the data 
tweet_df.isnull().sum()

date              0
name              0
tweet             0
replies_count     0
retweets_count    0
likes_count       0
dtype: int64

In [24]:
# Drop nulls
tweet_df = tweet_df.dropna().copy()
print("\033[1mCLEANED NULLS:\n\033[0m")
print( tweet_df.isnull().sum())
print("\n\033[1mCLEANED TWITTER DATA:\033[0m\n")
tweet_df.head()

[1mCLEANED NULLS:
[0m
date              0
name              0
tweet             0
replies_count     0
retweets_count    0
likes_count       0
dtype: int64

[1mCLEANED WHALE RETURNS:[0m



Unnamed: 0_level_0,date,name,tweet,replies_count,retweets_count,likes_count
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
1.476662e+18,2021-12-31,Elon Musk,@roshanpateI 🤣 $7,793,364,13468
1.476656e+18,2021-12-31,Elon Musk,@tesla_raj Many UI improvements coming,1008,435,12209
1.476652e+18,2021-12-31,Elon Musk,@CSmithson80 @heydave7 @BLKMDL3 @mims This cha...,240,143,2529
1.47662e+18,2021-12-30,Elon Musk,@BLKMDL3 @mims Predicting macroeconomics is ch...,709,1235,5756
1.476618e+18,2021-12-30,Elon Musk,"@mims If history is any guide, not many will m...",370,549,5247


In [35]:
# Filter out the tweets that is not bitcoin or crypto related

df = tweet_df.loc[tweet_df['tweet'].str.contains("bitcoin|crypto", case=False)]
df.head(54)


Unnamed: 0_level_0,date,name,tweet,replies_count,retweets_count,likes_count
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
1.469039e+18,2021-12-10,Elon Musk,@BillyM2k @TheCryptoCPA 😬,561,243,4506
1.461942e+18,2021-11-20,Elon Musk,@WSBChairman Bitcoin cures cancer,5002,5777,41849
1.455245e+18,2021-11-01,Elon Musk,@BillyM2k - Everything on the Internet is true...,2347,2908,19884
1.452539e+18,2021-10-25,Elon Musk,@CryptoWhale Sclerotic democracy,666,617,8184
1.452348e+18,2021-10-24,Elon Musk,@ProTheDoge Lots of people I talked to on the ...,4060,7162,33219
1.452334e+18,2021-10-24,Elon Musk,@Filasophical @ShibaInuHodler Out of curiosity...,5448,6230,36329
1.452332e+18,2021-10-24,Elon Musk,@itsALLrisky @TeslaGong @mishaboar @DogecoinFd...,739,760,5366
1.452186e+18,2021-10-24,Elon Musk,@BillyM2k Plus crypto scams haha,2067,1319,17163
1.423781e+18,2021-08-07,Elon Musk,"@brian_armstrong @MarkWarner Agreed, this is n...",1413,5351,33214
1.42053e+18,2021-07-29,Elon Musk,"@heydave7 We don’t have that many Bitcoin, but...",3319,1305,25526
