In [4]:
import numpy as np
import pandas as pd
import tweepy
import sys
import json
import time


class TwitterStreamListener(tweepy.StreamListener):
    '''
    This class receives messages from tweepy.Stream and route them to appropriately named methods 
    (on_status, on_error).
    '''
    
    def __init__(self, timeout=None):
        super(TwitterStreamListener, self).__init__()
        self.timeout = timeout
        self.start_time = time.time()
    
    def on_status(self, status):
        '''
        Receive streaming data and display it. If timeout parameter is specified in constructor, stop 
        displaying data after a certain time.
        '''
        if self.timeout:
            if time.time()-self.start_time < self.timeout:          
                print(f'Tweet id: ',status.id,'\n',
                     'Date: ',status.created_at,'\n',
                     'Text: ',status.text,'\n')
                return True
            else:
                return False
        else:
            print(f'Tweet id: ',status.id,'\n',
                     'Date: ',status.created_at,'\n',
                     'Text: ',status.text,'\n')
    
    def on_error(self, status_code):
        print(status_code)

        
class Twitter_crawler:
    '''
    Class to connect to Twitter API, stream tweets in real time, get last tweets for a certain topic, 
    get last tweets of a certain user
    '''
    
    def __init__(self, API_key, API_secret_key, access_token, access_token_secret):   
        
        self.consumer_key = API_key
        self.consumer_secret = API_secret_key
        self.access_token = access_token
        self.access_token_secret = access_token_secret
        self.auth = None
        self.api = None
        self.stream = None
        
    def connect(self):
        
        self.auth = tweepy.OAuthHandler(self.consumer_key, self.consumer_secret)
        self.auth.set_access_token(self.access_token, self.access_token_secret)
        self.api = tweepy.API(self.auth)
        
    def stream_tweets(self, topic, timeout=None):
        
        try:
            self.stream = tweepy.Stream(auth = self.api.auth, listener = TwitterStreamListener(timeout))
            self.stream.filter(track=[topic])
                
        except Exception as excp:
            print('Streaming failed, make sure that you are connected to the Twitter API and/or a stream listener is provided')
            raise excp
            
    def get_data(self, topic, nb_tweets, date_start='2018-11-16'):
        
        try:
            alltweets = {'tweet_id':[], 'date':[], 'text':[]}
            tweets = tweepy.Cursor(self.api.search, q=topic, lang="en", since=date_start).items(nb_tweets)
            for t in tweets:
                alltweets['tweet_id'].append(t._json['id_str'])
                alltweets['date'].append(t._json['created_at'])
                alltweets['text'].append(t._json['text'])
            df = pd.DataFrame(alltweets, columns=alltweets.keys())
            return df
            
        except Exception as excp:
            print('Data extraction failed, make sure that you are connected to the Twitter API')
            raise excp
                  
    def get_user_data(self, name, nb_tweets):
    
        try:
            alltweets = []
            new_tweet = self.api.user_timeline(screen_name=name, count=nb_tweets)
            alltweets.extend(new_tweet)
            outtweets = np.array([[tweet.id_str, tweet.created_at, tweet.text.encode("utf-8")] for tweet in alltweets])
            df = pd.DataFrame(outtweets, columns=['tweet_id', 'date', 'text'])
            return df
            
        except Exception as excp:
            print(f'data extraction of user "{name}" failed, make sure that you are connected to the Twitter API')
            raise excp
            
    
API_key = "YOUR-API-KEY"
API_secret_key = "YOUR-API-SECRET-KEY"
access_token = "YOUR-ACCESS-TOKEN"
access_token_secret = "YOUR-ACCESS-TOKEN-SECRET"

crawlerTwitter = Twitter_crawler(API_key , API_secret_key, access_token, access_token_secret)

crawlerTwitter.connect()

#df = crawlerTwitter.get_user_data('omaross', 1000)
#crawlerTwitter.stream_tweets('bitcoin')
#tweets_bitcoin = crawlerTwitter.get_data('bitcoin', 500)


In [5]:
df_tweets = crawlerTwitter.get_data('bitcoin', 500)
df_tweets

Unnamed: 0,tweet_id,date,text
0,1386783065813442562,Mon Apr 26 20:43:42 +0000 2021,RT @coinbase: In this week’s report: Understan...
1,1386783065322803201,Mon Apr 26 20:43:42 +0000 2021,RT @BeegSmokey: Can’t wait for Tesla to shove ...
2,1386783059748343808,Mon Apr 26 20:43:41 +0000 2021,RT @davidgerard: THEY LITERALLY DID THOUGH\n\n...
3,1386783059110936581,Mon Apr 26 20:43:41 +0000 2021,RT @ParikPatelCFA: Lord give me the confidence...
4,1386783058410590209,Mon Apr 26 20:43:40 +0000 2021,Eric Weinstein calls Bitcoin potential hedge a...
...,...,...,...
495,1386782553076551687,Mon Apr 26 20:41:40 +0000 2021,RT @scottmelker: People are mad that Tesla sol...
496,1386782551684038663,Mon Apr 26 20:41:40 +0000 2021,RT @Investments_CEO: Retweet this post if you ...
497,1386782549804941314,Mon Apr 26 20:41:39 +0000 2021,RT @BITCOIN_MODE: Tesla has made more profit b...
498,1386782548857081857,Mon Apr 26 20:41:39 +0000 2021,RT @michael_saylor: “Things may come to those ...


In [6]:
df_musk_tweets = crawlerTwitter.get_user_data('elonmusk', 200)
df_musk_tweets

Unnamed: 0,tweet_id,date,text
0,1386759755088211974,2021-04-26 19:11:05,b'@PPathole An advantage I did have is that my...
1,1386744515856322563,2021-04-26 18:10:31,b'@PPathole True. The opposite in fact \xe2\x8...
2,1386743561585102853,2021-04-26 18:06:44,b'RT @Space_Station: The @SpaceX Crew-1 astron...
3,1386563667706011653,2021-04-26 06:11:54,b'@BLKMDL3 @JohnnaCrider1 We just got approval...
4,1386562909833109505,2021-04-26 06:08:53,b'@JohnnaCrider1 https://t.co/xl441YNnOs'
...,...,...,...
195,1379018148570091522,2021-04-05 10:28:42,b'Godzilla vs Kong is so amaze much wow!\nMost...
196,1378900127402852352,2021-04-05 02:39:43,b'@TerminalCount The Starships feast in Valhal...
197,1378896089760337922,2021-04-05 02:23:41,b'@TerminalCount One of the greatest things I\...
198,1378838922244063237,2021-04-04 22:36:31,b'@floko12022021 @HamblinZeke @katlinegrey Ich...


In [7]:
crawlerTwitter.stream_tweets('bitcoin',4)

Tweet id:  1386783111413784577 
 Date:  2021-04-26 20:43:53 
 Text:  I wish I could go back to 2010 and tell myself to buy $2 worth of #Bitcoin (at 6 cents per token!) 

Tweet id:  1386783111489282057 
 Date:  2021-04-26 20:43:53 
 Text:  RT @CryptoMichNL: Peak high bull cycle prediction, it still stands. Expecting 2022/2023 to be the top, not this year. 

#Bitcoin $350,000-4… 

Tweet id:  1386783111694934028 
 Date:  2021-04-26 20:43:53 
 Text:  RT @BTC_Archive: Tesla buys $1.5b #Bitcoin in Feb
Sold $272m
Now holds $1.33b 

Tweet id:  1386783112365887491 
 Date:  2021-04-26 20:43:53 
 Text:  RT @AirdropDet: New #Airdrop: Gramswap (2nd Round)

💲Reward: Up to 150 GMS [~$27]+15 GMS [~$2.7] per referral

🔴 Start the airdrop bot: htt… 

Tweet id:  1386783114358370305 
 Date:  2021-04-26 20:43:54 
 Text:  Wow, $72~$1000 Airdrop, don't miss the @CryptoUltraman Airdrop. I'll receive 120+10*referrals $AMAN. If someone par… https://t.co/rHwtFCrYgR 

Tweet id:  1386783115130114050 
 Date:  2021-