In [1]:
import numpy as np
import pandas as pd
import tweepy
import sys
import json
import time


class TwitterStreamListener(tweepy.StreamListener):
    '''
    This class receives messages from tweepy.Stream and route them to appropriately named methods 
    (on_status, on_error).
    '''
    
    def __init__(self, timeout=None):
        super(TwitterStreamListener, self).__init__()
        self.timeout = timeout
        self.start_time = time.time()
    
    def on_status(self, status):
        '''
        Receive streaming data and display it. If timeout parameter is specified in constructor, stop 
        displaying data after a certain time.
        '''
        if self.timeout:
            if time.time()-self.start_time < self.timeout:          
                print(f'Tweet id: ',status.id,'\n',
                     'Date: ',status.created_at,'\n',
                     'Text: ',status.text,'\n')
                return True
            else:
                return False
        else:
            print(f'Tweet id: ',status.id,'\n',
                     'Date: ',status.created_at,'\n',
                     'Text: ',status.text,'\n')
    
    def on_error(self, status_code):
        print(status_code)

        
class Twitter_crawler:
    '''
    Class to connect to Twitter API, stream tweets in real time, get last tweets for a certain topic, 
    get last tweets of a certain user
    '''
    
    def __init__(self, API_key, API_secret_key, access_token, access_token_secret):   
        
        self.consumer_key = API_key
        self.consumer_secret = API_secret_key
        self.access_token = access_token
        self.access_token_secret = access_token_secret
        self.auth = None
        self.api = None
        self.stream = None
        
    def connect(self):
        
        self.auth = tweepy.OAuthHandler(self.consumer_key, self.consumer_secret)
        self.auth.set_access_token(self.access_token, self.access_token_secret)
        self.api = tweepy.API(self.auth)
        
    def stream_tweets(self, topic, timeout=None):
        
        try:
            self.stream = tweepy.Stream(auth = self.api.auth, listener = TwitterStreamListener(timeout))
            self.stream.filter(track=[topic])
                
        except Exception as excp:
            print('Streaming failed, make sure that you are connected to the Twitter API and/or a stream listener is provided')
            raise excp
            
    def get_data(self, topic, nb_tweets, date_start='2018-11-16'):
        
        try:
            start = time.time()
            print(f"Extracting last {nb_tweets} from #{topic}")
            alltweets = {'tweet_id':[], 'date':[], 'text':[]}
            tweets = tweepy.Cursor(self.api.search, q=topic, lang="en", since=date_start).items(nb_tweets)
            for t in tweets:
                alltweets['tweet_id'].append(t._json['id_str'])
                alltweets['date'].append(t._json['created_at'])
                alltweets['text'].append(t._json['text'])
            df = pd.DataFrame(alltweets, columns=alltweets.keys())
            print(f'processing time: {time.time() - start}')
            return df
            
        except Exception as excp:
            print(f"data extraction of user(s) failed, make sure that you are connected to the Twitter API or didn't reached limit extraction")
            raise excp
                  
    def get_users_data(self, list_names, nb_tweets=200):
        '''
        This method pick tweets from all twitter account in list_names. nb_tweets sets the total number of tweets to pick.
        Maximum is 200 tweets for each user.
        '''
        try:
            start = time.time()
            final_df = pd.DataFrame(None , columns=['tweet_id', 'date', 'text'])
            for name in list_names:
                alltweets = []
                new_tweet = self.api.user_timeline(screen_name=name, count=nb_tweets)
                alltweets.extend(new_tweet)
                outtweets = np.array([[tweet.id_str, tweet.created_at, tweet.text.encode("utf-8")] for tweet in alltweets])
                if outtweets.size:
                    print(f'Extraction of the {nb_tweets} last tweets from {name}')
                    df = pd.DataFrame(outtweets, columns=['tweet_id', 'date', 'text'])
                    final_df = pd.concat([final_df, df], ignore_index=True)
            print(f'processing time: {time.time() - start}')
            return final_df
            
        except Exception as excp:
            print(f"data extraction of user(s) failed, make sure that you are connected to the Twitter API or didn't reached limit extraction")
            raise excp
            
    
API_key = "YOUR-API-KEY"
API_secret_key = "YOUR-API-SECRET-KEY"
access_token = "YOUR-ACCESS-TOKEN"
access_token_secret = "YOUR-ACCESS-TOKEN-SECRET"

crawlerTwitter = Twitter_crawler(API_key , API_secret_key, access_token, access_token_secret)

crawlerTwitter.connect()

#df = crawlerTwitter.get_user_data('omaross', 1000)
#crawlerTwitter.stream_tweets('bitcoin')
#tweets_bitcoin = crawlerTwitter.get_data('bitcoin', 500)


In [2]:
df_tweets = crawlerTwitter.get_data('stockmarket', 1000)
df_tweets

Extracting last 1000 from #stockmarket
processing time: 25.98508381843567


Unnamed: 0,tweet_id,date,text
0,1391510369663741962,Sun May 09 21:48:19 +0000 2021,$DD enters an Uptrend because Momentum Indicat...
1,1391510110074064896,Sun May 09 21:47:17 +0000 2021,$CTXS's price moved below its 50-day Moving Av...
2,1391510069502746626,Sun May 09 21:47:08 +0000 2021,Beating the S&amp;P 500 – Long Term https://t....
3,1391509851029659650,Sun May 09 21:46:16 +0000 2021,$HUM enters an Uptrend because Momentum Indica...
4,1391509591519744000,Sun May 09 21:45:14 +0000 2021,$DISCK enters an Uptrend as Momentum Indicator...
...,...,...,...
995,1391425963834036224,Sun May 09 16:12:55 +0000 2021,MACD Indicator Explanation in 1min | What is M...
996,1391425870884007945,Sun May 09 16:12:33 +0000 2021,Pulse Biosciences Inc to report earnings on Ma...
997,1391425867318841349,Sun May 09 16:12:32 +0000 2021,"Kessler Topaz Meltzer &amp; Check, LLP: Remind..."
998,1391425866182193154,Sun May 09 16:12:32 +0000 2021,Hill Street Announces Stock Option Grants and ...


In [367]:

crypto_accounts = ['DocumentingBTC', 'scottmelker', 'APompliano', 'brian_armstrong', 'cameron', 'tyler', 'michael_saylor', 'BTCTN', 'APompliano','NickSzabo4', 'nic__carter','CarpeNoctom','MartyBent','100trillionUSD','MessariCrypto','MartyBent', 'aantonop']
df_tweets_users = crawlerTwitter.get_users_data(crypto_accounts, 200)
df_tweets_users

Extraction of the 200 last tweets from DocumentingBTC
Extraction of the 200 last tweets from scottmelker
Extraction of the 200 last tweets from APompliano
Extraction of the 200 last tweets from brian_armstrong
Extraction of the 200 last tweets from cameron
Extraction of the 200 last tweets from tyler
Extraction of the 200 last tweets from michael_saylor
Extraction of the 200 last tweets from BTCTN
Extraction of the 200 last tweets from APompliano
Extraction of the 200 last tweets from NickSzabo4
Extraction of the 200 last tweets from nic__carter
Extraction of the 200 last tweets from CarpeNoctom
Extraction of the 200 last tweets from MartyBent
Extraction of the 200 last tweets from 100trillionUSD
Extraction of the 200 last tweets from MessariCrypto
Extraction of the 200 last tweets from MartyBent
Extraction of the 200 last tweets from aantonop
processing time: 10.096443891525269


Unnamed: 0,tweet_id,date,text
0,1391409179731107842,2021-05-09 15:06:14,b'The recent #Bitcoin hash rate drop already l...
1,1391393073654116353,2021-05-09 14:02:14,"b'""THE GREAT MONETARY INFLATION""\n\nhttps://t...."
2,1391392901507305473,2021-05-09 14:01:33,"b'One year ago, Paul Tudor Jones wrote, ""At th..."
3,1391383258995339269,2021-05-09 13:23:14,"b""If you bought just $25 worth of #Bitcoin, as..."
4,1391104281244221442,2021-05-08 18:54:40,"b""@CoinMarketCap @cz_binance @binance This was..."
...,...,...,...
3395,1371186626173444097,2021-03-14 19:49:01,b'Someone made a deep fake of me singing. It w...
3396,1371144122707369989,2021-03-14 17:00:08,b'The #DeFi Q&amp;A livestream is starting NOW...
3397,1371136557734899719,2021-03-14 16:30:04,b'30MIN until the #DeFi Q&amp;A livestream sta...
3398,1371134753772998656,2021-03-14 16:22:54,b'@Andrea_1892 @sunnyokoroa @blockchain @APomp...


In [368]:
crawlerTwitter.stream_tweets('bitcoin',4)

Tweet id:  1391413518939738112 
 Date:  2021-05-09 15:23:28 
 Text:  Markets to date: Bitcoin doubles in price in 2021 and the bull cycle does not stop https://t.co/pKhbiTZ5O8 

Tweet id:  1391413519598370816 
 Date:  2021-05-09 15:23:28 
 Text:  @tansokumanchi https://t.co/0NKYIEQnSa You can buy here #SHIB #shiba #safemars #SAFEMOON #akita 🚀🚀 #elon #ElonMusk… https://t.co/b7k9G0wOm7 

Tweet id:  1391413520177246210 
 Date:  2021-05-09 15:23:29 
 Text:  O valor médio das criptomoedas é:
 Bitcoin(BTC) R$ 305868,13 
 Litecoin(LTC) R$ 1824,25 
 Bitcoin Cash(BCH) R$ 7153… https://t.co/LUaOOPrP11 

Tweet id:  1391413521569787906 
 Date:  2021-05-09 15:23:29 
 Text:  RT @furkanhabernet: Bitcoin 58 bin doların üzerinde tutundu
https://t.co/Uo8f5FabW8 https://t.co/Enn7cP60fN 

Tweet id:  1391413525378183174 
 Date:  2021-05-09 15:23:30 
 Text:  @elonmusk Tesla got $1.5bn in environmental subsidies in 2020, funded by the taxpayer."It turned around and spent $… https://t.co/ZZyfFhzleS 

Tweet id