# Streaming from Twitter to a Tinybird Data Source

Filter tweets using a list of tracking words. Stream the tweets to a Tinybird Data Source.

In [9]:
!pip show jupyter | grep Version
!pip show tweepy | grep Version

Version: 1.0.0
Version: 4.5.0


In [14]:
#!pip install jupyter==1.0.0
#!pip install tweepy==4.5.0
#!pip install git+https://github.com/tinybirdco/tinybird-python-sdk@master

In [15]:
import logging
logging.getLogger().setLevel(logging.INFO)

import json
import time
import tweepy

from tb.datasource import Datasource
from email.utils import parsedate_to_datetime

Get the access codes you need from your app on [Twitter's Developer Platform](https://developer.twitter.com/en) and the token from the [Tinybird](https://ui.tinybird.co/login) workspace where you want to put the Data Source of tweets.

In [20]:
TWITTER_API_KEY = ''
TWITTER_API_KEY_SECRET = ''
TWITTER_ACCESS_TOKEN = ''
TWITTER_ACCESS_TOKEN_SECRET = ''
TB_TOKEN = ''

Set up your variables, including the words to look for in tweets. We use [tweepy.Stream](https://docs.tweepy.org/en/stable/stream.html?highlight=.Stream) to filter realtime tweets on the list of keywords.

In [17]:
TB_HOST = 'https://api.tinybird.co'
datasource = 'tweets'
search_label = 'ESP'
track = ['Madrid', 'madrid', 'Spain', 'spain', 'España', 'españa', 'Espana', 'espana']
time_limit = 30    # seconds to run stream

Send chunks of data to Tinybird

In [18]:
class TwitterToTinybird(tweepy.Stream):
    def __init__(self, auth, datasource, token, api_url, search_label, **kwargs):
        super().__init__(*auth, **kwargs)
        self.datasource = Datasource(datasource, token, api_url=api_url)
        self.search_label = search_label
        self.start_time = time.monotonic()
        self.limit = kwargs.get('time_limit', time_limit)
        print('seconds to run:', self.limit)

    def on_data(self, raw_tweet):
        if (time.monotonic() - self.start_time) > self.limit:
            print('reached time limit')
            self.disconnect()
            return False
        tweet = json.loads(raw_tweet)
        if 'created_at' not in tweet or 'id' not in tweet or 'text' not in tweet:
          return
        date = str(tweet['created_at'])

        text = ''
        try:
          if tweet['truncated']:
              text = tweet['extended_tweet']['full_text']
          else:
              text = tweet['text']
        except Exception as e:
          print(e)

        try:
          if tweet.get('retweeted_status'):
              if tweet.get('retweeted_status')['truncated']:
                  text += tweet['retweeted_status'].get('extended_tweet', {})['full_text']
              else:
                  text += tweet['retweeted_status'].get('text')
        except Exception as e:
          print(e)

        try:
          if tweet.get('quoted_status'):
              q = tweet.get('quoted_status')
              if q['truncated']:
                  text += q.get('extended_tweet', {})['full_text']
              else:
                  text += q.get('text')
        except Exception as e:
          print(e)

        tw = {
          "search_label": self.search_label,
          "tweet": text,
          "date": parsedate_to_datetime(date).strftime("%Y-%m-%d %H:%M:%S")
        }
        self.datasource << tw

Listen for your tweets

In [19]:
def connect():
  try:
    tt = TwitterToTinybird(
        [
            TWITTER_API_KEY,
            TWITTER_API_KEY_SECRET,
            TWITTER_ACCESS_TOKEN,
            TWITTER_ACCESS_TOKEN_SECRET
        ],
        datasource,
        TB_TOKEN,
        TB_HOST,
        search_label,
        daemon=True,
        chunk_size=1024*10
    )
    tt.filter(track=track, threaded=True)
    return tt
  except Exception as e:
      print(e)

print('connect')
stream = connect()

connect
seconds to run: 30


INFO:tweepy.streaming:Stream connected
INFO:root:Flushing 16 records and 5.2 K bytes to tweets
INFO:root:Waiting while flushing...
INFO:root:Flushing 11 records and 4.8 K bytes to tweets
INFO:root:Flushing 10 records and 4.0 K bytes to tweets
INFO:root:Waiting while flushing...
INFO:root:Flushing 14 records and 5.4 K bytes to tweets
INFO:root:Flushing 16 records and 5.3 K bytes to tweets
INFO:root:Waiting while flushing...
INFO:root:Flushing 9 records and 2.8 K bytes to tweets
INFO:root:Waiting while flushing...
INFO:root:Flushing 16 records and 5.2 K bytes to tweets
INFO:root:Flushing 11 records and 4.1 K bytes to tweets
INFO:root:Flushing 4 records and 1.4 K bytes to tweets
INFO:root:Flushing 9 records and 3.4 K bytes to tweets
INFO:root:Waiting while flushing...
INFO:root:Flushing 14 records and 4.9 K bytes to tweets
INFO:root:Flushing 8 records and 2.5 K bytes to tweets
INFO:root:Waiting while flushing...
INFO:root:Flushing 13 records and 4.9 K bytes to tweets
INFO:root:Waiting whi

reached time limit
