# Tweets producer

We will see how to use Spark Structured Streaming with Kafka using a hashtags count example. 

First we must start the following code which is retrieving Tweets from a user and sends them to a Kafka topic

In [None]:
import tweepy
import time
from kafka import KafkaProducer
from dateutil.parser import parse
import pytz
from datetime import datetime

# Define our Twitter credentials in file `creds.py`
from creds import *
    
# Authorize our Twitter credentials
auth = tweepy.OAuthHandler(consumer_key, consumer_secret)
auth.set_access_token(access_token, access_token_secret)
api = tweepy.API(auth)

# Initialize variables
topic = "savas"
userID = "@sgioldasis"
athens = pytz.timezone('Europe/Athens')

# last_processed_ts = parse('2021-11-16 07:33:00+02:00')
last_processed_ts = datetime.now(tz=athens)

producer = KafkaProducer(bootstrap_servers='broker:29092')

print(f'Reading {userID} tweets from {last_processed_ts}')

# Loop
while True:

    # Get from Twitter API
    tweets = api.user_timeline(screen_name=userID, 
                           # 200 is the maximum allowed count
                           count=1,
                           include_rts = False,
                           # Necessary to keep full_text 
                           # otherwise only the first 140 words are extracted
                           tweet_mode = 'extended'
                           )
    
    # Get last Tweet info
    info = tweets[:1][0]
    
    # Process last Tweet
    if info.created_at > last_processed_ts:
        
        # Print Tweet (we can also print info.id)
        print(f"{info.created_at.astimezone(athens)} {info.full_text}")
        
        # Send Tweet text to Kafka
        producer.send(topic, info.full_text.encode('utf-8'))
        
        # Update last processed timestamp
        last_processed_ts = info.created_at
            
        
    # Sleep for a while
    time.sleep(10)
        


Now we can run `tweets-visualizer.ipynb`