# Load Followers

This notebook loads users from the 'users' topic, finds their twitter followers, and then stores those in the 'followers' topic.

In [None]:
! pip install kafka-python lxml bs4 requests twint nest_asyncio

In [1]:
import json
import twint
from kafka import KafkaConsumer, KafkaProducer
from neo4j import GraphDatabase

In [2]:
def publish_message(producer_instance, topic_name, key, value):
    try:
        key_bytes = bytes(key, encoding='utf-8')
        value_bytes = bytes(value, encoding='utf-8')
        producer_instance.send(topic_name, key=key_bytes, value=value_bytes)
        producer_instance.flush()
        print('Message published successfully.')
    except Exception as ex:
        print('Exception in publishing message')
        print(str(ex))


def connect_kafka_producer(server):
    _producer = None
    try:
        _producer = KafkaProducer(bootstrap_servers=[server], api_version=(0, 10))
    except Exception as ex:
        print('Exception while connecting Kafka')
        print(str(ex))
    finally:
        return _producer

In [3]:
kafka_broker = 'localhost:9092'
users_topic = "users"
followers_topic = "followers"

users_consumer = KafkaConsumer(
    users_topic, auto_offset_reset='earliest',
    bootstrap_servers=[kafka_broker], api_version=(0, 10), consumer_timeout_ms=1000, 
    value_deserializer = json.loads)

In [4]:
users = []
for msg in users_consumer:
    username = msg.value["username"]
    users.append(username)
    
print(users[:10])  

['webmink', 'Synechron', 'flablog', 'didierdaglinckx', 'DailyPythonInfo', 'shobeirf', 'LitteraCarolina', 'continentspirit', 'jepsen_io', 'mvriel']


In [5]:
import nest_asyncio
nest_asyncio.apply()

In [None]:
kafka_producer = connect_kafka_producer(kafka_broker)
  
for username in ["flablog"]:
    print(username)
    c = twint.Config()
    c.Username = username
    c.User_full = False
    c.Store_object = True
    c.Hide_output = True

    twint.run.Followers(c)
    followers = twint.output.follow_object

    if not username in followers:
        followers[username] = {"followers": []}
           
    document = {"username": username, "followers": followers[username]["followers"]}
            
    publish_message(kafka_producer, followers_topic, "follower", json.dumps(document))
    
    twint.output.follow_object = {}

webmink


In [None]:
followers_consumer = KafkaConsumer(
    followers_topic, auto_offset_reset='earliest',
    bootstrap_servers=[kafka_broker], api_version=(0, 10), consumer_timeout_ms=1000, 
    value_deserializer = json.loads)
driver = GraphDatabase.driver("bolt://localhost", auth=("neo4j", "zeppelin"))

with driver.session() as session:
    for message in followers_consumer:
        value = message.value
        
        for username in value:
            params = {"followers": value[username]["followers"], "username": username}

            result = session.run("""
            MATCH (u:User {username: $username})
            SET u.followersImported = true
            WITH u
            UNWIND $followers AS follower
            MATCH (f:User {username: follower})
            MERGE (f)-[:FOLLOWS]->(u)
            """, params)
            print(result.summary().counters)

In [None]:
kafka_producer = connect_kafka_producer(kafka_broker)
publish_message(kafka_producer, followers_topic, "follower", json.dumps({"username": "markhneedham", "followers": ["neo4j"]}))