In [1]:
import multiprocessing
import tweepy
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from tqdm.notebook import tqdm
from config import *

In [2]:
def crawl_tweet_dataframe(hashtag, count): 
    """
    Get KEY and ACCESS TOKEN from https://developer.twitter.com/en/apps
    
    Parameters:
        hashtag: the search query string of 500 characters maximum, including operators.
        count: the number of results to try and retrieve per page.
    """
    authentication = tweepy.OAuthHandler(CONSUMER_KEY, CONSUMER_SECRET)
    authentication.set_access_token(ACCESS_TOKEN, ACCESS_TOKEN_SECRET)
    api = tweepy.API(authentication, wait_on_rate_limit=True, wait_on_rate_limit_notify=True)
    maxId = -1
    tweetCount = 0

    tweet_list = []
    while tweetCount < maxTweets: 
        if(maxId <= 0):
            newTweets = api.search(
                q=hashtag, count=tweetsPerQry, result_type="recent", tweet_mode="extended")
        else:
            newTweets = api.search(
                q=hashtag, count=tweetsPerQry, max_id=str(maxId - 1), result_type="recent", tweet_mode="extended")

        if not newTweets:
            print("Tweet Habis")
            break

        for tweet in newTweets:
            user = tweet.user.screen_name.encode('utf-8')
            content = tweet.full_text.encode('utf-8')
            date = tweet.created_at
            likes = tweet.favorite_count
            location = tweet.coordinates["coordinates"] if tweet.coordinates is not None else None
            tweet_list.append([user, content, date, likes, location])

        tweetCount += len(newTweets)
        maxId = newTweets[-1].id

    tweet_df = pd.DataFrame(tweet_list)
    tweet_df.columns = ["user", "text", "date", "likes", "location"]
    tweet_df["user"] = tweet_df["user"].apply(lambda x: x.decode("utf-8"))
    tweet_df["text"] = tweet_df["text"].apply(lambda x: x.decode("utf-8"))

    return tweet_df

In [10]:
class MaxListener(tweepy.StreamListener):
    
    def on_data(self, raw_data):
        self.process_data(raw_data)
        return True
    
    def process_data(self, raw_data):
        print(raw_data)
        
    def on_error(self, status_code):
        if status_code == 420:
            return False
        
class MaxStream():
    
    def __init__(self, auth, listener):
        self.stream = tweepy.Stream(auth=auth, listener=listener)
        
    def start(self, keyword):
        keyword_list = [keyword]
        self.stream.filter(track=keyword_list)
        
if __name__ == "__main__":
    listener = MaxListener()
    auth = tweepy.OAuthHandler(CONSUMER_KEY, CONSUMER_SECRET)
    auth.set_access_token(ACCESS_TOKEN, ACCESS_TOKEN_SECRET)
    stream = MaxStream(auth, listener)
    stream.start("python")

{"created_at":"Sat Jun 13 09:07:01 +0000 2020","id":1271730784211283968,"id_str":"1271730784211283968","text":"RT @cssiridna: building a web app with flask. it's hard for newbie like me but it's exciting \ud83d\ude48 #coding #flask #python #webapp #100daysofcode","source":"\u003ca href=\"https:\/\/nlogn.in\" rel=\"nofollow\"\u003enlognrobot\u003c\/a\u003e","truncated":false,"in_reply_to_status_id":null,"in_reply_to_status_id_str":null,"in_reply_to_user_id":null,"in_reply_to_user_id_str":null,"in_reply_to_screen_name":null,"user":{"id":1260062769543434240,"id_str":"1260062769543434240","name":"nlognbot","screen_name":"nlognbot","location":null,"url":"http:\/\/nlogn.in","description":"Hi, I'm a bot that like and retweets #100daysofcode #javascript #nodejs and #nlognco.\nYou can follow me.\ud83d\ude09\n\nP.S. I was created by @nlognco and they are awesome.","translator_type":"none","protected":false,"verified":false,"followers_count":530,"friends_count":1,"listed_count":23,"favourites_cou

{"created_at":"Sat Jun 13 09:07:05 +0000 2020","id":1271730802942816256,"id_str":"1271730802942816256","text":"RT @Inspire90434512: #javascript #Java #Jobs #Python #technology #CodeNewbie #code #100DaysOfCode #Eclipse #Developer #management #Software\u2026","source":"\u003ca href=\"https:\/\/twitter.com\/learn__together\" rel=\"nofollow\"\u003elearn__together\u003c\/a\u003e","truncated":false,"in_reply_to_status_id":null,"in_reply_to_status_id_str":null,"in_reply_to_user_id":null,"in_reply_to_user_id_str":null,"in_reply_to_screen_name":null,"user":{"id":1224163949957939201,"id_str":"1224163949957939201","name":"Let's learn together \ud83e\udd16","screen_name":"learn__together","location":"Japan","url":"https:\/\/toshiya-marukubo.github.io\/","description":"I am a bot \ud83e\udd16 I am created by @toshiyamarukubo to retweet specific hashtags, #100DaysOfCode #WomenWhoCode and so on. Let's learn together \ud83d\ude03","translator_type":"none","protected":false,"verified":false,"followers_

{"created_at":"Sat Jun 13 09:07:13 +0000 2020","id":1271730834404302848,"id_str":"1271730834404302848","text":"RT @omarqe: Kerjaya dan teknologi\/programming language berkaitan:\n\n1. Software Engineer (Backend)\n- Python\n- PHP\n- Golang\n- Java\n- Ruby\n- S\u2026","source":"\u003ca href=\"http:\/\/twitter.com\/download\/android\" rel=\"nofollow\"\u003eTwitter for Android\u003c\/a\u003e","truncated":false,"in_reply_to_status_id":null,"in_reply_to_status_id_str":null,"in_reply_to_user_id":null,"in_reply_to_user_id_str":null,"in_reply_to_screen_name":null,"user":{"id":1134029534,"id_str":"1134029534","name":"yashi","screen_name":"QoLire","location":null,"url":null,"description":"Look into my eyes. They seem like haters, but yet full with love","translator_type":"none","protected":false,"verified":false,"followers_count":34,"friends_count":640,"listed_count":0,"favourites_count":3912,"statuses_count":2488,"created_at":"Wed Jan 30 13:04:47 +0000 2013","utc_offset":null,"time_zone":null,"g

{"created_at":"Sat Jun 13 09:07:15 +0000 2020","id":1271730844156203008,"id_str":"1271730844156203008","text":"RT @Inspire90434512: #javascript #Java #Jobs #Python #technology #CodeNewbie #code #100DaysOfCode #Eclipse #Developer #management #Software\u2026","source":"\u003ca href=\"https:\/\/google.com\" rel=\"nofollow\"\u003e@erRaghavKhanna\u003c\/a\u003e","truncated":false,"in_reply_to_status_id":null,"in_reply_to_status_id_str":null,"in_reply_to_user_id":null,"in_reply_to_user_id_str":null,"in_reply_to_screen_name":null,"user":{"id":1182866592138350592,"id_str":"1182866592138350592","name":"Inferno","screen_name":"theInfernobot","location":null,"url":null,"description":"Hi! I am a bot made by @erRaghavkhanna I'm programmed to retweet, like & reply #30daysofkotlin #100daysofcode #JavaScript. Please support me by following me\ud83d\ude0d","translator_type":"none","protected":false,"verified":false,"followers_count":240,"friends_count":0,"listed_count":16,"favourites_count":5448,"statu

{"created_at":"Sat Jun 13 09:07:17 +0000 2020","id":1271730851039121411,"id_str":"1271730851039121411","text":"You are speaking my mind right now. The apprehension I suffer while waiting for feedback on anything academic is si\u2026 https:\/\/t.co\/Kd4meQ5luN","source":"\u003ca href=\"http:\/\/twitter.com\/download\/android\" rel=\"nofollow\"\u003eTwitter for Android\u003c\/a\u003e","truncated":true,"in_reply_to_status_id":null,"in_reply_to_status_id_str":null,"in_reply_to_user_id":null,"in_reply_to_user_id_str":null,"in_reply_to_screen_name":null,"user":{"id":2304685326,"id_str":"2304685326","name":"emmanuel essel\ud83c\uddec\ud83c\udded","screen_name":"EsselDelta","location":"Accra, Ghana","url":null,"description":"Development Communications Academic and practitioner\/\/ PhD Candidate University of Kwazu-Natal, Durban, SA \/\/ Research consultant. Sports Journalist.","translator_type":"none","protected":false,"verified":false,"followers_count":296,"friends_count":372,"listed_count":3

{"created_at":"Sat Jun 13 09:07:21 +0000 2020","id":1271730868093140992,"id_str":"1271730868093140992","text":"RT @Inspire90434512: #javascript #Java #Jobs #Python #technology #CodeNewbie #code #100DaysOfCode #Eclipse #Developer #management #Software\u2026","source":"\u003ca href=\"https:\/\/nlogn.in\" rel=\"nofollow\"\u003enlognrobot\u003c\/a\u003e","truncated":false,"in_reply_to_status_id":null,"in_reply_to_status_id_str":null,"in_reply_to_user_id":null,"in_reply_to_user_id_str":null,"in_reply_to_screen_name":null,"user":{"id":1260062769543434240,"id_str":"1260062769543434240","name":"nlognbot","screen_name":"nlognbot","location":null,"url":"http:\/\/nlogn.in","description":"Hi, I'm a bot that like and retweets #100daysofcode #javascript #nodejs and #nlognco.\nYou can follow me.\ud83d\ude09\n\nP.S. I was created by @nlognco and they are awesome.","translator_type":"none","protected":false,"verified":false,"followers_count":530,"friends_count":1,"listed_count":23,"favourites_count":2,

KeyboardInterrupt: 

In [None]:
tweetsPerQry = 100
maxTweets = 100000
hashtag = "#trump"

tweet_df = crawl_tweet_dataframe(
    hashtag=hashtag, 
    count=tweetsPerQry)

Rate limit reached. Sleeping for: 698
Rate limit reached. Sleeping for: 707


In [None]:
print("# of tweets: {}".format(len(tweet_df)))
tweet_df.head()

In [None]:
plt.figure(figsize=(10, 4))
sns.heatmap(tweet_df.isnull(), cbar=True, cmap=sns.color_palette("GnBu_d"))
plt.title("Missing Values Heatmap")
plt.show()