In [10]:
import config
import datetime
# import itertools
import os
import pandas as pd
import requests
import tweepy as tw
import threading
import time
import queue
from pathlib import Path

In [2]:
def get_df():
    return pd.DataFrame(
        columns=[
            "tweet_id",
            "name",
            "screen_name",
            "retweet_count",
            "text",
            "mined_at",
            "created_at",
            "favourite_count",
            "hashtags",
            "status_count",
            "followers_count",
            "location",
            "source_device",
        ]
    )

In [11]:
#### Twitter API ####
class TweetMiner(object):
    result_minit = 20
    ret = []
    api = False

    twitter_keys = {
        "consumer_key": config.consumer_key,
        "consumer_secret": config.consumer_secret,
        "access_token_key": config.access_token_key,
        "access_token_secret": config.access_token_secret,
    }

    def __init__(self, keys_dict=twitter_keys, api=api):
        """ 
        Initialize the miner.
        """
        self.api = api
        auth = tw.OAuthHandler(keys_dict["consumer_key"], keys_dict["consumer_secret"])
        auth.set_access_token(keys_dict["access_token_key"], keys_dict["access_token_secret"])
        self.api = tw.API(auth, wait_on_rate_limit=True)
        self.twitter_keys = keys_dict
        self.Path = Path(f'{os.getcwd()}, tweets')

    def mine_tweets(self, query="BTC"):
        """
        Mine tweets from the query.
        """
        last_tweet_id = False
        page_num = 1

        ret = get_df()
        crypto_query = f"#{query}"
        print("========", query, crypto_query)

        for page in tw.Cursor(
            self.api.search_tweets, 
            q=crypto_query, 
            lang="en", 
            tweet_mode="extended", 
            count=200
        ).pages():
            print("........... new page", page_num)
            page_num += 1

            for tweet in page:
                data = {
                    "tweet_id": tweet.id,
                    "name": tweet.user.name,
                    "screen_name": tweet.user.screen_name,
                    "retweet_count": tweet.retweet_count,
                    "text": tweet.full_text,
                    "mined_at": datetime.datetime.now(),
                    "created_at": tweet.created_at,
                    "favourite_count": tweet.favorite_count,
                    "hashtags": tweet.entities.get("hashtags"),
                    "status_count": tweet.user.statuses_count,
                    "followers_count": tweet.user.followers_count,
                    "location": tweet.user.location,
                    "source_device": tweet.source,
                }
                try:
                    data["retweet_text"] = tweet.retweeted_status.full_text
                except:
                    data["retweet_text"] = "None"

                last_tweet_id = tweet.id
                ret = ret.append(data, ignore_index=True)

            #print("Page:", page_num)
            if page_num % 180 == 0:
                date_label = datetime.datetime.now().strftime("%Y-%m-%d")
                print("Saving to file:", date_label)
                ret.to_csv(f"{date_label}-{query}.csv", index=False)
                print("Resetting df")
                ret = get_df()
        date_label = datetime.datetime.now().strftime("%Y-%m-%d")
        ret.to_csv(f"{self.Path}\{date_label}-{query}.csv", index=False)

In [12]:
miner = TweetMiner()

handle_list = [
    #"BTC",
    #"ETH",
    #"USDT",
    "XRP",
    #"BCH",
    #"ADA",
    #"BSV",
    #"LTC",
    #"LINK",
    #"BNB",
    #"EOS",
    #"TRON",
]

In [13]:
should_publish = threading.Event()
update_queue = queue.Queue()


def start_publisher():
    """ 
    Start the publisher thread. (This is the thread that will be listening for updates)
    """
    global handle_list

    starttime = time.time()
    print("Start polling", starttime)
    poll_iteration = 1

    for i in range(10):
        for name in handle_list[:1]:
            print(i, poll_iteration, "\rpublishing update ", end="")
            update_queue.put((poll_iteration, name))
            poll_iteration += 1
            time.sleep(900)
            print("\rawaiting for publishing update", end="")
            should_publish.wait()
            update_queue.join()

def start_update_listener():
    """
    Start the update listener thread. (This is the thread that will be listening for updates)
    """
    while True:
        poll_iteration, name = update_queue.get()

        print(" --- ", name)
        try:

            miner.mine_tweets(query=name)
            update_queue.task_done()

        except Exception as e:  # work on python 3.x
            print("Failed to upload to ftp: " + str(e))

# Start the threads
listener_thread = threading.Thread(target=start_update_listener, daemon=True)
publisher_thread = threading.Thread(target=start_publisher, daemon=True)

In [14]:
publisher_thread.start()
listener_thread.start()
# start publishing
should_publish.set()

Start polling 1640074843.7755203
publishing update  ---  XRP
........... new page 1
........... new page 2
........... new page 3
........... new page 4
........... new page 5
........... new page 6
........... new page 7
........... new page 8
........... new page 9
........... new page 10
........... new page 11
........... new page 12
........... new page 13
........... new page 14
........... new page 15
........... new page 16
........... new page 17
........... new page 18
........... new page 19
........... new page 20
........... new page 21
........... new page 22
........... new page 23
........... new page 24
........... new page 25
........... new page 26
........... new page 27
........... new page 28
........... new page 29
........... new page 30
........... new page 31
........... new page 32
........... new page 33


In [15]:
should_publish.clear()

........... new page 34
........... new page 35
........... new page 36
........... new page 37
........... new page 38
........... new page 39
........... new page 40
........... new page 41
........... new page 42
........... new page 43
........... new page 44
........... new page 45
........... new page 46
........... new page 47
........... new page 48
........... new page 49
........... new page 50
........... new page 51
........... new page 52
........... new page 53
........... new page 54
........... new page 55
........... new page 56
........... new page 57
........... new page 58
........... new page 59
........... new page 60
........... new page 61
........... new page 62
........... new page 63
........... new page 64
........... new page 65
........... new page 66
........... new page 67
........... new page 68
........... new page 69
........... new page 70
........... new page 71
........... new page 72
........... new page 73
........... new page 74
........... new 

Rate limit reached. Sleeping for: 581
