# Complex Sentiment Index :: download crypto tweets

### Max tweets per hour

In [1]:
2_000_000/31/24

2688.172043010753

### Max coins to track (tweets per sample = 200)

In [2]:
int(2688/200)

13

In [3]:
coins = [
    'BTC',   # 1
    'ETH',   # 2
    'BNB',   # 3
    'DOT',   # 4
    'SOL',   # 5
    'LUNA',  # 6
    'ADA',   # 7
    'CRO',   # 8
    'AXS',   # 9
    'SAND',  # 10
    'DOGE',  # 11
    'SHIB',  # 12
    'MATIC', # 13
]

## Tweet Count

In [13]:
from datetime import datetime, timedelta

def get_one_week_ago(hour_shift=0, origin=None):
    if origin is None:
        origin = datetime.now()
    dt = origin - timedelta(days=7) + timedelta(hours=hour_shift + 1)
    dt = dt.replace(minute=0, second=0, microsecond=0)
    return dt.strftime("%Y-%m-%dT%H:%M:%SZ")

def get_last_hour(hour_shift=1, origin=None):
    if origin is None:
        origin = datetime.now()
    dt = origin + timedelta(hours=hour_shift-1)
    dt = dt.replace(minute=0, second=0, microsecond=0)
    return dt.strftime("%Y-%m-%dT%H:%M:%SZ")

get_last_hour(hour_shift=0)

'2022-02-05T18:00:00Z'

In [5]:
import requests
import json

def fetch_tweet_count(coin, start_time=None):

    if start_time is None:
        start_time = get_one_week_ago()

    url = f"https://api.twitter.com/2/tweets/counts/recent?query=%23{coin}&start_time={start_time}" \
          f"&granularity=hour"

    headers = {
      'Authorization': 'Bearer AAAAAAAAAAAAAAAAAAAAAOLYNQEAAAAAP%2Bma8Qj1tPhvVj6UdJkCu7%2Bc6BA%3DADAppxvZC'
        'yiqgxI9dtwJVRvFedVzFhZVW16mVs5qaI7dbkf8yA',
    }

    response = requests.request("GET", url, headers=headers)

    return json.loads(response.text)

In [6]:
import pandas as pd
import matplotlib.pyplot as plt

def tweet_count_collection(coin, show=False, start_time=None):
    if start_time is None:
        start_time = get_one_week_ago()
    data = fetch_tweet_count(coin, start_time=start_time)
    print(start_time)
    try:
        df = pd.DataFrame(data['data'])
    except:
        print(f"Error in tweet_count_collection: {data}")
        return

    df['start'] = pd.to_datetime(df['start'])
    df['end'] = pd.to_datetime(df['end'])
    df = df.set_index('end')
    # drop last row
    df = df[:-1].copy()
    
    if show:
        df['tweet_count'].rolling(window=24).sum().dropna().plot(
            legend='best', title=f"#{coin} tweets in the previous 24h")
        plt.show()
    
    df.to_csv(f"tweet-counts/{coin}-{start_time.replace(':', '-')}.csv")

    return df

In [7]:
def fetch_tweets(coin, start_time=None, end_time=None):
    
    if start_time is None:
        start_time = get_last_hour(hour_shift=0)
    if end_time is None:
        end_time = get_last_hour(hour_shift=1)

    url = f"https://api.twitter.com/2/tweets/search/recent?start_time={start_time}&end_time={end_time}" \
          f"&query=%23{coin} lang:en&tweet.fields=created_at,public_metrics,referenced_tweets" \
          f"&expansions=author_id,referenced_tweets.id&user.fields=created_at&max_results=100"

    headers = {
      'Authorization': 'Bearer AAAAAAAAAAAAAAAAAAAAAOLYNQEAAAAAP%2Bma8Qj1tPhvVj6UdJkCu7%2Bc6BA%3DADAppxvZC'
        'yiqgxI9dtwJVRvFedVzFhZVW16mVs5qaI7dbkf8yA',
    }

    response = requests.request("GET", url, headers=headers)

    return json.loads(response.text)

In [8]:
import numpy as np

def tweet_collection(coin, start_time=None, end_time=None):
    
    if start_time is None:
        start_time = get_last_hour(hour_shift=0)
    if end_time is None:
        end_time = get_last_hour(hour_shift=1)
    print(f"{start_time} -> {end_time}")

    out = fetch_tweets(coin, start_time=start_time, end_time=end_time)
    
    index = {}
    try:
        for t in out['includes']['tweets']:
            index[t['id']] = t
    except:
        print(f"Error in tweet_collection: {out}")
        return

    tdata = []

    for tweet in out['data']:
        tweet_text = tweet['text']
        orig_id = np.nan
        orig_created_at = np.nan
        if 'referenced_tweets' in tweet:
            for reft in tweet['referenced_tweets']:
                if reft['type'] in ['retweeted', 'quoted']:
                    tweet_text = index[reft['id']]['text']
                    orig_id = index[reft['id']]['id']
                    orig_created_at = index[reft['id']]['created_at']
        tdata.append({'id': tweet['id'], 'text': tweet_text, 'author_id': tweet['author_id'],
                      'retweets': tweet['public_metrics']['retweet_count'],
                      'replies': tweet['public_metrics']['reply_count'],
                      'likes': tweet['public_metrics']['like_count'], 
                      'quotes': tweet['public_metrics']['quote_count'],
                      'created_at': tweet['created_at'],
                      'orig_id': orig_id, 'orig_created_at': orig_created_at,
                     })

    df = pd.DataFrame(tdata)
    df = df.set_index('id')
    
    df.to_csv(f"tweets/{coin}-{start_time.replace(':', '-')}.csv")
    
    return df

In [9]:
def download():

    dfs = {}

    start_time_count = get_one_week_ago()
    start_time = get_last_hour(hour_shift=0)
    end_time = get_last_hour(hour_shift=1)

    for coin in coins:
        dfs[coin] = {
            'tweet_count': tweet_count_collection(coin, start_time=start_time_count),
            'tweets': tweet_collection(coin, start_time=start_time, end_time=end_time),        
        }
    
    return dfs

In [11]:
import pause
import pickle


def do_something(iterat):
    dfs = download()
    with open(f'pickles/tweet-data-dfs-{iterat}.pkl', 'wb') as f_out:
        pickle.dump(dfs, f_out)

def start():
    iterat = 10
    while True:
        iterat += 1
        do_something(iterat)
        
        # sleep until next iteration
        dt = datetime.now().replace(minute=0, second=0, microsecond=0) + timedelta(hours=1, minutes=1)
        print(f"Waiting until {dt.strftime('%Y-%m-%d %H:%M:%S')} local time...")
        pause.until(dt)

# start()

## Recover missing data from 4th Feb '22 blackout

In [17]:
data = []

for h in range(1, 9):
    dt = datetime(2022, 2, 4, h, 1) # we pretend to be at this time
    print(dt.strftime('%Y-%m-%d %H:%M:%S'))

    start_time_count = get_one_week_ago(origin=dt)
    start_time = get_last_hour(hour_shift=0, origin=dt)
    end_time = get_last_hour(hour_shift=1, origin=dt)
    
    print(start_time_count)
    print(start_time)
    print(end_time)
    print('---')
    
    dfs = {}
    for coin in coins:
        dfs[coin] = {
            'tweet_count': tweet_count_collection(coin, start_time=start_time_count),
            'tweets': tweet_collection(coin, start_time=start_time, end_time=end_time),        
        }
    data.append(dfs)


2022-02-04 01:01:00
2022-01-28T02:00:00Z
2022-02-04T00:00:00Z
2022-02-04T01:00:00Z
---
2022-01-28T02:00:00Z
Error in tweet_count_collection: {'errors': [{'parameters': {'start_time': ['2022-01-28T02:00Z']}, 'message': "Invalid 'start_time':'2022-01-28T02:00Z'. 'start_time' must be on or after 2022-01-29T19:44Z"}], 'title': 'Invalid Request', 'detail': 'One or more parameters to your request was invalid.', 'type': 'https://api.twitter.com/2/problems/invalid-request'}
2022-02-04T00:00:00Z -> 2022-02-04T01:00:00Z
2022-01-28T02:00:00Z
Error in tweet_count_collection: {'errors': [{'parameters': {'start_time': ['2022-01-28T02:00Z']}, 'message': "Invalid 'start_time':'2022-01-28T02:00Z'. 'start_time' must be on or after 2022-01-29T19:44Z"}], 'title': 'Invalid Request', 'detail': 'One or more parameters to your request was invalid.', 'type': 'https://api.twitter.com/2/problems/invalid-request'}
2022-02-04T00:00:00Z -> 2022-02-04T01:00:00Z
2022-01-28T02:00:00Z
Error in tweet_count_collection: {

2022-01-28T03:00:00Z
Error in tweet_count_collection: {'errors': [{'parameters': {'start_time': ['2022-01-28T03:00Z']}, 'message': "Invalid 'start_time':'2022-01-28T03:00Z'. 'start_time' must be on or after 2022-01-29T19:44Z"}], 'title': 'Invalid Request', 'detail': 'One or more parameters to your request was invalid.', 'type': 'https://api.twitter.com/2/problems/invalid-request'}
2022-02-04T01:00:00Z -> 2022-02-04T02:00:00Z
2022-01-28T03:00:00Z
Error in tweet_count_collection: {'errors': [{'parameters': {'start_time': ['2022-01-28T03:00Z']}, 'message': "Invalid 'start_time':'2022-01-28T03:00Z'. 'start_time' must be on or after 2022-01-29T19:44Z"}], 'title': 'Invalid Request', 'detail': 'One or more parameters to your request was invalid.', 'type': 'https://api.twitter.com/2/problems/invalid-request'}
2022-02-04T01:00:00Z -> 2022-02-04T02:00:00Z
2022-01-28T03:00:00Z
Error in tweet_count_collection: {'errors': [{'parameters': {'start_time': ['2022-01-28T03:00Z']}, 'message': "Invalid 's

2022-01-28T04:00:00Z
Error in tweet_count_collection: {'errors': [{'parameters': {'start_time': ['2022-01-28T04:00Z']}, 'message': "Invalid 'start_time':'2022-01-28T04:00Z'. 'start_time' must be on or after 2022-01-29T19:44Z"}], 'title': 'Invalid Request', 'detail': 'One or more parameters to your request was invalid.', 'type': 'https://api.twitter.com/2/problems/invalid-request'}
2022-02-04T02:00:00Z -> 2022-02-04T03:00:00Z
2022-02-04 04:01:00
2022-01-28T05:00:00Z
2022-02-04T03:00:00Z
2022-02-04T04:00:00Z
---
2022-01-28T05:00:00Z
Error in tweet_count_collection: {'errors': [{'parameters': {'start_time': ['2022-01-28T05:00Z']}, 'message': "Invalid 'start_time':'2022-01-28T05:00Z'. 'start_time' must be on or after 2022-01-29T19:44Z"}], 'title': 'Invalid Request', 'detail': 'One or more parameters to your request was invalid.', 'type': 'https://api.twitter.com/2/problems/invalid-request'}
2022-02-04T03:00:00Z -> 2022-02-04T04:00:00Z
2022-01-28T05:00:00Z
Error in tweet_count_collection: {

2022-01-28T06:00:00Z
Error in tweet_count_collection: {'errors': [{'parameters': {'start_time': ['2022-01-28T06:00Z']}, 'message': "Invalid 'start_time':'2022-01-28T06:00Z'. 'start_time' must be on or after 2022-01-29T19:45Z"}], 'title': 'Invalid Request', 'detail': 'One or more parameters to your request was invalid.', 'type': 'https://api.twitter.com/2/problems/invalid-request'}
2022-02-04T04:00:00Z -> 2022-02-04T05:00:00Z
2022-01-28T06:00:00Z
Error in tweet_count_collection: {'errors': [{'parameters': {'start_time': ['2022-01-28T06:00Z']}, 'message': "Invalid 'start_time':'2022-01-28T06:00Z'. 'start_time' must be on or after 2022-01-29T19:45Z"}], 'title': 'Invalid Request', 'detail': 'One or more parameters to your request was invalid.', 'type': 'https://api.twitter.com/2/problems/invalid-request'}
2022-02-04T04:00:00Z -> 2022-02-04T05:00:00Z
2022-01-28T06:00:00Z
Error in tweet_count_collection: {'errors': [{'parameters': {'start_time': ['2022-01-28T06:00Z']}, 'message': "Invalid 's

2022-01-28T07:00:00Z
Error in tweet_count_collection: {'errors': [{'parameters': {'start_time': ['2022-01-28T07:00Z']}, 'message': "Invalid 'start_time':'2022-01-28T07:00Z'. 'start_time' must be on or after 2022-01-29T19:45Z"}], 'title': 'Invalid Request', 'detail': 'One or more parameters to your request was invalid.', 'type': 'https://api.twitter.com/2/problems/invalid-request'}
2022-02-04T05:00:00Z -> 2022-02-04T06:00:00Z
2022-01-28T07:00:00Z
Error in tweet_count_collection: {'errors': [{'parameters': {'start_time': ['2022-01-28T07:00Z']}, 'message': "Invalid 'start_time':'2022-01-28T07:00Z'. 'start_time' must be on or after 2022-01-29T19:45Z"}], 'title': 'Invalid Request', 'detail': 'One or more parameters to your request was invalid.', 'type': 'https://api.twitter.com/2/problems/invalid-request'}
2022-02-04T05:00:00Z -> 2022-02-04T06:00:00Z
2022-02-04 07:01:00
2022-01-28T08:00:00Z
2022-02-04T06:00:00Z
2022-02-04T07:00:00Z
---
2022-01-28T08:00:00Z
Error in tweet_count_collection: {

2022-01-28T09:00:00Z
Error in tweet_count_collection: {'errors': [{'parameters': {'start_time': ['2022-01-28T09:00Z']}, 'message': "Invalid 'start_time':'2022-01-28T09:00Z'. 'start_time' must be on or after 2022-01-29T19:45Z"}], 'title': 'Invalid Request', 'detail': 'One or more parameters to your request was invalid.', 'type': 'https://api.twitter.com/2/problems/invalid-request'}
2022-02-04T07:00:00Z -> 2022-02-04T08:00:00Z
2022-01-28T09:00:00Z
Error in tweet_count_collection: {'errors': [{'parameters': {'start_time': ['2022-01-28T09:00Z']}, 'message': "Invalid 'start_time':'2022-01-28T09:00Z'. 'start_time' must be on or after 2022-01-29T19:45Z"}], 'title': 'Invalid Request', 'detail': 'One or more parameters to your request was invalid.', 'type': 'https://api.twitter.com/2/problems/invalid-request'}
2022-02-04T07:00:00Z -> 2022-02-04T08:00:00Z
2022-01-28T09:00:00Z
Error in tweet_count_collection: {'errors': [{'parameters': {'start_time': ['2022-01-28T09:00Z']}, 'message': "Invalid 's