In [None]:
import tweepy
from tweepy import Stream
from tweepy import StreamListener
from tweepy import OAuthHandler
import json
import numpy as np
import jsonpickle
import time 

This script will query the Twitter API with given parameters and write the results to a file. We're using the `tweepy` Python library for the API access.

The scripts expects there to be a file `.secrets` in the same directory in which the Twitter API keys are stored. The file format is a plain text file with each key in their own line without any additions, in order of consumer key, consumer secret, access token, access secret.

In [4]:
try:
    with open(".secrets", "r") as f:
        api_keys = f.readlines()
    except:
        print("no secrets file or error")
        exit()
    
consumer_key = api_keys[0]
consumer_secret = api_keys[1]

access_token = api_keys[2]
access_secret = api_keys[3]

auth = OAuthHandler(consumer_key, consumer_secret)
auth.set_access_token(access_token, access_secret)

api = tweepy.API(auth, wait_on_rate_limit=True, wait_on_rate_limit_notify=True)



Here we will create the query from the words and tags we want to look for. Parameter `FILENAME` is expected to contain the name of the `.json` file we want to read and write to.

Tweets have a rolling id which increases in time. In case the script has to be restarted, the existing file is read and the smallest existing id is given as maximum id to the query, so as not to get duplicates. 

In [None]:
query = [
    "fortnite", "#fortnite",
    "overwatch", "#overwatch",
    "blackops4", "#blackops4",
    "#blackout",
    "#bo4",
    "black ops 4"
]

query_term = " OR ".join(query)

FILENAME = "<TWEET_FILENAME>"

max_id = float('inf')

try:
    tweetfile = open(FILENAME, "r").read()
    tweets = [json.loads(str(line)) for line in tweetfile.strip().split('\n')]
    max_id = min([tweet['id'] for tweet in tweets])
except:
    pass

print("max {}".format(max_id))

The tweet query includes the query term, language wanted - Twitter tries to guess the language itself - and the maximum id of tweets. The tweets are queried in an endless loop and appended to file in `JSON` format. In case of error - which, in this case, is mostly the tweet limit reached - we will wait for 15 minutes before trying again. 

In [None]:
with open(FILENAME, "a") as f:
    cursor = tweepy.Cursor(api.search, q=query_term, lang="en", max_id=max_id, tweet_mode="extended").items(1000000)

    while True:
        try:
            tweet = cursor.next()
            f.write(jsonpickle.encode(tweet._json, unpicklable=False) + "\n")
        except tweepy.TweepError as e:
            print("err: " + str(e))
            time.sleep(60 * 15)
            continue
        except KeyboardInterrupt:
            exit(1)
        except:
            pass