# Experiments with the twitter API

With help from the following tutorial:
https://towardsdatascience.com/an-extensive-guide-to-collecting-tweets-from-twitter-api-v2-for-academic-research-using-python-3-518fcb71df2a

Along with the twitter documentation

## Imports

In [11]:
import requests
import os
import datetime
import time
import dateutil.parser
import unicodedata
import json

## Preparation of functions to use
Initialize an environment variable for bearer token in a block that I delete so it is not pushed to git

In [3]:
def authorize():
    return os.environ['BEARER']

In [4]:
def create_headers(bearer_token):
    headers = {}
    headers["Authorization"] = "Bearer {}".format(bearer_token)
    return headers


In [None]:
def create_request(keyword, start_date, end_date, max_results = 10):
    pass

In [9]:
def connect_to_endpoint(url, headers, params = None):
    #params['next_token'] = next_token   #params object received from create_url function
    response = requests.request("GET", url, headers = headers, params = params)
    print("Endpoint Response Code: " + str(response.status_code))
    if response.status_code != 200:
        raise Exception(response.status_code, response.text)
    return response.json()

## Initialize authorization token and header

In [7]:
bearer_token = authorize()
headers = create_headers(bearer_token)

## Retrieve user data experiment

Using the get user by username I found the id of my account and tried querying the tweets

In [33]:
url_name = "https://api.twitter.com/2/users/by/username/nannahann"
response = connect_to_endpoint(url_name, headers)

Endpoint Response Code: 200


In [36]:
print(response['data'])

{'id': '3126267033', 'name': 'Nanna', 'username': 'nannahann'}


In [37]:
print(json.dumps(response, indent=4, sort_keys=True))
nanna_id = response['data']['id']
print(nanna_id)

{
    "data": {
        "id": "3126267033",
        "name": "Nanna",
        "username": "nannahann"
    }
}
3126267033


In [13]:
url_name = "https://api.twitter.com/2/users/by/username/hsmarason"
response = connect_to_endpoint(url_name, headers)
hs_id = response['data']['id']
url_id = 'https://api.twitter.com/2/users/'+ hs_id +'/tweets'
parameters = {
    'tweet.fields': 'geo,created_at,public_metrics',
    'next_token': {}
}
response = connect_to_endpoint(url_id, headers, params = parameters)
print(json.dumps(response, indent=4, sort_keys=True))

Endpoint Response Code: 200
Endpoint Response Code: 200
{
    "data": [
        {
            "created_at": "2021-09-11T13:50:47.000Z",
            "id": "1436688675501481987",
            "public_metrics": {
                "like_count": 0,
                "quote_count": 0,
                "reply_count": 0,
                "retweet_count": 11547
            },
            "text": "RT @JoeBiden: Twenty years ago, nearly 3,000 lives were cut short by an unspeakable act of cowardice and hatred on 9/11. As a nation, we mu\u2026"
        },
        {
            "created_at": "2021-09-11T13:49:16.000Z",
            "id": "1436688292842463237",
            "public_metrics": {
                "like_count": 0,
                "quote_count": 0,
                "reply_count": 0,
                "retweet_count": 315
            },
            "text": "RT @EricTopol: 3 new @CDCMMWR reports today provide very strong reassurance of vaccination benefits vs Delta and point toward a potential e\u2026"

In [26]:
url_id = 'https://api.twitter.com/2/users/'+ nanna_id +'/tweets'
response = connect_to_endpoint(url_id, headers)
print(json.dumps(response, indent=4, sort_keys=True))

Endpoint Response Code: 200
{
    "data": [
        {
            "id": "956915469402951680",
            "text": "RT @AldisMjoll: Settist \u00ed s\u00e6ti \u00ed @straetobs sem einhver annar hefur auglj\u00f3slega seti\u00f0 \u00ed lengi \u00e1 undan. Hl\u00fdtt og notalegt. \ud83d\ude0c\u2728 Hver \u00fearf rass\u2026"
        },
        {
            "id": "944138060618399744",
            "text": "@AldisMjoll H\u00e6ttu bara a\u00f0 kaupa \u00feessar n\u00fdjustu ger\u00f0ir alltaf hreint- gamla st\u00f6ffi\u00f0 klikkar ekki (kv \u00far fimmunni)"
        },
        {
            "id": "943067713907380224",
            "text": "P\u00e6ling; miki\u00f0 af sj\u00e1lfshj\u00e1lparb\u00f3kum til sem vilja kenna f\u00f3lki a\u00f0 fyrirgefa en hvar eru allar b\u00e6kurnar fyrir \u00fe\u00e1 sem \u00feurfa a\u00f0 l\u00e6ra a\u00f0 bi\u00f0jast fyrirgefningar? \ud83e\udd14"
        },
        {
            "id": "942066590064967681",
            "text": "@AldisMjoll sjitt tengi"
       

## Getting more fields and excluding retweets replys

In [41]:
#'query': '-is%3Aretweet%20-is%3Areply'
parameters = {
    'tweet.fields': 'geo,created_at,public_metrics',
    'next_token': {}
}
url_id = 'https://api.twitter.com/2/users/'+ nanna_id +'/tweets'
response = connect_to_endpoint(url_id, headers, params = parameters)

Endpoint Response Code: 200


In [44]:
print(json.dumps(response, indent=4, sort_keys=True))

{
    "data": [
        {
            "created_at": "2018-01-26T15:43:21.000Z",
            "id": "956915469402951680",
            "public_metrics": {
                "like_count": 0,
                "quote_count": 0,
                "reply_count": 0,
                "retweet_count": 2
            },
            "text": "RT @AldisMjoll: Settist \u00ed s\u00e6ti \u00ed @straetobs sem einhver annar hefur auglj\u00f3slega seti\u00f0 \u00ed lengi \u00e1 undan. Hl\u00fdtt og notalegt. \ud83d\ude0c\u2728 Hver \u00fearf rass\u2026"
        },
        {
            "created_at": "2017-12-22T09:30:29.000Z",
            "id": "944138060618399744",
            "public_metrics": {
                "like_count": 1,
                "quote_count": 0,
                "reply_count": 0,
                "retweet_count": 1
            },
            "text": "@AldisMjoll H\u00e6ttu bara a\u00f0 kaupa \u00feessar n\u00fdjustu ger\u00f0ir alltaf hreint- gamla st\u00f6ffi\u00f0 klikkar ekki (kv \u00far fimmun

In [47]:
print(response['data'][-1])
print()
print(response['meta'])

{'public_metrics': {'retweet_count': 0, 'reply_count': 0, 'like_count': 3, 'quote_count': 0}, 'created_at': '2017-03-02T09:48:13.000Z', 'text': 'Veðrið þessa dagana 😍😍', 'id': '837238102536163328'}

{'oldest_id': '837238102536163328', 'newest_id': '956915469402951680', 'result_count': 9, 'next_token': 'zldjwdz3w6sba13nnr4j73j5hxbcvz4mvx5s4pejubc'}


See that the oldest tweet is from 2017, try getting more with next_token:

In [49]:
parameters['next_token'] = response['meta']['next_token']
response = connect_to_endpoint(url_id, headers, params = parameters)
print(json.dumps(response, indent=4, sort_keys=True))

Endpoint Response Code: 400


Exception: (400, '{"errors":[{"parameters":{"next_token":["z"]},"message":"The query parameter [next_token] is not one of [id,since_id,until_id,max_results,pagination_token,exclude,start_time,end_time,expansions,tweet.fields,media.fields,poll.fields,place.fields,user.fields]"}],"title":"Invalid Request","detail":"One or more parameters to your request was invalid.","type":"https://api.twitter.com/2/problems/invalid-request"}')

Does not work, try changing parameters and using: start time, end time

In [50]:
start_date = '2015-09-15T02:53:59.00Z'
end_date = '2016-12-31T02:53:59.00Z'
parameters = {
    'start_time': start_date,
    'end_time': end_date,
    'tweet.fields': 'created_at,public_metrics',
    'next_token': {}
}
response = connect_to_endpoint(url_id, headers, params = parameters)
print(json.dumps(response, indent=4, sort_keys=True))

Endpoint Response Code: 200
{
    "data": [
        {
            "created_at": "2016-12-29T18:03:54.000Z",
            "id": "814532408011800576",
            "public_metrics": {
                "like_count": 2,
                "quote_count": 0,
                "reply_count": 0,
                "retweet_count": 0
            },
            "text": "Loksins farin a\u00f0 s\u00fdna mitt r\u00e9tta andlit https://t.co/Ojn5Autkdl"
        },
        {
            "created_at": "2016-12-19T16:24:31.000Z",
            "id": "810883518939230208",
            "public_metrics": {
                "like_count": 1,
                "quote_count": 0,
                "reply_count": 0,
                "retweet_count": 0
            },
            "text": "\u00dej\u00e1ist af ofn\u00e6mi (\u00fe.e. er of-n\u00e6m)"
        },
        {
            "created_at": "2016-12-12T16:32:36.000Z",
            "id": "808348839937798144",
            "public_metrics": {
                "like_count": 0,
         

Or, change result count number! (max is 100 but default only 10). For fun add an expansion of media keys, in case there was a photo or video attached.

In [51]:
max_results = 50
parameters = {
    'start_time': {},
    'end_time': {},
    'max_results' : max_results,
    'expansions' : 'attachments.media_keys',
    'tweet.fields': 'created_at,public_metrics',
    'next_token': {}
}
response = connect_to_endpoint(url_id, headers, params = parameters)
print(json.dumps(response, indent=4, sort_keys=True))

Endpoint Response Code: 200
{
    "data": [
        {
            "created_at": "2018-01-26T15:43:21.000Z",
            "id": "956915469402951680",
            "public_metrics": {
                "like_count": 0,
                "quote_count": 0,
                "reply_count": 0,
                "retweet_count": 2
            },
            "text": "RT @AldisMjoll: Settist \u00ed s\u00e6ti \u00ed @straetobs sem einhver annar hefur auglj\u00f3slega seti\u00f0 \u00ed lengi \u00e1 undan. Hl\u00fdtt og notalegt. \ud83d\ude0c\u2728 Hver \u00fearf rass\u2026"
        },
        {
            "created_at": "2017-12-22T09:30:29.000Z",
            "id": "944138060618399744",
            "public_metrics": {
                "like_count": 1,
                "quote_count": 0,
                "reply_count": 0,
                "retweet_count": 1
            },
            "text": "@AldisMjoll H\u00e6ttu bara a\u00f0 kaupa \u00feessar n\u00fdjustu ger\u00f0ir alltaf hreint- gamla st\u00f6ffi\u00f0 kli

In [55]:

print(json.dumps(response['data'][-3:-1], indent=4, sort_keys=True))

[
    {
        "attachments": {
            "media_keys": [
                "3_769841793437429760"
            ]
        },
        "created_at": "2016-08-28T10:19:24.000Z",
        "id": "769841805412143104",
        "public_metrics": {
            "like_count": 1,
            "quote_count": 0,
            "reply_count": 0,
            "retweet_count": 0
        },
        "text": "@tunglbjort munum nattla hvernig \u00feetta byrja\u00f0i... https://t.co/yynnNfpoef"
    },
    {
        "attachments": {
            "media_keys": [
                "3_769608004064935936"
            ]
        },
        "created_at": "2016-08-27T18:50:25.000Z",
        "id": "769608022129774592",
        "public_metrics": {
            "like_count": 0,
            "quote_count": 0,
            "reply_count": 0,
            "retweet_count": 0
        },
        "text": "Hin heilaga tvenna \ud83d\ude4f\ud83c\udffb https://t.co/fu9P2ralHI"
    }
]


Now we see media keys added and are getting older tweets

## Retrieving tweets based on query

There are elections in Iceland tonight. Let's see if I can query for some tweets on it. The Icelandic word for elections is 'kosningar'.
Start by defining some variables, modifying parameters and adding a query:

In [59]:
url_search = 'https://api.twitter.com/2/tweets/search/recent?query=kosning%20-is%3Aretweet%20-is%3Areply'
max_results = 10
parameters = {
    'start_time': {},
    'end_time': {},
    'max_results' : max_results,
    'expansions' : {},
    'tweet.fields': 'created_at,public_metrics',
    'next_token': {}
}

response = connect_to_endpoint(url_search, headers, params = parameters)
print(json.dumps(response, indent=4, sort_keys=True))

Endpoint Response Code: 200
{
    "data": [
        {
            "created_at": "2021-09-26T03:09:34.000Z",
            "id": "1441963125746634756",
            "public_metrics": {
                "like_count": 0,
                "quote_count": 0,
                "reply_count": 0,
                "retweet_count": 0
            },
            "text": "Gr\u00e9tar \u00de\u00f3r Ey\u00fe\u00f3rsson pr\u00f3fessor \u00ed stj\u00f3rnm\u00e1lafr\u00e6\u00f0i segir lj\u00f3st a\u00f0 r\u00edkisstj\u00f3rnarflokkarnir hafi n\u00e1\u00f0 vopnum s\u00ednum aftur \u00e1 lokasprettinum en \u00fea\u00f0 s\u00e9 \u00fe\u00f3 ekki \u00e1fellisd\u00f3mur yfir k\u00f6nnunum sem sl\u00edkum. Marka\u00f0sherfer\u00f0ir flokkanna hafi greinilega falli\u00f0 misvel \u00ed ... https://t.co/ZuuqOPUHIz https://t.co/tTcKW03vxA"
        },
        {
            "created_at": "2021-09-26T01:46:39.000Z",
            "id": "1441942262041169921",
            "public_metrics": {
                "like_count": 0,
    

Indeed, results from the elections are queried after looking through them and looking past the fact that you have all the misisng letters where Icelandic alphabet takes over.

## Another query with more conditions

Let's try filtering tweets on pets (cats&dogs) from the last hour that include images.

In [61]:
url_search = 'https://api.twitter.com/2/tweets/search/recent?query=(cat%20OR%20dog)%20has%3Aimages%20-is%3Aretweet'
max_results = 10
parameters = {
    'start_time': '2021-09-25T22:30:59.00Z',
    'end_time': '2021-09-25T23:30:59.00Z',
    'max_results' : max_results,
    'expansions' : {},
    'tweet.fields': 'created_at,public_metrics',
    'next_token': {}
}

response = connect_to_endpoint(url_search, headers, params = parameters)
print(json.dumps(response, indent=4, sort_keys=True))

Endpoint Response Code: 200
{
    "data": [
        {
            "created_at": "2021-09-25T23:30:57.000Z",
            "id": "1441908109593567232",
            "public_metrics": {
                "like_count": 2,
                "quote_count": 0,
                "reply_count": 1,
                "retweet_count": 0
            },
            "text": "@weareronin47 @_dog_soldier_ https://t.co/GajpgZ1mry"
        },
        {
            "created_at": "2021-09-25T23:30:56.000Z",
            "id": "1441908105906900996",
            "public_metrics": {
                "like_count": 0,
                "quote_count": 0,
                "reply_count": 0,
                "retweet_count": 0
            },
            "text": "@el_yuuri Minha madrugada vai ser triste,mais eu te entendo bom descanso dog\u270c\u2764 https://t.co/f9c3CPD9Zw"
        },
        {
            "created_at": "2021-09-25T23:30:56.000Z",
            "id": "1441908105261027329",
            "public_metrics": {
           

Change language to only english :

In [65]:
url_search = 'https://api.twitter.com/2/tweets/search/recent?query=(cat%20OR%20dog)%20lang%3Aen%20has%3Aimages%20-is%3Aretweet'
response = connect_to_endpoint(url_search, headers, params = parameters)
print(json.dumps(response, indent=4, sort_keys=True))

Endpoint Response Code: 200
{
    "data": [
        {
            "created_at": "2021-09-25T23:30:56.000Z",
            "id": "1441908105261027329",
            "public_metrics": {
                "like_count": 74,
                "quote_count": 1,
                "reply_count": 75,
                "retweet_count": 74
            },
            "text": "\ud83c\udf81Cool Cat giveaway\ud83c\udf81\n\nRules to get rich:\n1\ufe0f\u20e3 Follow Me / \ud83d\udd14\n2\ufe0f\u20e3 Like &amp; RT \u2764\n3\ufe0f\u20e3 Tag more Friends more chances to win \ud83e\udd11\ud83e\udd47\n\n#NFTdrop #NFTGiveaways #NFTGiveaway #NFTs https://t.co/LndvytV7Jf"
        },
        {
            "created_at": "2021-09-25T23:30:55.000Z",
            "id": "1441908101074980865",
            "public_metrics": {
                "like_count": 1,
                "quote_count": 0,
                "reply_count": 3,
                "retweet_count": 0
            },
            "text": "I told people never count my New York

## Try the next_token:

In [67]:
print(response['data'][-1])

{'text': '@Author_Gretchen Smart! We only ever had two at a time…except for about 9 months when it was 1 v elderly cat and 2 rescue dogs…. Now, those two dogs. https://t.co/quxTxX81XD', 'public_metrics': {'retweet_count': 0, 'reply_count': 1, 'like_count': 1, 'quote_count': 0}, 'id': '1441908019512545283', 'created_at': '2021-09-25T23:30:35.000Z'}


In [66]:
next_token = response['meta']['next_token']
print(next_token)

b26v89c19zqg8o3fpds6g2nn58qil0vx0h4qbu0c9r6nx


In [68]:
url_search +='&next_token='+ next_token
response = connect_to_endpoint(url_search, headers, params = parameters)
print(json.dumps(response, indent=4, sort_keys=True))

Endpoint Response Code: 200
{
    "data": [
        {
            "created_at": "2021-09-25T23:30:33.000Z",
            "id": "1441908010251587585",
            "public_metrics": {
                "like_count": 1,
                "quote_count": 0,
                "reply_count": 0,
                "retweet_count": 0
            },
            "text": "Top music. Now playing Catch The Cat - Cherry Laine on https://t.co/9RDEDKWLIS https://t.co/DngQBmVszc"
        },
        {
            "created_at": "2021-09-25T23:30:30.000Z",
            "id": "1441907998863945735",
            "public_metrics": {
                "like_count": 1,
                "quote_count": 0,
                "reply_count": 1,
                "retweet_count": 0
            },
            "text": "@umbiedecoder We don't talk at all so 40% also have dog https://t.co/WUVbNDdbk1"
        },
        {
            "created_at": "2021-09-25T23:30:29.000Z",
            "id": "1441907993809915907",
            "public_metric

Works! The timestamp printed using the next token is milliseconds earlier than the last one printed in the first batch. 