In [88]:
import requests
from pprint import pp as print

## Pull data from an URL

In [89]:
url = 'https://jsonplaceholder.typicode.com/'
entity = 'posts'

In [90]:
response = requests.get(url=f'{url}{entity}')

In [91]:
response.status_code

200


    Informational responses (100 – 199)
    Successful responses (200 – 299)
    Redirection messages (300 – 399)
    Client error responses (400 – 499)
    Server error responses (500 – 599)


In [92]:
# Inspect the request
req = response.request
print(f'request headers: {req.headers}')
print(f'request method type: {req.method}')
print(f'request path: {req.path_url}')

("request headers: {'User-Agent': 'python-requests/2.32.3', 'Accept-Encoding': "
 "'gzip, deflate', 'Accept': '*/*', 'Connection': 'keep-alive'}")
'request method type: GET'
'request path: /posts'


In [93]:
# inspect the response data without proper format (binary)
response.content

b'[\n  {\n    "userId": 1,\n    "id": 1,\n    "title": "sunt aut facere repellat provident occaecati excepturi optio reprehenderit",\n    "body": "quia et suscipit\\nsuscipit recusandae consequuntur expedita et cum\\nreprehenderit molestiae ut ut quas totam\\nnostrum rerum est autem sunt rem eveniet architecto"\n  },\n  {\n    "userId": 1,\n    "id": 2,\n    "title": "qui est esse",\n    "body": "est rerum tempore vitae\\nsequi sint nihil reprehenderit dolor beatae ea dolores neque\\nfugiat blanditiis voluptate porro vel nihil molestiae ut reiciendis\\nqui aperiam non debitis possimus qui neque nisi nulla"\n  },\n  {\n    "userId": 1,\n    "id": 3,\n    "title": "ea molestias quasi exercitationem repellat qui ipsa sit aut",\n    "body": "et iusto sed quo iure\\nvoluptatem occaecati omnis eligendi aut ad\\nvoluptatem doloribus vel accusantium quis pariatur\\nmolestiae porro eius odio et labore et velit aut"\n  },\n  {\n    "userId": 1,\n    "id": 4,\n    "title": "eum et est occaecati",

In [94]:
# check the type of the response
response.raw

<urllib3.response.HTTPResponse at 0x72fd24ba3f10>

In [95]:
# inspect the response data without proper format
response.text

'[\n  {\n    "userId": 1,\n    "id": 1,\n    "title": "sunt aut facere repellat provident occaecati excepturi optio reprehenderit",\n    "body": "quia et suscipit\\nsuscipit recusandae consequuntur expedita et cum\\nreprehenderit molestiae ut ut quas totam\\nnostrum rerum est autem sunt rem eveniet architecto"\n  },\n  {\n    "userId": 1,\n    "id": 2,\n    "title": "qui est esse",\n    "body": "est rerum tempore vitae\\nsequi sint nihil reprehenderit dolor beatae ea dolores neque\\nfugiat blanditiis voluptate porro vel nihil molestiae ut reiciendis\\nqui aperiam non debitis possimus qui neque nisi nulla"\n  },\n  {\n    "userId": 1,\n    "id": 3,\n    "title": "ea molestias quasi exercitationem repellat qui ipsa sit aut",\n    "body": "et iusto sed quo iure\\nvoluptatem occaecati omnis eligendi aut ad\\nvoluptatem doloribus vel accusantium quis pariatur\\nmolestiae porro eius odio et labore et velit aut"\n  },\n  {\n    "userId": 1,\n    "id": 4,\n    "title": "eum et est occaecati",\

In [96]:
response.encoding

'utf-8'

In [97]:
# Data as json format
response.json()

[{'userId': 1,
  'id': 1,
  'title': 'sunt aut facere repellat provident occaecati excepturi optio reprehenderit',
  'body': 'quia et suscipit\nsuscipit recusandae consequuntur expedita et cum\nreprehenderit molestiae ut ut quas totam\nnostrum rerum est autem sunt rem eveniet architecto'},
 {'userId': 1,
  'id': 2,
  'title': 'qui est esse',
  'body': 'est rerum tempore vitae\nsequi sint nihil reprehenderit dolor beatae ea dolores neque\nfugiat blanditiis voluptate porro vel nihil molestiae ut reiciendis\nqui aperiam non debitis possimus qui neque nisi nulla'},
 {'userId': 1,
  'id': 3,
  'title': 'ea molestias quasi exercitationem repellat qui ipsa sit aut',
  'body': 'et iusto sed quo iure\nvoluptatem occaecati omnis eligendi aut ad\nvoluptatem doloribus vel accusantium quis pariatur\nmolestiae porro eius odio et labore et velit aut'},
 {'userId': 1,
  'id': 4,
  'title': 'eum et est occaecati',
  'body': 'ullam et saepe reiciendis voluptatem adipisci\nsit amet autem assumenda provid

## Pull data from URL with query parameters

query parameters allow you to filter data for a given entity, for example only pull a given entity id, only pull entity that satisfies a condition

In [98]:
# get an entity by id
entity_id = 1
response = requests.get(url=f'{url}{entity}/{entity_id}')
print(response.json())

{'userId': 1,
 'id': 1,
 'title': 'sunt aut facere repellat provident occaecati excepturi optio '
          'reprehenderit',
 'body': 'quia et suscipit\n'
         'suscipit recusandae consequuntur expedita et cum\n'
         'reprehenderit molestiae ut ut quas totam\n'
         'nostrum rerum est autem sunt rem eveniet architecto'}


In [99]:
# get all the comment entity that is from a specific post entity
entity = 'comments'
query_params = {'postId': 1}
response = requests.get(url=f'{url}{entity}', params=query_params)
response.json() # all the comments on post 1

[{'postId': 1,
  'id': 1,
  'name': 'id labore ex et quam laborum',
  'email': 'Eliseo@gardner.biz',
  'body': 'laudantium enim quasi est quidem magnam voluptate ipsam eos\ntempora quo necessitatibus\ndolor quam autem quasi\nreiciendis et nam sapiente accusantium'},
 {'postId': 1,
  'id': 2,
  'name': 'quo vero reiciendis velit similique earum',
  'email': 'Jayne_Kuhic@sydney.com',
  'body': 'est natus enim nihil est dolore omnis voluptatem numquam\net omnis occaecati quod ullam at\nvoluptatem error expedita pariatur\nnihil sint nostrum voluptatem reiciendis et'},
 {'postId': 1,
  'id': 3,
  'name': 'odio adipisci rerum aut animi',
  'email': 'Nikita@garfield.biz',
  'body': 'quia molestiae reprehenderit quasi aspernatur\naut expedita occaecati aliquam eveniet laudantium\nomnis quibusdam delectus saepe quia accusamus maiores nam est\ncum et ducimus et vero voluptates excepturi deleniti ratione'},
 {'postId': 1,
  'id': 4,
  'name': 'alias odio sit',
  'email': 'Lew@alysha.tv',
  'body'

In [100]:
# see how there query params are added after the ?
print(response.request.url)

'https://jsonplaceholder.typicode.com/comments?postId=1'


In [101]:
entity = 'comments'
# pull based on comment ID, comment ID is unique across all posts, based on checking https://jsonplaceholder.typicode.com/
comment_id = 100
    response = requests.get(url=f'{url}{entity}/{comment_id}')
response.json() # all the comments on post 1

{'postId': 20,
 'id': 100,
 'name': 'et sint quia dolor et est ea nulla cum',
 'email': 'Leone_Fay@orrin.com',
 'body': 'architecto dolorem ab explicabo et provident et\net eos illo omnis mollitia ex aliquam\natque ut ipsum nulla nihil\nquis voluptas aut debitis facilis'}

## Pagination

### Pagination: Based on range

In [102]:
url = 'https://pokeapi.co/api/v2/'
entity = 'ability'
response = requests.get(url=f'{url}{entity}')
json_response = response.json()
print(json_response)

{'count': 367,
 'next': 'https://pokeapi.co/api/v2/ability?offset=20&limit=20',
 'previous': None,
 'results': [{'name': 'stench', 'url': 'https://pokeapi.co/api/v2/ability/1/'},
             {'name': 'drizzle', 'url': 'https://pokeapi.co/api/v2/ability/2/'},
             {'name': 'speed-boost',
              'url': 'https://pokeapi.co/api/v2/ability/3/'},
             {'name': 'battle-armor',
              'url': 'https://pokeapi.co/api/v2/ability/4/'},
             {'name': 'sturdy', 'url': 'https://pokeapi.co/api/v2/ability/5/'},
             {'name': 'damp', 'url': 'https://pokeapi.co/api/v2/ability/6/'},
             {'name': 'limber', 'url': 'https://pokeapi.co/api/v2/ability/7/'},
             {'name': 'sand-veil',
              'url': 'https://pokeapi.co/api/v2/ability/8/'},
             {'name': 'static', 'url': 'https://pokeapi.co/api/v2/ability/9/'},
             {'name': 'volt-absorb',
              'url': 'https://pokeapi.co/api/v2/ability/10/'},
             {'name': 'wat

In [103]:
# count indicates the total number of data points
# the API is well designed in that they are providing us with the next page of data to pull

# we could use the next url with params as is
response = requests.get(json_response.get('next'))
response.json()
# we can see that the API now provides a next and previous, not all APIs are so well designed

{'count': 367,
 'next': 'https://pokeapi.co/api/v2/ability?offset=40&limit=20',
 'previous': 'https://pokeapi.co/api/v2/ability?offset=0&limit=20',
 'results': [{'name': 'suction-cups',
   'url': 'https://pokeapi.co/api/v2/ability/21/'},
  {'name': 'intimidate', 'url': 'https://pokeapi.co/api/v2/ability/22/'},
  {'name': 'shadow-tag', 'url': 'https://pokeapi.co/api/v2/ability/23/'},
  {'name': 'rough-skin', 'url': 'https://pokeapi.co/api/v2/ability/24/'},
  {'name': 'wonder-guard', 'url': 'https://pokeapi.co/api/v2/ability/25/'},
  {'name': 'levitate', 'url': 'https://pokeapi.co/api/v2/ability/26/'},
  {'name': 'effect-spore', 'url': 'https://pokeapi.co/api/v2/ability/27/'},
  {'name': 'synchronize', 'url': 'https://pokeapi.co/api/v2/ability/28/'},
  {'name': 'clear-body', 'url': 'https://pokeapi.co/api/v2/ability/29/'},
  {'name': 'natural-cure', 'url': 'https://pokeapi.co/api/v2/ability/30/'},
  {'name': 'lightning-rod', 'url': 'https://pokeapi.co/api/v2/ability/31/'},
  {'name': 'se

In [104]:
# we could also change the params to fit our needs
params = {'offset': 5, 'limit': 3}
url = 'https://pokeapi.co/api/v2/'
entity = 'ability'
response = requests.get(url=f'{url}{entity}', params=params)
response.json()
# we will get 3 data points (limit) that start after the 5th element in the ability list; so we will get ability 6, 7, & 8

{'count': 367,
 'next': 'https://pokeapi.co/api/v2/ability?offset=8&limit=3',
 'previous': 'https://pokeapi.co/api/v2/ability?offset=2&limit=3',
 'results': [{'name': 'damp', 'url': 'https://pokeapi.co/api/v2/ability/6/'},
  {'name': 'limber', 'url': 'https://pokeapi.co/api/v2/ability/7/'},
  {'name': 'sand-veil', 'url': 'https://pokeapi.co/api/v2/ability/8/'}]}

### Pagination: Based on next id
In the poke example the API provide us with a next link while also enabling parameter based data pulls (with limit and offset).
However not every api provides this level of details, you may have to make calls to get the number of items and paginate logic based on how that API is designed (usually specified in the doc)

Some APIs also provide a next link with some Id (e.g. uuid) that we cannot manipulate to limit the ability of us (calling code) to freely pull any subset of data that we'd like.

## Retry 

Some API servers only allow us to call them n times per time unit. In such cases you will have to wait and retry.


In [124]:
import subprocess
process = subprocess.Popen(['uv', 'run', 'dummy_server.py'], 
                          stdout=subprocess.PIPE,
                          stderr=subprocess.PIPE)

The above command will start a simple server that runs locally at http://localhost:8000/api which only accepts one API call every 10 secondsm

In [125]:
url = 'http://localhost:8000/api'
response_1 = requests.get(url)
print(response_1.json())
response_2 = requests.get(url)
print(response_2.json())

{'status': 'success',
 'message': 'API request processed successfully',
 'timestamp': 1744715055.1799412}
{'status': 'error',
 'message': 'Rate limit exceeded. Try again in 9.99 seconds.',
 'retry_after': 9}


In [126]:
import time

# we can just wait for 10 seconds
url = 'http://localhost:8000/api'
response_1 = requests.get(url)
print(response_1.json())
time.sleep(10)
response_2 = requests.get(url)
print(response_2.json())

{'status': 'success',
 'message': 'API request processed successfully',
 'timestamp': 1744715065.3647523}
{'status': 'success',
 'message': 'API request processed successfully',
 'timestamp': 1744715075.3728964}


In [127]:
process.terminate() # stop server

### Retry with backoff

If you are not aware of the rate limits of the API server, you can use an exponential back off technique, where you wait ever increasing times between API calls.
You can do this with the retry functionality as shown below.

In [142]:
import subprocess
process.terminate()
process = subprocess.Popen(['uv', 'run', 'random_dummy_server.py'], 
                          stdout=subprocess.PIPE,
                          stderr=subprocess.PIPE)

In [143]:
import time

# we can just wait for 10 seconds
url = 'http://localhost:8000/api'
response_1 = requests.get(url)
print(response_1.json())
time.sleep(10)
response_2 = requests.get(url)
print(response_2.json())

{'status': 'success',
 'message': 'API request processed successfully Rate limited at 18 calls per '
            'second ',
 'timestamp': 1744715530.654787,
 'rate_limit': 'Rate limited at 18 calls per second'}
{'status': 'error',
 'message': 'Rate limit exceeded. Try again in 7.99 seconds.Rate limited at 18 '
            'calls per second ',
 'retry_after': 7,
 'rate_limit': 'Rate limited at 18 calls per second'}


In [144]:
import requests
from requests.adapters import HTTPAdapter
from urllib3.util.retry import Retry
import time

# Configure the retry strategy 
# 3 retries with exponential backoffs
retries = Retry(
    total=3,
     backoff_factor=2,  
    status_forcelist=[500, 502, 503, 504],  # Retry on these server errors
    allowed_methods={"GET"},  # Explicitly allow only GET requests
    backoff_jitter=2  # Additional random sleep seconds (0-2s)
)

# Create a session and mount the retry adapter
session = requests.Session()
adapter = HTTPAdapter(max_retries=retries)
session.mount('http://', adapter)

# URL for your local API
api_url = "http://localhost:8000/api"

# Function to make API calls with retry handling
def make_api_call():
    try:
        # Make the GET request
        response = session.get(api_url)
        response.raise_for_status()  # Raise exception for 4XX/5XX responses
        return response.json()
    except requests.exceptions.HTTPError as e:
        print(f"HTTP Error: {e}")
    except requests.exceptions.ConnectionError as e:
        print(f"Connection Error: {e}")
    except requests.exceptions.Timeout as e:
        print(f"Timeout Error: {e}")
    except requests.exceptions.RequestException as e:
        print(f"Request Exception: {e}")
    return None

# Example usage with your rate-limited server
for i in range(3):
    print(f"Attempt {i+1}:")
    result = make_api_call()
    print(f"Result: {result}")
 

'Attempt 1:'
("Result: {'status': 'success', 'message': 'API request processed successfully "
 "Rate limited at 23 calls per second ', 'timestamp': 1744715564.0061274, "
 "'rate_limit': 'Rate limited at 23 calls per second'}")
'Attempt 2:'
("Result: {'status': 'success', 'message': 'API request processed successfully "
 "Rate limited at 19 calls per second ', 'timestamp': 1744715590.7461154, "
 "'rate_limit': 'Rate limited at 19 calls per second'}")
'Attempt 3:'
("Result: {'status': 'success', 'message': 'API request processed successfully "
 "Rate limited at 19 calls per second ', 'timestamp': 1744715614.1090477, "
 "'rate_limit': 'Rate limited at 19 calls per second'}")
'Attempt 4:'
("Result: {'status': 'success', 'message': 'API request processed successfully "
 "Rate limited at 19 calls per second ', 'timestamp': 1744715637.2118976, "
 "'rate_limit': 'Rate limited at 19 calls per second'}")
'Attempt 5:'
("Result: {'status': 'success', 'message': 'API request processed successfully 

In [146]:
process.terminate()

## APIs that require authentication

Some APIs require you to authenticate before connecting. There are multiple ways to authenticate, the most common ones for APIs are

1. OAuth 2.0
An authorization framework that enables third-party applications to access resources on behalf of users without exposing credentials. OAuth 2.0 issues temporary access tokens with defined scopes, supporting multiple flows like Authorization Code, Implicit, and Client Credentials depending on the use case.
2. API Keys
Simple string tokens that act as unique identifiers to authenticate API requests. Typically included in request headers, query parameters, or the request body. They're easy to implement but should be protected and transmitted over HTTPS to prevent interception.
3. Basic Authentication
Authentication method where credentials (username and password) are encoded as Base64 and sent in the HTTP Authorization header. The format is Authorization: Basic {base64(username:password)}. Simple to implement but must be used with HTTPS to prevent credential exposure.
4. API Key Pairs
Authentication using two complementary keys: a public key that identifies the client and a private/secret key used to generate signatures. The private key never leaves the client, while signatures allow the server to verify the request's authenticity and integrity without transmitting sensitive information.