# Pulling data from public APIs (without registration) - GET request

In [3]:
# loading the packages
# requests provides us with the capabilities of sending an HTTP request to a server
import requests

## Extracting data on currency exchange rates

In [4]:
# We will use an API containing currency exchange rates as published by the European Central Bank
# Documentation at https://exchangeratesapi.io

### Sending a GET request

In [49]:
# Define the base URL
# Base URL: the part of the URL common to all requests, not containing the parameters
base_url = "https://api.exchangeratesapi.io/latest?access_key=a7933e2b3b05701829e98a2a6605eaa2"

In [50]:
# We can make a GET request to this API endpoint with requests.get
response = requests.get(base_url)

# This method returns the response from the server
# We store this response in a variable for future processing

### Investigating the response

In [51]:
# Checking if the request went through ok
response.ok

True

In [52]:
# Checking the status code of the response
response.status_code

200

In [53]:
# Inspecting the content body of the response (as a regular 'string')
response.text

'{"success":false,"error":{"code":105,"type":"https_access_restricted","info":"Access Restricted - Your current Subscription Plan does not support HTTPS Encryption."}}'

In [34]:
# Inspecting the content of the response (in 'bytes' format)
response.content

b'{"success":false,"error":{"code":105,"type":"https_access_restricted","info":"Access Restricted - Your current Subscription Plan does not support HTTPS Encryption."}}'

In [11]:
# The data is presented in JSON format

### Handling the JSON

In [12]:
# Requests has in-build method to directly convert the response to JSON format
response.json()

{'success': False,
 'error': {'code': 101,
  'type': 'missing_access_key',
  'info': 'You have not supplied an API Access Key. [Required format: access_key=YOUR_ACCESS_KEY]'}}

In [13]:
# In Python, this JSON is stored as a dictionary
type(response.json())

dict

In [14]:
# A useful library for JSON manipulation and pretty print
import json

# It has two main methods:
# .loads(), which creates a Python dictionary from a JSON format string (just as response.json() does)
# .dumps(), which creates a JSON format string out of a Python dictionary 

In [15]:
# .dumps() has options to make the string 'prettier', more readable
# We can choose the number of spaces to be used as indentation
json.dumps(response.json(), indent=4)
print(response.url)

https://api.exchangeratesapi.io/latest


In [16]:
# In order to visualize these changes, we need to print the string
#
print(json.dumps(response.json(), indent=4))

{
    "success": false,
    "error": {
        "code": 101,
        "type": "missing_access_key",
        "info": "You have not supplied an API Access Key. [Required format: access_key=YOUR_ACCESS_KEY]"
    }
}


In [15]:
# It contains 3 keys; the value for the 'rates' key is another dictionary
response.json().keys()

dict_keys(['success', 'error'])

### Incorporating parameters in the GET request

In [16]:
# Request parameters are added to the URL after a question mark '?'
# In this case, we request for the exchange rates of the US Dollar (USD) and Pound Sterling (GBP) only
param_url = base_url + "?symbols=USD,GBP"
param_url

'https://api.exchangeratesapi.io/latest?symbols=USD,GBP'

In [62]:
# Making a request to the server with the new URL, containing the parameters
# Need a access key
#https://api.exchangeratesapi.io/latest?access_key=ACCESS_KEY => symbols=GBP&base=USD

response = requests.get(param_url)
print("Status Code",response.status_code)
print(response.url)

Status Code 200
https://api.exchangeratesapi.io/latest?symbols=GBP&base=USD


In [63]:
# Saving the response data
data = response.json()
data

{'success': False,
 'error': {'code': 101,
  'type': 'missing_access_key',
  'info': 'You have not supplied an API Access Key. [Required format: access_key=YOUR_ACCESS_KEY]'}}

In [24]:
# 'data' is a dictionary
data['base']

KeyError: 'base'

In [21]:
data['date']

KeyError: 'date'

In [None]:
data['rates']

In [25]:
# As per the documentation of this API, we can change the base with the parameter 'base'
param_url = base_url + "?symbols=GBP&base=USD"

In [26]:
# Sending a request and saving the response JSON, all at once
data = requests.get(param_url).json()
data

{'success': False,
 'error': {'code': 101,
  'type': 'missing_access_key',
  'info': 'You have not supplied an API Access Key. [Required format: access_key=YOUR_ACCESS_KEY]'}}

In [None]:
usd_to_gbp = data['rates']['GBP']
usd_to_gbp

### Obtaining historical exchange rates

In [27]:
base_url = "https://api.exchangeratesapi.io"

In [28]:
# We can also ask for the exhange rates at a particular day in the past with '/DATE', where DATE is in the format YYYY-MM-DD
historical_url = base_url + "/2016-01-26"
historical_url

'https://api.exchangeratesapi.io/2016-01-26'

In [29]:
# Making the GET request
response = requests.get(historical_url)
response.status_code

200

In [30]:
# Pretty printing the data
data = response.json()
print(json.dumps(data, indent=4))

{
    "success": false,
    "error": {
        "code": 101,
        "type": "missing_access_key",
        "info": "You have not supplied an API Access Key. [Required format: access_key=YOUR_ACCESS_KEY]"
    }
}


### Extracting data for a time period

In [31]:
# The last feautre of this API is: giving the historical exchange rates for every day over some time period

In [32]:
# The URL for this request is formed with '/history' and the parameters 'start_at' and 'end_at'
time_period = base_url + "/history" + "?start_at=2017-04-26&end_at=2018-04-26" + "&symbols=GBP"
time_period

'https://api.exchangeratesapi.io/history?start_at=2017-04-26&end_at=2018-04-26&symbols=GBP'

In [33]:
# Extracting the response JSON object
data = requests.get(time_period).json()

In [34]:
# Pretty printing the JSON
# Notice that the dates are in random order
print(json.dumps(data, indent=4))

{
    "success": false,
    "error": {
        "code": 101,
        "type": "missing_access_key",
        "info": "You have not supplied an API Access Key. [Required format: access_key=YOUR_ACCESS_KEY]"
    }
}


In [35]:
# We can use the 'sort_keys' parameter of the json.dumps() method to order these dates chronologically
print(json.dumps(data, indent=4, sort_keys=True))

{
    "error": {
        "code": 101,
        "info": "You have not supplied an API Access Key. [Required format: access_key=YOUR_ACCESS_KEY]",
        "type": "missing_access_key"
    },
    "success": false
}


In [36]:
# This data can then be used to plot the change in the exchange rate through time or any other further analysis

### Testing the API response to incorrect input

In [37]:
# Testing how the API behaves if given incorrect input parameters

In [38]:
# Trying out an invalid DATE
invalid_url = base_url + "/2019-13-01"

In [39]:
# Making the request
response = requests.get(invalid_url)
response.status_code # The server responds with a 400 error code indicating a 'bad request'

200

In [40]:
# There is also an error message in the JSON
response.json()

{'success': False,
 'error': {'code': 101,
  'type': 'missing_access_key',
  'info': 'You have not supplied an API Access Key. [Required format: access_key=YOUR_ACCESS_KEY]'}}

In [41]:
# Testing an invalid BASE CURRENCY
invalid_url = base_url + "/2019-12-01?base=USB"

In [42]:
response = requests.get(invalid_url)
response.status_code

200

In [43]:
response.json()

{'success': False,
 'error': {'code': 101,
  'type': 'missing_access_key',
  'info': 'You have not supplied an API Access Key. [Required format: access_key=YOUR_ACCESS_KEY]'}}

In [44]:
# Testing an invalid EXCHANGE CURRENCY
invalid_url = base_url + "/2019-12-01?symbols=WBP"

In [45]:
response = requests.get(invalid_url)
response.status_code

200

In [46]:
response.json()

{'success': False,
 'error': {'code': 101,
  'type': 'missing_access_key',
  'info': 'You have not supplied an API Access Key. [Required format: access_key=YOUR_ACCESS_KEY]'}}

### Creating a simple currency convertor

In [47]:
# We can use the data provided from this API to create a simple currency convertor

In [48]:
# Gathering input parameters from the user
date = input("Please enter the date (in the format 'yyyy-mm-dd' or 'latest'): ")
base = input("Convert from (currency): ")
curr = input("Convert to (currency): ")
quan = float(input("How much {} do you want to convert: ".format(base)))

# Constructing the URL based on the user parameters and sending a request to the server
url = base_url + "/" + date + "?base=" + base + "&symbols=" + curr
response = requests.get(url)

# Displaying the error message, if something went wrong
if(response.ok is False):
    print("\nError {}:".format(response.status_code))
    print(response.json()['error'])

else:
    data = response.json()
    rate = data['rates'][curr]
    
    result = quan*rate
    
    print("\n{0} {1} is equal to {2} {3}, based upon exchange rates on {4}".format(quan,base,result,curr,data['date']))


Please enter the date (in the format 'yyyy-mm-dd' or 'latest'): 2022-02-20
Convert from (currency): KSH
Convert to (currency): USD
How much KSH do you want to convert: 70000


KeyError: 'rates'

## Another example: the iTunes search API

In [49]:
# The documentation for this particular API can be found here:
# https://affiliate.itunes.apple.com/resources/documentation/itunes-store-web-service-search-api/

### Passing parameters in the request

In [17]:
# define base URL
base_site = "https://itunes.apple.com/search"

In [18]:
# We can manually add parameters to the URL, as seen before
# E.G., searching for 'the beatles'
url = base_site + "?term=the+beatles&country=us"

# submit a GET request with parameters needed
requests.get(url)

<Response [200]>

In [19]:
# Note, that the space in 'the beatles' was replaced with a '+' in the URL
# Having to worry about special symbols in the URL can make the code harder to write and 'more buggy'

In [20]:
# Another way of expressing the parameters is to pass them to the get() method
# We pass the key/value parameter pairs as a dictionary to 'params'

r = requests.get(base_site, params = {"term": "the beatles", "country": "us"})
r.status_code

200

In [21]:
# The request package incorporates those paramaters into the URL automatically
# check the URL we submitted the request to
r.url

'https://itunes.apple.com/search?term=the+beatles&country=us'

In [22]:
# This way of stating parameters is the preffered one

### Investigating the output and parameters

In [23]:
# The request went through OK
r.status_code

200

In [25]:
# Inspecting the response's JSON
info = r.json()
print(json.dumps(info, indent=4))

{
    "resultCount": 50,
    "results": [
        {
            "wrapperType": "audiobook",
            "artistId": 160778930,
            "collectionId": 1439484338,
            "artistName": "Bob Spitz",
            "collectionName": "The Beatles (Abridged)",
            "collectionCensoredName": "The Beatles (Abridged)",
            "artistViewUrl": "https://books.apple.com/us/author/bob-spitz/id160778930?uo=4",
            "collectionViewUrl": "https://books.apple.com/us/audiobook/the-beatles-abridged/id1439484338?uo=4",
            "artworkUrl60": "https://is3-ssl.mzstatic.com/image/thumb/Music128/v4/69/b8/65/69b86591-ce4b-9bd0-ab71-3808d0ea06dc/9780743552455.jpg/60x60bb.jpg",
            "artworkUrl100": "https://is3-ssl.mzstatic.com/image/thumb/Music128/v4/69/b8/65/69b86591-ce4b-9bd0-ab71-3808d0ea06dc/9780743552455.jpg/100x100bb.jpg",
            "collectionPrice": 16.99,
            "collectionExplicitness": "notExplicit",
            "trackCount": 1,
            "country": "US

In [26]:
# This seems to contain a lot of data
# Let's check if there are some keys we don't see at first glance in the outermost dictionary
info.keys()

dict_keys(['resultCount', 'results'])

In [27]:
# There are, indeed, only these two keys

In [28]:
# The second one contains a list of all the results
# Let's look at one such result
print(json.dumps(info['results'][0], indent=4))

# It's a simple dictionary with a lot of data

{
    "wrapperType": "audiobook",
    "artistId": 160778930,
    "collectionId": 1439484338,
    "artistName": "Bob Spitz",
    "collectionName": "The Beatles (Abridged)",
    "collectionCensoredName": "The Beatles (Abridged)",
    "artistViewUrl": "https://books.apple.com/us/author/bob-spitz/id160778930?uo=4",
    "collectionViewUrl": "https://books.apple.com/us/audiobook/the-beatles-abridged/id1439484338?uo=4",
    "artworkUrl60": "https://is3-ssl.mzstatic.com/image/thumb/Music128/v4/69/b8/65/69b86591-ce4b-9bd0-ab71-3808d0ea06dc/9780743552455.jpg/60x60bb.jpg",
    "artworkUrl100": "https://is3-ssl.mzstatic.com/image/thumb/Music128/v4/69/b8/65/69b86591-ce4b-9bd0-ab71-3808d0ea06dc/9780743552455.jpg/100x100bb.jpg",
    "collectionPrice": 16.99,
    "collectionExplicitness": "notExplicit",
    "trackCount": 1,
    "country": "USA",
    "currency": "USD",
    "releaseDate": "2005-11-01T08:00:00Z",
    "primaryGenreName": "Biographies & Memoirs",
    "previewUrl": "https://audio-ssl.itunes

In [61]:
# The first one states how many results are shown (by default, 50)
info["resultCount"]

50

In [None]:
# The number of results can be set (to a maximum of 200) with the 'limit' parameter
r = requests.get(base_site, params = {"term": "the beatles", "country": "us", "limit": 200})
r.ok

In [None]:
info = r.json()
info

In [None]:
len(info['results'])

In [None]:
# Finally, let's check the response to an invalid input
check_resp = requests.get(base_site, params = {"term": "alternative", "country": "us", "media": "hahaha"})
check_resp.ok

In [None]:
# Status code is 400 - meaning 'Bad request'
check_resp.status_code

In [None]:
# Error message
check_resp.json()

### Structuring and exporting the data

In [None]:
# It may be useful to store the data in a structured form
# The pandas package is great for that, as we can use its dataframe (basically a table)
# Since the results is a list of 'shallow' dictionaries, it neatly fits into a table
# A more complicated, nested dictionary may not be easily transformable into a table

In [None]:
import pandas as pd

In [None]:
# Creating the dataframe and populating it with the results of our search
songs_df = pd.DataFrame(info["results"])
songs_df

In [None]:
# Exporting the data to a CSV (Comma Separated Values) file
songs_df.to_csv("songs_info.csv")

# Pagination

In [None]:
# Loading the packages
import requests
import json

In [None]:
# We will use API for job listings on Github
# Documentation can be found on: https://jobs.github.com/api

In [None]:
# define base URL
base_site = "https://jobs.github.com/positions.json"

In [None]:
# Submiting a GET request
r = requests.get(base_site, params = {"title": "data science", "location": "Fairfax, VA"})
r.status_code

In [None]:
# Inspect the response
r.json()

In [None]:
# How many jobs have been found?
len(r.json())

### The page parameter

In [None]:
# Let's search for all jobs (no filter parameters)
r =  requests.get(base_site)
r.ok

In [None]:
r.json()

In [None]:
len(r.json())

In [None]:
# According to the documentation, the results are split into pages
# These were the results from the first page only

In [None]:
# To get the next page, we need to make another GET request with parameter 'page'
r =  requests.get(base_site, params = {"page": 2})
r.status_code

In [None]:
r.json()

In [None]:
len(r.json())

In [None]:
# Making a request to a non-existing page
r = requests.get(base_site, params = {"page": 10})
r.status_code

In [None]:
# The response is an empty list
r.json()

### Extracting results from multiple pages

In [None]:
# Let's obtain the results of the first 5 pages
results = []

In [None]:
for i in range(5):
    r =  requests.get(base_site, params = {"page": i+1})
    
    if len(r.json()) == 0:   # We have reached the end of the results
        break
    else:
        # Add the response results to our list of results
        results.extend(r.json())


In [None]:
# number of found jobs
len(results)