In [4]:
%matplotlib inline
import matplotlib.pyplot as plt

import requests # make a request to a web page and print the response text
import json

import os # interacting with operating system
import pandas as pd
import numpy as np

from bs4 import BeautifulSoup

from IPython.core.display import HTML, display

  from IPython.core.display import HTML, display


#### Lecture 4a - JSONs and APIs
by Vítek Macháček
October 25, 2022

### Contents

* Standardized data representation
* JSON (+ XMLs)
* Introduction to Requests (GET vs. POST) and APIs


### Goals:
    
* work with data  online/real-time data
* acquisition, processing - > results
* Today introduction and next week a practical example

## Microservice architecture

* Foundation of modern software architecture
* Do one thing and do it well.

![Microservice architecture schema](./img/microservices.png "Microservice Architecture")


## Date exchange formats - JSON, XML

`Language of the internet`

* You can send/receive a message with (almost) any service
* we need system agnostic data format 
* is ediatable in basic editors
* More complex than simple tables
* Highly structured - if you dont follow the rules, you are out
* Both sides need to understand the structure
* only data. It does not do anything - no code to be run
* distributed as text/string (to be precise as `bytes` literals) 
* parsed to objects - easy to work with straight away
* Can be persisted as special files, or some data streams from APIs. 
* Human readable
* Hierarchical
* Can be fetched using standard web APIs

### Purpose

1. Communication 
    * All imaginable communication channels
    * Applications

2. Storing
    * self-descriptive
    * human readable
    * also in DBs - SQL, MongoDB etc.

3. Standardization
    * predictability
    * cooperation
    * spillovers from standardization

### Dimensionality problem

* rich information comes at costs of data complexity 
* to interrelate information, you need to high dimensionality (or A LOT of columns)
* Strongly object-oriented


### 1D:
* logs

### 2D
* tabular data (like pandas DFs)
* SQL

### 3+D:
#### XML (and HTML)
* eXtensible Markup Language is a software- and hardware-independent tool for storing and transporting data.
* Officialy defined at 1998, but its roots are even older.
* XML was designed to carry data - with focus on what data is
* HTML was designed to display data - with focus on what data should look like displayed
* XML tags are not predefined like HTML tags are
* more verbose than JSON
* can have comments !actually a really cool in useful feature!
* used historically as a transaction format in many areas: 
    * Scientific measurements
    * News information
    * Wheather measurements
    * Financial transactions
* Necessary to use XML parser - for example `BeautifulSoup` or `xmltree`
* doc`x`, xls`x`, etc. stands for xml


### JSON
* JavaScript Object Notation
* REST APIs return JSONs
* often *.json* files
* but also used in the web etc.
* supports standard datatypes - strings, integers, floats, lists
* No comments
* More compact, less verbose
* No closing tags
* Used EVERYWHERE, BUT [NOT LICENSED FOR EVIL](https://www.json.org/license.html). If you want to do evil stuff, use XML instead.
* Native in JavaScript and close to native in Python (dictionary)
* Jupyter Notebooks

# JSON

* JSON is similar to combination of `dictionaries` (`object` in JSON-terms)  and `lists` (`arrays`) in Python

In [5]:
teachers = [
    {'name':'Jozef Baruník','titles':['doc.','PhDr.','Ph.D.','Bc.','Mgr.'],'ID':1234,'courses':['JEM005','JEM116','JEM059','JEM061']},
    {'name':'Martin Hronec','titles':['Bc.','Mgr.'],'ID':3421,'courses':['JEM005','JEM207']},
]

courses = {
    "JEM005":{'name':'Advanced Econometrics','ECTS':6,'teachers':[3421,1234]},
    'JEM207':{'name':'Data Processing in Python','ECTS':5,'teachers':[3421]},
    'JEM116':{'name':'Applied Econometrics','ECTS':6,'teachers':[1234]},
    'JEM059':{'name':'Quantitative Finance I.','ECTS':6,'teachers':[1234,5678]},
    'JEM061':{'name':'Quantitative Finance II.','ECTS':6,'teachers':[1234,5678]}
}
jsondata = {'teachers':teachers,'courses':courses}
jsondata

{'teachers': [{'name': 'Jozef Baruník',
   'titles': ['doc.', 'PhDr.', 'Ph.D.', 'Bc.', 'Mgr.'],
   'ID': 1234,
   'courses': ['JEM005', 'JEM116', 'JEM059', 'JEM061']},
  {'name': 'Martin Hronec',
   'titles': ['Bc.', 'Mgr.'],
   'ID': 3421,
   'courses': ['JEM005', 'JEM207']}],
 'courses': {'JEM005': {'name': 'Advanced Econometrics',
   'ECTS': 6,
   'teachers': [3421, 1234]},
  'JEM207': {'name': 'Data Processing in Python',
   'ECTS': 5,
   'teachers': [3421]},
  'JEM116': {'name': 'Applied Econometrics', 'ECTS': 6, 'teachers': [1234]},
  'JEM059': {'name': 'Quantitative Finance I.',
   'ECTS': 6,
   'teachers': [1234, 5678]},
  'JEM061': {'name': 'Quantitative Finance II.',
   'ECTS': 6,
   'teachers': [1234, 5678]}}}

is this a valid JSON?

https://jsonformatter.curiousconcept.com/

In [6]:
js = json.dumps(
    jsondata, indent=4, ensure_ascii = False
) #json formatted string!

print(js)

{
    "teachers": [
        {
            "name": "Jozef Baruník",
            "titles": [
                "doc.",
                "PhDr.",
                "Ph.D.",
                "Bc.",
                "Mgr."
            ],
            "ID": 1234,
            "courses": [
                "JEM005",
                "JEM116",
                "JEM059",
                "JEM061"
            ]
        },
        {
            "name": "Martin Hronec",
            "titles": [
                "Bc.",
                "Mgr."
            ],
            "ID": 3421,
            "courses": [
                "JEM005",
                "JEM207"
            ]
        }
    ],
    "courses": {
        "JEM005": {
            "name": "Advanced Econometrics",
            "ECTS": 6,
            "teachers": [
                3421,
                1234
            ]
        },
        "JEM207": {
            "name": "Data Processing in Python",
            "ECTS": 5,
            "teachers": [
                3

# Reading data using `requests` library

* API = Application Programming Interface
* more specifically: http based APIs

### When to use?
* whenever more applications need to communicate - 
    * DB speaks to app
    * accounting system communicates with inventory system
    * Google Maps need to get info about local public transport
    * ML-based BitCoin price prediction to be used to facilitate automatic trading
    *
* user-friendly interface for complicated tasks - DEEP AI, Google Maps
* Data - Golemio, OpenStreetMaps

## HTTP request

* A most standard webserver communication channel around
* `Client` asks/requests questions - **requests**
* `Server` replies/serve answers - **responses**

### HTTP request structure:
* URL
    * domain
    * route
    * parameters
* Request Type - GET, POST, PUT, DELETE
* Request Header
    * authentication
    * cookies
    * other metadata
* Outcoming data (will see below)
   
### HTTP response structure
* Header 
    * cookies
    * other metadata - responding server, dates, 
* Status Code:
    * 200 - success
    * 404 - resource does not exist
    * 500 - the server failed during processing your request
* Content
    * text - JSON, HTML etc.
    * file

### API types
1) REST API - use HTTP request and returns JSON
2) SOAP API - use HTTP request and returns XML
3) Website - use HTTP request and returns set of HTML, JavaScript, CSS and other files


### GET request
* fast
* public
* data flow only one direction
* parameters via request adress

### POST request
* slow
* private
* both sides can send data

### The simplest request

In [7]:
import requests

In [8]:
r = requests.get('https://www.google.com/')
print(r.text)

<!doctype html><html itemscope="" itemtype="http://schema.org/WebPage" lang="cs"><head><meta content="text/html; charset=UTF-8" http-equiv="Content-Type"><meta content="/images/branding/googleg/1x/googleg_standard_color_128dp.png" itemprop="image"><title>Google</title><script nonce="6dYVbwG3ZfsIyc_-pZIOpA">(function(){window.google={kEI:'qZAhZK_6B_qmqtsPhe2peA',kEXPI:'0,1359409,6059,206,4804,2316,383,246,5,1129120,1197789,605,380097,16114,28684,22431,1361,12318,17581,4998,13228,3847,38444,887,1985,2891,11754,606,30668,30021,6398,118,9241,3,346,230,6459,14124,4,1528,2304,11926,30201,13658,13795,7428,5818,2539,4094,7596,1,14262,27892,2,14022,25739,5679,1020,25049,6074,4568,6258,23417,1253,5835,14967,4333,7484,27082,5895,2260,7381,15970,873,19633,7,1922,9779,5864,6551,8975,14764,6305,2007,14736,3456,20232,19404,802,1622,1778,12,657,4308,8377,6514,4097,252,4132,991,3030,427,5202,481,1411,890,925,6480,1212,92,500,5221,2326,682,1152,1091,1649,108,1128,1003,1452,4702,498,695,1130,9506,754,536

In [9]:
r = requests.get('https://www.google.com/')
display(HTML(r.text))

0,1,2
,(function(){var id='tsuid_1';document.getElementById(id).onclick = function(){var s = document.createElement('script');s.src = this.getAttribute('data-script-url');(document.getElementById('xjsc')||document.body).appendChild(s);};})(); (function(){var id='tsuid_2';document.getElementById(id).onclick = function(){if (this.form.q.value){this.checked = 1;if (this.form.iflsig)this.form.iflsig.disabled = false;} else top.location='/doodles/';};})();,Rozšířené vyhledávání


## "Real-world" APIs

### Sreality

* surprisingly no need for authentication
https://www.sreality.cz/hledani/prodej/byty/praha

In [10]:
r = requests.get('https://www.sreality.cz/api/cs/v2/estates?category_main_cb=1&category_type_cb=1&locality_region_id=10&per_page=20&tms=1678732084920')
r.text

'{"meta_description": "5745 realit v nab\\u00eddce prodej byt\\u016f Praha. Vyberte si novou nemovitost na sreality.cz s hled\\u00e1n\\u00edm na map\\u011b a velk\\u00fdmi n\\u00e1hledy fotografi\\u00ed nab\\u00edzen\\u00fdch byt\\u016f.", "result_size": 5745, "_embedded": {"estates": [{"labelsReleased": [[], []], "has_panorama": 0, "labels": [], "is_auction": false, "labelsAll": [["new_building", "personal", "balcony", "cellar", "elevator", "parking_lots", "garage"], ["playground", "vet", "small_shop", "candy_shop", "tavern", "theater", "movies", "sightseeing", "train", "sports", "atm", "post_office", "kindergarten", "restaurant", "metro", "medic", "shop", "school", "tram", "bus_public_transport", "drugstore"]], "seo": {"category_main_cb": 1, "category_sub_cb": 8, "category_type_cb": 1, "locality": "praha-zizkov-krasova"}, "exclusively_at_rk": 0, "category": 1, "has_floor_plan": 1, "_embedded": {"favourite": {"is_favourite": false, "_links": {"self": {"profile": "/favourite/doc", "hre

In [16]:
type(r.json())

dict

In [17]:
r = requests.get('https://www.sreality.cz/api/cs/v2/estates?category_main_cb=1&category_type_cb=1&locality_region_id=10')
r.text

'{"meta_description": "5744 realit v nab\\u00eddce prodej byt\\u016f Praha. Vyberte si novou nemovitost na sreality.cz s hled\\u00e1n\\u00edm na map\\u011b a velk\\u00fdmi n\\u00e1hledy fotografi\\u00ed nab\\u00edzen\\u00fdch byt\\u016f.", "result_size": 5744, "_embedded": {"estates": [{"labelsReleased": [["new_building", "terrace", "garage"], []], "has_panorama": 0, "labels": ["Novostavba", "Terasa", "Gar\\u00e1\\u017e"], "is_auction": false, "labelsAll": [["new_building", "personal", "terrace", "cellar", "elevator", "parking_lots", "garage"], ["playground", "small_shop", "vet", "candy_shop", "tavern", "theater", "movies", "sightseeing", "drugstore", "shop", "kindergarten", "post_office", "school", "sports", "restaurant", "metro", "tram", "medic", "atm", "bus_public_transport", "train"]], "seo": {"category_main_cb": 1, "category_sub_cb": 8, "category_type_cb": 1, "locality": "praha-zizkov-krasova"}, "exclusively_at_rk": 1, "category": 1, "has_floor_plan": 1, "_embedded": {"favourite":

In [18]:
d = r.json()

In [19]:
d.keys()

dict_keys(['meta_description', 'result_size', '_embedded', 'filterLabels', 'title', 'filter', '_links', 'locality', 'locality_dativ', 'logged_in', 'per_page', 'category_instrumental', 'page', 'filterLabels2'])

In [20]:
pd.json_normalize(d['_embedded']['estates'])

Unnamed: 0,labelsReleased,has_panorama,labels,is_auction,labelsAll,exclusively_at_rk,category,has_floor_plan,paid_logo,locality,...,price_czk.unit,price_czk.name,_links.dynamicDown,_links.dynamicUp,_links.iterator.href,_links.self.href,_links.images,_links.image_middle2,gps.lat,gps.lon
0,"[[new_building, terrace, garage], []]",0,"[Novostavba, Terasa, Garáž]",False,"[[new_building, personal, terrace, cellar, ele...",1,1,1,1,Praha 3 - Žižkov,...,,Celková cena,[{'href': 'https://d18-a.sdn.cz/d_18/c_img_QO_...,[{'href': 'https://d18-a.sdn.cz/d_18/c_img_QO_...,/cs/v2/estate-iterator/0?category_main_cb=1&su...,/cs/v2/estates/981869644?region_tip=2462033,[{'href': 'https://d18-a.sdn.cz/d_18/c_img_QO_...,[{'href': 'https://d18-a.sdn.cz/d_18/c_img_QO_...,50.078609,14.45225
1,"[[collective, loggia, panel], []]",0,"[Družstevní, Lodžie, Panelová]",False,"[[collective, loggia, panel, cellar, elevator,...",1,1,0,0,Praha 4 - Chodov,...,,Celková cena,[{'href': 'https://d18-a.sdn.cz/d_18/c_img_gV_...,[{'href': 'https://d18-a.sdn.cz/d_18/c_img_gV_...,/cs/v2/estate-iterator/1?category_main_cb=1&su...,/cs/v2/estates/3015382860,[{'href': 'https://d18-a.sdn.cz/d_18/c_img_gV_...,[{'href': 'https://d18-a.sdn.cz/d_18/c_img_gV_...,50.025432,14.490284
2,"[[in_construction], []]",0,[Ve výstavbě],False,"[[in_construction, personal, brick, cellar, el...",0,1,0,0,Praha 2 - Nusle,...,,Celková cena,[{'href': 'https://d18-a.sdn.cz/d_18/c_img_gR_...,[{'href': 'https://d18-a.sdn.cz/d_18/c_img_gR_...,/cs/v2/estate-iterator/2?category_main_cb=1&su...,/cs/v2/estates/3937688412,[{'href': 'https://d18-a.sdn.cz/d_18/c_img_gR_...,[{'href': 'https://d18-a.sdn.cz/d_18/c_img_gR_...,50.060039,14.437084
3,"[[loggia], []]",0,[Lodžie],False,"[[personal, loggia, cellar, elevator], [small_...",1,1,0,0,Praha 10 - Vršovice,...,,Celková cena,[{'href': 'https://d18-a.sdn.cz/d_18/c_img_gR_...,[{'href': 'https://d18-a.sdn.cz/d_18/c_img_gR_...,/cs/v2/estate-iterator/3?category_main_cb=1&su...,/cs/v2/estates/371754572,[{'href': 'https://d18-a.sdn.cz/d_18/c_img_gR_...,[{'href': 'https://d18-a.sdn.cz/d_18/c_img_gR_...,50.064327,14.457003
4,"[[], []]",0,[],False,"[[personal, brick, elevator], [playground, sma...",0,1,0,0,Praha 2 - Vinohrady,...,,Celková cena,[{'href': 'https://d18-a.sdn.cz/d_18/c_img_gQ_...,[{'href': 'https://d18-a.sdn.cz/d_18/c_img_gQ_...,/cs/v2/estate-iterator/4?category_main_cb=1&su...,/cs/v2/estates/2700240460,[{'href': 'https://d18-a.sdn.cz/d_18/c_img_gQ_...,[{'href': 'https://d18-a.sdn.cz/d_18/c_img_gQ_...,50.072055,14.447245
5,"[[], []]",0,[],False,"[[personal, brick, elevator], [small_shop, can...",0,1,0,0,Praha 2 - Vinohrady,...,,Celková cena,[{'href': 'https://d18-a.sdn.cz/d_18/c_img_gV_...,[{'href': 'https://d18-a.sdn.cz/d_18/c_img_gV_...,/cs/v2/estate-iterator/5?category_main_cb=1&su...,/cs/v2/estates/2532468300,[{'href': 'https://d18-a.sdn.cz/d_18/c_img_gV_...,[{'href': 'https://d18-a.sdn.cz/d_18/c_img_gV_...,50.072055,14.447245
6,"[[], []]",0,[],False,"[[personal, balcony, brick, elevator], [playgr...",0,1,0,0,Praha 2 - Vinohrady,...,,Celková cena,[{'href': 'https://d18-a.sdn.cz/d_18/c_img_QQ_...,[{'href': 'https://d18-a.sdn.cz/d_18/c_img_QQ_...,/cs/v2/estate-iterator/6?category_main_cb=1&su...,/cs/v2/estates/1441949260,[{'href': 'https://d18-a.sdn.cz/d_18/c_img_QQ_...,[{'href': 'https://d18-a.sdn.cz/d_18/c_img_QQ_...,50.072055,14.447245
7,"[[], []]",0,[],False,"[[personal, balcony, brick, elevator], [playgr...",0,1,0,0,Praha 2 - Vinohrady,...,,Celková cena,[{'href': 'https://d18-a.sdn.cz/d_18/c_img_QN_...,[{'href': 'https://d18-a.sdn.cz/d_18/c_img_QN_...,/cs/v2/estate-iterator/7?category_main_cb=1&su...,/cs/v2/estates/3739379276,[{'href': 'https://d18-a.sdn.cz/d_18/c_img_QN_...,[{'href': 'https://d18-a.sdn.cz/d_18/c_img_QN_...,50.072055,14.447245
8,"[[], []]",0,[],False,"[[personal, balcony, brick, elevator], [playgr...",0,1,0,0,Praha 2 - Vinohrady,...,,Celková cena,[{'href': 'https://d18-a.sdn.cz/d_18/c_img_gZ_...,[{'href': 'https://d18-a.sdn.cz/d_18/c_img_gZ_...,/cs/v2/estate-iterator/8?category_main_cb=1&su...,/cs/v2/estates/1357014604,[{'href': 'https://d18-a.sdn.cz/d_18/c_img_gZ_...,[{'href': 'https://d18-a.sdn.cz/d_18/c_img_gZ_...,50.072055,14.447245
9,"[[after_reconstruction, furnished], []]",0,"[Po rekonstrukci, Vybavený]",False,"[[personal, after_reconstruction, brick, eleva...",0,1,0,0,Praha 5 - Smíchov,...,,Celková cena,[{'href': 'https://d18-a.sdn.cz/d_18/c_img_QI_...,[{'href': 'https://d18-a.sdn.cz/d_18/c_img_QI_...,/cs/v2/estate-iterator/9?category_main_cb=1&su...,/cs/v2/estates/3546866764,[{'href': 'https://d18-a.sdn.cz/d_18/c_img_QI_...,[{'href': 'https://d18-a.sdn.cz/d_18/c_img_QI_...,50.06953,14.411436


In [21]:
pd.json_normalize(d['_embedded']['estates'])

Unnamed: 0,labelsReleased,has_panorama,labels,is_auction,labelsAll,exclusively_at_rk,category,has_floor_plan,paid_logo,locality,...,price_czk.unit,price_czk.name,_links.dynamicDown,_links.dynamicUp,_links.iterator.href,_links.self.href,_links.images,_links.image_middle2,gps.lat,gps.lon
0,"[[new_building, terrace, garage], []]",0,"[Novostavba, Terasa, Garáž]",False,"[[new_building, personal, terrace, cellar, ele...",1,1,1,1,Praha 3 - Žižkov,...,,Celková cena,[{'href': 'https://d18-a.sdn.cz/d_18/c_img_QO_...,[{'href': 'https://d18-a.sdn.cz/d_18/c_img_QO_...,/cs/v2/estate-iterator/0?category_main_cb=1&su...,/cs/v2/estates/981869644?region_tip=2462033,[{'href': 'https://d18-a.sdn.cz/d_18/c_img_QO_...,[{'href': 'https://d18-a.sdn.cz/d_18/c_img_QO_...,50.078609,14.45225
1,"[[collective, loggia, panel], []]",0,"[Družstevní, Lodžie, Panelová]",False,"[[collective, loggia, panel, cellar, elevator,...",1,1,0,0,Praha 4 - Chodov,...,,Celková cena,[{'href': 'https://d18-a.sdn.cz/d_18/c_img_gV_...,[{'href': 'https://d18-a.sdn.cz/d_18/c_img_gV_...,/cs/v2/estate-iterator/1?category_main_cb=1&su...,/cs/v2/estates/3015382860,[{'href': 'https://d18-a.sdn.cz/d_18/c_img_gV_...,[{'href': 'https://d18-a.sdn.cz/d_18/c_img_gV_...,50.025432,14.490284
2,"[[in_construction], []]",0,[Ve výstavbě],False,"[[in_construction, personal, brick, cellar, el...",0,1,0,0,Praha 2 - Nusle,...,,Celková cena,[{'href': 'https://d18-a.sdn.cz/d_18/c_img_gR_...,[{'href': 'https://d18-a.sdn.cz/d_18/c_img_gR_...,/cs/v2/estate-iterator/2?category_main_cb=1&su...,/cs/v2/estates/3937688412,[{'href': 'https://d18-a.sdn.cz/d_18/c_img_gR_...,[{'href': 'https://d18-a.sdn.cz/d_18/c_img_gR_...,50.060039,14.437084
3,"[[loggia], []]",0,[Lodžie],False,"[[personal, loggia, cellar, elevator], [small_...",1,1,0,0,Praha 10 - Vršovice,...,,Celková cena,[{'href': 'https://d18-a.sdn.cz/d_18/c_img_gR_...,[{'href': 'https://d18-a.sdn.cz/d_18/c_img_gR_...,/cs/v2/estate-iterator/3?category_main_cb=1&su...,/cs/v2/estates/371754572,[{'href': 'https://d18-a.sdn.cz/d_18/c_img_gR_...,[{'href': 'https://d18-a.sdn.cz/d_18/c_img_gR_...,50.064327,14.457003
4,"[[], []]",0,[],False,"[[personal, brick, elevator], [playground, sma...",0,1,0,0,Praha 2 - Vinohrady,...,,Celková cena,[{'href': 'https://d18-a.sdn.cz/d_18/c_img_gQ_...,[{'href': 'https://d18-a.sdn.cz/d_18/c_img_gQ_...,/cs/v2/estate-iterator/4?category_main_cb=1&su...,/cs/v2/estates/2700240460,[{'href': 'https://d18-a.sdn.cz/d_18/c_img_gQ_...,[{'href': 'https://d18-a.sdn.cz/d_18/c_img_gQ_...,50.072055,14.447245
5,"[[], []]",0,[],False,"[[personal, brick, elevator], [small_shop, can...",0,1,0,0,Praha 2 - Vinohrady,...,,Celková cena,[{'href': 'https://d18-a.sdn.cz/d_18/c_img_gV_...,[{'href': 'https://d18-a.sdn.cz/d_18/c_img_gV_...,/cs/v2/estate-iterator/5?category_main_cb=1&su...,/cs/v2/estates/2532468300,[{'href': 'https://d18-a.sdn.cz/d_18/c_img_gV_...,[{'href': 'https://d18-a.sdn.cz/d_18/c_img_gV_...,50.072055,14.447245
6,"[[], []]",0,[],False,"[[personal, balcony, brick, elevator], [playgr...",0,1,0,0,Praha 2 - Vinohrady,...,,Celková cena,[{'href': 'https://d18-a.sdn.cz/d_18/c_img_QQ_...,[{'href': 'https://d18-a.sdn.cz/d_18/c_img_QQ_...,/cs/v2/estate-iterator/6?category_main_cb=1&su...,/cs/v2/estates/1441949260,[{'href': 'https://d18-a.sdn.cz/d_18/c_img_QQ_...,[{'href': 'https://d18-a.sdn.cz/d_18/c_img_QQ_...,50.072055,14.447245
7,"[[], []]",0,[],False,"[[personal, balcony, brick, elevator], [playgr...",0,1,0,0,Praha 2 - Vinohrady,...,,Celková cena,[{'href': 'https://d18-a.sdn.cz/d_18/c_img_QN_...,[{'href': 'https://d18-a.sdn.cz/d_18/c_img_QN_...,/cs/v2/estate-iterator/7?category_main_cb=1&su...,/cs/v2/estates/3739379276,[{'href': 'https://d18-a.sdn.cz/d_18/c_img_QN_...,[{'href': 'https://d18-a.sdn.cz/d_18/c_img_QN_...,50.072055,14.447245
8,"[[], []]",0,[],False,"[[personal, balcony, brick, elevator], [playgr...",0,1,0,0,Praha 2 - Vinohrady,...,,Celková cena,[{'href': 'https://d18-a.sdn.cz/d_18/c_img_gZ_...,[{'href': 'https://d18-a.sdn.cz/d_18/c_img_gZ_...,/cs/v2/estate-iterator/8?category_main_cb=1&su...,/cs/v2/estates/1357014604,[{'href': 'https://d18-a.sdn.cz/d_18/c_img_gZ_...,[{'href': 'https://d18-a.sdn.cz/d_18/c_img_gZ_...,50.072055,14.447245
9,"[[after_reconstruction, furnished], []]",0,"[Po rekonstrukci, Vybavený]",False,"[[personal, after_reconstruction, brick, eleva...",0,1,0,0,Praha 5 - Smíchov,...,,Celková cena,[{'href': 'https://d18-a.sdn.cz/d_18/c_img_QI_...,[{'href': 'https://d18-a.sdn.cz/d_18/c_img_QI_...,/cs/v2/estate-iterator/9?category_main_cb=1&su...,/cs/v2/estates/3546866764,[{'href': 'https://d18-a.sdn.cz/d_18/c_img_QI_...,[{'href': 'https://d18-a.sdn.cz/d_18/c_img_QI_...,50.06953,14.411436


### World Bank

#### Exploratory request

In [22]:
d = requests.get('http://api.worldbank.org/v2/country/all/indicator/SP.POP.TOTL?format=json&per_page=100').json()

In [23]:
d[0]

{'page': 1,
 'pages': 165,
 'per_page': 100,
 'total': 16492,
 'sourceid': '2',
 'sourcename': 'World Development Indicators',
 'lastupdated': '2023-03-01'}

#### Look at the data in the dataframe

In [24]:
pd.json_normalize(d[1])

Unnamed: 0,countryiso3code,date,value,unit,obs_status,decimal,indicator.id,indicator.value,country.id,country.value
0,AFE,2021,702976832,,,0,SP.POP.TOTL,"Population, total",ZH,Africa Eastern and Southern
1,AFE,2020,685112705,,,0,SP.POP.TOTL,"Population, total",ZH,Africa Eastern and Southern
2,AFE,2019,667242712,,,0,SP.POP.TOTL,"Population, total",ZH,Africa Eastern and Southern
3,AFE,2018,649756874,,,0,SP.POP.TOTL,"Population, total",ZH,Africa Eastern and Southern
4,AFE,2017,632746296,,,0,SP.POP.TOTL,"Population, total",ZH,Africa Eastern and Southern
...,...,...,...,...,...,...,...,...,...,...
95,AFW,1988,195969722,,,0,SP.POP.TOTL,"Population, total",ZI,Africa Western and Central
96,AFW,1987,190759952,,,0,SP.POP.TOTL,"Population, total",ZI,Africa Western and Central
97,AFW,1986,185720244,,,0,SP.POP.TOTL,"Population, total",ZI,Africa Western and Central
98,AFW,1985,180817312,,,0,SP.POP.TOTL,"Population, total",ZI,Africa Western and Central


### More advanced example: Paging

#### Return to Python basics 1: Errors and exceptions in Python

In [25]:
for i in range(-5,5):
    print(5/i)

-1.0
-1.25
-1.6666666666666667
-2.5
-5.0


ZeroDivisionError: division by zero

In [26]:
for i in range(-5,5):
    try:
        print(1/i)
    except:
        print(f'dividing with {i} raised an error. Are you sure your input was correct?')

-0.2
-0.25
-0.3333333333333333
-0.5
-1.0
dividing with 0 raised an error. Are you sure your input was correct?
1.0
0.5
0.3333333333333333
0.25


Return to Python basics 2: Formatting strings

In [27]:
my_name = 'Vítek'

f'Hello {my_name}!'

'Hello Vítek!'

In [28]:
string_template = 'Today {teachers_name} is teaching and he is in the {teachers_mood} mood'

string_template.format(
    teachers_name='Vítek',
    teachers_mood='good'
)

'Today Vítek is teaching and he is in the good mood'

Sending API requests is always risky - you do not control the other side of the transaction

Try listing first ten pages of results in the request

Always check if everything goes fine by checking the request status code

In [29]:
l = []
url_template = 'http://api.worldbank.org/v2/country/all/indicator/SP.POP.TOTL?format=json&page={page_num}'

for i in range(1,10):
    r = requests.get(url_template.format(page_num=i))    
    if r.ok: #r.status_code == 200 would also work!
        l.append(r.json())

ConnectionError: HTTPConnectionPool(host='api.worldbank.org', port=80): Max retries exceeded with url: /v2/country/all/indicator/SP.POP.TOTL?format=json&page=1 (Caused by NewConnectionError('<urllib3.connection.HTTPConnection object at 0x000001EBB85C75D0>: Failed to establish a new connection: [Errno 11001] getaddrinfo failed'))

OK, but you still have imply a strong confidence on the other side, try and except is more certain

In [None]:
l = []
url_template = 'http://api.worldbank.org/v2/country/all/indicator/SP.POP.TOTL?format=json&page={page_num}'

for i in range(1,10):
    url = url_template.format(page_num=i)
    try:
        r = requests.get(url)    
        if r.ok: #r.status_code == 200 would also work!
            l.append(r.json())
    except:
        print(f'At least I want to know that something went wrong and when. Url: {url}')

OMG, this looks a bit messy. I would consider a writing function to increase clarity

In [None]:
def request_worldbank(url):
    try:
        r = requests.get(url)    
        if r.ok: #r.status_code == 200 would also work!
            return r.json()
        else:
            print(f'The url request on {url} not succesful. Status code: {r.status_code}. Message: {r.message}')
    except:
        print(f'At least I want to know that something went wrong and when. Url: {url}')

l = []
url_template = 'http://api.worldbank.org/v2/country/all/indicator/SP.POP.TOTL?format=json&page={page_num}'
for i in range(1,10):
    l.append(request_worldbank(url_template.format(page_num=i)))

In [None]:
l = [request_worldbank(url_template.format(page_num=i)) for i in range(1,10)]

pd.concat([pd.json_normalize(output_json[1]) for output_json in l])

In [None]:
def download_worldbank(indicator):
    url_template = 'http://api.worldbank.org/v2/country/all/indicator/{indicator}?format=json&page={page}&per_page=500'
    first_request = requests.get(url_template.format(indicator=indicator,page=1)).json()
        
    pages = first_request[0]['pages']
    
    def single_worldbank_request(url):
        try:
            r = requests.get(url)
            if r.ok:
                return pd.json_normalize(r.json()[1])
        except Exception as e:
            print(f'Could not parse an URL {url}. Read the message: {e.msg}')
    
    first_data = pd.json_normalize(first_request[1])
    
    l = [single_worldbank_request(url_template.format(indicator=indicator,page=page)) for page in range(2,pages+1)]

    return pd.concat([first_data] + l).set_index(['countryiso3code','date']).value
        
population = download_worldbank('SP.POP.TOTL')
population

### Eurostat

In [30]:
from io import StringIO
r_gdp = requests.get('https://ec.europa.eu/eurostat/api/dissemination/sdmx/2.1/data/NAMA_10_GDP?format=SDMX-CSV')
gdp = pd.read_csv(StringIO(r_gdp.text))
gdp

Unnamed: 0,DATAFLOW,LAST UPDATE,freq,unit,na_item,geo,TIME_PERIOD,OBS_VALUE,OBS_FLAG
0,ESTAT:NAMA_10_GDP(1.0),24/03/23 23:00:00,A,CLV05_MEUR,B1G,AT,1995,177617.0,
1,ESTAT:NAMA_10_GDP(1.0),24/03/23 23:00:00,A,CLV05_MEUR,B1G,AT,1996,180999.4,
2,ESTAT:NAMA_10_GDP(1.0),24/03/23 23:00:00,A,CLV05_MEUR,B1G,AT,1997,184799.1,
3,ESTAT:NAMA_10_GDP(1.0),24/03/23 23:00:00,A,CLV05_MEUR,B1G,AT,1998,192025.4,
4,ESTAT:NAMA_10_GDP(1.0),24/03/23 23:00:00,A,CLV05_MEUR,B1G,AT,1999,198247.2,
...,...,...,...,...,...,...,...,...,...
844534,ESTAT:NAMA_10_GDP(1.0),24/03/23 23:00:00,A,PYP_MNAC,YA1,XK,2017,0.0,
844535,ESTAT:NAMA_10_GDP(1.0),24/03/23 23:00:00,A,PYP_MNAC,YA1,XK,2018,0.0,
844536,ESTAT:NAMA_10_GDP(1.0),24/03/23 23:00:00,A,PYP_MNAC,YA1,XK,2019,0.0,
844537,ESTAT:NAMA_10_GDP(1.0),24/03/23 23:00:00,A,PYP_MNAC,YA1,XK,2020,0.0,


In [31]:
gdp.unit.unique()

array(['CLV05_MEUR', 'CLV05_MNAC', 'CLV10_MEUR', 'CLV10_MNAC',
       'CLV15_MEUR', 'CLV15_MNAC', 'CLV_I05', 'CLV_I10', 'CLV_I15',
       'CLV_PCH_PRE', 'CON_PPCH_PRE', 'CP_MEUR', 'CP_MNAC',
       'CP_MPPS_EU27_2020', 'PC_EU27_2020_MEUR_CP',
       'PC_EU27_2020_MPPS_CP', 'PC_GDP', 'PD05_EUR', 'PD05_NAC',
       'PD10_EUR', 'PD10_NAC', 'PD15_EUR', 'PD15_NAC', 'PD_PCH_PRE_EUR',
       'PD_PCH_PRE_NAC', 'PYP_MEUR', 'PYP_MNAC'], dtype=object)

In [32]:
gdp.na_item.unique()

array(['B1G', 'B1GQ', 'D21', 'D21X31', 'D31', 'P3', 'P31_S13', 'P31_S14',
       'P31_S14_S15', 'P31_S15', 'P32_S13', 'P3_P5', 'P3_P6', 'P3_S13',
       'P41', 'P51G', 'P5G', 'P6', 'P61', 'P62', 'P7', 'P71', 'P72',
       'P52_P53', 'B11', 'B111', 'B112', 'B2A3G', 'D1', 'D11', 'D12',
       'D2', 'D2X3', 'D3', 'P52', 'P53', 'YA0', 'YA1', 'YA2'],
      dtype=object)

In [None]:
geo

In [None]:
gdp['freq'].unique()

### Twitter

### Scopus

In [None]:
from secret import SCOPUS_API_KEY
r = requests.get('https://api.elsevier.com/content/search/scopus?query=AUTH(baruník, j.)  ',headers={'Accept':'application/json','X-ELS-APIKey': SCOPUS_API_KEY})

pd.json_normalize(r.json()['search-results']['entry'])

### XML or even HTML data

In [None]:
response = requests.get('https://en.wikipedia.org/wiki/Charles_University')
soup = BeautifulSoup(response.text)
div = soup.find('div',{'id':'mw-content-text'}) #  #mw-content-text > div > p:nth-child(10)texts)
article = ' '.join([p.text for p in div.find_all('p')])
print(article)

# Bonus example:

<img src="http://ies.fsv.cuni.cz/default/file/get/id/31996" height="500" width="300">

Will not work without authentication.

* You will need IAM account for Amazon Web Service 
* For that you can create `AWS_ACCESS_KEY` and `AWS_SECRET_KEY`. See here: https://aws.amazon.com/premiumsupport/knowledge-center/create-access-key/
* create `secret.py` file and put `AWS_ACCESS_KEY` and `AWS_SECRET_KEY`. Follow the template of `secret-example.py`

In [None]:
!pip install boto3

In [None]:
import boto3

In [None]:
from secret import AWS_ACCESS_KEY, AWS_SECRET_KEY

client=boto3.client('rekognition', 
                    region_name='us-west-2',
                    aws_access_key_id=AWS_ACCESS_KEY,
                    aws_secret_access_key=AWS_SECRET_KEY
)

with open('./img/iespic.jpeg','rb') as f:
    response = client.recognize_celebrities(Image={'Bytes': f.read()})
pd.DataFrame(response['UnrecognizedFaces'][0]['Emotions']).set_index('Type').Confidence.plot.bar()

In [None]:
response