### 13.0 Exchange data across the web

- XML (eXtensible Markup Language)
- JSON (JavaScript Object Notation)

### 13.1 XML
XML looks very similar to HTML, but XML is more structured than HTML.
### 13.2 Parsing XML
### 13.3 Looping through nodes

In [1]:
import xml.etree.ElementTree as ET

data = '''
<person>
  <name>Chuck</name>
  <phone type="intl">
    +1 734 303 4456
  </phone>
  <email hide="yes" />
</person>'''

tree = ET.fromstring(data)
print('Name:', tree.find('name').text)
print('Attr:', tree.find('email').get('hide'))

Name: Chuck
Attr: yes


In [2]:
import xml.etree.ElementTree as ET

input = '''
<stuff>
  <users>
    <user x="2">
      <id>001</id>
      <name>Chuck</name>
    </user>
    <user x="7">
      <id>009</id>
      <name>Brent</name>
    </user>
  </users>
</stuff>'''

stuff = ET.fromstring(input)
lst = stuff.findall('users/user')
print('User count:', len(lst))

for item in lst:
    print('Name', item.find('name').text)
    print('Id', item.find('id').text)
    print('Attribute', item.get('x'))

User count: 2
Name Chuck
Id 001
Attribute 2
Name Brent
Id 009
Attribute 7


### 13.4 JSON
### 13.5 Parsing JSON

In [5]:
import json

data = '''
{
  "name" : "Chuck",
  "phone" : {
    "type" : "intl",
    "number" : "+1 734 303 4456"
   },
   "email" : {
     "hide" : "yes"
   }
}'''

info = json.loads(data)
print('Name:', info["name"])
print('Hide:', info["email"]["hide"])

Name: Chuck
Hide: yes


In [6]:
type(info)

dict

In [7]:
import json

data = '''
[
  { "id" : "001",
    "x" : "2",
    "name" : "Chuck"
  } ,
  { "id" : "009",
    "x" : "7",
    "name" : "Brent"
  }
]'''

info = json.loads(data)
print('User count:', len(info))

for item in info:
    print('Name', item['name'])
    print('Id', item['id'])
    print('Attribute', item['x'])

User count: 2
Name Chuck
Id 001
Attribute 2
Name Brent
Id 009
Attribute 7


In [8]:
type(info)

list

### 13.6 Application Programming Interfaces (API)

- API is the general name for the application-to-application contracts.
- Service-Oriented Architecture (SOA) approach is one where our overall application makes use of the services of other applications.
- A non-SOA approach is where the application is a single standalone application which contains all of the code necessary to implement the application.

### 13.7 Google geocoding web services

In [1]:
import urllib.request, urllib.parse, urllib.error
import json
import ssl

api_key = False
# If you have a Google Places API key, enter it here
# api_key = 'AIzaSy___IDByT70'
# https://developers.google.com/maps/documentation/geocoding/intro

if api_key is False:
    api_key = 42
    serviceurl = 'http://py4e-data.dr-chuck.net/json?'
else :
    serviceurl = 'https://maps.googleapis.com/maps/api/geocode/json?'

# Ignore SSL certificate errors
ctx = ssl.create_default_context()
ctx.check_hostname = False
ctx.verify_mode = ssl.CERT_NONE

while True:
    address = input('Enter location: ')
    if len(address) < 1: break

    parms = dict()
    parms['address'] = address
    if api_key is not False: parms['key'] = api_key
    url = serviceurl + urllib.parse.urlencode(parms)

    print('Retrieving', url)
    uh = urllib.request.urlopen(url, context=ctx)
    data = uh.read().decode()
    print('Retrieved', len(data), 'characters')

    try:
        js = json.loads(data)
    except:
        js = None

    if not js or 'status' not in js or js['status'] != 'OK':
        print('==== Failure To Retrieve ====')
        print(data)
        continue

    print(json.dumps(js, indent=4))

    lat = js['results'][0]['geometry']['location']['lat']
    lng = js['results'][0]['geometry']['location']['lng']
    print('lat', lat, 'lng', lng)
    location = js['results'][0]['formatted_address']
    print(location)

Enter location: Hefei
Retrieving http://py4e-data.dr-chuck.net/json?address=Hefei&key=42
Retrieved 1521 characters
{
    "results": [
        {
            "address_components": [
                {
                    "long_name": "Hefei",
                    "short_name": "Hefei",
                    "types": [
                        "locality",
                        "political"
                    ]
                },
                {
                    "long_name": "Anhui",
                    "short_name": "Anhui",
                    "types": [
                        "administrative_area_level_1",
                        "political"
                    ]
                },
                {
                    "long_name": "China",
                    "short_name": "CN",
                    "types": [
                        "country",
                        "political"
                    ]
                }
            ],
            "formatted_address": "Hefei, Anhui, Ch

In [2]:
import urllib.request, urllib.parse, urllib.error
import xml.etree.ElementTree as ET
import ssl

api_key = False
# If you have a Google Places API key, enter it here
# api_key = 'AIzaSy___IDByT70'
# https://developers.google.com/maps/documentation/geocoding/intro

if api_key is False:
    api_key = 42
    serviceurl = 'http://py4e-data.dr-chuck.net/xml?'
else :
    serviceurl = 'https://maps.googleapis.com/maps/api/geocode/xml?'

# Ignore SSL certificate errors
ctx = ssl.create_default_context()
ctx.check_hostname = False
ctx.verify_mode = ssl.CERT_NONE

while True:
    address = input('Enter location: ')
    if len(address) < 1: break

    parms = dict()
    parms['address'] = address
    if api_key is not False: parms['key'] = api_key
    url = serviceurl + urllib.parse.urlencode(parms)
    print('Retrieving', url)
    uh = urllib.request.urlopen(url, context=ctx)

    data = uh.read()
    print('Retrieved', len(data), 'characters')
    print(data.decode())
    tree = ET.fromstring(data)

    results = tree.findall('result')
    lat = results[0].find('geometry').find('location').find('lat').text
    lng = results[0].find('geometry').find('location').find('lng').text
    location = results[0].find('formatted_address').text

    print('lat', lat, 'lng', lng)
    print(location)

Enter location: Dayton
Retrieving http://py4e-data.dr-chuck.net/xml?address=Dayton&key=42
Retrieved 1755 characters
<?xml version="1.0" encoding="UTF-8"?>
<GeocodeResponse>
 <status>OK</status>
 <result>
  <type>locality</type>
  <type>political</type>
  <formatted_address>Dayton, OH, USA</formatted_address>
  <address_component>
   <long_name>Dayton</long_name>
   <short_name>Dayton</short_name>
   <type>locality</type>
   <type>political</type>
  </address_component>
  <address_component>
   <long_name>Jefferson Township</long_name>
   <short_name>Jefferson Township</short_name>
   <type>administrative_area_level_3</type>
   <type>political</type>
  </address_component>
  <address_component>
   <long_name>Montgomery County</long_name>
   <short_name>Montgomery County</short_name>
   <type>administrative_area_level_2</type>
   <type>political</type>
  </address_component>
  <address_component>
   <long_name>Ohio</long_name>
   <short_name>OH</short_name>
   <type>administrative_area_l

### 13.8 OAuth and Twitter API

See the codes part.

### 13.9 Glossary
### 13.10 Exercises

In [4]:
# web1
import urllib.request, urllib.parse, urllib.error
import xml.etree.ElementTree as ET
import ssl

url = input("Enter location:")

# Ignore SSL certificate errors
ctx = ssl.create_default_context()
ctx.check_hostname = False
ctx.verify_mode = ssl.CERT_NONE

uh = urllib.request.urlopen(url, context=ctx)
data = uh.read()
tree = ET.fromstring(data)
results = tree.findall('.//count')
acc = 0
count =0
for result in results:
    #print(result.text)
    acc = int(result.text)+acc
    count = count + 1

print(count)
print(acc)


Enter location: http://py4e-data.dr-chuck.net/comments_216872.xml
50
2539


In [19]:
# web2
import json
import urllib.request, urllib.parse, urllib.error
import ssl

url = input("Enter location:")

# Ignore SSL certificate errors
ctx = ssl.create_default_context()
ctx.check_hostname = False
ctx.verify_mode = ssl.CERT_NONE

uh = urllib.request.urlopen(url, context=ctx)
data = uh.read()

info = json.loads(data)
print('User count:', len(info))

acc=0
count=0
for item in info["comments"]:
    acc = item["count"] + acc
    count = count + 1
print(count)
print(acc)

Enter location: http://py4e-data.dr-chuck.net/comments_216873.json
User count: 2
50
2507


In [2]:
# web3
import urllib.request, urllib.parse, urllib.error
import json
import ssl

api_key = False
# If you have a Google Places API key, enter it here
# api_key = 'AIzaSy___IDByT70'
# https://developers.google.com/maps/documentation/geocoding/intro

if api_key is False:
    api_key = 42
    serviceurl = 'http://py4e-data.dr-chuck.net/json?'
else :
    serviceurl = 'https://maps.googleapis.com/maps/api/geocode/json?'

# Ignore SSL certificate errors
ctx = ssl.create_default_context()
ctx.check_hostname = False
ctx.verify_mode = ssl.CERT_NONE

while True:
    address = input('Enter location: ')
    if len(address) < 1: break

    parms = dict()
    parms['address'] = address
    if api_key is not False: parms['key'] = api_key
    url = serviceurl + urllib.parse.urlencode(parms)

    print('Retrieving', url)
    uh = urllib.request.urlopen(url, context=ctx)
    data = uh.read().decode()
    print('Retrieved', len(data), 'characters')

    try:
        js = json.loads(data)
    except:
        js = None

    if not js or 'status' not in js or js['status'] != 'OK':
        print('==== Failure To Retrieve ====')
        print(data)
        continue

    # print(json.dumps(js, indent=4))

    placeID = js['results'][0]['place_id']
    print(placeID)

Enter location: University of Wisconsin
Retrieving http://py4e-data.dr-chuck.net/json?address=University+of+Wisconsin&key=42
Retrieved 1804 characters
ChIJHUh_CpWsB4gRZDpzkeBl4Mk
Enter location: 
