## Some XML Things

In [10]:
import xml.etree.ElementTree as ET
import pandas as pd

In [5]:
tree = ET.parse('nchs.xml')
root = tree.getroot()
print(root)

<Element 'response' at 0x1072dfb38>


In [6]:
for child in root:
    print(child.tag, child.attrib)

row {}


In [7]:
count = 0
for child in root:
    print('Child:\n')
    print(child.tag, child.attrib)
    print('Grandchildren:\n')
    for grandchild in child:
        count += 1
        if count < 10:
            print(grandchild.tag, grandchild.attrib)

Child:

row {}
Grandchildren:

row {'_id': 'row-8v4v.sspw~9gpw', '_uuid': '00000000-0000-0000-DF16-ABED6684B7B3', '_position': '0', '_address': 'https://data.cdc.gov/resource/bi63-dtpu/row-8v4v.sspw~9gpw'}
row {'_id': 'row-62a7~k9va.wwyw', '_uuid': '00000000-0000-0000-F264-BCEB7735BBB3', '_position': '0', '_address': 'https://data.cdc.gov/resource/bi63-dtpu/row-62a7~k9va.wwyw'}
row {'_id': 'row-8u2q-8j6a-9g3w', '_uuid': '00000000-0000-0000-69D6-672A77317666', '_position': '0', '_address': 'https://data.cdc.gov/resource/bi63-dtpu/row-8u2q-8j6a-9g3w'}
row {'_id': 'row-2nd8~f8zx_jpq2', '_uuid': '00000000-0000-0000-3C5C-6524299785AF', '_position': '0', '_address': 'https://data.cdc.gov/resource/bi63-dtpu/row-2nd8~f8zx_jpq2'}
row {'_id': 'row-yv27~hz42-w56a', '_uuid': '00000000-0000-0000-5B3E-F9DDF029D3C3', '_position': '0', '_address': 'https://data.cdc.gov/resource/bi63-dtpu/row-yv27~hz42-w56a'}
row {'_id': 'row-5bkx_f29z_2wrg', '_uuid': '00000000-0000-0000-3875-EC5688E58C7F', '_position'

In [8]:
count = 0
for element in root.iter():
    count += 1
    if count < 10:
        print(element.tag, element.text)

response None
row None
row None
year 2012
_113_cause_name Nephritis, nephrotic syndrome and nephrosis (N00-N07,N17-N19,N25-N27)
cause_name Kidney disease
state Vermont
deaths 21
aadr 2.6


In [11]:
dfs = []
for n, element in enumerate(root.iter('row')):
    if n > 0:
        dfs.append(pd.DataFrame.from_dict(element.attrib, orient='index').transpose())
df = pd.concat(dfs)
print(len(df))
df.head()

10296


Unnamed: 0,_id,_uuid,_position,_address
0,row-8v4v.sspw~9gpw,00000000-0000-0000-DF16-ABED6684B7B3,0,https://data.cdc.gov/resource/bi63-dtpu/row-8v...
0,row-62a7~k9va.wwyw,00000000-0000-0000-F264-BCEB7735BBB3,0,https://data.cdc.gov/resource/bi63-dtpu/row-62...
0,row-8u2q-8j6a-9g3w,00000000-0000-0000-69D6-672A77317666,0,https://data.cdc.gov/resource/bi63-dtpu/row-8u...
0,row-2nd8~f8zx_jpq2,00000000-0000-0000-3C5C-6524299785AF,0,https://data.cdc.gov/resource/bi63-dtpu/row-2n...
0,row-yv27~hz42-w56a,00000000-0000-0000-5B3E-F9DDF029D3C3,0,https://data.cdc.gov/resource/bi63-dtpu/row-yv...


## Time for JSON

In [4]:
# import the json library
import json

In [5]:
# load the json file and assign it to data
f = open('google-maps-geocoding-results.json')
data = json.load(f)

### Let's look at the data

In [None]:
print(data)

In [None]:
data

### Hm, that second bit looks familiar....

In [None]:
type(data)

In [17]:
data.keys()

dict_keys(['results', 'status'])

In [19]:
data['results']

[{'address_components': [{'long_name': '708',
    'short_name': '708',
    'types': ['street_number']},
   {'long_name': 'Main street', 'short_name': 'Main St', 'types': ['route']},
   {'long_name': 'Central Business District',
    'short_name': 'Central Business District',
    'types': ['neighborhood', 'political']},
   {'long_name': 'Houston',
    'short_name': 'Houston',
    'types': ['locality', 'political']},
   {'long_name': 'Harris County',
    'short_name': 'Harris County',
    'types': ['administrative_area_level_2', 'political']},
   {'long_name': 'Texas',
    'short_name': 'TX',
    'types': ['administrative_area_level_1', 'political']},
   {'long_name': 'United States',
    'short_name': 'US',
    'types': ['country', 'political']},
   {'long_name': '77002', 'short_name': '77002', 'types': ['postal_code']}],
  'formatted_address': '708 Main St, Houston, TX 77002, USA',
  'geometry': {'location': {'lat': 29.7590859, 'lng': -95.3636325},
   'location_type': 'ROOFTOP',
   'vie

In [20]:
# the 'results' key looks like a list of 1 element for now
data['results'][0].keys()

dict_keys(['address_components', 'formatted_address', 'geometry', 'place_id', 'plus_code', 'types'])

In [21]:
# Let's target a specific value like 'Main street'
data['results'][0]['address_components'][1]['long_name']

'Main street'

In [28]:
for address_component in data['results']:
    for component in address_component['address_components']:
        print(component['long_name'])

708
Main street
Central Business District
Houston
Harris County
Texas
United States
77002


In [1]:
def id_generator(dict_var):
    for k, v in dict_var.items():
        if k == "id":
            yield v
        elif isinstance(v, dict):
            for id_val in id_generator(v):
                yield id_val

In [7]:
for tag in id_generator(data):
    print(tag)

## Recursion

https://www.programmableweb.com/apis/directory
https://github.com/toddmotto/public-apis
https://blog.rapidapi.com/most-popular-apis/

In [59]:
# Iterative Factorial Solution:
def iterative_factorial(n):
    result = 1
    for i in range(2,n+1):
        result *= i
    return result

iterative_factorial(6)

720

In [9]:
# Recursive Factorial Solution:
def factorial(n):
    if n == 1:
        return 1
    else:
        return n * factorial(n-1)
    
factorial(8)

40320

In [10]:
def factorial(n):
    if n == 1:
        return 1
    else:
        print("factorial has been called with n = " + str(n))
        result = n * factorial(n-1)
        print("intermediate result for", n, "* factorial(", n-1, "):", result)
        return result

print(factorial(8))

factorial has been called with n = 8
factorial has been called with n = 7
factorial has been called with n = 6
factorial has been called with n = 5
factorial has been called with n = 4
factorial has been called with n = 3
factorial has been called with n = 2
intermediate result for 2 * factorial( 1 ): 2
intermediate result for 3 * factorial( 2 ): 6
intermediate result for 4 * factorial( 3 ): 24
intermediate result for 5 * factorial( 4 ): 120
intermediate result for 6 * factorial( 5 ): 720
intermediate result for 7 * factorial( 6 ): 5040
intermediate result for 8 * factorial( 7 ): 40320
40320


https://www.programmableweb.com/apis/directory
https://github.com/toddmotto/public-apis
https://blog.rapidapi.com/most-popular-apis/