### Importing from JSON

In [2]:
import json  # part of standard library: https://docs.python.org/3/library/json.html

In [3]:
help(json.load)

Help on function load in module json:

load(fp, *, cls=None, object_hook=None, parse_float=None, parse_int=None, parse_constant=None, object_pairs_hook=None, **kw)
    Deserialize ``fp`` (a ``.read()``-supporting file-like object containing
    a JSON document) to a Python object.
    
    ``object_hook`` is an optional function that will be called with the
    result of any object literal decode (a ``dict``). The return value of
    ``object_hook`` will be used instead of the ``dict``. This feature
    can be used to implement custom decoders (e.g. JSON-RPC class hinting).
    
    ``object_pairs_hook`` is an optional function that will be called with the
    result of any object literal decoded with an ordered list of pairs.  The
    return value of ``object_pairs_hook`` will be used instead of the ``dict``.
    This feature can be used to implement custom decoders.  If ``object_hook``
    is also defined, the ``object_pairs_hook`` takes priority.
    
    To use a custom ``JSONDecod

In [4]:
help(json.loads)

Help on function loads in module json:

loads(s, *, encoding=None, cls=None, object_hook=None, parse_float=None, parse_int=None, parse_constant=None, object_pairs_hook=None, **kw)
    Deserialize ``s`` (a ``str``, ``bytes`` or ``bytearray`` instance
    containing a JSON document) to a Python object.
    
    ``object_hook`` is an optional function that will be called with the
    result of any object literal decode (a ``dict``). The return value of
    ``object_hook`` will be used instead of the ``dict``. This feature
    can be used to implement custom decoders (e.g. JSON-RPC class hinting).
    
    ``object_pairs_hook`` is an optional function that will be called with the
    result of any object literal decoded with an ordered list of pairs.  The
    return value of ``object_pairs_hook`` will be used instead of the ``dict``.
    This feature can be used to implement custom decoders.  If ``object_hook``
    is also defined, the ``object_pairs_hook`` takes priority.
    
    ``parse

In [10]:
# basic json loading
with open('./files/posts-100.json','r') as f:
    posts = json.load(f)

print(type(posts))
print(len(posts))
print(type(posts[0]))
print(posts[0])

<class 'list'>
100
<class 'dict'>
{'Id': 5, 'PostTypeId': '1', 'CreationDate': '2014-05-13T23:58:30.457', 'Score': 9, 'ViewCount': 448, 'LastActivityDate': '2014-05-14T00:36:31.077', 'Title': 'How can I do simple machine learning without hard-coding behavior?', 'Tags': '<machine-learning>', 'AnswerCount': 1, 'CommentCount': 1, 'FavoriteCount': 1, 'ClosedDate': '2014-05-14T14:40:25.950'}


In [15]:
# watch out for problems in data types with serializing-deserializing
tuple1 = ('one', 'two')
list1 = ['one','two']
print(type(tuple1))
print(type(list1))

# serialize into json
j_tuple1 = json.dumps(tuple1)
j_list1 = json.dumps(list1)

print(j_tuple1)
print(j_list1)

# deserialize json  -- BOTH END UP LISTS
print(type(json.loads(j_tuple1)))
print(type(json.loads(j_list1)))

<class 'tuple'>
<class 'list'>
["one", "two"]
["one", "two"]
<class 'list'>
<class 'list'>


### Importing data from JSON API

In [25]:
test_api = 'https://jsonplaceholder.typicode.com/todos'
import requests  # simple library for http requests
from pprint import pprint as pp
test_call = requests.get(test_api)
todos = json.loads(test_call.text)
pp(len(todos))
print(todos[3])
print(todos[3]['title'])

200
{'userId': 1, 'id': 4, 'title': 'et porro tempora', 'completed': True}
et porro tempora


### Importing from XML

In [31]:
import xml.etree.ElementTree as ET
tree = ET.parse('./files/users-100.xml')
tree

<xml.etree.ElementTree.ElementTree at 0x1b560d453c8>

In [32]:
dir(tree)

['__class__',
 '__delattr__',
 '__dict__',
 '__dir__',
 '__doc__',
 '__eq__',
 '__format__',
 '__ge__',
 '__getattribute__',
 '__gt__',
 '__hash__',
 '__init__',
 '__init_subclass__',
 '__le__',
 '__lt__',
 '__module__',
 '__ne__',
 '__new__',
 '__reduce__',
 '__reduce_ex__',
 '__repr__',
 '__setattr__',
 '__sizeof__',
 '__str__',
 '__subclasshook__',
 '__weakref__',
 '_root',
 '_setroot',
 'find',
 'findall',
 'findtext',
 'getiterator',
 'getroot',
 'iter',
 'iterfind',
 'parse',
 'write',
 'write_c14n']

In [34]:
users_root = tree.getroot()
users_root.tag

'Users'

In [40]:
# users_root.getchildren()  # depreciated
children = list(users_root)
children

[<Element 'row' at 0x000001B5604C8E58>,
 <Element 'row' at 0x000001B560616E58>,
 <Element 'row' at 0x000001B560616EF8>,
 <Element 'row' at 0x000001B560626098>,
 <Element 'row' at 0x000001B560626138>,
 <Element 'row' at 0x000001B560626278>,
 <Element 'row' at 0x000001B560626368>,
 <Element 'row' at 0x000001B5606264A8>,
 <Element 'row' at 0x000001B560626598>,
 <Element 'row' at 0x000001B5606266D8>,
 <Element 'row' at 0x000001B5606267C8>,
 <Element 'row' at 0x000001B560626958>,
 <Element 'row' at 0x000001B560626A48>,
 <Element 'row' at 0x000001B560626AE8>,
 <Element 'row' at 0x000001B560626BD8>,
 <Element 'row' at 0x000001B560626D18>,
 <Element 'row' at 0x000001B560626E08>,
 <Element 'row' at 0x000001B560626F48>,
 <Element 'row' at 0x000001B560442098>,
 <Element 'row' at 0x000001B560442138>,
 <Element 'row' at 0x000001B560442278>,
 <Element 'row' at 0x000001B560442318>,
 <Element 'row' at 0x000001B560442408>,
 <Element 'row' at 0x000001B5604424A8>,
 <Element 'row' at 0x000001B5604425E8>,


In [41]:
len(children)

100

In [42]:
children[0].tag

'row'

In [44]:
children[0].attrib

{'AccountId': '-1',
 'CreationDate': '2014-05-13T21:29:22.820',
 'DownVotes': '679',
 'Id': '-1',
 'LastAccessDate': '2014-05-13T21:29:22.820',
 'Location': 'on the server farm',
 'Reputation': '1',
 'UpVotes': '780',
 'Views': '0'}