# Serialization

### CSV

In [None]:
with open('CSV-JSON-XML-CocaCola.csv') as fp:
    data = fp.readlines()
    
data  # looks pretty organized. could potentially write our own parser.

In [None]:
import csv

with open('CSV-JSON-XML-CocaCola.csv') as fp:
    reader = csv.reader(fp)
    data = list(reader)
    
data  # much better! each column is a broken into its own list element

In [None]:
with open('CSV-JSON-XML-CocaCola.csv') as fp:
    reader = csv.DictReader(fp)
    data = list(reader)
    
data  # The best for data with headers.

In [None]:
import collections

In [None]:
d = collections.OrderedDict()

d[1] = 'a'
d[2] = 'b'
d[3] = 'c'
d

In [None]:
first_row = data[0]
print(first_row['Year'])
print(first_row['Assets'])

In [None]:
import collections

# Data is still in strings. Let's convert to correct types.

def parse_row(row):
    new_row = collections.OrderedDict()
    new_row['Year'] = int(row['Year'])
    new_row['NOI'] = int(row['NOI'].replace(',', '')) if row['NOI'] else None
    new_row['Assets'] = int(row['Assets'].replace(',', '')) if row['Assets'] else None
    new_row['Cash'] = int(row['Cash'].replace(',', '')) if row['Cash'] else None
    new_row['Share Price'] = float(row['Share Price'].replace(',', '')) if row['Share Price'] else None
    return new_row

for row in data:
    print(parse_row(row))

In [None]:
# exporting data to txt for easier viewing in Jupyter Notebook.
# Same data would be present in a '.csv' file.
with open('CSV-JSON-XML-CocaCola-out.csv', 'w') as fp:
    writer = csv.writer(fp)
    writer.writerow(parse_row(row).keys())  # writer header row
    writer.writerows([parse_row(row).values() for row in data])  # write rows of data

### JSON

In [None]:
import json

with open('CSV-JSON-XML-superheros.json') as fp:
    data = json.load(fp)
    
data

In [None]:
type(data)  # JSON objects are loaded as `dict`s

In [None]:
del data['secretBase']

In [None]:
data

In [None]:
# Let's add another superhero
data['members'].append(
    {
        'age': 64,
        'name': 'Oprah Winfrey',
        'powers': ['Media Proprietor', 'Producer', 'Philanthropist'],
        'secretIdentity': 'Oprah Winfrey'
    }
)

In [None]:
data

In [None]:
with open('CSV-JSON-XML-superheros-out.json', 'w') as fp:
    json.dump(
        obj=data,
        fp=fp,
        indent=True,  # pretty printing
        sort_keys=True,  # sorting for easier lookup by a human
    )

In [None]:
# JSON is much easier when type parsing is important
# CSV is better for relational data

In [None]:
# LEFT HERE FOR SEC1

### XML

In [None]:
from lxml import etree

In [None]:
with open('CSV-JSON-XML-note.xml') as fp:
    data = fp.read()

root = etree.fromstring(data)

In [None]:
print(etree.tostring(root).decode())

In [None]:
for node in root:
    print(node)

# Notice "note" and "p" tags are not present.

In [None]:
tags = root.findall('.//p')  # must use XPath syntax to look up nested tags in tree, eg. "p"
tags

In [None]:
tags[0].text

In [None]:
# Could iterate over whole tree but that is not 
# very efficient if we just want the "p" tags.
for element in root.iter():
    print(element)

In [None]:
# where did the "note" tag go? It bacame the root object.
root

In [None]:
root.attrib  # get attributes on root

In [None]:
etree.tostring(root)  # will return bytes!

In [None]:
with open('CSV-JSON-XML-note-out.xml', 'wb') as fp:  # open the file for writing bytes
    fp.write(etree.tostring(root))

In [None]:
# When to use XML? Never. Try not to use XML unless the project specifically requires it.