# CSV

= Comma Separated Value

In [None]:
import csv
from pprint import pprint

In [None]:
# načtení dat
with open("data/menu.csv", mode="r", encoding="utf-8") as f:
    for radka in csv.reader(f): # delimiter
        print(radka)

In [None]:
# načtení do slovníku
with open("data/menu.csv", mode="r", encoding="utf-8") as f:
    for radka in csv.DictReader(f):  # fieldnames
        pprint(radka)

In [None]:
# list
data = [
    [234, 12, 425],
    [212, 553, 34],
    ["zrada, ", "text", "s carkou"],
]

# generátor
dalsi_data = (
    (x ** 2 for x in range(y, y + 3))
    for y in range(5)
)

# zápis
with open("data/write.csv", mode="w", encoding="utf-8") as f:
    writer = csv.writer(f)
    writer.writerows(data)
    writer.writerows(dalsi_data)

# JSON

= JavaScript Object Notation

In [None]:
import json

In [None]:
# čtení
with open("data/widget.json", mode="r", encoding="utf-8") as f:
    data = json.load(f)
    
print(type(data))
pprint(data)

In [None]:
# aktualizace
data["widget"]["debug"] = "off"
data["widget"]["window"] = {
    "height": 640,
    "width": 480,
    "name": None,
    "title": "Ahoj!",
}

In [None]:
# zápis
with open("data/widget_write.json", mode="w", encoding="utf-8") as f:
    json.dump(data, f, indent=2)

In [None]:
# manipulace s řetězci
json.loads("{}")
json.dumps(dict())

In [None]:
# chyby
try:
    json.loads("{")
except json.JSONDecodeError:
    print("Nevalidní JSON.")

Trik - formátování JSON souboru v konzoli:

```bash
cat data/widget.json | python -m json.tool
```

# XML

= Extensible Markup Language

~~Tři~~ Dva základní přístupy:

- ElementTree - [`xml.etree.ElementTree`](https://docs.python.org/3/library/xml.etree.elementtree.html).
- ~~DOM - [`xml.dom`](https://docs.python.org/3/library/xml.dom.html) a [`xml.dom.minidom`](https://docs.python.org/3/library/xml.dom.minidom.html).~~
- SAX - [`xml.sax`](https://docs.python.org/3/library/xml.sax.html) a [`xml.sax.handler`](https://docs.python.org/3/library/xml.sax.handler.html)

## ElementTree

In [None]:
# čtení
from xml.etree.ElementTree import parse

# přečtení souboru
with open("data/books.xml", mode="r", encoding="utf-8") as f:
    tree = parse(f)

# projití elementů
for book in tree.getroot():
    id = book.attrib["id"]
    title = book.find("title").text
    print(id, title)

In [None]:
# podpora XPath (omezená)
tree.findall(r"book[genre='Fantasy']")

In [None]:
# podpora generátorů
[el.text for el in tree.iterfind(r"book/title")]

In [None]:
# úpravy dat
for book in tree.iter("book"):
    book.attrib["processed"] = "yes"

# zápis
tree.write("data/books_processed.xml", encoding="utf-8")

## SAX

In [None]:
from xml.sax import parse
from xml.sax.handler import ContentHandler

In [None]:
# vytvořím parser
class GenreCollector(ContentHandler):

    def __init__(self):
        self.genres = set()
        self._in_genre_tag = False
        
    def startElement(self, name, attrs):
        if name == "genre":
            self._in_genre_tag = True

    def endElement(self, name):
        if name == "genre":
            self._in_genre_tag = False

    def characters(self, data):
        if self._in_genre_tag:
            self.genres.add(data)

In [None]:
# pustím ho na data
with open("data/books.xml", mode="r", encoding="utf-8") as books_file:
    collector = GenreCollector()
    parse(books_file, collector)
    print(collector.genres)

Všechny zmiňované parsery obsahují:

- pro práci s namespacovaným XML – suffix metody nebo parametr `NS`
- metody pro načtení dokumentu z řetězce, ne ze souboru

## Zápis XML

Nebuďte masochisti, použijte [Jinju](http://jinja.pocoo.org/).

In [None]:
# jinja2 není ve standardní knihovně
from jinja2 import Template
import datetime

books = [
    {
        "id": 1,
        "author": "Me",
        "title": "About me",
        "genre": "Science",
        "price": 123.2456,
        "date": datetime.date.today(),
    },
]

In [None]:
# načtení šablony
with open("data/books.j2", mode="r", encoding="utf-8") as f:
    template = Template(f.read())

# vykreslení
print(template.render(books=books))

# YAML

= YAML Ain't Markup Language

In [None]:
# modul PyYaml není ve standardní knihovně
import yaml

In [None]:
# čtení
with open("data/employees.yaml", mode="r", encoding="utf-8") as f:
    data = yaml.load(f)  # ošetřit yaml.YAMLError
    
print(type(data))
pprint(data)

In [None]:
# zápis
with open("data/employees_write.yaml", mode="w", encoding="utf-8") as f:
    yaml.dump(data, f)

In [None]:
# manipulace s řetězci - metody samy rozpoznají vstupní typ
yaml.load("""
- jedna
- dva
- tři
""")
yaml.dump(["jedna", "dva", "tři"])

# INI

> The name "INI file" comes from the commonly used filename extension `.INI`, which stands for "initialization". Other common initialization file extensions are `.CFG` and `.conf`.

In [None]:
import configparser

In [None]:
# čtení
config = configparser.ConfigParser()
config.read("data/config.ini", encoding="utf-8")

for server in config.sections():
    print(server, " -- ", config[server]["ForwardX11"])

In [None]:
# výchozí hodnoty
config.defaults()

In [None]:
# aktualizace konfigurace
config["bitbucket.org"]["ForwardX11"] = "no"

In [None]:
# zápis
with open("data/config_updated.ini", mode="w", encoding="utf-8") as f:
    config.write(f)

# Příklad

- Převeďte soubor `data/books.xml` na JSON.

In [None]:
import json

# TODO