In [1]:
# Housekeeping - Jupyter Fehlermeldungen kürzen
# Hinweis: Sorry, bei mybinder.org klappt der Ansatz nicht
import json 
from pprint import pprint
import sys
ipython = get_ipython()
unhide_traceback = None

def hide_traceback(exc_tuple=None, filename=None, tb_offset=None,
                   exception_only=False, running_compiled_code=False):
    etype, value, tb = sys.exc_info()
    return ipython._showtraceback(etype, value, ipython.InteractiveTB.get_exception_only(etype, value))

if not unhide_traceback:
    unhide_traceback = ipython.showtraceback

ipython.showtraceback = hide_traceback
# ipython.showtraceback = unhide_traceback



Pythoncamp 2020 Session Martin Borus, Twitter: @mborus

# GLOM

von Mahmoud Hashemi

## "Wenn Du verschachtelste Daten hast, brauchst du Glom!"

https://github.com/mahmoud/glom 





# Hauptfunktionen

- Pfadbasierender Zugriff 
- deklarative Datenumwandlung
- lesbare, ausagekräftige Fehlermeldungen
- mit Debugger!


## Vorbereitung

pip install glom pip install --upgrade -e git+https://github.com/mahmoud/glom#egg=glom

Dokumention und Tutorial auf https://glom.readthedocs.io,
von hier stammen auch die Code-Beispiele der Einführung

# 1. Normales Python

In [2]:
data = {'a': {'b': {'c': 'd'}}}

In [3]:
data['a']['b']['c']

'd'

In [4]:
data2 = {'a': {'b': None}}

In [5]:
data2['a']['b']['c']

TypeError: 'NoneType' object is not subscriptable

# 2. Glom 

In [6]:
from glom import glom

In [7]:
glom(data, 'a.b.c')

'd'

In [8]:
glom(data2, 'a.b.c')


PathAccessError: could not access 'c', part 2 of Path('a', 'b', 'c'), got error: AttributeError("'NoneType' object has no attribute 'c'")

In [9]:
# Mit glom lassen sich Fehler gut abfangen

from glom import GlomError, PathAccessError

try:
    glom(data2, 'a.b.c')
except PathAccessError as e:
    print(e)

could not access 'c', part 2 of Path('a', 'b', 'c'), got error: AttributeError("'NoneType' object has no attribute 'c'")


In [10]:
try:
    glom(data2, 'a.b.c')
except AttributeError as e:
    print(e)

could not access 'c', part 2 of Path('a', 'b', 'c'), got error: AttributeError("'NoneType' object has no attribute 'c'")


In [11]:
try:
    glom(data2, 'a.b.c')
except GlomError as e:
    print(e)

could not access 'c', part 2 of Path('a', 'b', 'c'), got error: AttributeError("'NoneType' object has no attribute 'c'")


## Glom geht auch mit Lists

In [12]:
data = [1, [2, 3, 4] , 3, 4 ,5]

In [13]:
glom(data, '1.0')

2

## Glom geht auch mit Objekten !1!!

In [14]:
class MyClass:
    def __init__(self):
        self._hallo = "hallo!"
        self._welt = "welt!"
       
myvar = MyClass()        

In [15]:
glom(myvar, '_hallo')

'hallo!'

# 3. Target & Spec
- "Target" sind die Daten, (list, dict, object)
- "Spec" ist das gewünschte Ergebnis

In [16]:
target = {
     'galaxy': {
        'system': {
            'planet': 'jupiter'
         }
    }
}

spec = 'galaxy.system.planet'

glom(target, spec)

'jupiter'

In [17]:
target = {
    'system': {
        'planets': [
            {'name': 'earth', 'moons': 1},
            {'name': 'jupiter', 'moons': 69}
        ]
    }
}

spec = {
     'names': ('system.planets', ['name']),
     'moons': ('system.planets', ['moons'])
}

glom(target, spec)

{'names': ['earth', 'jupiter'], 'moons': [1, 69]}

In [18]:
target = {
     'system': {
         'planets': [
            {
                'name': 'earth',
                'moons': [
                    {'name': 'luna'}
                ]
            },
            {
                'name': 'jupiter',
                'moons': [
                    {'name': 'io'},
                    {'name': 'europa'}
                ]
            }
        ]
    }
}

In [19]:

spec = {
    'planet_names': ('system.planets', ['name']),
    'moon_names': ('system.planets', [('moons', ['name'],  )])
}
pprint(glom(target, spec))

{'moon_names': [['luna'], ['io', 'europa']],
 'planet_names': ['earth', 'jupiter']}


In [20]:
from glom import Coalesce

target = {
     'system': {
         'planets': [
             {'name': 'earth', 'moons': 1},
             {'name': 'jupiter', 'moons': 69}
         ]
     }
}

spec = {
     'planets': (Coalesce('system.planets', 'system.dwarf_planets'), ['name']),
     'moons': (Coalesce('system.planets', 'system.dwarf_planets'), ['moons'])
}

glom(target, spec)

{'planets': ['earth', 'jupiter'], 'moons': [1, 69]}

In [21]:
target = {
     'system': {
         'dwarf_planets': [
             {'name': 'pluto', 'moons': 5},
             {'name': 'ceres', 'moons': 0},
         ]
     }
 }
glom(target, spec)

{'planets': ['pluto', 'ceres'], 'moons': [5, 0]}

In [22]:
target = {
     'system': {
         'planets': [
             {'name': 'earth', 'moons': 1},
             {'name': 'jupiter', 'moons': 69}
         ]
     }
}

glom(target, {'moon_count': ('system.planets', ['moons'], sum)})
# glom(target, {'moon_count': ('system.planets', ['moons'], lambda x: sum(x))})

{'moon_count': 70}

In [23]:
class MySubClass:
    def __init__(self):
        self._hey = "Hey!"

class MyClass:
    def __init__(self):
        self._hallo = "hallo!"
        self._welt = "welt!"
        self._heylist = [MySubClass()] * 6

        
myvar = MyClass()  

In [24]:
from glom import Iter
spec = {'hallo': '_hallo', 'welt': '_welt', 'heylist': ('_heylist', ['_hey'])}

glom(myvar, spec)

{'hallo': 'hallo!',
 'welt': 'welt!',
 'heylist': ['Hey!', 'Hey!', 'Hey!', 'Hey!', 'Hey!', 'Hey!']}

# 4. Daten eindampfen - Flatten und Merge

In [25]:
from glom import Flatten, Merge

In [26]:
data = [[1,2], [3], [4], [], [5]]

In [27]:
glom(data, Flatten())

[1, 2, 3, 4, 5]

In [28]:
data = [{'hallo': 'welt'}, {'hello': 'world'}]

In [29]:
glom(data, Merge())

{'hallo': 'welt', 'hello': 'world'}

# 5. Tutorial: Objekte -> Ausgaben

In [30]:
from glom.tutorial import * 

In [31]:
contact = Contact('Julian',
          emails=[Email(email='jlahey@svtp.info')],
                  location='Canada')

In [32]:
contact.save()

In [33]:
contact.primary_email

Email(id=5, email='jlahey@svtp.info', email_type='personal')

In [34]:
contact.add_date

datetime.datetime(2020, 4, 25, 10, 55, 25, 57554)

In [35]:
contact.id

5

In [36]:
len(Contact.objects.all())

5

In [37]:
json.dumps(Contact.objects.all())

TypeError: Object of type Contact is not JSON serializable

In [38]:
target = Contact.objects.all()

In [39]:
target[0].add_date

datetime.datetime(2020, 4, 25, 10, 55, 24, 929546)

In [40]:
spec = {'results': [{'id': 'id',
                      'name': 'name',
                      'add_date': ('add_date', str),
                      'emails': ('emails', [{'id': 'id',
                                            'email': 'email',
                                            'type': 'email_type'}]),
                      'primary_email': Coalesce('primary_email.email', default=None),
                      'pref_name': Coalesce('pref_name', 'name', skip='', default=''),
                      'detail': Coalesce('company',
                                         'location',
                                         ('add_date.year', str),
                                         skip='', default='')}]}

In [41]:
resp = glom(target, spec)

In [42]:
print(json.dumps(resp, indent=2, sort_keys=True))

{
  "results": [
    {
      "add_date": "2020-04-25 10:55:24.929546",
      "detail": "Mountain View",
      "emails": [
        {
          "email": "kurt@example.com",
          "id": 1,
          "type": "personal"
        }
      ],
      "id": 1,
      "name": "Kurt",
      "pref_name": "Kurt",
      "primary_email": "kurt@example.com"
    },
    {
      "add_date": "2020-04-25 10:55:24.929546",
      "detail": "D & D Mastering",
      "emails": [
        {
          "email": "seanboy@example.com",
          "id": 2,
          "type": "personal"
        }
      ],
      "id": 2,
      "name": "Sean",
      "pref_name": "Sean",
      "primary_email": "seanboy@example.com"
    },
    {
      "add_date": "2020-04-25 10:55:24.929546",
      "detail": "HomeMake Labs",
      "emails": [
        {
          "email": "mixtape@homemakelabs.com",
          "id": 3,
          "type": "work"
        },
        {
          "email": "matt@example.com",
          "id": 4,
          "type": "per

# 6. "T" - das Stunt-Double

In [43]:
from glom import T, Flatten
# Antwort 1. Zeile
glom(resp, ('results', T[0]))

{'id': 1,
 'name': 'Kurt',
 'add_date': '2020-04-25 10:55:24.929546',
 'emails': [{'id': 1, 'email': 'kurt@example.com', 'type': 'personal'}],
 'primary_email': 'kurt@example.com',
 'pref_name': 'Kurt',
 'detail': 'Mountain View'}

In [44]:
# Antwort: Alle Emails
from glom import Flatten

glom(resp, ('results', ['emails'],
            Flatten(),
            ['email'],
           )
    )


['kurt@example.com',
 'seanboy@example.com',
 'mixtape@homemakelabs.com',
 'matt@example.com',
 'jlahey@svtp.info']

# 7. Loop mit Iter

In [45]:
from glom import glom, Iter

target = ['Brummbär',
          'Pimpel',
          'Happy',
          'Chef',
          'Hatschi',
          'Schlafmütz',
          'Seppel']

In [46]:
# Iter liefert erst einmal einen Generator
spec = Iter()
glom(target, spec)

<generator object Iter._iterate at 0x00000000066AAF48>

In [47]:
# Generator in Liste verwandeln
spec = Iter().all()
glom(target, spec)

['Brummbär', 'Pimpel', 'Happy', 'Chef', 'Hatschi', 'Schlafmütz', 'Seppel']

In [48]:
# Gruppieren in Chunks
spec = Iter().chunked(2, fill='Schneewittchen').all()
glom(target, spec)

[['Brummbär', 'Pimpel'],
 ['Happy', 'Chef'],
 ['Hatschi', 'Schlafmütz'],
 ['Seppel', 'Schneewittchen']]

In [49]:
spec = Iter().filter(lambda x: x != 'Chef').all()
glom(target, spec)

['Brummbär', 'Pimpel', 'Happy', 'Hatschi', 'Schlafmütz', 'Seppel']

In [50]:
# Maximale Größe der Rückgabe
spec = Iter().limit(3).all()
glom(target, spec)

['Brummbär', 'Pimpel', 'Happy']

In [51]:
# Zwerge 2 bis 4 in der 0-basierten Liste finden
spec = Iter().slice(1, 4).all()
glom(target, spec)

['Pimpel', 'Happy', 'Chef']

In [52]:
# Nur ein Zwerg mit dem gleichen Anfangsbuchstaben erlaubt
spec = Iter().unique(T[0]).all()
glom(target, spec)

['Brummbär', 'Pimpel', 'Happy', 'Chef', 'Schlafmütz']

In [53]:
# Nehmen, nur bis der Chef kommt
spec = Iter().takewhile(lambda x: x != 'Chef').all()
glom(target, spec)

['Brummbär', 'Pimpel', 'Happy']

In [54]:
# Der erste Zwerg, der nach dem Chef kommt...
spec = Iter().dropwhile(lambda x: x != 'Chef').slice(1, 2).first()
glom(target, spec)

'Hatschi'

In [55]:
# Gruppentrennung
spec = Iter().split('Chef').all() 
glom(target, spec)

[['Brummbär', 'Pimpel', 'Happy'], ['Hatschi', 'Schlafmütz', 'Seppel']]

In [56]:
# Funktion anwenden
spec = Iter().map(lambda x:x.lower()).all()
glom(target, spec)


['brummbär', 'pimpel', 'happy', 'chef', 'hatschi', 'schlafmütz', 'seppel']

In [57]:
# oder das Stunt-Double verwenden
spec = Iter().map(T.upper()).all()
glom(target, spec)

['BRUMMBÄR', 'PIMPEL', 'HAPPY', 'CHEF', 'HATSCHI', 'SCHLAFMÜTZ', 'SEPPEL']

# 8. Literal - Feste Werte vergeben

In [58]:
from glom import Literal
spec = Iter({'Name': T, 'Grösse': Literal('Zwerg')}).all()

In [59]:
glom(target, spec)

[{'Name': 'Brummbär', 'Grösse': 'Zwerg'},
 {'Name': 'Pimpel', 'Grösse': 'Zwerg'},
 {'Name': 'Happy', 'Grösse': 'Zwerg'},
 {'Name': 'Chef', 'Grösse': 'Zwerg'},
 {'Name': 'Hatschi', 'Grösse': 'Zwerg'},
 {'Name': 'Schlafmütz', 'Grösse': 'Zwerg'},
 {'Name': 'Seppel', 'Grösse': 'Zwerg'}]

# 9. Data Driven

Wenn der Schlüssel aus dem Dictionary Daten enthält

In [60]:
from glom import glom, T, Merge, Iter, Coalesce

target = {
    "pluto": {"moons": 6, "population": None},
    "venus": {"population": {"aliens": 5}},
    "earth": {"moons": 1, "population": {"humans": 7_700_000_000, "aliens": 1}},
}

spec = {
    "moons": (
           T.items(),
           Iter({T[0]: (T[1], Coalesce("moons", default=0))}),
           Merge()
    )
}
        
glom(target, spec)        

{'moons': {'pluto': 6, 'venus': 0, 'earth': 1}}

# 10. Werte hinzufügen & Löschen

In [61]:
data = {'moons': {'pluto': 6, 'venus': 0, 'earth': 1}}

In [62]:
from glom import Assign, Delete
spec = Assign('moons.saturn', 7)

In [63]:
glom(data, spec)

{'moons': {'pluto': 6, 'venus': 0, 'earth': 1, 'saturn': 7}}

In [64]:
spec = Delete('moons.earth')

In [65]:
glom(data, spec)

{'moons': {'pluto': 6, 'venus': 0, 'saturn': 7}}

In [66]:
spec = Delete('moons.mars', ignore_missing=False)
glom(data, spec)

PathDeleteError: could not delete 'mars' on object at Path('moons'), got error: KeyError('mars')

# 11. Scope

Mit Scope weitere Daten übergeben, die sonst nicht sichtbar wären

In [67]:
from glom import S, glom, Assign, Spec

target = {'date': '2020-04-01',
 'location': 'A',
 'items': [
     {'name': 'A', 'id': 'A1'},
     {'name': 'B', 'id': 'B1'},
     {'name': 'C', 'id': 'C1'}
]}

spec = ('items', 
        [Assign('date', Spec(S['date']))], 
        [Assign('location', Spec(S['location']))]
       )

glom(target, spec, scope=target)

[{'name': 'A', 'id': 'A1', 'date': '2020-04-01', 'location': 'A'},
 {'name': 'B', 'id': 'B1', 'date': '2020-04-01', 'location': 'A'},
 {'name': 'C', 'id': 'C1', 'date': '2020-04-01', 'location': 'A'}]

# 12. XML

In [68]:
from glom import Ref

In [69]:
etree2dicts = Ref('ElementTree',
    {"tag": "tag", 
     "text": "text", 
     "attrib": "attrib", 
     "children": (iter, [Ref('ElementTree')])})

In [70]:
html_text = """<html>
  <head>
    <title>the title</title>
  </head>
  <body id="the-body">
    <p>A paragraph</p>
  </body>
</html>"""

In [71]:
from xml.etree import ElementTree
etree = ElementTree.fromstring(html_text)

In [72]:
glom(etree, etree2dicts)

{'tag': 'html',
 'text': '\n  ',
 'attrib': {},
 'children': [{'tag': 'head',
   'text': '\n    ',
   'attrib': {},
   'children': [{'tag': 'title',
     'text': 'the title',
     'attrib': {},
     'children': []}]},
  {'tag': 'body',
   'text': '\n    ',
   'attrib': {'id': 'the-body'},
   'children': [{'tag': 'p',
     'text': 'A paragraph',
     'attrib': {},
     'children': []}]}]}

# 13. Weiteres...

aus Zeitgründen noch nicht erwähnt

- Path ( Eine andere Pfadnotation, wenn Strings nicht gehen)
- Invoke / Call (Funktionen anwenden)
- Check -> CheckError (Daten auf Korrektheit prüfen!)
- Inspect (Debugger)  

# 14. Übungsaufgaben

### Aufgabe 1: 

Aus der Raumliste http://borus.de/pythoncamp/event.json ein Dictionary erstellen, das den Raumnamen in GROSSBUCHSTABEN als Schlüssel hat und die URL als Wert.

Gewünschtes Ergebnis:

    {
     'BERLIN': 'https://bbb01.pythoncamp.online/b/rei-hyz-cgm',
     'FLIEGENDER ZIRKUS': 'https://bbb01.pythoncamp.online/b/rei-hyz-cgm',
     'TOKIO': 'https://bbb01.pythoncamp.online/b/rei-hyz-cgm'
     ...
     }

In [97]:
import requests

In [98]:
r = requests.get(r'http://borus.de/pythoncamp/event.json')

In [99]:
r.json()

{'event': 'pythoncamp 2020',
 'update': '2020-04-17 23:23:00',
 'rooms': [{'name': 'Berlin',
   'url': 'https://bbb01.pythoncamp.online/b/rei-hyz-cgm',
   'access_code': '12345'},
  {'name': 'Fliegender Zirkus',
   'url': 'https://bbb01.pythoncamp.online/b/rei-hyz-cgm',
   'access_code': '12345'},
  {'name': 'London',
   'url': 'https://bbb01.pythoncamp.online/b/rei-hyz-cgm',
   'access_code': '12345'},
  {'name': 'New York',
   'url': 'https://bbb01.pythoncamp.online/b/rei-hyz-cgm',
   'access_code': '12345'},
  {'name': 'Paris',
   'url': 'https://bbb01.pythoncamp.online/b/rei-hyz-cgm',
   'access_code': '12345'},
  {'name': 'Plenum',
   'url': 'https://bbb01.pythoncamp.online/b/rei-hyz-cgm',
   'access_code': '12345'},
  {'name': 'Ritter der Kokosnuss',
   'url': 'https://bbb01.pythoncamp.online/b/rei-hyz-cgm',
   'access_code': '12345'},
  {'name': 'Rom',
   'url': 'https://bbb01.pythoncamp.online/b/rei-hyz-cgm',
   'access_code': '12345'},
  {'name': 'Spamalot',
   'url': 'https:/

In [None]:
# hier die Spec bitte selbst schreiben
spec = "... ?"

In [None]:
glom(r.json(), spec)

### Aufgabe 2:

Daten extrahieren aus der einer Online-Buchungsanfrage 
http://borus.de/pythoncamp/booking_example.json
        
Gewünschtes Ergebnis:

    {'abfahrtshafen_hin': 'DKHNB',
     'abfahrtsdatum_hin': '2019-09-01',
     'abfahrtszeit_hin': '14:30',
     'abfahrtshafen_rueck': 'DELIS',
     'abfahrtsdatum_rueck': '2019-10-02',
     'abfahrtszeit_rueck': '19:25',
     'fahrkarten': ['CAR', 'AD', 'CH']
    }


In [104]:
r = requests.get(r'http://borus.de/pythoncamp/booking_example.json')

In [105]:
r.json()

{'request_ip': '127.0.0.1',
 'request_system': 'homepage',
 'request_session': '<SESSION UUID4>',
 'request_language': 'en',
 'company': 'REDACTED',
 'agency': None,
 'client_no': '123456789',
 'trip_special': None,
 'booking_type': 'VEH',
 'booking_currency': 'EUR',
 'booking_no': None,
 'trips': [{'trip_part': 'OUT',
   'departure_harbor': 'DKHNB',
   'destination_harbor': 'DELIS',
   'date': '2019-09-01',
   'time': '14:30',
   'date_open': False,
   'selected_voyage': 'H-L070145',
   'selected_reservation_area': None,
   'trip_discount percent': None},
  {'trip_part': 'RET',
   'departure_harbor': 'DELIS',
   'destination_harbor': 'DKHNB',
   'date': '2019-10-02',
   'time': '19:25',
   'date_open': False,
   'selected_voyage': 'H-L070170',
   'selected_reservation_area': None,
   'trip_discount percent': None}],
 'tickets': [{'type': 'CAR',
   'token': None,
   'is_vehicle': True,
   'license_plate': 'NF-TEST-01',
   'count': 1,
   'length': 5.0,
   'reservation_area': None,
   'd

In [107]:
# Bitte hier die Spec schreiben
spec = "... ?"


In [108]:
glom(r.json(), spec)

{'abfahrtshafen_hin': 'DKHNB',
 'abfahrtsdatum_hin': '2019-09-01',
 'abfahrtszeit_hin': '14:30',
 'abfahrtshafen_rueck': 'DELIS',
 'abfahrtsdatum_rueck': '2019-10-02',
 'abfahrtszeit_rueck': '19:25',
 'fahrkarten': ['CAR', 'AD', 'CH']}