# Schema: Document Type Definition (DTD)

Die Document Type Definition ermöglicht die Spezifikation von XML Dokumente und somit das Validieren solcher Dokumente. Man kann mit der DTD Schemas für XML Dokumente entwickeln. So kann man sich unter mehreren Parteien auf ein gemeinsames Vokabular einigen, und die Interoperabilität zwischen entwickelten Systeme ermöglichen oder erhöhen. In dieser Übung schauen wir uns die DTD etwas genauer in der Praxis an. Führen Sie zuerst den folgenden Codeblock aus und machen Sie dann der Reihe nach weiter. Beantworten Sie die Fragen (falls zutreffend). Zum Schluss, schreiben Sie eine eigene DTD und ein exemplarisches XML Dokument dafür. Stellen Sie sicher, dass das XML Dokument wohlgeformt und gültig ist.

In [None]:
import io
from lxml import etree as et

def isvalid(dtd, doc):
    print(et.DTD(io.StringIO(dtd)).validate(et.fromstring(doc)))
    
def exp(doc, path):
    print(et.fromstring(doc).xpath(path))

## Elemente

In [None]:
isvalid('<!ELEMENT discography EMPTY>', '<discography/>')

In [None]:
dtd = """
<!ELEMENT discography (albums)>
<!ELEMENT albums EMPTY>
"""

isvalid(dtd, """
<discography/>
""")

isvalid(dtd, """
<discography>
  <albums/>
</discography>
""")

# Warum ist dieses XML Dokument nicht gültig? Antwort: 
isvalid(dtd, """
<discography>
  <albums/>
  <albums/>
</discography>
""")

In [None]:
dtd = """
<!ELEMENT discography (albums*)>
<!ELEMENT albums EMPTY>
"""

isvalid(dtd, """
<discography/>
""")

isvalid(dtd, """
<discography>
  <albums/>
</discography>
""")

# Warum ist dieses XML Dokument gültig? Antwort: 
isvalid(dtd, """
<discography>
  <albums/>
  <albums/>
</discography>
""")

In [None]:
dtd = """
<!ELEMENT discography (albums?)>
<!ELEMENT albums EMPTY>
"""

isvalid(dtd, """
<discography/>
""")

isvalid(dtd, """
<discography>
  <albums/>
</discography>
""")

# Warum ist dieses XML Dokument nicht gültig? Antwort: 
isvalid(dtd, """
<discography>
  <albums/>
  <albums/>
</discography>
""")

In [None]:
dtd = """
<!ELEMENT discography (albums+)>
<!ELEMENT albums EMPTY>
"""

# Warum ist dieses XML Dokument nicht gültig? Antwort: 
isvalid(dtd, """
<discography/>
""")

isvalid(dtd, """
<discography>
  <albums/>
</discography>
""")

isvalid(dtd, """
<discography>
  <albums/>
  <albums/>
</discography>
""")

In [None]:
dtd = """
<!ELEMENT discography (albums)>
<!ELEMENT albums (album+)>
<!ELEMENT album (title)>
<!ELEMENT title (#PCDATA)>
"""

isvalid(dtd, """
<discography>
  <albums>
    <album>
      <title>The Dark Side of the Moon</title>
    </album>
  </albums>
</discography>
""")

In [None]:
dtd = """
<!ELEMENT discography (albums)>
<!ELEMENT albums (album+)>
<!ELEMENT album (title, label)>
<!ELEMENT title (#PCDATA)>
<!ELEMENT label (#PCDATA)>
"""

isvalid(dtd, """
<discography>
  <albums>
    <album>
      <title>The Dark Side of the Moon</title>
      <label>Harvest, EMI</label>
    </album>
  </albums>
</discography>
""")

# Warum ist dieses XML Dokument nicht gültig? Antwort: 
isvalid(dtd, """
<discography>
  <albums>
    <album>
      <label>Harvest, EMI</label>
      <title>The Dark Side of the Moon</title>
    </album>
  </albums>
</discography>
""")

In [None]:
dtd = """
<!ELEMENT discography (albums)>
<!ELEMENT albums (album+)>
<!ELEMENT album (title, label)>
<!ELEMENT title (#PCDATA)>
"""

# Warum ist dieses XML Dokument nicht gültig? Antwort: 
isvalid(dtd, """
<discography>
  <albums>
    <album>
      <title>The Dark Side of the Moon</title>
      <label>Harvest, EMI</label>
    </album>
  </albums>
</discography>
""")

In [None]:
dtd = """
<!ELEMENT discography (albums)>
<!ELEMENT albums (album+)>
<!ELEMENT album (#PCDATA | title)*>
<!ELEMENT title (#PCDATA)>
"""

isvalid(dtd, """
<discography>
  <albums>
    <album>The Dark Side of the Moon</album>
  </albums>
</discography>
""")

isvalid(dtd, """
<discography>
  <albums>
    <album>
      <title>The Dark Side of the Moon</title>
    </album>
  </albums>
</discography>
""")

In [None]:
dtd = """
<!ELEMENT discography (albums)>
<!ELEMENT albums (album+)>
<!ELEMENT album (title | label)>
<!ELEMENT title (#PCDATA)>
<!ELEMENT label (#PCDATA)>
"""

isvalid(dtd, """
<discography>
  <albums>
    <album>
      <title>The Dark Side of the Moon</title>
    </album>
  </albums>
</discography>
""")

isvalid(dtd, """
<discography>
  <albums>
    <album>
      <label>Harvest, EMI</label>
    </album>
  </albums>
</discography>
""")

# Warum ist dieses XML Dokument nicht gültig? Antwort: 
isvalid(dtd, """
<discography>
  <albums>
    <album>
      <title>The Dark Side of the Moon</title>
      <label>Harvest, EMI</label>
    </album>
  </albums>
</discography>
""")

In [None]:
isvalid("""
<!ELEMENT discography (albums)>
<!ELEMENT albums (album*)>
<!ELEMENT album (title, label, released)>
<!ELEMENT title (#PCDATA)>
<!ELEMENT label (#PCDATA)>
<!ELEMENT released (day, month, year)>
<!ELEMENT day (#PCDATA)>
<!ELEMENT month (#PCDATA)>
<!ELEMENT year (#PCDATA)>
""", """
<discography>
  <albums>
    <album>
      <title>The Dark Side of the Moon</title>
      <label>Harvest, EMI</label>
      <released>
        <day>16</day>
        <month>03</month>
        <year>1973</year>
      </released>
    </album>
  </albums>
</discography>
""")

In [None]:
dtd = """
<!ELEMENT discography (albums)>
<!ELEMENT albums (album*)>
<!ELEMENT album (title, label, released?)>
<!ELEMENT title (#PCDATA)>
<!ELEMENT label (#PCDATA)>
<!ELEMENT released ((day, month)?, year)>
<!ELEMENT day (#PCDATA)>
<!ELEMENT month (#PCDATA)>
<!ELEMENT year (#PCDATA)>
"""

isvalid(dtd, """
<discography>
  <albums>
    <album>
      <title>The Dark Side of the Moon</title>
      <label>Harvest, EMI</label>
      <released>
        <day>16</day>
        <month>03</month>
        <year>1973</year>
      </released>
    </album>
  </albums>
</discography>
""")

isvalid(dtd, """
<discography>
  <albums>
    <album>
      <title>The Dark Side of the Moon</title>
      <label>Harvest, EMI</label>
    </album>
  </albums>
</discography>
""")


isvalid(dtd, """
<discography>
  <albums>
    <album>
      <title>The Dark Side of the Moon</title>
      <label>Harvest, EMI</label>
      <released>
        <year>1973</year>
      </released>
    </album>
  </albums>
</discography>
""")

isvalid(dtd, """
<discography>
  <albums>
    <album>
      <title>The Dark Side of the Moon</title>
      <label>Harvest, EMI</label>
    </album>
    <album>
      <title>The Wall</title>
      <label>Harvest, EMI</label>
      <released>
        <year>1979</year> 
      </released>
    </album>
  </albums>
</discography>
""")

## Attribute

In [None]:
dtd = """
<!ELEMENT discography (albums)>
<!ELEMENT albums (album*)>
<!ELEMENT album (title)>
<!ELEMENT title (#PCDATA)>
<!ATTLIST title released CDATA "1973">
"""

isvalid(dtd, """
<discography>
  <albums>
    <album>
      <title released="1973">The Dark Side of the Moon</title>
    </album>
  </albums>
</discography>
""")

isvalid(dtd, """
<discography>
  <albums>
    <album>
      <title released="1979">The Wall</title>
    </album>
  </albums>
</discography>
""")

In [None]:
dtd = """
<!ELEMENT discography (albums)>
<!ELEMENT albums (album*)>
<!ELEMENT album (title)>
<!ELEMENT title (#PCDATA)>
<!ATTLIST title released CDATA #REQUIRED>
"""

isvalid(dtd, """
<discography>
  <albums>
    <album>
      <title released="1973">The Dark Side of the Moon</title>
    </album>
  </albums>
</discography>
""")

# Warum ist dieses XML Dokument nicht gültig? Antwort: 
isvalid(dtd, """
<discography>
  <albums>
    <album>
      <title>The Dark Side of the Moon</title>
    </album>
  </albums>
</discography>
""")

In [None]:
dtd = """
<!ELEMENT discography (albums)>
<!ELEMENT albums (album*)>
<!ELEMENT album (title)>
<!ELEMENT title (#PCDATA)>
<!ATTLIST title released CDATA #IMPLIED>
"""

isvalid(dtd, """
<discography>
  <albums>
    <album>
      <title released="1973">The Dark Side of the Moon</title>
    </album>
  </albums>
</discography>
""")

# Warum ist dieses XML Dokument gültig? Antwort: 
isvalid(dtd, """
<discography>
  <albums>
    <album>
      <title>The Dark Side of the Moon</title>
    </album>
  </albums>
</discography>
""")

In [None]:
dtd = """
<!ELEMENT discography (albums)>
<!ELEMENT albums (album*)>
<!ELEMENT album (title)>
<!ELEMENT title (#PCDATA)>
<!ATTLIST title released CDATA #FIXED "1973">
"""

isvalid(dtd, """
<discography>
  <albums>
    <album>
      <title released="1973">The Dark Side of the Moon</title>
    </album>
  </albums>
</discography>
""")

# Warum ist dieses XML Dokument nicht gültig? Antwort: 
isvalid(dtd, """
<discography>
  <albums>
    <album>
      <title released="1979">The Wall</title>
    </album>
  </albums>
</discography>
""")

isvalid(dtd, """
<discography>
  <albums>
    <album>
      <title>The Wall</title>
    </album>
  </albums>
</discography>
""")

In [None]:
dtd = """
<!ELEMENT discography (albums)>
<!ELEMENT albums (album*)>
<!ELEMENT album (title)>
<!ELEMENT title (#PCDATA)>
<!ATTLIST title released (1973 | 1979) #REQUIRED>
"""

isvalid(dtd, """
<discography>
  <albums>
    <album>
      <title released="1973">The Dark Side of the Moon</title>
    </album>
  </albums>
</discography>
""")

# Warum ist dieses XML Dokument gültig? Antwort: 
isvalid(dtd, """
<discography>
  <albums>
    <album>
      <title released="1973">The Dark Side of the Moon</title>
    </album>
    <album>
      <title released="1979">The Wall</title>
    </album>
  </albums>
</discography>
""")

isvalid(dtd, """
<discography>
  <albums>
    <album>
      <title released="1982">The Wall</title>
    </album>
  </albums>
</discography>
""")

In [None]:
dtd = """
<!ELEMENT discography (albums)>
<!ELEMENT albums (album*)>
<!ELEMENT album (title)>
<!ELEMENT title (#PCDATA)>
<!ATTLIST title identifier ID #REQUIRED>
"""

isvalid(dtd, """
<discography>
  <albums>
    <album>
      <title identifier="p1">The Dark Side of the Moon</title>
    </album>
    <album>
      <title identifier="p2">The Wall</title>
    </album>
  </albums>
</discography>
""")

# Warum ist dieses XML Dokument nicht gültig? Antwort: 
isvalid(dtd, """
<discography>
  <albums>
    <album>
      <title identifier="p1">The Dark Side of the Moon</title>
    </album>
    <album>
      <title identifier="p1">The Wall</title>
    </album>
  </albums>
</discography>
""")

In [None]:
dtd = """
<!ELEMENT discography (albums)>
<!ELEMENT albums (album*)>
<!ELEMENT album EMPTY>
<!ATTLIST album title CDATA #REQUIRED>
<!ATTLIST album released CDATA #IMPLIED>
"""

isvalid(dtd, """
<discography>
  <albums>
    <album title="The Dark Side of the Moon" released="1973"/>
  </albums>
</discography>
""")

isvalid(dtd, """
<discography>
  <albums>
    <album title="The Dark Side of the Moon"/>
  </albums>
</discography>
""")

# Warum ist dieses XML Dokument nicht gültig? Antwort: 
isvalid(dtd, """
<discography>
  <albums>
    <album released="1973"/>
  </albums>
</discography>
""")

In [None]:
dtd = """
<!ELEMENT discography (albums)>
<!ELEMENT albums (album*)>
<!ELEMENT album EMPTY>
<!ATTLIST album title CDATA #REQUIRED
                released CDATA #IMPLIED>
"""

isvalid(dtd, """
<discography>
  <albums>
    <album title="The Dark Side of the Moon" released="1973"/>
  </albums>
</discography>
""")

## Entitäten

In [None]:
dtd = """
<!ELEMENT discography (albums)>
<!ELEMENT albums (album*)>
<!ELEMENT album (#PCDATA)>
"""

doc = """
<!DOCTYPE discography [
<!ENTITY waters "Roger Waters">
]>
<discography>
  <albums>
    <album>&waters;</album>
  </albums>
</discography>
"""

isvalid(dtd, doc)

# Warum ergibt dies 'Roger Waters'? Antwort: 
exp(doc, '/discography/albums/album/text()')

## Namensräume

In [None]:
dtd = """
<!ELEMENT disc:discography (albs:albums)>
<!ELEMENT albs:albums (albs:album*)>
<!ELEMENT albs:album EMPTY>
<!ATTLIST disc:discography xmlns:disc CDATA #FIXED "http://discography.org">
<!ATTLIST disc:discography xmlns:albs CDATA #FIXED "http://albums.org">
<!ATTLIST albs:album title CDATA #REQUIRED>
<!ATTLIST albs:album released CDATA #REQUIRED>
"""

doc = """
<disc:discography xmlns:disc="http://discography.org" xmlns:albs="http://albums.org">
<albs:albums>
<albs:album title="The Dark Side of the Moon" released="1973"/>
</albs:albums>
</disc:discography>
"""

isvalid(dtd, doc)

Denken Sie sich nun ein eigenes XML Dokument aus und erstellen Sie dafür eine DTD.