# Kalliope SRU Abfrage und parsen von MODS mit Python etree
Quelle: https://github.com/deutsche-nationalbibliothek/dnblab/blob/main/DNB_SRU_Tutorial.ipynb

In [118]:
import requests
from lxml import etree
import pandas as pd

In [119]:
# SRU query
def kalliope_sru(query):
    base_url = "https://kalliope-verbund.info/sru"
    params = {
        'version': '1.2',
        'operation': 'searchRetrieve',
        'recordSchema': 'mods37',
        'maximumRecords': '100',
        'query': query
    }
    
    r = requests.get(base_url, params=params)
    mods_content = r.content
    records_mods = etree.fromstring(mods_content)
    
    # Check if more than 100 records
    if len(records_mods.xpath("//srw:record", namespaces={'srw': 'http://www.loc.gov/zing/srw/'})) < 100:
        return records_xml
    else:
        num_results = 100
        i = 101
        while num_results == 100:
            params.update({'startRecord': i})
            r = requests.get(base_url, params=params)
            new_mods_content = r.content
            new_records_mods = etree.fromstring(new_mods_content)
            records_mods.extend(new_records_mods.xpath("//srw:record", namespaces={'srw': 'http://www.loc.gov/zing/srw/'}))
            i += 100
            num_results = len(new_records_mods.xpath("//srw:record", namespaces={'srw': 'http://www.loc.gov/zing/srw/'}))
        
        return records_mods


In [165]:
# function parse mods
def parse_mods(record):
    ns = {
        'srw': 'http://www.loc.gov/zing/srw/',  # SRW namespace
        'mods': 'http://www.loc.gov/mods/v3'    # MODS namespace
    }
    
    # Extract the title (e.g., <titleInfo><title>)
    title = record.xpath(".//mods:titleInfo/mods:title", namespaces=ns)
    try:
        title = title[0].text
    except IndexError:
        title = "unknown"
        
    # Extract holder //mods/location/physicalLocation/@authority Physischer Standort (bestandshaltende Einrichtung)
    holder = record.xpath(".//mods:location/mods:physicalLocation", namespaces=ns)    
    try:
        holder = holder[0].text
    except IndexError:
        holder = "unknown"
        
    # Extract ISIL //mods/location/physicalLocation/@authority Physischer Standort (bestandshaltende Einrichtung)
    ISILholder = record.xpath(".//mods:location/mods:physicalLocation/@authorityURI", namespaces=ns)
    
    lang_attributes = record.xpath("//@lang")
    
    name = record.xpath(".//mods:namePart/text()", namespaces=ns)
    nameID = record.xpath(".//mods:name/@authority", namespaces=ns)
    abstract_content = record.xpath(".//mods:abstract[@type='content']/text()", namespaces=ns)
    abstract_content = abstract_content[0] if abstract_content else 'NaN'

   

    # Return a dictionary to build the DataFrame
    return {
        "Title": title,
        "holder": holder,
        "ISILholder": ISILholder,
        "langattributes": lang_attributes,
        "name": name,
        "nameID": nameID,
        "abstract_content": abstract_content,
        
        # Add more fields to extract as needed
        
        
    }

In [166]:
# Example query
query = 'ead.id="DE-611-BF-110454"'
records_xml = kalliope_sru(query)

print(f'{len(records_xml.xpath("//srw:record", namespaces={"srw": "http://www.loc.gov/zing/srw/"}))} Ergebnisse gefunden')


126 Ergebnisse gefunden


In [167]:
# Parse data and convert to DataFrame
records = records_xml.xpath("//srw:record", namespaces={"srw": "http://www.loc.gov/zing/srw/"})
output = [parse_mods(record) for record in records]
df = pd.DataFrame(output)
df


Unnamed: 0,Title,holder,ISILholder,langattributes,name,nameID,abstract_content
0,K. I,Staatsbibliothek zu Berlin. Handschriftenabtei...,[http://ld.zdb-services.de/resource/organisati...,"[eng, ger, ger, ger, ger, eng, ger, ger, ger, ...","[Mann, Oskar]",[DE-588],Entwurf eines Beitrages über die deutsche Bank...
1,K. II,Staatsbibliothek zu Berlin. Handschriftenabtei...,[http://ld.zdb-services.de/resource/organisati...,"[eng, ger, ger, ger, ger, eng, ger, ger, ger, ...","[Mann, Oskar]",[DE-588],Etliche Notizen (auf unterschiedlichem Papierf...
2,K. III,Staatsbibliothek zu Berlin. Handschriftenabtei...,[http://ld.zdb-services.de/resource/organisati...,"[eng, ger, ger, ger, ger, eng, ger, ger, ger, ...","[Mann, Oskar]",[DE-588],Aufzeichnungen von Oskar Mann bzw. eine Bestan...
3,K. IV,Staatsbibliothek zu Berlin. Handschriftenabtei...,[http://ld.zdb-services.de/resource/organisati...,"[eng, ger, ger, ger, ger, eng, ger, ger, ger, ...","[Mann, Oskar]",[DE-588],"Abschrift und Übersetzungen aus dem Werk ""Die ..."
4,K. IV,Staatsbibliothek zu Berlin. Handschriftenabtei...,[http://ld.zdb-services.de/resource/organisati...,"[eng, ger, ger, ger, ger, eng, ger, ger, ger, ...","[Mann, Oskar]",[DE-588],"Abschrift und Übersetzungen aus dem Werk ""Die ..."
...,...,...,...,...,...,...,...
121,Kurt Wolff Archiv,"Yale University, Beinecke Rare Book and Manusc...",[http://ld.zdb-services.de/resource/organisati...,"[eng, ger, ger, ger, ger, eng, ger, ger, ger, ...","[Benn, Gottfried (1886-1956), Heym, Georg (188...","[DE-588, DE-588, DE-588, DE-588, DE-588, DE-58...",
122,Chronologische Konvolute,Schweizerisches Literaturarchiv (SLA),[http://ld.zdb-services.de/resource/organisati...,"[eng, ger, ger, ger, ger, eng, ger, ger, ger, ...","[Schweizerisches Literaturarchiv (SLA), Mann, ...","[DE-588, DE-588, DE-588, DE-588, DE-588, DE-58...",
123,Chronologische Konvolute,Schweizerisches Literaturarchiv (SLA),[http://ld.zdb-services.de/resource/organisati...,"[eng, ger, ger, ger, ger, eng, ger, ger, ger, ...","[Schweizerisches Literaturarchiv (SLA), Speer,...","[DE-588, DE-588, DE-588, DE-588, DE-588, DE-58...",
124,Chronologische Konvolute,Schweizerisches Literaturarchiv (SLA),[http://ld.zdb-services.de/resource/organisati...,"[eng, ger, ger, ger, ger, eng, ger, ger, ger, ...","[Schweizerisches Literaturarchiv (SLA), May, F...","[DE-588, DE-588, DE-588, DE-588, DE-588, DE-58...",


In [168]:
#print(etree.tostring(records_xml, pretty_print=True).decode())