# Kalliope SRU Abfrage und parsen von MODS mit Python etree
Quelle: https://github.com/deutsche-nationalbibliothek/dnblab/blob/main/DNB_SRU_Tutorial.ipynb

In [7]:
import requests
from lxml import etree
import pandas as pd

In [22]:
# SRU query
def kalliope_sru(query):
    base_url = "https://kalliope-verbund.info/sru"
    params = {
        'version': '1.2',
        'operation': 'searchRetrieve',
        'recordSchema': 'mods37',
        'maximumRecords': '100',
        'query': query
    }
    
    r = requests.get(base_url, params=params)
    mods_content = r.content
    records_mods = etree.fromstring(mods_content)
    
    # Check if more than 100 records
    if len(records_mods.xpath("//srw:record", namespaces={'srw': 'http://www.loc.gov/zing/srw/'})) < 100:
        return records_mods
    else:
        num_results = 100
        i = 101
        while num_results == 100:
            params.update({'startRecord': i})
            r = requests.get(base_url, params=params)
            new_mods_content = r.content
            new_records_mods = etree.fromstring(new_mods_content)
            records_mods.extend(new_records_mods.xpath("//srw:record", namespaces={'srw': 'http://www.loc.gov/zing/srw/'}))
            i += 100
            num_results = len(new_records_mods.xpath("//srw:record", namespaces={'srw': 'http://www.loc.gov/zing/srw/'}))
        
        return records_mods

In [18]:
# function parse mods
def parse_mods(record):
    ns = {
        'srw': 'http://www.loc.gov/zing/srw/',  # SRW namespace
        'mods': 'http://www.loc.gov/mods/v3'    # MODS namespace
    }
    
    # Extract the title (e.g., <titleInfo><title>)
    title = record.xpath(".//mods:titleInfo/mods:title", namespaces=ns)
    try:
        title = title[0].text
    except IndexError:
        title = "unknown"
        
    # Extract holder //mods/location/physicalLocation/@authority Physischer Standort (bestandshaltende Einrichtung)
    holder = record.xpath(".//mods:location/mods:physicalLocation", namespaces=ns)    
    try:
        holder = holder[0].text
    except IndexError:
        holder = "unknown"
        
    # Extract ISIL //mods/location/physicalLocation/@authority Physischer Standort (bestandshaltende Einrichtung)
    ISILholder = record.xpath(".//mods:location/mods:physicalLocation/@authorityURI", namespaces=ns)
    
    lang_attributes = record.xpath("//@lang")
    
    name = record.xpath(".//mods:namePart/text()", namespaces=ns)
    nameID = record.xpath(".//mods:name/@authority", namespaces=ns)
    abstract_content = record.xpath(".//mods:abstract[@type='content']/text()", namespaces=ns)
    abstract_content = abstract_content[0] if abstract_content else 'NaN'

   

    # Return a dictionary to build the DataFrame
    return {
        "Title": title,
        "holder": holder,
        "ISILholder": ISILholder,
        "langattributes": lang_attributes,
        "name": name,
        "nameID": nameID,
        "abstract_content": abstract_content,
        
        # Add more fields to extract as needed
        
        
    }

In [24]:
# Example query
query = 'ead.title="Kurdisch"'
records_mods = kalliope_sru(query)

print(f'{len(records_mods.xpath("//srw:record", namespaces={"srw": "http://www.loc.gov/zing/srw/"}))} Ergebnisse gefunden')

79 Ergebnisse gefunden


In [26]:
# Parse data and convert to DataFrame
records = records_mods.xpath("//srw:record", namespaces={"srw": "http://www.loc.gov/zing/srw/"})
output = [parse_mods(record) for record in records]
df = pd.DataFrame(output)
df


Unnamed: 0,Title,holder,ISILholder,langattributes,name,nameID,abstract_content
0,Armenisch und Türkisch,Niedersächsische Staats- und Universitätsbibli...,[http://ld.zdb-services.de/resource/organisati...,"[eng, ger, ger, ger, eng, ger, ger, ger, ger, ...","[Andreas, Friedrich Carl (1846-1930)]",[DE-588],
1,Armenisch und Türkisch,Niedersächsische Staats- und Universitätsbibli...,[http://ld.zdb-services.de/resource/organisati...,"[eng, ger, ger, ger, eng, ger, ger, ger, ger, ...","[Andreas, Friedrich Carl (1846-1930)]",[DE-588],
2,K. VIII,Staatsbibliothek zu Berlin. Handschriftenabtei...,[http://ld.zdb-services.de/resource/organisati...,"[eng, ger, ger, ger, eng, ger, ger, ger, ger, ...","[Mann, Oskar]",[DE-588],Erklärung des Namens Gelbagi und die Beschreib...
3,"Kurdisch, Gorani",Niedersächsische Staats- und Universitätsbibli...,[http://ld.zdb-services.de/resource/organisati...,"[eng, ger, ger, ger, eng, ger, ger, ger, ger, ...","[MacKenzie, David N. (1926-2001)]",[DE-588],
4,Teilnachlass Gerhard Doerfer,Niedersächsische Staats- und Universitätsbibli...,[http://ld.zdb-services.de/resource/organisati...,"[eng, ger, ger, ger, eng, ger, ger, ger, ger, ...","[Doerfer, Gerhard (1920-)]",[DE-588],
...,...,...,...,...,...,...,...
74,K. XIII,Staatsbibliothek zu Berlin. Handschriftenabtei...,[http://ld.zdb-services.de/resource/organisati...,"[eng, ger, ger, ger, eng, ger, ger, ger, ger, ...","[Mann, Oskar]",[DE-588],
75,lyrische Entwürfe und Vorstufen,Staatsbibliothek zu Berlin. Handschriftenabtei...,[http://ld.zdb-services.de/resource/organisati...,"[eng, ger, ger, ger, eng, ger, ger, ger, ger, ...","[Chamisso, Adelbert von (1781-1838)]",[DE-588],
76,Manuskripte fremder Autoren,Archiv der Berlin-Brandenburgischen Akademie d...,[http://ld.zdb-services.de/resource/organisati...,"[eng, ger, ger, ger, eng, ger, ger, ger, ger, ...","[Mann, Oskar, Mullah Mohammad Reza Kandulai]","[DE-588, DE-611]","Der Schreiber dieser Sammlung heißt ""Mullah Mu..."
77,Proteste und Stellungnahmen,Schweizerisches Literaturarchiv (SLA),[http://ld.zdb-services.de/resource/organisati...,"[eng, ger, ger, ger, eng, ger, ger, ger, ger, ...",[Schweizerisches Literaturarchiv (SLA)],[],


In [29]:
#print(etree.tostring(records_mods, pretty_print=True).decode())