In [5]:
import os
import csv
import pandas as pd
import numpy as np
from lxml import etree

# Directory containing the XML files
folder_path = "../data/bremen/lido"
namespace = {"lido": "http://www.lido-schema.org"}

# Output CSV file path
output_csv = "../output_csv/bremen_output.csv"

# Initialize a list to store all record data
data = []

# Loop through each subfolder in the lido directory
for subfolder in os.listdir(folder_path):
    subfolder_path = os.path.join(folder_path, subfolder)
    if os.path.isdir(subfolder_path):
        # Loop through each XML file in the subfolder
        for filename in os.listdir(subfolder_path):
            if filename.endswith(".xml"):
                filepath = os.path.join(subfolder_path, filename)
                tree = etree.parse(filepath)

                # Fields
                lido_record_id = tree.xpath("string(//lido:lidoRecID)", namespaces=namespace)
                image_urls = tree.xpath("//lido:linkResource/text()", namespaces=namespace)
                types = tree.xpath("//lido:objectWorkType/lido:term/text()", namespaces=namespace)
                materials = tree.xpath("//lido:termMaterialsTech/lido:term/text()", namespaces=namespace)
                display_date = tree.xpath("string(//lido:eventDate/lido:displayDate)", namespaces=namespace)
                earliest_date = tree.xpath("string(//lido:eventDate/lido:date/lido:earliestDate)", namespaces=namespace)
                latest_date = tree.xpath("string(//lido:eventDate/lido:date/lido:latestDate)", namespaces=namespace)
                subjects = tree.xpath("//lido:subjectConcept/lido:term/text()", namespaces=namespace)
                artist_name = tree.xpath("string(//lido:nameActorSet/lido:appellationValue[@lido:pref='preferred'])", namespaces=namespace)
                nationality = tree.xpath("string(//lido:culture/lido:term)", namespaces=namespace)
                role = tree.xpath("string(//lido:roleActor/lido:term)", namespaces=namespace)
                birth = tree.xpath("string(//lido:vitalDatesActor/lido:earliestDate)", namespaces=namespace)
                death = tree.xpath("string(//lido:vitalDatesActor/lido:latestDate)", namespaces=namespace)
                location = tree.xpath("string(//lido:repositoryLocation/lido:namePlaceSet/lido:appellationValue)", namespaces=namespace)
                classifications = tree.xpath("//lido:classification/lido:term/text()", namespaces=namespace)
                titles = tree.xpath("//lido:titleSet/lido:appellationValue/text()", namespaces=namespace)
                rights_statements = tree.xpath("//lido:rightsResource/lido:rightsType/lido:conceptID[@lido:type='http://terminology.lido-schema.org/lido00099']/text()", namespaces=namespace)
                work_id = tree.xpath("string(//lido:workID)", namespaces=namespace)
                repository_name = tree.xpath("//lido:legalBodyName/lido:appellationValue/text()", namespaces=namespace)
                record_id = tree.xpath("string(//lido:recordID)", namespaces=namespace)
                record_links = tree.xpath("//lido:recordInfoLink/text()", namespaces=namespace)
                event_type = tree.xpath("string(//lido:eventType/lido:term)", namespaces=namespace)
                display_materials = tree.xpath("//lido:displayMaterialsTech/text()", namespaces=namespace)
                inscriptions = tree.xpath("//lido:inscriptionDescription/lido:descriptiveNoteValue/text()", namespaces=namespace)
                related_work_notes = tree.xpath("//lido:objectNote/text()", namespaces=namespace)

                data.append({
                    "lidoRecordId": lido_record_id,
                    "imageUrl": "; ".join(image_urls),
                    "type": "; ".join(types),
                    "material": "; ".join(materials),
                    "displayDate": display_date,
                    "earliestDate": earliest_date,
                    "latestDate": latest_date,
                    "subject": "; ".join(subjects),
                    "artistName": artist_name,
                    "nationality": nationality,
                    "role": role,
                    "birth": birth,
                    "death": death,
                    "location": location,
                    "title": "; ".join(titles),
                    "classification": "; ".join(classifications),
                    "rightsStatement": "; ".join(rights_statements),
                    "workID": work_id,
                    "repositoryName": repository_name,
                    "recordID": record_id,
                    "recordLinks": record_links,
                    "eventType": event_type,
                    "displayMaterialsTech": display_materials,
                    "inscriptions": inscriptions,
                    "relatedWorkNotes": related_work_notes
                })

# Convert to DataFrame and save to CSV
df = pd.DataFrame(data)
df.replace("", np.nan, inplace=True)
df.to_csv(output_csv, index=False, encoding="utf-8")

  df.replace("", np.nan, inplace=True)


In [6]:
import pandas as pd

# Output CSV file path
output_csv = "../output_csv/bremen_output.csv"

# Load and display the CSV as a table
df_bremen = pd.read_csv(output_csv)
df_bremen.head(20)

Unnamed: 0,lidoRecordId,imageUrl,type,material,displayDate,earliestDate,latestDate,subject,artistName,nationality,...,classification,rightsStatement,workID,repositoryName,recordID,recordLinks,eventType,displayMaterialsTech,inscriptions,relatedWorkNotes
0,DE-MUS-027614/lido/94265,https://onlinekatalog.kunsthalle-bremen.de/DE-...,Druckgraphik; Druckgraphik,Holzschnitt; Holzschnitt (Druckverfahren),um 1550-1555,1550.0,1555.0,Noahs Trunkenheit; Trunkenheit,"Salomon, Bernard",,...,Druckgraphik; Druckgraphik,http://creativecommons.org/publicdomain/mark/1.0/,25567,"['Kunsthalle Bremen', 'Bremen, Kunsthalle Brem...",94265,['https://onlinekatalog.kunsthalle-bremen.de/D...,Herstellung,['Holzschnitt'],"['oben außerhalb der Darstellung bezeichnet', ...",[]
1,DE-MUS-027614/lido/84536,https://onlinekatalog.kunsthalle-bremen.de/DE-...,Druckgraphik; Druckgraphik,,1807-1845,1807.0,1845.0,"Jugendlicher, Heranwachsender; Jugendlicher; V...","Charlet, Nicolas Toussaint",,...,Druckgraphik; Druckgraphik,http://creativecommons.org/publicdomain/mark/1.0/,1911/1179,"['Kunsthalle Bremen', 'Bremen, Kunsthalle Brem...",84536,['https://onlinekatalog.kunsthalle-bremen.de/D...,Herstellung,"['Lithographie, Chine collé']","['rechts oberhalb der Darstellung nummeriert',...",[]
2,DE-MUS-027614/lido/84537,https://onlinekatalog.kunsthalle-bremen.de/DE-...,Druckgraphik; Druckgraphik,Lithographie; Lithographie (Druckverfahren),um 1910,1900.0,1920.0,Kunsthändler; Kopf (als Teil des menschlichen ...,"Forain, Jean Louis",,...,Druckgraphik; Druckgraphik,http://creativecommons.org/publicdomain/mark/1.0/,1962/189,"['Kunsthalle Bremen', 'Bremen, Kunsthalle Brem...",84537,['https://onlinekatalog.kunsthalle-bremen.de/D...,Herstellung,['Lithographie'],['unten links außerhalb der Darstellung: Präge...,[]
3,DE-MUS-027614/lido/13286,https://onlinekatalog.kunsthalle-bremen.de/DE-...,Druckgraphik; Druckgraphik,Radierung; Radierung (Druckverfahren),1878-1880,1878.0,1880.0,"den Boden fruchtbar machen, düngen; Fruchtbark...","Pissarro, Camille",,...,Druckgraphik; Druckgraphik,http://creativecommons.org/publicdomain/mark/1.0/,1909/908,"['Kunsthalle Bremen', 'Bremen, Kunsthalle Brem...",13286,['https://onlinekatalog.kunsthalle-bremen.de/D...,Herstellung,['Radierung und wenig Kaltnadel'],"['verso: Kunsthalle Bremen; Lugt 292', 'unten ...",[]
4,DE-MUS-027614/lido/54019,https://onlinekatalog.kunsthalle-bremen.de/DE-...,Druckgraphik; Druckgraphik,Holzschnitt; Holzschnitt (Druckverfahren),um 1496/7,1496.0,1498.0,,"Dürer, Albrecht",,...,Druckgraphik; Druckgraphik,http://creativecommons.org/publicdomain/mark/1.0/,1906/318,"['Kunsthalle Bremen', 'Bremen, Kunsthalle Brem...",54019,['https://onlinekatalog.kunsthalle-bremen.de/D...,Herstellung,['Holzschnitt'],"['verso: Kunsthalle Bremen; Lugt 292', 'unten ...",[]
5,DE-MUS-027614/lido/94199,https://onlinekatalog.kunsthalle-bremen.de/DE-...,Druckgraphik; Druckgraphik,Lithographie; Lithographie (Druckverfahren),Juni 1820,1820.0,1820.0,in eine Stadt oder Festung eindringen (bei ein...,"Aubry, Charles",,...,Druckgraphik; Druckgraphik,http://creativecommons.org/publicdomain/mark/1.0/,2022/3009,"['Kunsthalle Bremen', 'Bremen, Kunsthalle Brem...",94199,['https://onlinekatalog.kunsthalle-bremen.de/D...,Herstellung,['Lithographie'],"['links unterhalb der Darstellung mit ""Copie"" ...",[]
6,DE-MUS-027614/lido/54018,https://onlinekatalog.kunsthalle-bremen.de/DE-...,Druckgraphik; Druckgraphik,Holzschnitt; Holzschnitt (Druckverfahren),um 1498,1497.0,1498.0,,"Dürer, Albrecht",,...,Druckgraphik; Druckgraphik,http://creativecommons.org/publicdomain/mark/1.0/,1906/317,"['Kunsthalle Bremen', 'Bremen, Kunsthalle Brem...",54018,['https://onlinekatalog.kunsthalle-bremen.de/D...,Herstellung,"['Holzschnitt', 'Meder 259']",['unten Mitte monogrammiert'],[]
7,DE-MUS-027614/lido/94198,https://onlinekatalog.kunsthalle-bremen.de/DE-...,Druckgraphik; Druckgraphik,Lithographie; Lithographie (Druckverfahren),1804-1844,1804.0,1844.0,Porträt einer anonymen historischen Person (al...,"Mauzaisse, Jean Baptiste",,...,Druckgraphik; Druckgraphik,http://creativecommons.org/publicdomain/mark/1.0/,2022/3008,"['Kunsthalle Bremen', 'Bremen, Kunsthalle Brem...",94198,['https://onlinekatalog.kunsthalle-bremen.de/D...,Herstellung,['Lithographie'],['Mitte unterhalb der Darstellung von fremder ...,[]
8,DE-MUS-027614/lido/58959,https://onlinekatalog.kunsthalle-bremen.de/DE-...,Druckgraphik; Druckgraphik,Kupferstich; Kupferstich (Druckverfahren); Kup...,nach 1512,1512.0,1750.0,,Anonym,,...,Druckgraphik; Druckgraphik,http://creativecommons.org/publicdomain/mark/1.0/,1851/485,"['Kunsthalle Bremen', 'Bremen, Kunsthalle Brem...",58959,['https://onlinekatalog.kunsthalle-bremen.de/D...,Herstellung,['Kupferstich'],['oben Mitte innerhalb der Bogenöffnung bezeic...,[]
9,DE-MUS-027614/lido/58958,https://onlinekatalog.kunsthalle-bremen.de/DE-...,Druckgraphik; Druckgraphik,Kupferstich; Kupferstich (Druckverfahren); Kup...,nach 1512,1512.0,1750.0,,Anonym,,...,Druckgraphik; Druckgraphik,http://creativecommons.org/publicdomain/mark/1.0/,1851/486,"['Kunsthalle Bremen', 'Bremen, Kunsthalle Brem...",58958,['https://onlinekatalog.kunsthalle-bremen.de/D...,Herstellung,['Kupferstich'],['unten rechts auf dem Sarkophag bezeichnet'],[]
