In [1]:
import requests
import xml.etree.ElementTree as ET
import pandas as pd
import time

In [7]:
# Load Church Fathers CSV
df = pd.read_csv("../data/church-fathers/manual-church-fathers-gnd.csv", 
                 sep=';', encoding='utf-8')

print(f"Loaded {len(df)} Church Fathers")
print(df.head())

Loaded 44 Church Fathers
       ID         Surname     Forename                     name     gnd_id  \
0  P18881  von Antiochien     Ignatius  Ignatius von Antiochien  118555340   
1  P19930        Smyrnäus   Polycarpus      Polycarpus Smyrnäus  11859558X   
2  P19894     Atheniensis  Athenagoras  Athenagoras Atheniensis  118646141   
3  P21365     Antiochenus   Theophilus   Theophilus Antiochenus  118756923   
4  P18056        von Lyon      Irenäus         Irenäus von Lyon  118555766   

                           gnd_url  \
0  https://d-nb.info/gnd/118555340   
1  https://d-nb.info/gnd/11859558X   
2  https://d-nb.info/gnd/118646141   
3  https://d-nb.info/gnd/118756923   
4  https://d-nb.info/gnd/118555766   

                                       wikipedia_url  
0  https://de.wikipedia.org/wiki/Ignatius_von_Ant...  
1  https://de.wikipedia.org/wiki/Polykarp_von_Smyrna  
2  https://de.wikipedia.org/wiki/Athenagoras_von_...  
3  https://de.wikipedia.org/wiki/Theophilus_(Anti...  
4 

In [10]:
# Set up the search URL
base_url = "https://mlat.uzh.ch"
search_url = f"{base_url}/php_modules/db_search.php"

cc_mapping = []

for idx, row in df.iterrows():
    gnd = row['gnd_id']
    name = row['name']
    
    print(f"[{idx+1}/{len(df)}] {name} (GND: {gnd})", end=" → ")
    
    # Make request
    params = {
        'mode': '0',
        'query_text': gnd,
        'subject': 'all'
    }
    
    try:
        response = requests.get(search_url, params=params, timeout=10)
        
        if response.status_code == 200:
            # Parse response
            root = ET.fromstring(response.content)
            
            # Find CC ID
            cc_id = None
            for result in root.findall('.//cc:xq_result[@type="author"]', ns):
                cc_idno_elem = result.find('cc:cc_idno', ns)
                external_id_elem = result.find('.//cc:external[@source="DNB"]', ns)
                
                if cc_idno_elem is not None and external_id_elem is not None:
                    if external_id_elem.get('value') == str(gnd):
                        cc_id = cc_idno_elem.text
                        break
            
            if cc_id:
                print(f"CC ID: {cc_id}")
                cc_mapping.append({
                    'ID': row['ID'],
                    'Name': name,
                    'gnd_id': gnd,
                    'cc_id': cc_id
                })
            else:
                print("Not found")
                cc_mapping.append({
                    'ID': row['ID'],
                    'Name': name,
                    'gnd_id': gnd,
                    'cc_id': None
                })
        else:
            print(f"Error: HTTP {response.status_code}")
            cc_mapping.append({
                'ID': row['ID'],
                'Name': name,
                'gnd_id': gnd,
                'cc_id': None
            })
    
    except Exception as e:
        print(f"Error: {e}")
        cc_mapping.append({
            'ID': row['ID'],
            'Name': name,
            'gnd_id': gnd,
            'cc_id': None
        })
    
    # Rate limiting
    time.sleep(1)

[1/44] Ignatius von Antiochien (GND: 118555340) → Not found
[2/44] Polycarpus Smyrnäus (GND: 11859558X) → Not found
[3/44] Athenagoras Atheniensis (GND: 118646141) → Not found
[4/44] Theophilus Antiochenus (GND: 118756923) → Not found
[5/44] Irenäus von Lyon (GND: 118555766) → Not found
[6/44] Quintus Septimius Florens Tertullian (GND: 118621386) → CC ID: 867
[7/44] Hippolyt von Rom (GND: 118551418) → Not found
[8/44] Hippolytos von Rom (GND: 118551418) → Not found
[9/44] Origenes (GND: 118590235) → CC ID: 1049
[10/44] Cyprian (GND: 1026798183) → Not found
[11/44] Eusebius von Caesarea (GND: 118531425) → CC ID: 919
[12/44] Hilarius von Poitiers (GND: 118550896) → CC ID: 927
[13/44] Athanasius von Alexandrien (GND: 118504843) → CC ID: 938
[14/44] Basilius der Grosse (Caesariensis) (GND: 118637797) → CC ID: 1472
[15/44] Gregor von Nazianz (GND: 118541900) → Not found
[16/44] Ambrosius von Mailand (GND: 11850245X) → CC ID: 958
[17/44] Didymus der Blinde (GND: 118678213) → Not found
[18/44

In [13]:
# Create DataFrame with results
cc_df = pd.DataFrame(cc_mapping)

print(f"Found CC IDs: {cc_df['cc_id'].notna().sum()}")
print("\nChurch Fathers WITH CC IDs:")
print(cc_df[cc_df['cc_id'].notna()].to_string(index=False))

Found CC IDs: 28

Church Fathers WITH CC IDs:
    ID                                            Name    gnd_id cc_id
P17746            Quintus Septimius Florens Tertullian 118621386   867
P17753                                        Origenes 118590235  1049
P17899                           Eusebius von Caesarea 118531425   919
P18048                           Hilarius von Poitiers 118550896   927
P18895                      Athanasius von Alexandrien 118504843   938
P18887              Basilius der Grosse (Caesariensis) 118637797  1472
P18993                           Ambrosius von Mailand 11850245X   958
 P4541                           Johannes Chrysostomus 118557831   999
P18986                  Sophronius Eusebius Hieronymus 118550853   879
P18700                            Augustinus von Hippo 118505114   900
P18346                           Theodoret von Kyrrhos 118756796  1266
P18989              Anicius Manlius Severinus Boethius 11851282X  1139
P18489                     Paps

In [15]:
# Save results
output_path = "../data/church-fathers/church-fathers-gnd-cc.csv"

cc_df.to_csv(output_path, sep=';', index=False, encoding='utf-8-sig')