In [None]:
####
##Step 1: Run the script
##Step 2: Type in the name you are searching for
##Step 3: Press enter
##Step 4: Enjoy!
##If used frequently, please request your own Europeana API key via https://apis.europeana.eu/en
####

import requests
import pandas as pd
import xml.etree.ElementTree as ET
import time
from concurrent.futures import ThreadPoolExecutor

# Helper function to merge results and ensure consistent columns
def format_df(df, source_name):
    columns = ['name', 'source', 'dates', 'id', 'link']
    if df is None or df.empty:
        return pd.DataFrame(columns=columns)

    df['source'] = source_name
    # Ensure all standard columns exist
    for col in columns:
        if col not in df.columns:
            df[col] = "N/A"
    return df[columns]

HEADERS = {'User-Agent': 'ResearchScript/1.0 (Educational Use; contact: third-floor)'}

# ========== 1. Library of Congress (LOC) ============
def loc_search(search_term):
    source = "Library of Congress"
    try:
        base_url = "https://id.loc.gov/search/"
        params = {"q": f"rdftype:Authority {search_term}", "format": "atom"}
        response = requests.get(base_url, params=params, headers=HEADERS, timeout=10)
        if response.status_code == 200:
            root = ET.fromstring(response.content)
            ns = {'atom': 'http://www.w3.org/2005/Atom'}
            results = []
            for entry in root.findall('atom:entry', ns):
                link_elem = entry.find('atom:link', ns)
                results.append({
                    'name': entry.find('atom:title', ns).text,
                    'id': entry.find('atom:id', ns).text.split('/')[-1],
                    'link': link_elem.attrib['href'] if link_elem is not None else "N/A",
                    'dates': "N/A"
                })
            print(f"[+] {source}: Found {len(results)} results.")
            return format_df(pd.DataFrame(results), source)
    except Exception as e:
        print(f"[!] {source} Error: {e}")
    return format_df(pd.DataFrame(), source)

# ========== 2. Europeana ============
def europeana_search(search_term):
    source = "Europeana"
    api_key = 'treherpis'
    url = 'https://api.europeana.eu/entity/search'
    params = {'wskey': api_key, 'query': search_term, 'type': 'agent', 'language': 'en'}
    try:
        response = requests.get(url, params=params, headers=HEADERS, timeout=10)
        if response.status_code == 200:
            items = response.json().get('items', [])
            results = [{
                'name': item.get('prefLabel', {}).get('en', 'N/A'),
                'id': item.get('id', 'N/A').split('/')[-1],
                'link': item.get('guid', 'N/A'),
                'dates': f"{item.get('begin', '')}-{item.get('end', '')}".strip('-')
            } for item in items]
            print(f"[+] {source}: Found {len(results)} results.")
            return format_df(pd.DataFrame(results), source)
    except Exception as e:
        print(f"[!] {source} Error: {e}")
    return format_df(pd.DataFrame(), source)

# ========== 3. Wellcome Collection ============
def wellcome_search(search_term):
    source = "Wellcome"
    url = "https://api.wellcomecollection.org/catalogue/v2/concepts"
    params = {"query": search_term, "pageSize": 20}
    try:
        response = requests.get(url, params=params, headers=HEADERS, timeout=10)
        if response.status_code == 200:
            data = response.json().get("results", [])
            results = [{
                'name': c.get("label"),
                'id': c.get("id"),
                'link': f"https://wellcomecollection.org/concepts/{c.get('id')}",
                'dates': "N/A"
            } for c in data]
            print(f"[+] {source}: Found {len(results)} results.")
            return format_df(pd.DataFrame(results), source)
    except Exception as e:
        print(f"[!] {source} Error: {e}")
    return format_df(pd.DataFrame(), source)

# ========== 4. Wikidata ============
def wikidata_search(search_term):
    source = "Wikidata"
    url = "https://www.wikidata.org/w/api.php"
    params = {"action": "wbsearchentities", "search": search_term, "language": "en", "format": "json"}
    try:
        response = requests.get(url, params=params, headers=HEADERS, timeout=10)
        if response.status_code == 200:
            data = response.json().get("search", [])
            results = [{
                'name': i.get("label"),
                'id': i.get("id"),
                'link': f"https://www.wikidata.org/wiki/{i.get('id')}",
                'dates': i.get("description", "N/A")
            } for i in data]
            print(f"[+] {source}: Found {len(results)} results.")
            return format_df(pd.DataFrame(results), source)
    except Exception as e:
        print(f"[!] {source} Error: {e}")
    return format_df(pd.DataFrame(), source)

# ========== 5. The National Archives (TNA) ============
def tna_search(search_term):
    source = "TNA"
    base_url = "https://discovery.nationalarchives.gov.uk/API/search/v1/fileauthorities"
    filtered_results = []
    try:
        params = {"sps.searchQuery": search_term, "sps.resultsPageSize": 50, "sps.batchStartMark": "*"}
        response = requests.get(base_url, params=params, headers=HEADERS, timeout=15)
        if response.status_code == 200:
            assets = response.json().get("fileAuthorityAssets", [])
            for asset in assets:
                if asset.get("subjectType") == "P":
                    filtered_results.append({
                        "name": asset.get("title"),
                        "id": asset.get("id"),
                        "link": f"https://discovery.nationalarchives.gov.uk/details/f/{asset.get('id')}",
                        "dates": f"{asset.get('startDate', '')}-{asset.get('endDate', '')}".strip('-')
                    })
            print(f"[+] {source}: Found {len(filtered_results)} results.")
            return format_df(pd.DataFrame(filtered_results), source)
    except Exception as e:
        print(f"[!] {source} Error: {e}")
    return format_df(pd.DataFrame(), source)

# ========== MAIN EXECUTION ============
def search_all_collections(search_term):
    functions = [loc_search, europeana_search, wellcome_search, wikidata_search, tna_search]
    print(f"\n--- Searching for: '{search_term}' ---")
    with ThreadPoolExecutor(max_workers=len(functions)) as executor:
        results = list(executor.map(lambda f: f(search_term), functions))

    valid_dfs = [df for df in results if not df.empty]
    if valid_dfs:
        return pd.concat(valid_dfs, ignore_index=True)
    return pd.DataFrame()

# User Input
user_input = input("Enter name to search: ")
if user_input.strip():
    final_df = search_all_collections(user_input.strip())
    if not final_df.empty:
        # Sort by source for better readability
        display(final_df.sort_values('source'))
    else:
        print("No matches found.")

Enter name to search: george airy

--- Searching for: 'george airy' ---
[+] Wikidata: Found 1 results.
[+] Europeana: Found 1 results.
[+] Library of Congress: Found 2 results.
[+] TNA: Found 1 results.
[+] Wellcome: Found 20 results.


Unnamed: 0,name,source,dates,id,link
2,George Biddell Airy,Europeana,,166721,
0,"Airy, George Biddell, 1801-1892",Library of Congress,,n80138214,http://id.loc.gov/authorities/names/n80138214
1,"Campbell, George, of Airies",Library of Congress,,nr93051117,http://id.loc.gov/authorities/names/nr93051117
24,"Airy, Sir George Biddell, (1801-1892), Knight,...",TNA,01/01/1801-31/12/1892,F58766,https://discovery.nationalarchives.gov.uk/deta...
4,"Airy, Wilfred.",Wellcome,,d7xwxgqj,https://wellcomecollection.org/concepts/d7xwxgqj
22,"Larue, George R. (George Roger), 1882-1967",Wellcome,,qb25buy2,https://wellcomecollection.org/concepts/qb25buy2
21,"Bridges, George Wilson",Wellcome,,sz5tff2w,https://wellcomecollection.org/concepts/sz5tff2w
20,"Cooke, G. (George), 1793-1849",Wellcome,,w4e2tezm,https://wellcomecollection.org/concepts/w4e2tezm
19,"Cooke, George, 1781-1834",Wellcome,,xq6atc8p,https://wellcomecollection.org/concepts/xq6atc8p
18,"Sanchez, George J.",Wellcome,,hjkzrhws,https://wellcomecollection.org/concepts/hjkzrhws
