# Querying eLuxembourgensia
This notebook will illustrate how to use the eLuxembourgensia digital collection. 
First, we will request the user (you) to enter a name of a person. We will then query Wikidata for the person. 
If there are multiple entries, then the user will be requested to select the desired result. 
We will then use the birth and death dates of the selected person to query the eLuxembourgensia collection and display
the newspapers that were published during that person's lifetime.



## Requirements
* Python 3.12
* [requests](https://pypi.org/project/requests/): HTTP library to run HTTP requests
* [pandas](https://pandas.pydata.org/): format the output into tabular layout
* [yarl](https://pypi.org/project/yarl/): format the output URL into a clickable URL link

In [None]:
%pip install pandas
%pip install yarl

In [None]:
# A function to fetch data with the Wikidata API
import requests

def fetch_wikidata(params):
    url = 'https://www.wikidata.org/w/api.php'
    try:
        return requests.get(url, params=params)
    except:
        return 'ERR'

In [None]:
# Request a name from the user
name = ''
while (name==''):
    name = input("Enter a name to search for in Wikidata:")
    if '' == name:
        print('Please enter a name.')

In [None]:
# Build the wikidata parameters
# Call wbsearchentities function with our name to search
# Output the results in English and using the json format
params = {
        'action': 'wbsearchentities',
        'format': 'json',
        'search': name,
        'language': 'en'
    }

# Fetch the data from Wikidata
wikidata_results = fetch_wikidata(params)

# if no error, convert the response to JSON
if wikidata_results == 'ERR' or 'error' in wikidata_results.text:
    print(wikidata_results.text)
else: 
    wikidata_results = wikidata_results.json()

In [None]:
def selectFromDict(options):
    index = 0
    indexValidList = []
    print('Select an option:')
    for optionName in options:
        index = index + 1
        indexValidList.extend([options[optionName]])
        print(str(index) + ') ' + optionName)
    inputValid = False
    while not inputValid:
        inputRaw = input('Option: ')
        inputNo = int(inputRaw) - 1
        if inputNo > -1 and inputNo < len(indexValidList):
            selected = indexValidList[inputNo]
            #print('Selected ' +  name + ': ' + selected)
            inputValid = True
            break
        else:
            print('Please select a valid option number.')
    return selected

options= {}
for entry in wikidata_results['search']:
    label = entry['label']
    id = entry['id']
    options[label] = id

if len(options) > 1:
    selected_id = selectFromDict(options)
else:
    selected_id = id 

for label, id in options.items():
    if id == selected_id:
        selected_entry = label
        break

In [None]:
# Retrieve the birth and death date from wikidata for the given id

# Create parameters
params = {
        'action': 'wbgetentities',
        'ids':selected_id, 
        'format': 'json',
        'languages': 'en'
    }

# fetch the API
wikidata_selected_entry = fetch_wikidata(params)

# Convert the response to JSON
if wikidata_selected_entry != 'ERR':
    wikidata_selected_entry = wikidata_selected_entry.json()


In [None]:
# P569 = birth date
birthdate = wikidata_selected_entry["entities"][selected_id]["claims"]["P569"][0]["mainsnak"]["datavalue"]["value"]["time"]
# parse out the date as yyyy-mm-dd
birthdate = birthdate[1:11]

In [None]:
# P570 = death date
if 'P570' in wikidata_selected_entry["entities"][selected_id]["claims"]: 
    deathdate = wikidata_selected_entry["entities"][selected_id]["claims"]["P570"][0]["mainsnak"]["datavalue"]["value"]["time"]
    deathdate = deathdate[1:11]
    print_deathdate = deathdate
else:
    deathdate = "9999-12-31"
    print_deathdate = ""

In [None]:
# get the BnL eLuxembourgensia collection
elux_collection = requests.get("https://viewer.eluxemburgensia.lu/api/viewer2/cms/digitalcollections")
elux_collection = elux_collection.json()

In [None]:
import pandas as pd
from yarl import URL

# select only those newspapers published between the birth date and death date of the person  
print("Newspapers published during " + selected_entry + "'s lifetime (" + birthdate + " - " + print_deathdate + "):")

# to display all the rows in the table - otherwise, some rows are hidden
pd.set_option('display.max_rows', None)

filtered_newspapers = []
for newspaper in elux_collection["data"]:
    newspaper_dict = {}
    startdate = newspaper["startdate"]
    try:
        enddate = newspaper["enddate"]
        print_enddate = enddate
    except:
        enddate = "9999-12-31"
        print_enddate = ""
    if startdate <= deathdate or enddate >= birthdate:
        newspaper_dict = {'Title': newspaper["title"],'Start Date': startdate, 'End Date': print_enddate, 'Link': "https://persist.lu/" + newspaper["ark"]}
        filtered_newspapers.append(newspaper_dict)

df = pd.DataFrame(filtered_newspapers, columns=["Title", "Start Date", "End Date", "Link"])

def make_clickable(val):
    return f'<a target="_blank" href="{val}">{val}</a>'
    
dfStyler = df.style.set_properties(**{'text-align': 'left'})
dfStyler.set_table_styles([dict(selector='th', props=[('text-align', 'left')])])
dfStyler.format({'Link': make_clickable})
