# A Correspondence Contextualised
This is a script for enriching existing data about keywords or person information with information from [Wikidata](https://www.wikidata.org/wiki/Wikidata:Main_Page).

In [2]:
import pandas as pd
from json import JSONDecodeError
from qwikidata.sparql  import return_sparql_query_results
from SPARQLWrapper import SPARQLWrapper, JSON
import geocoder
import re

In [60]:
df = pd.read_excel('../data/cds_sheets/Kopie_von_20220816_sachindex_cds.xlsx')

# Cells that include entries with more than one value and that are seperated by a backslash are split into separate cells.
df_merged = (df.set_index(df.columns.drop('Deutsch',1).tolist()).Deutsch.str.split('/', expand=True).stack().reset_index().rename(columns={0:'Deutsch'}).loc[:, df.columns])
df_merged.to_csv('../data/retrieved/sachindex_singular_values.csv')



In [75]:
def enrich_data():
    index = 0
    for string in df_merged['Deutsch']:
        try:
            query = f'' \
                    f'SELECT ?item ?label_fr ' \
                    f'  WHERE {{' \
                    f'      ?item rdfs:label "{string}"@de. ' \
                    f'      ?item rdfs:label ?label_fr filter (lang(?label_fr) = "fr").' \
                    f'  }}'
            res = return_sparql_query_results(query)
            df_merged['Wikidata'][index] = [item['item']['value'] for item in res['results']['bindings']]
            df_merged['Französisch'][index] = [item['label_fr']['value'] for item in res['results']['bindings']]

            index += 1
        except JSONDecodeError:
            index += 1
            continue

    df_merged.to_csv('../data/retrieved/sachindex_additional_data.csv')

In [None]:
enrich_data()

In [3]:
df = pd.read_csv('../data/retrieved/filtered_cds_data.csv')
df = df.fillna(0)
df

Unnamed: 0.1,Unnamed: 0,FuD-Key,letzte Änderung,Dokumenttyp,URL,Nr.,Verfasser,GND (Verfasser),VIAF (Verfasser),Empfänger,...,Geonames (Orte),Schlagwörter,Editionen,Vermerk,Bestand,Verweise,Digitalisate: Dateiname (Signatur),Zitierempfehlung,year,decade
0,100,CdS-b1-000s,2022-08-19 10:18:12,Brief,https://constance-de-salm.de/archiv/#/document/28,C11/S84/020-022,"Salm, Constance de",http://d-nb.info/gnd/116765968,https://viaf.org/viaf/121051,"Barbier, Antoine-Alexandre",...,https://www.geonames.org/3247449; https://www....,Wohnen/Haushalt; Frankreich - Politik; CdS: Ép...,0,0,Toulon,0,CdS-b1-000s-0.jpg (C11/S84/020 ); CdS-b1-000s-...,C11/S84/020-022. In: Die Korrespondenz der Con...,1822.0,1820.0
1,108,CdS-b1-0021,2022-08-19 10:18:12,Brief,https://constance-de-salm.de/archiv/#/document/73,C11/S84/117-119,"Salm, Constance de",http://d-nb.info/gnd/116765968,https://viaf.org/viaf/121051,"Barbier, Antoine-Alexandre",...,https://www.geonames.org/2934246; https://www....,Briefpartner/Werk; Literarisches Geschehen/Neu...,0,0,Toulon,0,CdS-b1-0021-0.jpg (C11/S84/117 ); CdS-b1-0021-...,C11/S84/117-119. In: Die Korrespondenz der Con...,1823.0,1820.0
2,110,CdS-b1-000z,2022-08-19 10:18:12,Brief,https://constance-de-salm.de/archiv/#/document/35,C11/S84/037-038,"Salm, Constance de",http://d-nb.info/gnd/116765968,https://viaf.org/viaf/121051,"Barbier, Antoine-Alexandre",...,0,Karriere/-förderung,0,0,Toulon,0,CdS-b1-000z-0.jpg (C11/S84/037 ); CdS-b1-000z-...,C11/S84/037-038. In: Die Korrespondenz der Con...,1822.0,1820.0
3,111,CdS-b1-0026,2022-08-19 10:18:12,Brief,https://constance-de-salm.de/archiv/#/document/78,C11/S84/131,"Salm, Constance de",http://d-nb.info/gnd/116765968,https://viaf.org/viaf/121051,"Fallenstein, Georg Friedrich (?)",...,0,CdS: Épître à un honnête homme (1820); CdS: Su...,0,0,Toulon,0,CdS-b1-0026-0.jpg (C11/S84/131),C11/S84/131. In: Die Korrespondenz der Constan...,1823.0,1820.0
4,112,CdS-b1-0016,2022-08-19 10:18:12,Brief,https://constance-de-salm.de/archiv/#/document/42,C11/S84/052,"Salm, Constance de",http://d-nb.info/gnd/116765968,https://viaf.org/viaf/121051,"Preußen, Prinz Friedrich von (Neffe Friedrich ...",...,https://www.geonames.org/2934246,"CdS: Poésies (1811, 1817², 1835³)",0,0,Toulon,0,CdS-b1-0016-0.jpg (C11/S84/052),C11/S84/052. In: Die Korrespondenz der Constan...,1822.0,1820.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2026,10716,CdS-b1-08iu,2022-08-19 10:18:12,Brief,https://constance-de-salm.de/archiv/#/document...,CdS/31/125-126,"Salm, Constance de",http://d-nb.info/gnd/116765968,https://viaf.org/viaf/121051,"Pailliet, Jean Baptiste Joseph (Anwalt der CdS...",...,https://www.geonames.org/2989317,Wirtschaft/Finanzen; Prozess/Gericht,0,0,Schloss Dyck,0,CdS-b1-08iu-0.jpg (CdS/31/125); CdS-b1-08iu-1....,CdS/31/125-126. In: Die Korrespondenz der Cons...,1829.0,1820.0
2027,10717,CdS-b1-08jb,2022-08-19 10:18:12,Brief,https://constance-de-salm.de/archiv/#/document...,CdS/31/098-099,"Salm, Constance de",http://d-nb.info/gnd/116765968,https://viaf.org/viaf/121051,"Pailliet, Jean Baptiste Joseph (Anwalt der CdS...",...,https://www.geonames.org/2972191,Prozess/Gericht,0,0,Schloss Dyck,0,CdS-b1-08jb-0.jpg (CdS/31/098); CdS-b1-08jb-1....,CdS/31/098-099. In: Die Korrespondenz der Cons...,1827.0,1820.0
2028,10720,CdS-b1-08in,2022-08-19 10:18:12,Brief,https://constance-de-salm.de/archiv/#/document...,CdS/31/039-040,"Salm, Constance de",http://d-nb.info/gnd/116765968,https://viaf.org/viaf/121051,"Pailliet, Jean Baptiste Joseph (Anwalt der CdS...",...,https://www.geonames.org/2972191; https://www....,Prozess/Gericht,0,0,Schloss Dyck,0,CdS-b1-08in-0.jpg (CdS/31/039); CdS-b1-08in-1....,CdS/31/039-040. In: Die Korrespondenz der Cons...,1825.0,1820.0
2029,10721,CdS-b1-08iz,2022-08-19 10:18:12,Brief,https://constance-de-salm.de/archiv/#/document...,CdS/31/062-063,"Salm, Constance de",http://d-nb.info/gnd/116765968,https://viaf.org/viaf/121051,"Pailliet, Jean Baptiste Joseph (Anwalt der CdS...",...,0,Wirtschaft/Finanzen; Prozess/Gericht,0,0,Schloss Dyck,0,CdS-b1-08iz-0.jpg (CdS/31/062); CdS-b1-08iz-1....,CdS/31/062-063. In: Die Korrespondenz der Cons...,1826.0,1820.0


In [4]:
def get_geodata(dataframe: pd.DataFrame, feature_column: str):
    for feature in dataframe[feature_column].unique():
        if feature != '0':
            g = geocoder.geonames(
                re.sub(
                    'https://www.geonames.org/',
                    '',
                    str(feature)
                ),
                method='details',
                geoNamesUsername="sarahondraszek",
                key="sarahondraszek"
            )
            print(feature, g.lat, g.lng)

In [5]:
get_geodata(df, 'Geonames (Ausstellungsort)')

https://www.geonames.org/3247449 50.77664 6.08342
https://www.geonames.org/2894637 51.1 6.5


Status code 404 from http://api.geonames.org/getJSON: ERROR - 404 Client Error: Not Found for url: http://api.geonames.org/getJSON?geonameId=0&username=sarahondraszek&style=full


0 None None
https://www.geonames.org/2988507 48.85341 2.3488
https://www.geonames.org/2800866 50.85045 4.34878
https://www.geonames.org/2971041 50.35909 3.52506
https://www.geonames.org/2934246 51.22172 6.77616
https://www.geonames.org/2958516 50.73333 7.01667
