In [2]:
import sddk
import pandas as pd
pd.options.display.max_columns = 1000 # to see all columns
import geopandas as gpd
import warnings
warnings.filterwarnings('ignore')
import nltk

In [3]:
# find local version of the input dataset
local_paths = !find ~/Projects -name "LIREg.parquet"
print(local_paths)

['/Users/kasev/Projects/LIRE_ETL/data/large_data/LIREg.parquet']


In [4]:
# read LIRE dataset from zenodo or locally, if already available
try:
    LIRE = gpd.read_parquet(local_paths[0])
    print("local copy found and loaded")
except:# !!! use carefully - takes time
    q = input("local file not found, try another put, or input 'y' for download")
    if q == "y":
        LIRE = gpd.read_file("https://zenodo.org/record/5074774/files/LIREg.geojson?download=1", driver="geoJSON")
        print("file downloaded from zenodo")
        LIRE.to_parquet("../data/large_data/LIREg.parquet")

local copy found and loaded


In [8]:
# records with valid EDH-ID
edh_ids_list = LIRE[LIRE["EDH-ID"].notnull()]["EDH-ID"].tolist()
len(edh_ids_list)

53823

In [9]:
# records with valid & UNIQUE EDH-ID
edh_ids_unique = LIRE[LIRE["EDH-ID"].notnull()]["EDH-ID"].unique().tolist()
len(edh_ids_unique)

52708

In [10]:
# EDH-IDs appearing at least twice:
dup_ids = [id_tup[0] for id_tup in nltk.FreqDist(edh_ids_list).most_common() if id_tup[1] > 1]
len(dup_ids)

946

In [11]:
# dataframe of duplicate rows (i.e. every second instance of the same id
LIRE[LIRE["EDH-ID"].notnull()].loc[LIRE.duplicated(subset="EDH-ID"), :]

Unnamed: 0,EDCS-ID,publication,province,province_list,place,place_list,end_yr_list,notes_dating,status_list,inscr_type,status_notation,inscr_process,notes_references,notes_comments,inscription,inscription_stripped_final,Links,dating from,dating to,status,Latitude,Longitude,photo,Material,Comment,EDH-ID,language_EDCS,clean_text_interpretive_word_EDCS,responsible_individual,type_of_inscription,letter_size,literature,work_status,height,diplomatic_text,people,depth,material,type_of_monument,province_label,width,transcription,country,uri,findspot_ancient,last_update,modern_region,findspot_modern,edh_geography_uri,commentary,trismegistos_uri,external_image_uris,fotos,idno_tm,placenames_refs,text_edition,origdate_text,layout_execution,layout_execution_text,support_objecttype,support_objecttype_text,support_material,support_material_text,support_decoration,keywords_term,keywords_term_text,type_of_inscription_clean,type_of_inscription_certainty,height_cm,width_cm,depth_cm,material_clean,type_of_monument_clean,type_of_monument_certainty,province_label_clean,province_label_certainty,country_clean,country_certainty,findspot_ancient_clean,findspot_ancient_certainty,modern_region_clean,modern_region_certainty,findspot_modern_clean,findspot_modern_certainty,findspot_clean,findspot_certainty,origdate_text_clean,clean_text_conservative,clean_text_interpretive_sentence,findspot,year_of_find,present_location,religion,geography,social_economic_legal_history,military,not_after,language,not_before,coordinates,clean_text_interpretive_word,urban_context,within_rome,nearest_city,city_id_hanson,city_pop_est,city_geometry,nearest_city_type,nearest_city_dist,type_of_inscription_auto,type_of_inscription_auto_prob,geometry
164,EDCS-12700138,"ILGR 00158 = AE 1928, 00015 = AE 1937, 00114 =...",Achaia,Achaia,Nikolara / Nikopoli / Nicopolis,"['Nikolara', 'Nikopoli', 'Nicopolis']",-24,,"['Augusti/Augustae', 'tituli operum', 'tituli ...","['tituli operum', 'tituli sacri']","['Augusti/Augustae', 'viri']",{ },,,[Nep]tuno [et Ma]rt[i Imp(erator) Caesa]r div[...,[Nep]tuno [et Ma]rt[i Imp(erator) Caesa]r div[...,http://db.edcs.eu/epigr/partner.php?s_language...,-24.0,-24,Augusti/Augustae; tituli operum; tituli sacri;...,39.009084,20.734332,http://db.edcs.eu/epigr/bilder.php?bilder.php?...,,,HD023503,,Neptuno et Marti Imperator Caesar divi Iuli fi...,Cowey,votive inscription,,"K.A. Romaios, AD 9, 1924/25, 1-4; eik. 1. - AE...",checked with photo,,[ ]TVNO [ ]RT[ ]R DIV[ ] F VICT[ ]RIT[ ]O QVOD...,"[{'nomen': 'Caesar+', 'praenomen': 'Imp.+', 'p...",,,block,Epirus,,[Nep]tuno [et Ma]rt[i Imp(erator) Caesa]r Div[...,Greece,https://edh-www.adw.uni-heidelberg.de/edh/insc...,"Nicopolis, bei",2013-07-23,Ípeiros,Nikópolis,https://edh-www.adw.uni-heidelberg.de/edh/geog...,Mehrere Blöcke des Siegesdenkmals von Actium....,https://www.trismegistos.org/text/120977,http://www.flickr.com/photos/dandiffendale/835...,['https://edh-www.adw.uni-heidelberg.de/fotos/...,120977,[],Neptuno et Marti Imperator Caesar Divi Iuli fi...,29 BC,21,unbestimmt,189.0,Block,138,unbestimmt,1000.0,80,Weihinschrift,votive inscription,Certain,,,,,block,Certain,Epirus,Certain,Greece,Certain,Nicopolis,Estimated,Ípeiros,Certain,Nikópolis,Certain,"Nikópolis, Augustusmonument",Certain,29 BC,tuno rtr Div f victrito quod pro blic gest in ...,Neptuno et Marti Imperator Caesar Divi Iuli fi...,"Nikópolis, Augustusmonument",,,names of pagan deities,,data available,,,Latin,-29.0,"[20.7355, 39.0276]",Neptuno et Marti Imperator Caesar Divi Iuli fi...,big,False,Nicopolis (Achaea),65,41221.0,"[20.733395, 39.026505]",big,0.002373,votive inscription,1.0,POINT (20.73550 39.02760)
204,EDCS-50300300,JARCE-1994-161,Aegyptus,Aegyptus,Abu Sha|ar,Abu Sha|ar,{ },,{ },{ },{ },{ },,,]VM[1]LI[,]VM[1]LI[,,,,,27.368929,33.682834,,,,HD052304,,VM LI,Platz,,,"AE 2004, 1641b.",provisional,,]ANIS LIMITIBVS APTA IN LITO[ ] / CVRANTE AVRE...,{ },,,,Aegyptus,,------]anis limitibus apta in lito[re? ---] / ...,Egypt,https://edh-www.adw.uni-heidelberg.de/edh/insc...,,2008-11-03,,Abu Sha'ar,https://edh-www.adw.uni-heidelberg.de/edh/geog...,,https://www.trismegistos.org/text/79054,{ },{ },79054,"['http://www.trismegistos.org/place/008881', '...",anis limitibus apta in litore curante Aureli...,309 AD – 310 AD,21,unbestimmt,2.0,unbestimmt,138,unbestimmt,1000.0,143,unbestimmt,,,,,,,,,Aegyptus,Certain,Egypt,Certain,,,,,Abu Sha'ar,Certain,,,309 AD – 310 AD,anis limitibus apta in lito curante Aurel Max,anis limitibus apta in litore curante Aurelio ...,,,,,data available,,,310.0,Latin,309.0,"[33.6828, 27.3689]",anis limitibus apta in litore curante Aurelio ...,rest,False,Tentyris,153,1000.0,"[32.65, 26.166667]",minor,1.584942,,0.0,POINT (33.68280 27.36890)
205,EDCS-50300301,JARCE-1994-161,Aegyptus,Aegyptus,Abu Sha|ar,Abu Sha|ar,{ },,viri,{ },viri,{ },,,]MIANV[,]MIANV[,,,,viri,27.368929,33.682834,,,,HD052304,,MIANV,Platz,,,"AE 2004, 1641b.",provisional,,]ANIS LIMITIBVS APTA IN LITO[ ] / CVRANTE AVRE...,{ },,,,Aegyptus,,------]anis limitibus apta in lito[re? ---] / ...,Egypt,https://edh-www.adw.uni-heidelberg.de/edh/insc...,,2008-11-03,,Abu Sha'ar,https://edh-www.adw.uni-heidelberg.de/edh/geog...,,https://www.trismegistos.org/text/79054,{ },{ },79054,"['http://www.trismegistos.org/place/008881', '...",anis limitibus apta in litore curante Aureli...,309 AD – 310 AD,21,unbestimmt,2.0,unbestimmt,138,unbestimmt,1000.0,143,unbestimmt,,,,,,,,,Aegyptus,Certain,Egypt,Certain,,,,,Abu Sha'ar,Certain,,,309 AD – 310 AD,anis limitibus apta in lito curante Aurel Max,anis limitibus apta in litore curante Aurelio ...,,,,,data available,,,310.0,Latin,309.0,"[33.6828, 27.3689]",anis limitibus apta in litore curante Aurelio ...,rest,False,Tentyris,153,1000.0,"[32.65, 26.166667]",minor,1.584942,,0.0,POINT (33.68280 27.36890)
206,EDCS-50300302,JARCE-1994-162,Aegyptus,Aegyptus,Abu Sha|ar,Abu Sha|ar,{ },,{ },{ },{ },{ },,,]VM[,]VM[,,,,,27.368929,33.682834,,,,HD052304,,VM,Platz,,,"AE 2004, 1641b.",provisional,,]ANIS LIMITIBVS APTA IN LITO[ ] / CVRANTE AVRE...,{ },,,,Aegyptus,,------]anis limitibus apta in lito[re? ---] / ...,Egypt,https://edh-www.adw.uni-heidelberg.de/edh/insc...,,2008-11-03,,Abu Sha'ar,https://edh-www.adw.uni-heidelberg.de/edh/geog...,,https://www.trismegistos.org/text/79054,{ },{ },79054,"['http://www.trismegistos.org/place/008881', '...",anis limitibus apta in litore curante Aureli...,309 AD – 310 AD,21,unbestimmt,2.0,unbestimmt,138,unbestimmt,1000.0,143,unbestimmt,,,,,,,,,Aegyptus,Certain,Egypt,Certain,,,,,Abu Sha'ar,Certain,,,309 AD – 310 AD,anis limitibus apta in lito curante Aurel Max,anis limitibus apta in litore curante Aurelio ...,,,,,data available,,,310.0,Latin,309.0,"[33.6828, 27.3689]",anis limitibus apta in litore curante Aurelio ...,rest,False,Tentyris,153,1000.0,"[32.65, 26.166667]",minor,1.584942,,0.0,POINT (33.68280 27.36890)
207,EDCS-50300307,JARCE-1994-162,Aegyptus,Aegyptus,Abu Sha|ar,Abu Sha|ar,{ },,{ },{ },{ },{ },,,]MO[,]MO[,,,,,27.368929,33.682834,,,,HD052304,,MO,Platz,,,"AE 2004, 1641b.",provisional,,]ANIS LIMITIBVS APTA IN LITO[ ] / CVRANTE AVRE...,{ },,,,Aegyptus,,------]anis limitibus apta in lito[re? ---] / ...,Egypt,https://edh-www.adw.uni-heidelberg.de/edh/insc...,,2008-11-03,,Abu Sha'ar,https://edh-www.adw.uni-heidelberg.de/edh/geog...,,https://www.trismegistos.org/text/79054,{ },{ },79054,"['http://www.trismegistos.org/place/008881', '...",anis limitibus apta in litore curante Aureli...,309 AD – 310 AD,21,unbestimmt,2.0,unbestimmt,138,unbestimmt,1000.0,143,unbestimmt,,,,,,,,,Aegyptus,Certain,Egypt,Certain,,,,,Abu Sha'ar,Certain,,,309 AD – 310 AD,anis limitibus apta in lito curante Aurel Max,anis limitibus apta in litore curante Aurelio ...,,,,,data available,,,310.0,Latin,309.0,"[33.6828, 27.3689]",anis limitibus apta in litore curante Aurelio ...,rest,False,Tentyris,153,1000.0,"[32.65, 26.166667]",minor,1.584942,,0.0,POINT (33.68280 27.36890)
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
49017,EDCS-07000521,"ZPE-111-271 = AE 1988, 00625 = AE 1991, 00895 ...",Sicilia,Sicilia,Taormina / Tauromenium,"['Taormina', 'Tauromenium']",-19,,"['leges', 'ordo senatorius', 'tituli sacri', '...","['leges', 'tituli sacri']","['ordo senatorius', 'tria nomina', 'viri']",{ },,,] / [B VII] c(omitialis) l[udi] / [C VI] c(omi...,] / [B VII] c(omitialis) l[udi] / [C VI] c(omi...,http://db.edcs.eu/epigr/partner.php?s_language...,-36.0,-19,leges; ordo senatorius; tituli sacri; tria nom...,37.851792,15.285652,,lapis,,HD003680,,B VII comitialis ludi C VI comitialis ludi D V...,Gräf,calendar,,"AE 1988, 0625.; AE 1988, 0626.; AE 1991, 0894....",provisional,,[ ] MVNERE D S P F / [ ]RIM EX PR NON [ ]R [ ]...,"[{'praenomen': 'P.+', 'gender': 'male', 'name'...",,marble: rocks - metamorphic rocks,tabula,"Sicilia, Melita",,[---] munere d(e) s(ua) p(ecunia) f(ec---) / [...,Italy,https://edh-www.adw.uni-heidelberg.de/edh/insc...,Tauromenium,2015-08-27,,Taormina,https://edh-www.adw.uni-heidelberg.de/edh/geog...,,https://www.trismegistos.org/text/175689,{ },{ },175689,"['http://www.trismegistos.org/place/002210', '...",munere de sua pecunia fec--- rim ex pridie No...,8 BC – 14 AD,21,unbestimmt,257.0,Tafel,48,Marmor,1000.0,3,Kalender,calendar,Certain,,,,Marble,tabula,Certain,"Sicilia, Melita",Certain,Italy,Certain,Tauromenium,Certain,,,Taormina,Certain,Thermengebäude,Certain,8 BC – 14 AD,munere d s p f rim ex pr Non r d pr K Iun Vest...,munere de sua pecunia fec rim ex pridie Nonas ...,Thermengebäude,,,names of pagan deities,,data available,,14.0,Latin,-8.0,"[15.2879, 37.8531]",munere de sua pecunia fec rim ex pridie Nonas ...,middle,False,Tauromenium,1321,11060.0,"[15.287916, 37.853066]",middle,0.000038,calendar,1.0,POINT (15.28790 37.85310)
49056,EDCS-38301821,"SEG-55, 01646b = AE 2005, 01562b",Syria,Syria,Tadmur / Tadmer / Tadmor / Palmyra,"['Tadmur', 'Tadmer', 'Tadmor', 'Palmyra']",100,,ordo decurionum,{ },ordo decurionum,{ },,,Bule et [civitas(?),Bule et [civitas(?),http://db.edcs.eu/epigr/partner.php?s_language...,51.0,100,ordo decurionum,34.556016,38.280975,,,,HD055971,,Bule et civitas,Cowey,honorific inscription,3-3.2 cm,"AE 2005, 1562.; J. Seigne - J.-B. Yon, in: P. ...",provisional,(117) cm,[ ]T C[ ] IA[ ]AA[ ] / [ ]SI[ ]IB[ ]VA[ ] / / ...,{ },,,honorific/grave/votive column,Syria,120 cm,[Bule e]t c[ivitas] Ia[r---]AA[---] / [---]SI[...,Syrian Arab Republic,https://edh-www.adw.uni-heidelberg.de/edh/insc...,Palmyra,2014-07-01,,Tadmur,https://edh-www.adw.uni-heidelberg.de/edh/geog...,,https://www.trismegistos.org/text/198740,{ },{ },198740,"['http://www.trismegistos.org/place/000016', '...",Bule et civitas IarAA SIIBVA Bule et [civitas...,51 AD – 100 AD,21,unbestimmt,102.0,Ehren-/Grab-/Votivsäule,138,unbestimmt,1000.0,69,Ehreninschrift,honorific inscription,Certain,117.0,120.0,,,honorific/grave/votive column,Certain,Syria,Certain,Syrian Arab Republic,Certain,Palmyra,Certain,,,Tadmur,Certain,"GroÃe Kolonnaden, Sektion C, Exedra C, sekund...",Certain,51 AD – 100 AD,t c IaAA SIIBVA Bule et civitas υλῆβΑ ΛΟΝΙΔΟΥΑ...,Bule et civitas Iar AA SI IB VA Bule et civita...,"GroÃe Kolonnaden, Sektion C, Exedra C, sekund...",1963,,,,,,100.0,Greek-Latin,51.0,"[38.2787, 34.548]",Bule et civitas Iar AA SI IB VA Bule et civita...,middle,False,Palmyra,1340,25091.0,"[38.267749, 34.551677]",middle,0.011552,honorific inscription,1.0,POINT (38.27870 34.54800)
49162,EDCS-13303439,"SEG-47, 01098",Thracia,Thracia,Plowdiw / Plovdiv / Filibe / Philippopolis,"['Plowdiw', 'Plovdiv', 'Filibe', 'Philippopolis']",{ },,tituli sepulcrales,tituli sepulcrales,{ },{ },,,D(is) M(anibus),D(is) M(anibus),,,,tituli sepulcrales,42.141854,24.749930,,,,HD044459,,Dis Manibus,Cowey,epitaph,2.8-6 cm,"IGBulg 5462; tab. 69, 5462.; SEG 47, 1098.; AE...",checked with photo,46 cm,ΑΙΛ ΠΟΛΕΜΩΝ ΒΦ ΒΕΙΘΥΤΡΑΛΕΙ ΤΑΣΗΟΥΣ ΚΑΙ / ΚΟΥΙΤ...,"[{'cognomen': 'Î\xa0Î¿Î»á½³Î¼Ï\x89Î½', 'name':...",35 cm,,architectural member,Thracia,115 cm,Αἴλ(ιος) Πολέμων β(ενε)φ(ικιάριος) Βειθυτραλει...,Bulgaria,https://edh-www.adw.uni-heidelberg.de/edh/insc...,Philippopolis,2009-06-08,Plovdiv,Plovdiv,https://edh-www.adw.uni-heidelberg.de/edh/geog...,,https://www.trismegistos.org/text/197764,{ },{ },197764,"['http://www.trismegistos.org/place/000014', '...",Αἴλ(ιος) Πολέμων β(ενε)φ(ικιάρ...,101 AD – 200 AD,21,unbestimmt,35.0,Architekturteil,138,unbestimmt,1000.0,92,Grabinschrift,epitaph,Certain,46.0,115.0,35.0,,architectural member,Certain,Thracia,Certain,Bulgaria,Certain,Philippopolis,Certain,Plovdiv,Certain,Plovdiv,Certain,,,101 AD – 200 AD,Αἴλ Πολέμων βφ Βειθυτραλει Τασηους καὶ Κουτῃ Δ...,Αἴλιος Πολέμων βενεφικιάριος Βειθυτραλει Τασηο...,,1975,"Plovdiv, Arh. Muz.",,,,,200.0,Greek-Latin,101.0,"[24.7453, 42.1354]",Αἴλιος Πολέμων βενεφικιάριος Βειθυτραλει Τασηο...,middle,False,Philippopolis (Thracia),1385,14111.0,"[24.750913, 42.146652]",middle,0.012574,epitaph,1.0,POINT (24.74530 42.13540)
49239,EDCS-05100816,"CIL 05, 05661b = Epigraphica-1961-98 = AE 1964...",Transpadana / Regio XI,Transpadana / Regio XI,Barzano,Barzano,300,,tituli sacri,tituli sacri,{ },{ },,,Novelia / Pandarus / I(ovi) O(ptimo) M(aximo) ...,Novelia / Pandarus / I(ovi) O(ptimo) M(aximo) ...,http://db.edcs.eu/epigr/partner.php?s_language...,201.0,300,tituli sacri,45.733474,9.312813,,,,HD013243,,Novelia Pandarus Iovi Optimo Maximo votum solv...,Scheithauer,votive inscription,5.5 cm,"AE 1969/70, 0203. (B); AE 1964, 0105.; CIL 05,...",no image,100 cm,I [ ] M / NOVELLIA[ ] / PANDARVS / PRO SE ET S...,"[{'name': 'Novellia[nus] Pandarus', 'person_id...",50 cm,,altar,Transpadana (Regio XI),55 cm,I(ovi) [O(ptimo)] M(aximo) / Novellia[nus] / P...,Italy,https://edh-www.adw.uni-heidelberg.de/edh/insc...,,1998-01-15,,Barzano,https://edh-www.adw.uni-heidelberg.de/edh/geog...,Auf Vorder- und Rückseite beschriftet. (B): (...,https://www.trismegistos.org/text/285387,{ },{ },285387,[],Iovi Optimo Maximo Novellianus Pandarus pro se...,301 AD – 400 AD,21,unbestimmt,29.0,Altar,138,unbestimmt,1000.0,80,Weihinschrift,votive inscription,Certain,100.0,55.0,50.0,,altar,Certain,Transpadana (Regio XI),Certain,Italy,Certain,,,,,Barzano,Certain,"S. Salvatore, Kirche",Certain,301 AD – 400 AD,I M Novellia Pandarus pro se et suis omnibus a...,Iovi Optimo Maximo Novellianus Pandarus pro se...,"S. Salvatore, Kirche",,,names of pagan deities,,,,400.0,Latin,301.0,"[9.3167, 45.7333]",Iovi Optimo Maximo Novellianus Pandarus pro se...,rest,False,Comum,1068,3086.0,"[9.082065, 45.81011]",middle,0.246887,votive inscription,1.0,POINT (9.31670 45.73330)


In [23]:
# remove the duplicates
# (using pandas deduplicated() method)
LIRE = LIRE.loc[~(LIRE.duplicated(subset="EDH-ID")) | (LIRE["EDH-ID"].isnull())]
len(LIRE)

136190

In [29]:
# login to sciencedata.dk
s = sddk.cloudSession("sciencedata.dk", "SDAM_root/SDAM_data/LIRE", "648597@au.dk")

connection with shared folder established with you as its owner
endpoint variable has been configured to: https://sciencedata.dk/files/SDAM_root/SDAM_data/LIRE/


In [33]:
# upload data to sciencedata.dk
s.write_file("LIRE_v1-1.geojson", LIRE)

Your <class 'geopandas.geodataframe.GeoDataFrame'> object has been succefully written as "https://sciencedata.dk/files/SDAM_root/SDAM_data/LIRE/LIRE_v1-1.geojson"


In [32]:
# save the file locally
# a) as geojson:
LIRE.to_file("../data/large_data/LIRE_v1-1.geojson", driver="GeoJSON")

In [34]:
# b) as parquet:
LIRE.to_parquet("../data/large_data/LIRE_v1-1.parquet")