In [0]:
### REQUIREMENTS
import numpy as np
import math
import pandas as pd

import sys
### we do a lot of requests during the scrapping. Some of them with requests package, some of them with urllib
import requests
from urllib.request import urlopen 
from urllib.parse import quote  
from bs4 import BeautifulSoup
import xml.etree.cElementTree as ET

# to avoid errors, we sometime use time.sleep(N) before retrying a request
import time
# the input data have typically a json structure
import json
import getpass

import datetime as dt
# for simple paralel computing:
from concurrent.futures import ThreadPoolExecutor
### google drive
from google.colab import drive
#import gspread
#from gspread_dataframe import get_as_dataframe, set_with_dataframe

In [4]:
### extract names of provinces
response = requests.get("https://edh-www.adw.uni-heidelberg.de/data/api/terms/province")
response
json_data = response.json()
print(str(json_data)[:200])

{'provinces': {'Ach': 'Achaia', 'Aeg': 'Aegyptus', 'Aem': 'Aemilia (Regio VIII)', 'Afr': 'Africa Proconsularis', 'AlC': 'Alpes Cottiae', 'AlG': 'Alpes Graiae', 'AlM': 'Alpes Maritimae', 'AlP': 'Alpes 


In [6]:
###  generate a list of provinces, resp. their abbreviations
%%time

json_data = requests.get("https://edh-www.adw.uni-heidelberg.de/data/api/terms/province").json()
provinces = json_data["provinces"].keys()

### make the basic URL form
URL_form_geo = "https://edh-www.adw.uni-heidelberg.de/data/api/geography/search?"

### parse all find spot for each province 
### at them to the list
geo_data = []
for province in provinces:
  total = requests.get(URL_form_geo + "province=" + province).json()["total"]
  response = requests.get(URL_form_geo + "province=" + province + "&limit=" + str(total))
  geo_data.extend(response.json()["items"])
  print(province, total)

Ach 183
Aeg 65
Aem 162
Afr 1400
AlC 59
AlG 31
AlM 173
AlP 79
ApC 204
Aqu 212
Ara 103
Arm 2
Asi 182
Ass 0
Bae 1305
Bar 89
Bel 429
BiP 105
BrL 79
Bri 1916
Cap 31
Cil 48
Cor 15
Cre 29
Cyp 21
Cyr 24
Dac 995
Dal 2186
Epi 77
Etr 356
Gal 89
GeI 1051
GeS 2616
HiC 2553
Inc 7
Iud 83
LaC 654
Lig 131
Lug 251
Lus 746
LyP 37
MaC 298
MaT 43
Mak 434
Mes 9
MoI 888
MoS 790
Nar 635
Nor 1504
Num 333
PaI 1316
PaS 1623
Pic 85
Rae 729
ReB 22
Rom 734
Sam 313
Sar 81
Sic 47
Syr 154
Thr 257
Tra 111
Tri 0
Umb 212
Val 0
VeH 479
CPU times: user 1.95 s, sys: 156 ms, total: 2.1 s
Wall time: 2min 20s


In [7]:
### total amount of findspots
len(geo_data)

29875

In [8]:
geo_data_df = pd.DataFrame(geo_data)
geo_data_df.set_index("uri", inplace=True)
geo_data_df.head(5)

Unnamed: 0_level_0,country,last_update,coordinates,province,id,find_spot_ancient,pleiades_uri,find_spot,region,find_spot_modern,geonames_uri
uri,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
https://edh-www.adw.uni-heidelberg.de/edh/geographie/10,Greece,2011-07-14,"37.05,25.19",Achaia,10,Ptoion,,,,,
https://edh-www.adw.uni-heidelberg.de/edh/geographie/100,Greece,2012-03-20,"37.983175,23.716647",Achaia,100,Athenae,https://pleiades.stoa.org/places/579885,Kerameikos,Attikí,Athínai,
https://edh-www.adw.uni-heidelberg.de/edh/geographie/101,Greece,2012-03-20,"37.983175,23.716647",Achaia,101,Athenae,https://pleiades.stoa.org/places/579885,Kerameikos bei Kirche Haghia Triada,Attikí,Athínai,
https://edh-www.adw.uni-heidelberg.de/edh/geographie/102,Greece,2012-03-20,"38.251123,21.741943",Achaia,102,Colonia Augusta Aroe Patrae,,"Kirche, sekundär verwendet",Dytikí Elláda,Pátrai,
https://edh-www.adw.uni-heidelberg.de/edh/geographie/103,Greece,2012-03-20,"37.939865,22.928467",Achaia,103,Colonia Laus Iulia Corinthus,,Kranion,Pelopónissos,Kórinthos,


## Upload the data to sciencedata.dk

In [9]:
### interactively setup your sciencedata.dk homeurl, username and password
sciencedata_homeurl = "https://sciencedata.dk/files/"
username = input("sciencedata.dk username (format '123456@au.dk'):")
password = getpass.getpass("sciencedata.dk password:")

### establish a request session
s = requests.Session()
s.auth = (username, password)

sciencedata.dk username (format '123456@au.dk'):648597@au.dk
sciencedata.dk password:··········


In [10]:
### put your dataframe data into this folder
s.put(sciencedata_homeurl + "personal_folder/EDH_data/EDH_geographies_raw.json", data=geo_data_df.to_json())

<Response [201]>

# Merge the inscriptions with geographies

In [15]:
### upload the raw inscriptions (extracted in another notebook)
inscriptions_data_df = pd.DataFrame(s.get(sciencedata_homeurl + "personal_folder/EDH_data/EDH_inscriptions_raw.json").json())
inscriptions_data_df.head(5)

Unnamed: 0,people,work_status,findspot_modern,last_update,responsible_individual,width,language,literature,height,diplomatic_text,not_before,depth,material,trismegistos_uri,transcription,commentary,edh_geography_uri,country,uri,province_label,modern_region,type_of_monument,present_location,findspot_ancient,not_after,type_of_inscription,id,letter_size,social_economic_legal_history,findspot,year_of_find,geography,religion,fotos,military,external_image_uris
0,"[{'name': 'L. Ponponius(!) Rufus', 'age: years...",checked with photo,Roma,2014-10-10,Cowey,19 cm,Greek-Latin,"CIG 6916.; AE 1984, 0109. (B); P. Lombardi, Ti...",45 cm,L PONPONIVS RVFVS / VIXIT ANOS XXVII / EIA PON...,101,5.4 cm,marble: rocks - metamorphic rocks,https://www.trismegistos.org/text/177036,L(ucius) Ponponius(!) Rufus / vixit an(n)os XX...,Wiederverwendung der Tafel als TÃ¼rpfosten. D...,https://edh-www.adw.uni-heidelberg.de/edh/geog...,Italy,https://edh-www.adw.uni-heidelberg.de/edh/insc...,Achaia,Lazio,tabula,"Roma, Mus. Naz. Rom.","Kephallenia, aus",200.0,epitaph,HD001917,1-2 cm,,,,,,,,
1,"[{'gender': 'male', 'cognomen': 'ÎÎ±Î»Î»ÎµÎ½Ï...",checked with photo,"Patrasso - AthÃ­nai, zwischen",2012-03-15,GrÃ¤f,30 cm,Greek-Latin,"CIL 03, 00572.; CIL 03, 07306.; IG 02 (2. Aufl...",146 cm,[ ]ΥΤΟΚΡΑΤΟΡΙ / [ ]ΑΙΣΑΡΙ / [[[ ]]] / [ ]ΥΣΕΒΕ...,395,,"Marmor, geÃ¤dert / farbig",https://www.trismegistos.org/text/177037,[Α]ὐτοκράτορι / [Κ]αίσαρι / [[[---]]] / [Ε]ὐσε...,Meilenstein mit zwei griechischen Inschriften...,https://edh-www.adw.uni-heidelberg.de/edh/geog...,Greece,https://edh-www.adw.uni-heidelberg.de/edh/insc...,Achaia,AttikÃ­,mile-/leaguestone,"AthÃ­nai, Epigr. Mus.","Athenae, bei",397.0,mile-/leaguestone,HD002097,2.7 cm,data available,"Dafni, byzantinisches Kloster, bei, sekundÃ¤r ...",,,,,,
2,,no image,AthÃ­nai,2011-04-04,Cowey,(17) cm,Latin,"CIL 03, 06101.; M. Å aÅ¡el Kos, Inscriptiones ...",(15) cm,]S HOSTIVM DEPRESSE[ ] / [ ] CXIIX BELLO MARIT...,-38,12.5 cm,,https://www.trismegistos.org/text/177038,------ nave]s hostium depresse[rit ---] / [---...,Es handelt sich um ein Elogium fÃ¼r Agrippa. ...,https://edh-www.adw.uni-heidelberg.de/edh/geog...,Greece,https://edh-www.adw.uni-heidelberg.de/edh/insc...,Achaia,AttikÃ­,base,"AthÃ­nai, Epigr. Mus.",Athenae,-12.0,elogium,HD002919,6.5 cm,,"Roma-Augustus Tempel, Akropolis",1866.0,,,,,
3,"[{'cognomen': 'Traianus Hadrianus', 'gender': ...",checked with photo,AthÃ­nai,2009-11-17,Cowey,76 cm,Greek-Latin,"CIL 03, 00548. (B); CIL 03, 07281.; PIR (2. Au...",112 cm,[ ]MP CAES DIVI TRAIANI PAR / THICI FIL DIVI N...,132,48 cm,,https://www.trismegistos.org/text/177039,[I]mp(eratori) Caes(ari) divi Traiani Par/thic...,(B): Am Anfang von Z. 2 fehlt das TI von nepoti.,https://edh-www.adw.uni-heidelberg.de/edh/geog...,Greece,https://edh-www.adw.uni-heidelberg.de/edh/insc...,Achaia,AttikÃ­,statue base,"AthÃ­nai, Epigr. Mus.",Athenae,,honorific inscription,HD002922,,,,,data available,,,,
4,"[{'gender': 'male', 'cognomen': 'Traianus+ Had...",no image,AthÃ­nai,2011-04-04,Cowey,(41) cm,Latin,"CIL 03, 06102.; CIL 03, 07283.; AE 1984, 0822....",(20) cm,[ ] / [ ] / [ ]D[ ] / [ ]R P XVI COS III P P [...,132,(15) cm,marble: rocks - metamorphic rocks,https://www.trismegistos.org/text/177040,[Imp(eratori) Caesari divi Traiani] / [Parthic...,Rekonstruktion des Inschriftentextes nach CIL...,https://edh-www.adw.uni-heidelberg.de/edh/geog...,Greece,https://edh-www.adw.uni-heidelberg.de/edh/insc...,Achaia,AttikÃ­,statue base,"AthÃ­nai, Epigr. Mus.",Athenae,,honorific inscription,HD002925,3.8 cm,,"\""Stoa Hadriani\"" (CIL)",,data available,,,,


In [16]:
### use "edh_geography_uri" column to
### put information from geo_data_df "coordinates" column
### and return it as a new "coordinates" column in the inscriptions_data_df 

%%time
def get_coordinates(column):
  try:
    return geo_data_df.loc[column]["coordinates"]
  except:
    return None
inscriptions_data_df["coordinates"]  =  inscriptions_data_df.apply(lambda row: get_coordinates(row["edh_geography_uri"]), axis=1)

CPU times: user 10 s, sys: 0 ns, total: 10 s
Wall time: 10 s


In [17]:
inscriptions_data_df.head(5)

Unnamed: 0,people,work_status,findspot_modern,last_update,responsible_individual,width,language,literature,height,diplomatic_text,not_before,depth,material,trismegistos_uri,transcription,commentary,edh_geography_uri,country,uri,province_label,modern_region,type_of_monument,present_location,findspot_ancient,not_after,type_of_inscription,id,letter_size,social_economic_legal_history,findspot,year_of_find,geography,religion,fotos,military,external_image_uris,coordinates
0,"[{'name': 'L. Ponponius(!) Rufus', 'age: years...",checked with photo,Roma,2014-10-10,Cowey,19 cm,Greek-Latin,"CIG 6916.; AE 1984, 0109. (B); P. Lombardi, Ti...",45 cm,L PONPONIVS RVFVS / VIXIT ANOS XXVII / EIA PON...,101,5.4 cm,marble: rocks - metamorphic rocks,https://www.trismegistos.org/text/177036,L(ucius) Ponponius(!) Rufus / vixit an(n)os XX...,Wiederverwendung der Tafel als TÃ¼rpfosten. D...,https://edh-www.adw.uni-heidelberg.de/edh/geog...,Italy,https://edh-www.adw.uni-heidelberg.de/edh/insc...,Achaia,Lazio,tabula,"Roma, Mus. Naz. Rom.","Kephallenia, aus",200.0,epitaph,HD001917,1-2 cm,,,,,,,,,"41.8917375,12.4861685"
1,"[{'gender': 'male', 'cognomen': 'ÎÎ±Î»Î»ÎµÎ½Ï...",checked with photo,"Patrasso - AthÃ­nai, zwischen",2012-03-15,GrÃ¤f,30 cm,Greek-Latin,"CIL 03, 00572.; CIL 03, 07306.; IG 02 (2. Aufl...",146 cm,[ ]ΥΤΟΚΡΑΤΟΡΙ / [ ]ΑΙΣΑΡΙ / [[[ ]]] / [ ]ΥΣΕΒΕ...,395,,"Marmor, geÃ¤dert / farbig",https://www.trismegistos.org/text/177037,[Α]ὐτοκράτορι / [Κ]αίσαρι / [[[---]]] / [Ε]ὐσε...,Meilenstein mit zwei griechischen Inschriften...,https://edh-www.adw.uni-heidelberg.de/edh/geog...,Greece,https://edh-www.adw.uni-heidelberg.de/edh/insc...,Achaia,AttikÃ­,mile-/leaguestone,"AthÃ­nai, Epigr. Mus.","Athenae, bei",397.0,mile-/leaguestone,HD002097,2.7 cm,data available,"Dafni, byzantinisches Kloster, bei, sekundÃ¤r ...",,,,,,,"38.012978,23.635883"
2,,no image,AthÃ­nai,2011-04-04,Cowey,(17) cm,Latin,"CIL 03, 06101.; M. Å aÅ¡el Kos, Inscriptiones ...",(15) cm,]S HOSTIVM DEPRESSE[ ] / [ ] CXIIX BELLO MARIT...,-38,12.5 cm,,https://www.trismegistos.org/text/177038,------ nave]s hostium depresse[rit ---] / [---...,Es handelt sich um ein Elogium fÃ¼r Agrippa. ...,https://edh-www.adw.uni-heidelberg.de/edh/geog...,Greece,https://edh-www.adw.uni-heidelberg.de/edh/insc...,Achaia,AttikÃ­,base,"AthÃ­nai, Epigr. Mus.",Athenae,-12.0,elogium,HD002919,6.5 cm,,"Roma-Augustus Tempel, Akropolis",1866.0,,,,,,"37.983175,23.716647"
3,"[{'cognomen': 'Traianus Hadrianus', 'gender': ...",checked with photo,AthÃ­nai,2009-11-17,Cowey,76 cm,Greek-Latin,"CIL 03, 00548. (B); CIL 03, 07281.; PIR (2. Au...",112 cm,[ ]MP CAES DIVI TRAIANI PAR / THICI FIL DIVI N...,132,48 cm,,https://www.trismegistos.org/text/177039,[I]mp(eratori) Caes(ari) divi Traiani Par/thic...,(B): Am Anfang von Z. 2 fehlt das TI von nepoti.,https://edh-www.adw.uni-heidelberg.de/edh/geog...,Greece,https://edh-www.adw.uni-heidelberg.de/edh/insc...,Achaia,AttikÃ­,statue base,"AthÃ­nai, Epigr. Mus.",Athenae,,honorific inscription,HD002922,,,,,data available,,,,,"37.983175,23.716647"
4,"[{'gender': 'male', 'cognomen': 'Traianus+ Had...",no image,AthÃ­nai,2011-04-04,Cowey,(41) cm,Latin,"CIL 03, 06102.; CIL 03, 07283.; AE 1984, 0822....",(20) cm,[ ] / [ ] / [ ]D[ ] / [ ]R P XVI COS III P P [...,132,(15) cm,marble: rocks - metamorphic rocks,https://www.trismegistos.org/text/177040,[Imp(eratori) Caesari divi Traiani] / [Parthic...,Rekonstruktion des Inschriftentextes nach CIL...,https://edh-www.adw.uni-heidelberg.de/edh/geog...,Greece,https://edh-www.adw.uni-heidelberg.de/edh/insc...,Achaia,AttikÃ­,statue base,"AthÃ­nai, Epigr. Mus.",Athenae,,honorific inscription,HD002925,3.8 cm,,"\""Stoa Hadriani\"" (CIL)",,data available,,,,,"37.983175,23.716647"


# Upload the merged data to sciencedata.dk

In [18]:
### put your dataframe data into this folder
%%time
s.put(sciencedata_homeurl + "personal_folder/EDH_data/EDH_inscriptions_with_geo.json", data=geo_data_df.to_json())

CPU times: user 121 ms, sys: 13.3 ms, total: 134 ms
Wall time: 4.86 s


<Response [201]>