In [356]:
import sddk
import pandas as pd
import tabulate
pd.options.display.max_columns = 1000 # to see all columns
import json
import re
import geopandas as gpd
from shapely.geometry import Point
from functools import partial
from shapely.ops import transform
import pyproj
import warnings
warnings.filterwarnings('ignore')
import numpy as np
from scipy.spatial import cKDTree
import matplotlib.pyplot as plt

from sklearn.feature_extraction.text import CountVectorizer
from sklearn.feature_extraction.text import TfidfVectorizer


from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.metrics import confusion_matrix

import pickle

import nltk
from nltk.util import bigrams

In [2]:
EDHCSg = sddk.read_file("EDHCSg.geojson", "gdf", "a9237c5ea642d4714bcdefb03f70a1f4")

reading file located in a public folder


In [249]:
EDH_overlap = EDH_overlap_all[~EDH_overlap_all["type_of_inscription_clean"].str.contains("NULL")]

Now we can focus on some EDCS attributes (i.e. firt 28 columns) which might be good predictors of `type_of_inscription_clean` in EDH. First, look at `status_list`:

# Applying the model

In [310]:
# load the model and vectorizer back
svm_v1 = pickle.load(open('../data/svm_model_v1.0.sav', 'rb'))
vectorizer_v1 = pickle.load(open('../data/svm_vectorizer_v1.0.sav','rb'))

In [311]:
# we will apply the model to inscriptions which are only in EDCS
EDCS_unique = EDHCSg[(EDHCSg["EDH-ID"].isnull()) & (EDHCSg["EDCS-ID"].notnull())] 
EDCS_unique.head(5)

Unnamed: 0,EDCS-ID,publication,province,province_list,place,place_list,end_yr_list,notes_dating,status_list,inscr_type,status_notation,inscr_process,notes_references,notes_comments,inscription,inscription_stripped_final,Links,dating from,dating to,status,Latitude,Longitude,photo,Material,Comment,EDH-ID,language_EDCS,clean_text_interpretive_word_EDCS,responsible_individual,type_of_inscription,letter_size,literature,work_status,height,diplomatic_text,people,depth,material,type_of_monument,province_label,width,transcription,country,uri,findspot_ancient,last_update,modern_region,findspot_modern,edh_geography_uri,commentary,trismegistos_uri,external_image_uris,fotos,idno_tm,placenames_refs,text_edition,origdate_text,layout_execution,layout_execution_text,support_objecttype,support_objecttype_text,support_material,support_material_text,support_decoration,keywords_term,keywords_term_text,type_of_inscription_clean,type_of_inscription_certainty,height_cm,width_cm,depth_cm,material_clean,type_of_monument_clean,type_of_monument_certainty,province_label_clean,province_label_certainty,country_clean,country_certainty,findspot_ancient_clean,findspot_ancient_certainty,modern_region_clean,modern_region_certainty,findspot_modern_clean,findspot_modern_certainty,findspot_clean,findspot_certainty,origdate_text_clean,clean_text_conservative,clean_text_interpretive_sentence,findspot,year_of_find,present_location,religion,geography,social_economic_legal_history,military,not_after,language,not_before,coordinates,clean_text_interpretive_word,urban_context,within_rome,nearest_city,city_id_hanson,city_pop_est,city_geometry,nearest_city_type,nearest_city_dist,geometry,bigrams
0,EDCS-71300269,"AE 2014, 01203",Achaia,Achaia,Mavrommati / Mavromati / Messini / Messene,"['Mavrommati', 'Mavromati', 'Messini', 'Messene']",96,,"['sigilla impressa', 'tituli fabricationis', '...",tituli fabricationis,viri,sigilla impressa,,,De Rasticanis,De Rasticanis,,69.0,96,sigilla impressa; tituli fabricationis; viri,37.179412,21.924179,,opus figlinae,,,,De Rasticanis,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,96.0,Latin,69.0,"['21.9241791', '37.1794122']",De Rasticanis,big,False,Messene,59,22337.0,"[22.008029, 37.051363]",middle,0.15306,POINT (21.92418 37.17941),[De_Rasticanis]
1,EDCS-21200140,"CIL 03, 00079 = CIL 03, 14147,6 = IDakke 00028",Aegyptus,Aegyptus,Ad-Dakka / El-Dakka / Dakke / Pselqet / Pselki...,"['Ad-Dakka', 'El-Dakka', 'Dakke', 'Pselqet', '...",109,,"['milites', 'tituli fabricationis', 'tituli sa...","['tituli fabricationis', 'tituli sacri']","['milites', 'tria nomina', 'viri']",{},,,Deo Magno Mercurio / adoravit vexillu / leg(io...,Deo Magno Mercurio / adoravit vexillu / leg(io...,http://db.edcs.eu/epigr/partner.php?s_language...,109.0,109,milites; tituli fabricationis; tituli sacri; t...,23.2,32.75,,,,,,Deo Magno Mercurio adoravit vexillu legionis I...,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,109.0,Latin,109.0,"['32.750000', '23.200000']",Deo Magno Mercurio adoravit vexillu legionis I...,rest,False,Elephantine,126,33806.0,"[32.933333, 24.083333]",big,0.902158,POINT (32.75000 23.20000),"[Deo_Magno, Magno_Mercurio, Mercurio_adoravit,..."
2,EDCS-21200142,"CIL 03, 00081 = CIL 03, 13584 = IDakke 00064a",Aegyptus,Aegyptus,Ad-Dakka / El-Dakka / Dakke / Pselqet / Pselki...,"['Ad-Dakka', 'El-Dakka', 'Dakke', 'Pselqet', '...",109,,{},{},{},{},,,Anno / XII [Tr]ai/a[ni],Anno / XII [Tr]ai/a[ni],,109.0,109,,23.2,32.75,,,,,,Anno XII Traiani,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,109.0,Latin,109.0,"['32.750000', '23.200000']",Anno XII Traiani,rest,False,Elephantine,126,33806.0,"[32.933333, 24.083333]",big,0.902158,POINT (32.75000 23.20000),"[Anno_XII, XII_Traiani]"
3,EDCS-52100002,IDakke 00064,Aegyptus,Aegyptus,Ad-Dakka / El-Dakka / Dakke / Pselqet / Pselki...,"['Ad-Dakka', 'El-Dakka', 'Dakke', 'Pselqet', '...",109,,tituli possessionis,tituli possessionis,{},{},,,Anno XX [Tra]ani,Anno XX [Tra]ani,,109.0,109,tituli possessionis,23.2,32.75,,,,,,Anno XX Traani,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,109.0,Latin,109.0,"['32.750000', '23.200000']",Anno XX Traani,rest,False,Elephantine,126,33806.0,"[32.933333, 24.083333]",big,0.902158,POINT (32.75000 23.20000),"[Anno_XX, XX_Traani]"
4,EDCS-44500113,IDakke 00100,Aegyptus,Aegyptus,Ad-Dakka / El-Dakka / Dakke / Pselqet / Pselki...,"['Ad-Dakka', 'El-Dakka', 'Dakke', 'Pselqet', '...",117,,"['Augusti/Augustae', 'milites', 'ordo equester...",tituli sacri,"['Augusti/Augustae', 'milites', 'ordo equester...",{},,,Pro salute Imp(eratoris) Nervae Traiani Caesar...,Pro salute Imp(eratoris) Nervae Traiani Caesar...,,103.0,117,Augusti/Augustae; milites; ordo equester; titu...,23.2,32.75,,,,,,Pro salute Imperatoris Nervae Traiani Caesaris...,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,117.0,Latin,103.0,"['32.750000', '23.200000']",Pro salute Imperatoris Nervae Traiani Caesaris...,rest,False,Elephantine,126,33806.0,"[32.933333, 24.083333]",big,0.902158,POINT (32.75000 23.20000),"[Pro_salute, salute_Imperatoris, Imperatoris_N..."


In [312]:
# first create bigrams
EDCS_unique["bigrams"] = EDCS_unique["clean_text_interpretive_word"].apply(get_bigrams_underscore)

In [313]:
# we need to extract all predictors from the dataframe
# into `status_bigrams_list_apply` (list of lists)
status_bigrams_list_apply = []
status_list = []
material_list = []
for el_status, el_bigrams, el_material in zip(EDCS_unique["status_list"].tolist(), EDCS_unique["bigrams"].tolist(), EDCS_unique["Material"].tolist()):
    new_el, new_status, el_material = combine_status_list_and_bigrams(el_status, el_bigrams, el_material)
    status_bigrams_list_apply.append(new_el)
    status_list.append(new_status)
    material_list.extend(el_material)

In [315]:
X_applied = vectorizer_v1.transform(status_bigrams_list_apply)

In [316]:
prediction = svm_v1.predict(X_applied)
prediction[:10]

array(['owner/artist inscription', 'owner/artist inscription', 'epitaph',
       'votive inscription', 'votive inscription', 'epitaph', 'epitaph',
       'honorific inscription', 'honorific inscription',
       'honorific inscription'], dtype='<U31')

In [377]:
svm_v1.classes_

array(['acclamation', 'adnuntiatio', 'assignation inscription',
       'boundary inscription', 'building/dedicatory inscription',
       'calendar', 'defixio', 'elogium', 'epitaph',
       'honorific inscription', 'identification inscription', 'label',
       'letter', 'list', 'mile-/leaguestone', 'military diploma',
       'owner/artist inscription', 'prayer', 'private legal inscription',
       'public legal inscription', 'seat inscription',
       'votive inscription'], dtype='<U31')

In [317]:
probs = [np.max(pred) for pred in svm_v1.predict_proba(X_applied)]
probs[:10]

[0.7857018706636444,
 0.850080323385903,
 0.696566811086934,
 0.7255255264157866,
 0.91963276157691,
 0.9973812084603909,
 0.722058041187063,
 0.5854847852357029,
 0.8361305582932681,
 0.9281131587280677]

In [318]:
prediction_probs = [(svm_v1.classes_[np.argmax(pred)], pred[np.argmax(pred)]) for pred in svm_v1.predict_proba(X_applied)]
prediction_probs[:10]

[('owner/artist inscription', 0.7857018706636444),
 ('owner/artist inscription', 0.850080323385903),
 ('epitaph', 0.696566811086934),
 ('votive inscription', 0.7255255264157866),
 ('votive inscription', 0.91963276157691),
 ('epitaph', 0.9973812084603909),
 ('epitaph', 0.722058041187063),
 ('honorific inscription', 0.5854847852357029),
 ('honorific inscription', 0.8361305582932681),
 ('honorific inscription', 0.9281131587280677)]

In [320]:
len(prediction_probs)

83482

In [321]:
for threshold in [thres / 100 for thres in range(30, 100, 5)]:
    above_threshold = [tup for tup in prediction_probs if tup[1] >= threshold]
    proportion = len(above_threshold) / len(prediction_probs)
    print("prob. threshold: {0}, proportion of inscr.: {1}".format(threshold, proportion))

prob. threshold: 0.3, proportion of inscr.: 0.9917227665844134
prob. threshold: 0.35, proportion of inscr.: 0.9776718334491268
prob. threshold: 0.4, proportion of inscr.: 0.9708440142785271
prob. threshold: 0.45, proportion of inscr.: 0.9533192784073213
prob. threshold: 0.5, proportion of inscr.: 0.9454612970460698
prob. threshold: 0.55, proportion of inscr.: 0.9377949737668
prob. threshold: 0.6, proportion of inscr.: 0.9269543135047076
prob. threshold: 0.65, proportion of inscr.: 0.920713447210177
prob. threshold: 0.7, proportion of inscr.: 0.732073980019645
prob. threshold: 0.75, proportion of inscr.: 0.7075776814163532
prob. threshold: 0.8, proportion of inscr.: 0.605962962075657
prob. threshold: 0.85, proportion of inscr.: 0.5564073692532522
prob. threshold: 0.9, proportion of inscr.: 0.4863443616588007
prob. threshold: 0.95, proportion of inscr.: 0.4326441628135406


In [328]:
threshold_results = []
for threshold in [thres / 100 for thres in range(30, 100, 5)]:
    above_threshold = [tup for tup in prediction_probs if tup[1] >= threshold]
    proportion = len(above_threshold) / len(prediction_probs)
    threshold_results.append({"threshold (=/>)" : threshold, "proportion" : np.round(proportion, 2), "N" : len(above_threshold)})
model_results_df = pd.DataFrame(threshold_results)
model_results_df

Unnamed: 0,threshold (=/>),proportion,N
0,0.3,0.99,82791
1,0.35,0.98,81618
2,0.4,0.97,81048
3,0.45,0.95,79585
4,0.5,0.95,78929
5,0.55,0.94,78289
6,0.6,0.93,77384
7,0.65,0.92,76863
8,0.7,0.73,61115
9,0.75,0.71,59070


In [332]:
model_results_df.to_csv("../data/svm_v1_EDCS_results.csv")

In [337]:
test_results_df

Unnamed: 0,threshold (=/>),proportion,N,correct
0,0.3,1.0,4601,0.862421
1,0.35,0.94,4337,0.890247
2,0.4,0.93,4310,0.8942
3,0.45,0.91,4213,0.898647
4,0.5,0.9,4162,0.90197
5,0.55,0.89,4092,0.90567
6,0.6,0.86,3955,0.908976
7,0.65,0.84,3898,0.910723
8,0.7,0.71,3264,0.957414
9,0.75,0.67,3078,0.968811


# Combining the data back

In [339]:
EDCS_unique["type_of_inscription_auto"] = prediction
EDCS_unique["type_of_inscription_auto_prob"] = probs

In [340]:
EDCS_unique.head(5)

Unnamed: 0,EDCS-ID,publication,province,province_list,place,place_list,end_yr_list,notes_dating,status_list,inscr_type,status_notation,inscr_process,notes_references,notes_comments,inscription,inscription_stripped_final,Links,dating from,dating to,status,Latitude,Longitude,photo,Material,Comment,EDH-ID,language_EDCS,clean_text_interpretive_word_EDCS,responsible_individual,type_of_inscription,letter_size,literature,work_status,height,diplomatic_text,people,depth,material,type_of_monument,province_label,width,transcription,country,uri,findspot_ancient,last_update,modern_region,findspot_modern,edh_geography_uri,commentary,trismegistos_uri,external_image_uris,fotos,idno_tm,placenames_refs,text_edition,origdate_text,layout_execution,layout_execution_text,support_objecttype,support_objecttype_text,support_material,support_material_text,support_decoration,keywords_term,keywords_term_text,type_of_inscription_clean,type_of_inscription_certainty,height_cm,width_cm,depth_cm,material_clean,type_of_monument_clean,type_of_monument_certainty,province_label_clean,province_label_certainty,country_clean,country_certainty,findspot_ancient_clean,findspot_ancient_certainty,modern_region_clean,modern_region_certainty,findspot_modern_clean,findspot_modern_certainty,findspot_clean,findspot_certainty,origdate_text_clean,clean_text_conservative,clean_text_interpretive_sentence,findspot,year_of_find,present_location,religion,geography,social_economic_legal_history,military,not_after,language,not_before,coordinates,clean_text_interpretive_word,urban_context,within_rome,nearest_city,city_id_hanson,city_pop_est,city_geometry,nearest_city_type,nearest_city_dist,geometry,bigrams,type_of_inscription_auto,type_of_inscription_auto_prob
0,EDCS-71300269,"AE 2014, 01203",Achaia,Achaia,Mavrommati / Mavromati / Messini / Messene,"['Mavrommati', 'Mavromati', 'Messini', 'Messene']",96,,"['sigilla impressa', 'tituli fabricationis', '...",tituli fabricationis,viri,sigilla impressa,,,De Rasticanis,De Rasticanis,,69.0,96,sigilla impressa; tituli fabricationis; viri,37.179412,21.924179,,opus figlinae,,,,De Rasticanis,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,96.0,Latin,69.0,"['21.9241791', '37.1794122']",De Rasticanis,big,False,Messene,59,22337.0,"[22.008029, 37.051363]",middle,0.15306,POINT (21.92418 37.17941),[De_Rasticanis],owner/artist inscription,0.785702
1,EDCS-21200140,"CIL 03, 00079 = CIL 03, 14147,6 = IDakke 00028",Aegyptus,Aegyptus,Ad-Dakka / El-Dakka / Dakke / Pselqet / Pselki...,"['Ad-Dakka', 'El-Dakka', 'Dakke', 'Pselqet', '...",109,,"['milites', 'tituli fabricationis', 'tituli sa...","['tituli fabricationis', 'tituli sacri']","['milites', 'tria nomina', 'viri']",{},,,Deo Magno Mercurio / adoravit vexillu / leg(io...,Deo Magno Mercurio / adoravit vexillu / leg(io...,http://db.edcs.eu/epigr/partner.php?s_language...,109.0,109,milites; tituli fabricationis; tituli sacri; t...,23.2,32.75,,,,,,Deo Magno Mercurio adoravit vexillu legionis I...,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,109.0,Latin,109.0,"['32.750000', '23.200000']",Deo Magno Mercurio adoravit vexillu legionis I...,rest,False,Elephantine,126,33806.0,"[32.933333, 24.083333]",big,0.902158,POINT (32.75000 23.20000),"[Deo_Magno, Magno_Mercurio, Mercurio_adoravit,...",owner/artist inscription,0.85008
2,EDCS-21200142,"CIL 03, 00081 = CIL 03, 13584 = IDakke 00064a",Aegyptus,Aegyptus,Ad-Dakka / El-Dakka / Dakke / Pselqet / Pselki...,"['Ad-Dakka', 'El-Dakka', 'Dakke', 'Pselqet', '...",109,,{},{},{},{},,,Anno / XII [Tr]ai/a[ni],Anno / XII [Tr]ai/a[ni],,109.0,109,,23.2,32.75,,,,,,Anno XII Traiani,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,109.0,Latin,109.0,"['32.750000', '23.200000']",Anno XII Traiani,rest,False,Elephantine,126,33806.0,"[32.933333, 24.083333]",big,0.902158,POINT (32.75000 23.20000),"[Anno_XII, XII_Traiani]",epitaph,0.696567
3,EDCS-52100002,IDakke 00064,Aegyptus,Aegyptus,Ad-Dakka / El-Dakka / Dakke / Pselqet / Pselki...,"['Ad-Dakka', 'El-Dakka', 'Dakke', 'Pselqet', '...",109,,tituli possessionis,tituli possessionis,{},{},,,Anno XX [Tra]ani,Anno XX [Tra]ani,,109.0,109,tituli possessionis,23.2,32.75,,,,,,Anno XX Traani,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,109.0,Latin,109.0,"['32.750000', '23.200000']",Anno XX Traani,rest,False,Elephantine,126,33806.0,"[32.933333, 24.083333]",big,0.902158,POINT (32.75000 23.20000),"[Anno_XX, XX_Traani]",votive inscription,0.725526
4,EDCS-44500113,IDakke 00100,Aegyptus,Aegyptus,Ad-Dakka / El-Dakka / Dakke / Pselqet / Pselki...,"['Ad-Dakka', 'El-Dakka', 'Dakke', 'Pselqet', '...",117,,"['Augusti/Augustae', 'milites', 'ordo equester...",tituli sacri,"['Augusti/Augustae', 'milites', 'ordo equester...",{},,,Pro salute Imp(eratoris) Nervae Traiani Caesar...,Pro salute Imp(eratoris) Nervae Traiani Caesar...,,103.0,117,Augusti/Augustae; milites; ordo equester; titu...,23.2,32.75,,,,,,Pro salute Imperatoris Nervae Traiani Caesaris...,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,117.0,Latin,103.0,"['32.750000', '23.200000']",Pro salute Imperatoris Nervae Traiani Caesaris...,rest,False,Elephantine,126,33806.0,"[32.933333, 24.083333]",big,0.902158,POINT (32.75000 23.20000),"[Pro_salute, salute_Imperatoris, Imperatoris_N...",votive inscription,0.919633


In [341]:
# prepare last part of the data - inscriptions we have only in EDH
# in this case, we can also use the information we already have
EDH_unique = EDHCSg[(EDHCSg["EDH-ID"].notnull()) & (EDHCSg["EDCS-ID"].isnull())] 
EDH_unique["type_of_inscription_auto"] = EDH_unique["type_of_inscription_clean"]
EDH_unique["type_of_inscription_auto_prob"] = [1] * len(EDH_unique)

In [248]:
EDH_overlap_all = EDHCSg[(EDHCSg["EDH-ID"].notnull()) & (EDHCSg["EDCS-ID"].notnull())] 
EDH_overlap_all["type_of_inscription_auto"] = EDH_overlap_all["type_of_inscription_clean"]
EDH_overlap_all["type_of_inscription_auto_prob"] = [1] * len(EDH_overlap_all)

type_of_inscription_clean
epitaph                            21520
votive inscription                 11728
NULL                                3745
owner/artist inscription            3340
honorific inscription               3003
building/dedicatory inscription     2561
mile-/leaguestone                   1307
identification inscription           850
acclamation                          287
defixio                              269
list                                 246
military diploma                     209
label                                194
boundary inscription                 175
elogium                              132
letter                               119
public legal inscription             109
seat inscription                      42
private legal inscription             36
prayer                                18
assignation inscription               15
calendar                              10
adnuntiatio                            1
dtype: int64

In [343]:
print("the dataset we used for training and testing (+NULLs): ", EDH_overlap_all.shape)
print("inscriptions in EDH only: ", EDH_unique.shape)
print("inscriptions in EDCS only, we wanted to classify: ", EDCS_unique.shape)

the dataset we used for training and testing (+NULLs):  (49916, 113)
inscriptions in EDH only:  (3907, 113)
inscriptions in EDCS only, we wanted to classify:  (83482, 113)


In [344]:
# combine together
EDHCS_autotypes = pd.concat([EDH_overlap_all, EDH_unique, EDCS_unique])
EDHCS_autotypes.head()

Unnamed: 0,EDCS-ID,publication,province,province_list,place,place_list,end_yr_list,notes_dating,status_list,inscr_type,status_notation,inscr_process,notes_references,notes_comments,inscription,inscription_stripped_final,Links,dating from,dating to,status,Latitude,Longitude,photo,Material,Comment,EDH-ID,language_EDCS,clean_text_interpretive_word_EDCS,responsible_individual,type_of_inscription,letter_size,literature,work_status,height,diplomatic_text,people,depth,material,type_of_monument,province_label,width,transcription,country,uri,findspot_ancient,last_update,modern_region,findspot_modern,edh_geography_uri,commentary,trismegistos_uri,external_image_uris,fotos,idno_tm,placenames_refs,text_edition,origdate_text,layout_execution,layout_execution_text,support_objecttype,support_objecttype_text,support_material,support_material_text,support_decoration,keywords_term,keywords_term_text,type_of_inscription_clean,type_of_inscription_certainty,height_cm,width_cm,depth_cm,material_clean,type_of_monument_clean,type_of_monument_certainty,province_label_clean,province_label_certainty,country_clean,country_certainty,findspot_ancient_clean,findspot_ancient_certainty,modern_region_clean,modern_region_certainty,findspot_modern_clean,findspot_modern_certainty,findspot_clean,findspot_certainty,origdate_text_clean,clean_text_conservative,clean_text_interpretive_sentence,findspot,year_of_find,present_location,religion,geography,social_economic_legal_history,military,not_after,language,not_before,coordinates,clean_text_interpretive_word,urban_context,within_rome,nearest_city,city_id_hanson,city_pop_est,city_geometry,nearest_city_type,nearest_city_dist,geometry,bigrams,type_of_inscription_auto,type_of_inscription_auto_prob
83482,EDCS-03700724,"ZPE-108-159 = Thesprotia 00001 = AE 1993, 0140...",Achaia,Achaia,Agios Athanasios / Photike,"['Agios Athanasios', 'Photike']",313,,"['Augusti/Augustae', 'litterae erasae', 'ordo ...",tituli honorarii,"['Augusti/Augustae', 'ordo equester', 'tria no...",litterae erasae,,,Fortissimo et Piis/simo Caesari d(omino) n(ost...,Fortissimo et Piis/simo Caesari d(omino) n(ost...,http://db.edcs.eu/epigr/partner.php?s_language...,309.0,313,Augusti/Augustae; litterae erasae; ordo equest...,39.451218,20.766767,http://db.edcs.eu/epigr/bilder.php?bilder.php?...,,,HD052964,,Fortissimo et Piissimo Caesari domino nostro G...,Cowey,honorific inscription,3-5.3 cm,"AE 1993, 1406.; V. Papadopoulou, AD 43 B, 1988...",checked with photo,99 cm,FORTISSIMO ET PIIS / SIMO CAESARI D N / GAL VA...,"[{'name': 'Gal. Val. [[Maximiano]]', 'nomen': ...",67 cm,,statue base,Epirus,67 cm,Fortissimo et piis/simo Caesari d(omino) n(ost...,Greece,https://edh-www.adw.uni-heidelberg.de/edh/insc...,Photike,2014-09-16,Ípeiros,Paramythía,https://edh-www.adw.uni-heidelberg.de/edh/geog...,,https://www.trismegistos.org/text/121715,{},{},121715,"['http://www.trismegistos.org/place/000655', '...",Fortissimo et piissimo Caesari...,309 AD – 313 AD,21,unbestimmt,57.0,Statuenbasis,138,unbestimmt,1000.0,69.0,Ehreninschrift,honorific inscription,Certain,99.0,67.0,67.0,,statue base,Certain,Epirus,Certain,Greece,Certain,Photike,Certain,Ípeiros,Certain,Paramythía,Certain,Agios Athanasios,Certain,309 AD – 313 AD,Fortissimo et piissimo Caesari d n Gal Val P F...,Fortissimo et piissimo Caesari domino nostro G...,Agios Athanasios,,Fragma Kalama,,data available,,,313.0,Latin,309.0,"[20.7668, 39.4512]",Fortissimo et piissimo Caesari domino nostro G...,rest,False,Dodona,31,1000.0,"[20.787767, 39.546432]",minor,0.097513,POINT (20.76680 39.45120),"[Fortissimo_et, et_piissimo, piissimo_Caesari,...",honorific inscription,1.0
83483,EDCS-03300852,"AE 1995, 01409",Achaia,Achaia,Alea / Tegea,"['Alea', 'Tegea']",276,to 276; b: 276 to 282 \n\n,"['Augusti/Augustae', 'miliaria', 'viri']",miliaria,"['Augusti/Augustae', 'viri']",{},,,to 276; b: 276 to 282 \n\n \n \nImp...,Imp(eratori) / Floriano / P(io) F(elici) Aug(u...,http://db.edcs.eu/epigr/partner.php?s_language...,,276,Augusti/Augustae; miliaria; viri,37.454501,22.420877,,lapis,,HD051000,,Imperatori Floriano Pio Felici Augusto patri p...,Cowey,mile-/leaguestone,3.3-6 cm,"AE 1995, 1409.; M. Iozzer - M. Pangano, ASAA 6...",checked with photo,(44) cm,[ ] / AN[ ] FLORIANO / P F AVG / P P / M P III...,"[{'person_id': '1', 'nomen': 'Annius+', 'name'...",,,mile-/leaguestone,Achaia,24 cm,[Imp(eratori) Caes(ari) M(arco)] / An[nio] Flo...,Greece,https://edh-www.adw.uni-heidelberg.de/edh/insc...,Tegea,2011-05-24,Pelopónissos,Alea,https://edh-www.adw.uni-heidelberg.de/edh/geog...,"Die erste Inschrift ist auf 276, die zweite a...",https://www.trismegistos.org/text/177273,{},{},177273,"['http://www.trismegistos.org/place/000078', '...",Imperatori Caesari Marco Annio Floriano Pio Fe...,276 AD,21,unbestimmt,89.0,Meilen-/Leugenstein,138,unbestimmt,1000.0,102.0,Meilen-/Leugenstein,mile-/leaguestone,Certain,44.0,24.0,,,mile-/leaguestone,Certain,Achaia,Certain,Greece,Certain,Tegea,Certain,Pelopónissos,Certain,Alea,Certain,"Stringu, bei",Certain,276 AD,An Floriano P F Aug p p m p III Imp Caes M Aur...,Imperatori Caesari Marco Annio Floriano Pio Fe...,"Stringu, bei",,"Tegea, Mus.",,,data available,,,Latin,276.0,"[22.4171, 37.4319]",Imperatori Caesari Marco Annio Floriano Pio Fe...,big,False,Tegea,97,46362.0,"[22.417226, 37.427653]",big,0.004249,POINT (22.41710 37.43190),"[Imperatori_Caesari, Caesari_Marco, Marco_Anni...",mile-/leaguestone,1.0
83484,EDCS-28500283,"CIL 03, 07251 = D 00214 = NDIEC-07, p 81 = AE ...",Achaia,Achaia,Alea / Tegea,"['Alea', 'Tegea']",50,,"['Augusti/Augustae', 'leges', 'viri']",leges,"['Augusti/Augustae', 'viri']",{},,,T[(iberius)] Claudius Caesar Aug(ustus) / G[er...,T[(iberius)] Claudius Caesar Aug(ustus) / G[er...,http://db.edcs.eu/epigr/partner.php?s_language...,49.0,50,Augusti/Augustae; leges; viri,37.454501,22.420877,,,,HD021396,,Tiberius Claudius Caesar Augustus Germanicus p...,Cowey,public legal inscription,,"CIL 03, 07251.; ILS 0214.; AE 1941, 0119.; MAI...",checked with photo,160 cm,T[ ] CLAVDIVS CAESAR AVG / G[ ]ANICVS PONTIF M...,"[{'gender': 'male', 'cognomen': 'Caesar August...",17 cm,,tabula,Achaia,58 cm,T[i(berius)] Claudius Caesar Aug(ustus) / G[er...,Greece,https://edh-www.adw.uni-heidelberg.de/edh/insc...,Tegea,2011-05-10,Pelopónissos,Alea,https://edh-www.adw.uni-heidelberg.de/edh/geog...,,https://www.trismegistos.org/text/177131,{},{},177131,"['http://www.trismegistos.org/place/000078', '...",Tiberius Claudius Caesar Augus...,49 AD – 50 AD,21,unbestimmt,257.0,Tafel,138,unbestimmt,1000.0,,"Rechtliche Verfügung, öffentlich",public legal inscription,Certain,160.0,58.0,17.0,,tabula,Certain,Achaia,Certain,Greece,Certain,Tegea,Certain,Pelopónissos,Certain,Alea,Certain,,,49 AD – 50 AD,T Claudius Caesar Aug Ganicus pontif max trib ...,Tiberius Claudius Caesar Augustus Germanicus p...,,,,,,,,50.0,Latin,49.0,"[22.4171, 37.4319]",Tiberius Claudius Caesar Augustus Germanicus p...,big,False,Tegea,97,46362.0,"[22.417226, 37.427653]",big,0.004249,POINT (22.41710 37.43190),"[Tiberius_Claudius, Claudius_Caesar, Caesar_Au...",public legal inscription,1.0
83485,EDCS-09400671,"CIMRM-02, 02350 = IG-12, 00274 = Andros 00124 ...",Achaia,Achaia,Andros,Andros,209,,"['litterae erasae', 'tituli sacri']",tituli sacri,{},litterae erasae,,,Pro salute Imp(eratoris) Caesari(s) / L(uci) S...,Pro salute Imp(eratoris) Caesari(s) / L(uci) S...,http://db.edcs.eu/epigr/partner.php?s_language...,198.0,209,litterae erasae; tituli sacri,37.837612,24.937637,http://db.edcs.eu/epigr/bilder.php?bilder.php?...,,,HD011892,,Pro salute Imperatoris Caesaris Luci Septimi S...,Cowey,votive inscription,5.5-5.7 cm,"AE 1911, 0056. (B); T. Sauciuc, MDAI(R) 25, 19...",checked with photo,48 cm,PRO SALVTE IMP CAESARI / L SEPTIMI SEVERI ET M...,"[{'gender': 'male', 'praenomen': 'L.', 'name':...",,,,Achaia,126 cm,Pro salute Imp(eratorum) Caesari(s) / L(uci) S...,Greece,https://edh-www.adw.uni-heidelberg.de/edh/insc...,Palaeopolis,2017-05-30,Nótio Aigaío,Palaiópolis,https://edh-www.adw.uni-heidelberg.de/edh/geog...,Auf dem Foto sind die Buchstaben oft kaum erk...,https://www.trismegistos.org/text/177087,{},{},177087,[],Pro salute Imperatorum Caesari...,198 AD – 209 AD,21,unbestimmt,2.0,unbestimmt,138,unbestimmt,1000.0,80.0,Weihinschrift,votive inscription,Certain,48.0,126.0,,,,,Achaia,Certain,Greece,Certain,Palaeopolis,Certain,Nótio Aigaío,Certain,Palaiópolis,Certain,"to Elleniko Mauer, sekundär verwendet",Certain,198 AD – 209 AD,Pro salute Imp Caesari L Septimi Severi et M A...,Pro salute Imperatorum Caesaris Luci Septimi S...,"to Elleniko Mauer, sekundär verwendet",,"to Elleniko Mauer, vermauert",names of pagan deities,,,data available,209.0,Latin,198.0,"[24.8323, 37.8188]",Pro salute Imperatorum Caesaris Luci Septimi S...,rest,False,Ioulis,47,1000.0,"[24.34625, 37.633122]",minor,0.520308,POINT (24.83230 37.81880),"[Pro_salute, salute_Imperatorum, Imperatorum_C...",votive inscription,1.0
83486,EDCS-24600769,"AE 1995, 01407 = AE 2001, 01812",Achaia,Achaia,Archea Olimpia / Archaia Olympia / Olympia,"['Archea Olimpia', 'Archaia Olympia', 'Olympia']",96,,{},{},{},{},,,Octa(vius) Sa(lutaris),Octa(vius) Sa(lutaris),http://db.edcs.eu/epigr/partner.php?s_language...,81.0,96,,37.64387,21.625513,,,,HD050999,,Octavius Salutaris,Cowey,owner/artist inscription,,"AE 1995, 1407.; U. Sinn u. a., Nikephoros 8, 1...",checked with photo,,OCTA SAL,"[{'person_id': '1', 'nomen': 'Octavius*', 'nam...",,,instrumentum domesticum,Achaia,,Octa(vius) Sal(utaris),Greece,https://edh-www.adw.uni-heidelberg.de/edh/insc...,Olympia,2012-04-16,Stereá Elláda,Olympia,https://edh-www.adw.uni-heidelberg.de/edh/geog...,(B): Sa(lutaris) fehlerhaft für Sal(utaris).,https://www.trismegistos.org/text/177272,{},{},177272,"['http://www.trismegistos.org/place/000078', '...",Octavius Salutaris,81 AD – 96 AD,21,unbestimmt,140.0,Instrumentum domesticum,138,unbestimmt,1000.0,311.0,Besitzer-/Herstellerinschrift,owner/artist inscription,Certain,,,,,instrumentum domesticum,Certain,Achaia,Certain,Greece,Certain,Olympia,Certain,Stereá Elláda,Certain,Olympia,Certain,Haus der Athleten,Certain,81 AD – 96 AD,Octa Sal,Octavius Salutaris,Haus der Athleten,,,,,,,96.0,Latin,81.0,"[21.6271, 37.6479]",Octavius Salutaris,big,False,Elis,35,1000.0,"[21.435443, 37.827452]",minor,0.262624,POINT (21.62710 37.64790),[Octavius_Salutaris],owner/artist inscription,1.0


In [345]:
EDHCS_autotypes.drop(["bigrams"], axis=1, inplace=True)

In [346]:
EDHCS_autotypes.shape

(137305, 112)

In [347]:
EDHCS_autotypes.head(5)

Unnamed: 0,EDCS-ID,publication,province,province_list,place,place_list,end_yr_list,notes_dating,status_list,inscr_type,status_notation,inscr_process,notes_references,notes_comments,inscription,inscription_stripped_final,Links,dating from,dating to,status,Latitude,Longitude,photo,Material,Comment,EDH-ID,language_EDCS,clean_text_interpretive_word_EDCS,responsible_individual,type_of_inscription,letter_size,literature,work_status,height,diplomatic_text,people,depth,material,type_of_monument,province_label,width,transcription,country,uri,findspot_ancient,last_update,modern_region,findspot_modern,edh_geography_uri,commentary,trismegistos_uri,external_image_uris,fotos,idno_tm,placenames_refs,text_edition,origdate_text,layout_execution,layout_execution_text,support_objecttype,support_objecttype_text,support_material,support_material_text,support_decoration,keywords_term,keywords_term_text,type_of_inscription_clean,type_of_inscription_certainty,height_cm,width_cm,depth_cm,material_clean,type_of_monument_clean,type_of_monument_certainty,province_label_clean,province_label_certainty,country_clean,country_certainty,findspot_ancient_clean,findspot_ancient_certainty,modern_region_clean,modern_region_certainty,findspot_modern_clean,findspot_modern_certainty,findspot_clean,findspot_certainty,origdate_text_clean,clean_text_conservative,clean_text_interpretive_sentence,findspot,year_of_find,present_location,religion,geography,social_economic_legal_history,military,not_after,language,not_before,coordinates,clean_text_interpretive_word,urban_context,within_rome,nearest_city,city_id_hanson,city_pop_est,city_geometry,nearest_city_type,nearest_city_dist,geometry,type_of_inscription_auto,type_of_inscription_auto_prob
83482,EDCS-03700724,"ZPE-108-159 = Thesprotia 00001 = AE 1993, 0140...",Achaia,Achaia,Agios Athanasios / Photike,"['Agios Athanasios', 'Photike']",313,,"['Augusti/Augustae', 'litterae erasae', 'ordo ...",tituli honorarii,"['Augusti/Augustae', 'ordo equester', 'tria no...",litterae erasae,,,Fortissimo et Piis/simo Caesari d(omino) n(ost...,Fortissimo et Piis/simo Caesari d(omino) n(ost...,http://db.edcs.eu/epigr/partner.php?s_language...,309.0,313,Augusti/Augustae; litterae erasae; ordo equest...,39.451218,20.766767,http://db.edcs.eu/epigr/bilder.php?bilder.php?...,,,HD052964,,Fortissimo et Piissimo Caesari domino nostro G...,Cowey,honorific inscription,3-5.3 cm,"AE 1993, 1406.; V. Papadopoulou, AD 43 B, 1988...",checked with photo,99 cm,FORTISSIMO ET PIIS / SIMO CAESARI D N / GAL VA...,"[{'name': 'Gal. Val. [[Maximiano]]', 'nomen': ...",67 cm,,statue base,Epirus,67 cm,Fortissimo et piis/simo Caesari d(omino) n(ost...,Greece,https://edh-www.adw.uni-heidelberg.de/edh/insc...,Photike,2014-09-16,Ípeiros,Paramythía,https://edh-www.adw.uni-heidelberg.de/edh/geog...,,https://www.trismegistos.org/text/121715,{},{},121715,"['http://www.trismegistos.org/place/000655', '...",Fortissimo et piissimo Caesari...,309 AD – 313 AD,21,unbestimmt,57.0,Statuenbasis,138,unbestimmt,1000.0,69.0,Ehreninschrift,honorific inscription,Certain,99.0,67.0,67.0,,statue base,Certain,Epirus,Certain,Greece,Certain,Photike,Certain,Ípeiros,Certain,Paramythía,Certain,Agios Athanasios,Certain,309 AD – 313 AD,Fortissimo et piissimo Caesari d n Gal Val P F...,Fortissimo et piissimo Caesari domino nostro G...,Agios Athanasios,,Fragma Kalama,,data available,,,313.0,Latin,309.0,"[20.7668, 39.4512]",Fortissimo et piissimo Caesari domino nostro G...,rest,False,Dodona,31,1000.0,"[20.787767, 39.546432]",minor,0.097513,POINT (20.76680 39.45120),honorific inscription,1.0
83483,EDCS-03300852,"AE 1995, 01409",Achaia,Achaia,Alea / Tegea,"['Alea', 'Tegea']",276,to 276; b: 276 to 282 \n\n,"['Augusti/Augustae', 'miliaria', 'viri']",miliaria,"['Augusti/Augustae', 'viri']",{},,,to 276; b: 276 to 282 \n\n \n \nImp...,Imp(eratori) / Floriano / P(io) F(elici) Aug(u...,http://db.edcs.eu/epigr/partner.php?s_language...,,276,Augusti/Augustae; miliaria; viri,37.454501,22.420877,,lapis,,HD051000,,Imperatori Floriano Pio Felici Augusto patri p...,Cowey,mile-/leaguestone,3.3-6 cm,"AE 1995, 1409.; M. Iozzer - M. Pangano, ASAA 6...",checked with photo,(44) cm,[ ] / AN[ ] FLORIANO / P F AVG / P P / M P III...,"[{'person_id': '1', 'nomen': 'Annius+', 'name'...",,,mile-/leaguestone,Achaia,24 cm,[Imp(eratori) Caes(ari) M(arco)] / An[nio] Flo...,Greece,https://edh-www.adw.uni-heidelberg.de/edh/insc...,Tegea,2011-05-24,Pelopónissos,Alea,https://edh-www.adw.uni-heidelberg.de/edh/geog...,"Die erste Inschrift ist auf 276, die zweite a...",https://www.trismegistos.org/text/177273,{},{},177273,"['http://www.trismegistos.org/place/000078', '...",Imperatori Caesari Marco Annio Floriano Pio Fe...,276 AD,21,unbestimmt,89.0,Meilen-/Leugenstein,138,unbestimmt,1000.0,102.0,Meilen-/Leugenstein,mile-/leaguestone,Certain,44.0,24.0,,,mile-/leaguestone,Certain,Achaia,Certain,Greece,Certain,Tegea,Certain,Pelopónissos,Certain,Alea,Certain,"Stringu, bei",Certain,276 AD,An Floriano P F Aug p p m p III Imp Caes M Aur...,Imperatori Caesari Marco Annio Floriano Pio Fe...,"Stringu, bei",,"Tegea, Mus.",,,data available,,,Latin,276.0,"[22.4171, 37.4319]",Imperatori Caesari Marco Annio Floriano Pio Fe...,big,False,Tegea,97,46362.0,"[22.417226, 37.427653]",big,0.004249,POINT (22.41710 37.43190),mile-/leaguestone,1.0
83484,EDCS-28500283,"CIL 03, 07251 = D 00214 = NDIEC-07, p 81 = AE ...",Achaia,Achaia,Alea / Tegea,"['Alea', 'Tegea']",50,,"['Augusti/Augustae', 'leges', 'viri']",leges,"['Augusti/Augustae', 'viri']",{},,,T[(iberius)] Claudius Caesar Aug(ustus) / G[er...,T[(iberius)] Claudius Caesar Aug(ustus) / G[er...,http://db.edcs.eu/epigr/partner.php?s_language...,49.0,50,Augusti/Augustae; leges; viri,37.454501,22.420877,,,,HD021396,,Tiberius Claudius Caesar Augustus Germanicus p...,Cowey,public legal inscription,,"CIL 03, 07251.; ILS 0214.; AE 1941, 0119.; MAI...",checked with photo,160 cm,T[ ] CLAVDIVS CAESAR AVG / G[ ]ANICVS PONTIF M...,"[{'gender': 'male', 'cognomen': 'Caesar August...",17 cm,,tabula,Achaia,58 cm,T[i(berius)] Claudius Caesar Aug(ustus) / G[er...,Greece,https://edh-www.adw.uni-heidelberg.de/edh/insc...,Tegea,2011-05-10,Pelopónissos,Alea,https://edh-www.adw.uni-heidelberg.de/edh/geog...,,https://www.trismegistos.org/text/177131,{},{},177131,"['http://www.trismegistos.org/place/000078', '...",Tiberius Claudius Caesar Augus...,49 AD – 50 AD,21,unbestimmt,257.0,Tafel,138,unbestimmt,1000.0,,"Rechtliche Verfügung, öffentlich",public legal inscription,Certain,160.0,58.0,17.0,,tabula,Certain,Achaia,Certain,Greece,Certain,Tegea,Certain,Pelopónissos,Certain,Alea,Certain,,,49 AD – 50 AD,T Claudius Caesar Aug Ganicus pontif max trib ...,Tiberius Claudius Caesar Augustus Germanicus p...,,,,,,,,50.0,Latin,49.0,"[22.4171, 37.4319]",Tiberius Claudius Caesar Augustus Germanicus p...,big,False,Tegea,97,46362.0,"[22.417226, 37.427653]",big,0.004249,POINT (22.41710 37.43190),public legal inscription,1.0
83485,EDCS-09400671,"CIMRM-02, 02350 = IG-12, 00274 = Andros 00124 ...",Achaia,Achaia,Andros,Andros,209,,"['litterae erasae', 'tituli sacri']",tituli sacri,{},litterae erasae,,,Pro salute Imp(eratoris) Caesari(s) / L(uci) S...,Pro salute Imp(eratoris) Caesari(s) / L(uci) S...,http://db.edcs.eu/epigr/partner.php?s_language...,198.0,209,litterae erasae; tituli sacri,37.837612,24.937637,http://db.edcs.eu/epigr/bilder.php?bilder.php?...,,,HD011892,,Pro salute Imperatoris Caesaris Luci Septimi S...,Cowey,votive inscription,5.5-5.7 cm,"AE 1911, 0056. (B); T. Sauciuc, MDAI(R) 25, 19...",checked with photo,48 cm,PRO SALVTE IMP CAESARI / L SEPTIMI SEVERI ET M...,"[{'gender': 'male', 'praenomen': 'L.', 'name':...",,,,Achaia,126 cm,Pro salute Imp(eratorum) Caesari(s) / L(uci) S...,Greece,https://edh-www.adw.uni-heidelberg.de/edh/insc...,Palaeopolis,2017-05-30,Nótio Aigaío,Palaiópolis,https://edh-www.adw.uni-heidelberg.de/edh/geog...,Auf dem Foto sind die Buchstaben oft kaum erk...,https://www.trismegistos.org/text/177087,{},{},177087,[],Pro salute Imperatorum Caesari...,198 AD – 209 AD,21,unbestimmt,2.0,unbestimmt,138,unbestimmt,1000.0,80.0,Weihinschrift,votive inscription,Certain,48.0,126.0,,,,,Achaia,Certain,Greece,Certain,Palaeopolis,Certain,Nótio Aigaío,Certain,Palaiópolis,Certain,"to Elleniko Mauer, sekundär verwendet",Certain,198 AD – 209 AD,Pro salute Imp Caesari L Septimi Severi et M A...,Pro salute Imperatorum Caesaris Luci Septimi S...,"to Elleniko Mauer, sekundär verwendet",,"to Elleniko Mauer, vermauert",names of pagan deities,,,data available,209.0,Latin,198.0,"[24.8323, 37.8188]",Pro salute Imperatorum Caesaris Luci Septimi S...,rest,False,Ioulis,47,1000.0,"[24.34625, 37.633122]",minor,0.520308,POINT (24.83230 37.81880),votive inscription,1.0
83486,EDCS-24600769,"AE 1995, 01407 = AE 2001, 01812",Achaia,Achaia,Archea Olimpia / Archaia Olympia / Olympia,"['Archea Olimpia', 'Archaia Olympia', 'Olympia']",96,,{},{},{},{},,,Octa(vius) Sa(lutaris),Octa(vius) Sa(lutaris),http://db.edcs.eu/epigr/partner.php?s_language...,81.0,96,,37.64387,21.625513,,,,HD050999,,Octavius Salutaris,Cowey,owner/artist inscription,,"AE 1995, 1407.; U. Sinn u. a., Nikephoros 8, 1...",checked with photo,,OCTA SAL,"[{'person_id': '1', 'nomen': 'Octavius*', 'nam...",,,instrumentum domesticum,Achaia,,Octa(vius) Sal(utaris),Greece,https://edh-www.adw.uni-heidelberg.de/edh/insc...,Olympia,2012-04-16,Stereá Elláda,Olympia,https://edh-www.adw.uni-heidelberg.de/edh/geog...,(B): Sa(lutaris) fehlerhaft für Sal(utaris).,https://www.trismegistos.org/text/177272,{},{},177272,"['http://www.trismegistos.org/place/000078', '...",Octavius Salutaris,81 AD – 96 AD,21,unbestimmt,140.0,Instrumentum domesticum,138,unbestimmt,1000.0,311.0,Besitzer-/Herstellerinschrift,owner/artist inscription,Certain,,,,,instrumentum domesticum,Certain,Achaia,Certain,Greece,Certain,Olympia,Certain,Stereá Elláda,Certain,Olympia,Certain,Haus der Athleten,Certain,81 AD – 96 AD,Octa Sal,Octavius Salutaris,Haus der Athleten,,,,,,,96.0,Latin,81.0,"[21.6271, 37.6479]",Octavius Salutaris,big,False,Elis,35,1000.0,"[21.435443, 37.827452]",minor,0.262624,POINT (21.62710 37.64790),owner/artist inscription,1.0


In [348]:
len(EDHCS_autotypes[EDHCS_autotypes["type_of_inscription_auto_prob"]>0.55])

132112

In [349]:
len(EDHCS_autotypes[EDHCS_autotypes["type_of_inscription_auto"]=="NULL"])

4339

In [350]:
def apply_threshold(inscr_type, prob):
    if (prob < 0.55) or (inscr_type == "NULL"):
        return None 
    else:
        return inscr_type
EDHCS_autotypes["type_of_inscription_auto"] =  EDHCS_autotypes.apply(lambda row: apply_threshold(row["type_of_inscription_auto"], row["type_of_inscription_auto_prob"]), axis=1)

In [365]:
def prob_to_zero(inscr_type, prob):
    if inscr_type == None:
        return 0
    else:
        return prob
EDHCS_autotypes["type_of_inscription_auto_prob"] =  EDHCS_autotypes.apply(lambda row: prob_to_zero(row["type_of_inscription_auto"], row["type_of_inscription_auto_prob"]), axis=1)

In [366]:
len(EDHCS_autotypes[EDHCS_autotypes["type_of_inscription_auto"].notnull()])

127773

In [367]:
len(EDHCS_autotypes[(EDHCS_autotypes["type_of_inscription_auto"].notnull()) & (EDHCS_autotypes["type_of_inscription_auto_prob"] > 0.55)])

127773

In [368]:
s = sddk.cloudSession("sciencedata.dk", "SDAM_root/SDAM_data/EDHCS", "648597@au.dk")

Your ScienceData username (e.g. '123456@au.dk'): 648597@au.dk
Your ScienceData password: ········
connection with shared folder established with you as its owner
endpoint variable has been configured to: https://sciencedata.dk/files/SDAM_root/SDAM_data/EDHCS/


In [371]:
EDHCS_autotypes.head(5)

Unnamed: 0,EDCS-ID,publication,province,province_list,place,place_list,end_yr_list,notes_dating,status_list,inscr_type,status_notation,inscr_process,notes_references,notes_comments,inscription,inscription_stripped_final,Links,dating from,dating to,status,Latitude,Longitude,photo,Material,Comment,EDH-ID,language_EDCS,clean_text_interpretive_word_EDCS,responsible_individual,type_of_inscription,letter_size,literature,work_status,height,diplomatic_text,people,depth,material,type_of_monument,province_label,width,transcription,country,uri,findspot_ancient,last_update,modern_region,findspot_modern,edh_geography_uri,commentary,trismegistos_uri,external_image_uris,fotos,idno_tm,placenames_refs,text_edition,origdate_text,layout_execution,layout_execution_text,support_objecttype,support_objecttype_text,support_material,support_material_text,support_decoration,keywords_term,keywords_term_text,type_of_inscription_clean,type_of_inscription_certainty,height_cm,width_cm,depth_cm,material_clean,type_of_monument_clean,type_of_monument_certainty,province_label_clean,province_label_certainty,country_clean,country_certainty,findspot_ancient_clean,findspot_ancient_certainty,modern_region_clean,modern_region_certainty,findspot_modern_clean,findspot_modern_certainty,findspot_clean,findspot_certainty,origdate_text_clean,clean_text_conservative,clean_text_interpretive_sentence,findspot,year_of_find,present_location,religion,geography,social_economic_legal_history,military,not_after,language,not_before,coordinates,clean_text_interpretive_word,urban_context,within_rome,nearest_city,city_id_hanson,city_pop_est,city_geometry,nearest_city_type,nearest_city_dist,geometry,type_of_inscription_auto,type_of_inscription_auto_prob
83482,EDCS-03700724,"ZPE-108-159 = Thesprotia 00001 = AE 1993, 0140...",Achaia,Achaia,Agios Athanasios / Photike,"['Agios Athanasios', 'Photike']",313,,"['Augusti/Augustae', 'litterae erasae', 'ordo ...",tituli honorarii,"['Augusti/Augustae', 'ordo equester', 'tria no...",litterae erasae,,,Fortissimo et Piis/simo Caesari d(omino) n(ost...,Fortissimo et Piis/simo Caesari d(omino) n(ost...,http://db.edcs.eu/epigr/partner.php?s_language...,309.0,313,Augusti/Augustae; litterae erasae; ordo equest...,39.451218,20.766767,http://db.edcs.eu/epigr/bilder.php?bilder.php?...,,,HD052964,,Fortissimo et Piissimo Caesari domino nostro G...,Cowey,honorific inscription,3-5.3 cm,"AE 1993, 1406.; V. Papadopoulou, AD 43 B, 1988...",checked with photo,99 cm,FORTISSIMO ET PIIS / SIMO CAESARI D N / GAL VA...,"[{'name': 'Gal. Val. [[Maximiano]]', 'nomen': ...",67 cm,,statue base,Epirus,67 cm,Fortissimo et piis/simo Caesari d(omino) n(ost...,Greece,https://edh-www.adw.uni-heidelberg.de/edh/insc...,Photike,2014-09-16,Ípeiros,Paramythía,https://edh-www.adw.uni-heidelberg.de/edh/geog...,,https://www.trismegistos.org/text/121715,{},{},121715,"['http://www.trismegistos.org/place/000655', '...",Fortissimo et piissimo Caesari...,309 AD – 313 AD,21,unbestimmt,57.0,Statuenbasis,138,unbestimmt,1000.0,69.0,Ehreninschrift,honorific inscription,Certain,99.0,67.0,67.0,,statue base,Certain,Epirus,Certain,Greece,Certain,Photike,Certain,Ípeiros,Certain,Paramythía,Certain,Agios Athanasios,Certain,309 AD – 313 AD,Fortissimo et piissimo Caesari d n Gal Val P F...,Fortissimo et piissimo Caesari domino nostro G...,Agios Athanasios,,Fragma Kalama,,data available,,,313.0,Latin,309.0,"[20.7668, 39.4512]",Fortissimo et piissimo Caesari domino nostro G...,rest,False,Dodona,31,1000.0,"[20.787767, 39.546432]",minor,0.097513,POINT (20.76680 39.45120),honorific inscription,1.0
83483,EDCS-03300852,"AE 1995, 01409",Achaia,Achaia,Alea / Tegea,"['Alea', 'Tegea']",276,to 276; b: 276 to 282 \n\n,"['Augusti/Augustae', 'miliaria', 'viri']",miliaria,"['Augusti/Augustae', 'viri']",{},,,to 276; b: 276 to 282 \n\n \n \nImp...,Imp(eratori) / Floriano / P(io) F(elici) Aug(u...,http://db.edcs.eu/epigr/partner.php?s_language...,,276,Augusti/Augustae; miliaria; viri,37.454501,22.420877,,lapis,,HD051000,,Imperatori Floriano Pio Felici Augusto patri p...,Cowey,mile-/leaguestone,3.3-6 cm,"AE 1995, 1409.; M. Iozzer - M. Pangano, ASAA 6...",checked with photo,(44) cm,[ ] / AN[ ] FLORIANO / P F AVG / P P / M P III...,"[{'person_id': '1', 'nomen': 'Annius+', 'name'...",,,mile-/leaguestone,Achaia,24 cm,[Imp(eratori) Caes(ari) M(arco)] / An[nio] Flo...,Greece,https://edh-www.adw.uni-heidelberg.de/edh/insc...,Tegea,2011-05-24,Pelopónissos,Alea,https://edh-www.adw.uni-heidelberg.de/edh/geog...,"Die erste Inschrift ist auf 276, die zweite a...",https://www.trismegistos.org/text/177273,{},{},177273,"['http://www.trismegistos.org/place/000078', '...",Imperatori Caesari Marco Annio Floriano Pio Fe...,276 AD,21,unbestimmt,89.0,Meilen-/Leugenstein,138,unbestimmt,1000.0,102.0,Meilen-/Leugenstein,mile-/leaguestone,Certain,44.0,24.0,,,mile-/leaguestone,Certain,Achaia,Certain,Greece,Certain,Tegea,Certain,Pelopónissos,Certain,Alea,Certain,"Stringu, bei",Certain,276 AD,An Floriano P F Aug p p m p III Imp Caes M Aur...,Imperatori Caesari Marco Annio Floriano Pio Fe...,"Stringu, bei",,"Tegea, Mus.",,,data available,,,Latin,276.0,"[22.4171, 37.4319]",Imperatori Caesari Marco Annio Floriano Pio Fe...,big,False,Tegea,97,46362.0,"[22.417226, 37.427653]",big,0.004249,POINT (22.41710 37.43190),mile-/leaguestone,1.0
83484,EDCS-28500283,"CIL 03, 07251 = D 00214 = NDIEC-07, p 81 = AE ...",Achaia,Achaia,Alea / Tegea,"['Alea', 'Tegea']",50,,"['Augusti/Augustae', 'leges', 'viri']",leges,"['Augusti/Augustae', 'viri']",{},,,T[(iberius)] Claudius Caesar Aug(ustus) / G[er...,T[(iberius)] Claudius Caesar Aug(ustus) / G[er...,http://db.edcs.eu/epigr/partner.php?s_language...,49.0,50,Augusti/Augustae; leges; viri,37.454501,22.420877,,,,HD021396,,Tiberius Claudius Caesar Augustus Germanicus p...,Cowey,public legal inscription,,"CIL 03, 07251.; ILS 0214.; AE 1941, 0119.; MAI...",checked with photo,160 cm,T[ ] CLAVDIVS CAESAR AVG / G[ ]ANICVS PONTIF M...,"[{'gender': 'male', 'cognomen': 'Caesar August...",17 cm,,tabula,Achaia,58 cm,T[i(berius)] Claudius Caesar Aug(ustus) / G[er...,Greece,https://edh-www.adw.uni-heidelberg.de/edh/insc...,Tegea,2011-05-10,Pelopónissos,Alea,https://edh-www.adw.uni-heidelberg.de/edh/geog...,,https://www.trismegistos.org/text/177131,{},{},177131,"['http://www.trismegistos.org/place/000078', '...",Tiberius Claudius Caesar Augus...,49 AD – 50 AD,21,unbestimmt,257.0,Tafel,138,unbestimmt,1000.0,,"Rechtliche Verfügung, öffentlich",public legal inscription,Certain,160.0,58.0,17.0,,tabula,Certain,Achaia,Certain,Greece,Certain,Tegea,Certain,Pelopónissos,Certain,Alea,Certain,,,49 AD – 50 AD,T Claudius Caesar Aug Ganicus pontif max trib ...,Tiberius Claudius Caesar Augustus Germanicus p...,,,,,,,,50.0,Latin,49.0,"[22.4171, 37.4319]",Tiberius Claudius Caesar Augustus Germanicus p...,big,False,Tegea,97,46362.0,"[22.417226, 37.427653]",big,0.004249,POINT (22.41710 37.43190),public legal inscription,1.0
83485,EDCS-09400671,"CIMRM-02, 02350 = IG-12, 00274 = Andros 00124 ...",Achaia,Achaia,Andros,Andros,209,,"['litterae erasae', 'tituli sacri']",tituli sacri,{},litterae erasae,,,Pro salute Imp(eratoris) Caesari(s) / L(uci) S...,Pro salute Imp(eratoris) Caesari(s) / L(uci) S...,http://db.edcs.eu/epigr/partner.php?s_language...,198.0,209,litterae erasae; tituli sacri,37.837612,24.937637,http://db.edcs.eu/epigr/bilder.php?bilder.php?...,,,HD011892,,Pro salute Imperatoris Caesaris Luci Septimi S...,Cowey,votive inscription,5.5-5.7 cm,"AE 1911, 0056. (B); T. Sauciuc, MDAI(R) 25, 19...",checked with photo,48 cm,PRO SALVTE IMP CAESARI / L SEPTIMI SEVERI ET M...,"[{'gender': 'male', 'praenomen': 'L.', 'name':...",,,,Achaia,126 cm,Pro salute Imp(eratorum) Caesari(s) / L(uci) S...,Greece,https://edh-www.adw.uni-heidelberg.de/edh/insc...,Palaeopolis,2017-05-30,Nótio Aigaío,Palaiópolis,https://edh-www.adw.uni-heidelberg.de/edh/geog...,Auf dem Foto sind die Buchstaben oft kaum erk...,https://www.trismegistos.org/text/177087,{},{},177087,[],Pro salute Imperatorum Caesari...,198 AD – 209 AD,21,unbestimmt,2.0,unbestimmt,138,unbestimmt,1000.0,80.0,Weihinschrift,votive inscription,Certain,48.0,126.0,,,,,Achaia,Certain,Greece,Certain,Palaeopolis,Certain,Nótio Aigaío,Certain,Palaiópolis,Certain,"to Elleniko Mauer, sekundär verwendet",Certain,198 AD – 209 AD,Pro salute Imp Caesari L Septimi Severi et M A...,Pro salute Imperatorum Caesaris Luci Septimi S...,"to Elleniko Mauer, sekundär verwendet",,"to Elleniko Mauer, vermauert",names of pagan deities,,,data available,209.0,Latin,198.0,"[24.8323, 37.8188]",Pro salute Imperatorum Caesaris Luci Septimi S...,rest,False,Ioulis,47,1000.0,"[24.34625, 37.633122]",minor,0.520308,POINT (24.83230 37.81880),votive inscription,1.0
83486,EDCS-24600769,"AE 1995, 01407 = AE 2001, 01812",Achaia,Achaia,Archea Olimpia / Archaia Olympia / Olympia,"['Archea Olimpia', 'Archaia Olympia', 'Olympia']",96,,{},{},{},{},,,Octa(vius) Sa(lutaris),Octa(vius) Sa(lutaris),http://db.edcs.eu/epigr/partner.php?s_language...,81.0,96,,37.64387,21.625513,,,,HD050999,,Octavius Salutaris,Cowey,owner/artist inscription,,"AE 1995, 1407.; U. Sinn u. a., Nikephoros 8, 1...",checked with photo,,OCTA SAL,"[{'person_id': '1', 'nomen': 'Octavius*', 'nam...",,,instrumentum domesticum,Achaia,,Octa(vius) Sal(utaris),Greece,https://edh-www.adw.uni-heidelberg.de/edh/insc...,Olympia,2012-04-16,Stereá Elláda,Olympia,https://edh-www.adw.uni-heidelberg.de/edh/geog...,(B): Sa(lutaris) fehlerhaft für Sal(utaris).,https://www.trismegistos.org/text/177272,{},{},177272,"['http://www.trismegistos.org/place/000078', '...",Octavius Salutaris,81 AD – 96 AD,21,unbestimmt,140.0,Instrumentum domesticum,138,unbestimmt,1000.0,311.0,Besitzer-/Herstellerinschrift,owner/artist inscription,Certain,,,,,instrumentum domesticum,Certain,Achaia,Certain,Greece,Certain,Olympia,Certain,Stereá Elláda,Certain,Olympia,Certain,Haus der Athleten,Certain,81 AD – 96 AD,Octa Sal,Octavius Salutaris,Haus der Athleten,,,,,,,96.0,Latin,81.0,"[21.6271, 37.6479]",Octavius Salutaris,big,False,Elis,35,1000.0,"[21.435443, 37.827452]",minor,0.262624,POINT (21.62710 37.64790),owner/artist inscription,1.0


In [372]:
s.write_file("EDHCSg.geojson", EDHCS_autotypes)

A file with the same name ("EDHCSg.geojson") already exists in this location.
Press Enter to overwrite it or choose different path and filename: 
Your <class 'geopandas.geodataframe.GeoDataFrame'> object has been succefully written as "https://sciencedata.dk/files/SDAM_root/SDAM_data/EDHCS/EDHCSg.geojson"


In [359]:
print(test_results_df.to_markdown(tablefmt="github"))

|    |   threshold (=/>) |   proportion |    N |   correct |
|----|-------------------|--------------|------|-----------|
|  0 |              0.3  |         1    | 4601 |  0.862421 |
|  1 |              0.35 |         0.94 | 4337 |  0.890247 |
|  2 |              0.4  |         0.93 | 4310 |  0.8942   |
|  3 |              0.45 |         0.91 | 4213 |  0.898647 |
|  4 |              0.5  |         0.9  | 4162 |  0.90197  |
|  5 |              0.55 |         0.89 | 4092 |  0.90567  |
|  6 |              0.6  |         0.86 | 3955 |  0.908976 |
|  7 |              0.65 |         0.84 | 3898 |  0.910723 |
|  8 |              0.7  |         0.71 | 3264 |  0.957414 |
|  9 |              0.75 |         0.67 | 3078 |  0.968811 |
| 10 |              0.8  |         0.58 | 2694 |  0.971047 |
| 11 |              0.85 |         0.51 | 2336 |  0.977312 |
| 12 |              0.9  |         0.42 | 1957 |  0.984159 |
| 13 |              0.95 |         0.34 | 1563 |  0.989763 |


In [384]:
print(test_results_df.to_latex(index=False))

\begin{tabular}{rrrr}
\toprule
 threshold (=/>) &  proportion &     N &   correct \\
\midrule
            0.30 &        1.00 &  4601 &  0.862421 \\
            0.35 &        0.94 &  4337 &  0.890247 \\
            0.40 &        0.93 &  4310 &  0.894200 \\
            0.45 &        0.91 &  4213 &  0.898647 \\
            0.50 &        0.90 &  4162 &  0.901970 \\
            0.55 &        0.89 &  4092 &  0.905670 \\
            0.60 &        0.86 &  3955 &  0.908976 \\
            0.65 &        0.84 &  3898 &  0.910723 \\
            0.70 &        0.71 &  3264 &  0.957414 \\
            0.75 &        0.67 &  3078 &  0.968811 \\
            0.80 &        0.58 &  2694 &  0.971047 \\
            0.85 &        0.51 &  2336 &  0.977312 \\
            0.90 &        0.42 &  1957 &  0.984159 \\
            0.95 &        0.34 &  1563 &  0.989763 \\
\bottomrule
\end{tabular}



In [361]:
print(model_results_df.to_markdown(tablefmt="github"))

|    |   threshold (=/>) |   proportion |     N |
|----|-------------------|--------------|-------|
|  0 |              0.3  |         0.99 | 82791 |
|  1 |              0.35 |         0.98 | 81618 |
|  2 |              0.4  |         0.97 | 81048 |
|  3 |              0.45 |         0.95 | 79585 |
|  4 |              0.5  |         0.95 | 78929 |
|  5 |              0.55 |         0.94 | 78289 |
|  6 |              0.6  |         0.93 | 77384 |
|  7 |              0.65 |         0.92 | 76863 |
|  8 |              0.7  |         0.73 | 61115 |
|  9 |              0.75 |         0.71 | 59070 |
| 10 |              0.8  |         0.61 | 50587 |
| 11 |              0.85 |         0.56 | 46450 |
| 12 |              0.9  |         0.49 | 40601 |
| 13 |              0.95 |         0.43 | 36118 |


In [383]:
print(model_results_df.to_latex(index=False))

\begin{tabular}{rrr}
\toprule
 threshold (=/>) &  proportion &      N \\
\midrule
            0.30 &        0.99 &  82791 \\
            0.35 &        0.98 &  81618 \\
            0.40 &        0.97 &  81048 \\
            0.45 &        0.95 &  79585 \\
            0.50 &        0.95 &  78929 \\
            0.55 &        0.94 &  78289 \\
            0.60 &        0.93 &  77384 \\
            0.65 &        0.92 &  76863 \\
            0.70 &        0.73 &  61115 \\
            0.75 &        0.71 &  59070 \\
            0.80 &        0.61 &  50587 \\
            0.85 &        0.56 &  46450 \\
            0.90 &        0.49 &  40601 \\
            0.95 &        0.43 &  36118 \\
\bottomrule
\end{tabular}



In [381]:
EDH_overlap_all = EDHCSg[(EDHCSg["EDH-ID"].notnull()) & (EDHCSg["EDCS-ID"].notnull())] 
dict(EDH_overlap_all.groupby("type_of_inscription_clean").size().sort_values(ascending=False))

{'epitaph': 21520,
 'votive inscription': 11728,
 'NULL': 3745,
 'owner/artist inscription': 3340,
 'honorific inscription': 3003,
 'building/dedicatory inscription': 2561,
 'mile-/leaguestone': 1307,
 'identification inscription': 850,
 'acclamation': 287,
 'defixio': 269,
 'list': 246,
 'military diploma': 209,
 'label': 194,
 'boundary inscription': 175,
 'elogium': 132,
 'letter': 119,
 'public legal inscription': 109,
 'seat inscription': 42,
 'private legal inscription': 36,
 'prayer': 18,
 'assignation inscription': 15,
 'calendar': 10,
 'adnuntiatio': 1}

In [382]:
dict(EDHCS_autotypes.groupby("type_of_inscription_auto").size().sort_values(ascending=False))

{'epitaph': 85259,
 'votive inscription': 16338,
 'owner/artist inscription': 10677,
 'honorific inscription': 5344,
 'building/dedicatory inscription': 3634,
 'mile-/leaguestone': 2798,
 'identification inscription': 1205,
 'boundary inscription': 399,
 'acclamation': 368,
 'military diploma': 348,
 'list': 338,
 'defixio': 298,
 'label': 220,
 'elogium': 141,
 'public legal inscription': 137,
 'letter': 130,
 'seat inscription': 46,
 'private legal inscription': 39,
 'assignation inscription': 21,
 'prayer': 20,
 'calendar': 11,
 'adnuntiatio': 2}