# Requirements

In [50]:
import sddk
import json
import re
import pandas as pd
import geopandas as gpd
import nltk
pd.options.display.max_columns = 1000 # to see all columns
import warnings
warnings.filterwarnings('ignore')

# Loading datasets

In [52]:
LIRE = gpd.read_file("https://zenodo.org/record/5074774/files/LIREg.geojson?download=1", driver="GeoJSON")

In [53]:
LIRE.shape

(137305, 112)

# Function to extract occupations

In [54]:
deities_csv = pd.read_csv("../data/deities_decline.csv").reset_index()
deities_csv

Unnamed: 0,index,nom_sg,gen_sg,dat_sg,acc_sg,abl_sg,voc_sg,special_suffix1,special_suffix2,special_suffix3
0,Asclepius,Asclepii,Asclepio,Asclepium,Asclepio,Asclepie,,,,
1,Aesculapius,Aesculapii,Aesculapio,Aesculapium,Aesculapio,Aesculapie,Aesculapi,Aesculapem,Aesculapeo,
2,Esculapius,Esculapii,Esculapio,Esculapium,Esculapio,Esculapie,,,,
3,Asclepios,Asclepii,Asclepio,Asclepium,Asclepio,Asclepie,,,,
4,Apollo,Apollonis,Apolloni,Apollonem,Apollone,Apollo,,,,
5,Apollon,Apollonis,Apolloni,Apollonem,Apollone,Apollon,,,,
6,Apolon,Apolonis,Apoloni,Apolonem,Apolone,Apolon,,,,
7,Apolin,Apolinis,Apolini,Apolinem,Apoline,Apolin,,,,
8,Apollin,Apollinis,Apollini,Apollinem,Apolline,Apollin,,,,
9,Iupiter Optimus Maximus,Iovis Optimi Maximi,Iovi Optimo Maximo,Iovem Optimum Maximum,Iove Optimo Maximo,Iupiter Optimus Maximus,,,,


In [55]:
deities_dict = {}

asclep = []
for n in range(4):
    asclep.extend([el for el in deities_csv.loc[n].tolist() if isinstance(el, str)])
deities_dict["asclep"] = list(set(asclep))

apollo = []
for n in range(4,9):
    apollo.extend([el for el in deities_csv.loc[n].tolist() if isinstance(el, str)])
deities_dict["apollo"] = list(set(apollo))

jupiter = []
for n in range(9,13):
    jupiter.extend([el for el in deities_csv.loc[n].tolist() if isinstance(el, str)])
deities_dict["jupiter"] = list(set(jupiter))

In [56]:
deities_dict["apollo"]

['Apolinem',
 'Apolin',
 'Apollini',
 'Apolini',
 'Apollinis',
 'Apolline',
 'Apolone',
 'Apollo',
 'Apolloni',
 'Apollonis',
 'Apollone',
 'Apolon',
 'Apollonem',
 'Apolonem',
 'Apollin',
 'Apollinem',
 'Apoloni',
 'Apolonis',
 'Apoline',
 'Apollon',
 'Apolinis']

In [57]:
def eval_list(people_list):
    try: return eval(people_list)
    except: return []
LIRE["people"] = LIRE["people"].apply(eval_list)

In [58]:
def test_extract(text):
    try:
        if re.search("(\W|^)Apollini(\W|$)", text):
            return True
        else:
            return False
    except:
        return False
len(LIRE[LIRE["clean_text_interpretive_word"].apply(test_extract)])

361

In [59]:
def extract_deity(inscription_text, deity):
    if not isinstance(inscription_text, str): # if not valid string
        inscription_text = ""
    to_return = False
    for deity_morph in deities_dict[deity]:
        if deity_morph in inscription_text: # first check it this way, otherwise skip
            if re.search("(\W|^)" + deity_morph + "(\W|$)", inscription_text):
                to_return = True
                break
    return to_return

In [129]:
LIRE["asclepius"] = LIRE["clean_text_interpretive_word"].apply(extract_deity, deity="asclep")
LIRE["apollo"] = LIRE["clean_text_interpretive_word"].apply(extract_deity, deity="apollo")
LIRE["jupiter"] = LIRE["clean_text_interpretive_word"].apply(extract_deity, deity="jupiter")

In [130]:
print(len(LIRE[LIRE["asclepius"]]))
print(len(LIRE[LIRE["apollo"]]))
print(len(LIRE[LIRE["jupiter"]]))

238
495
2868


ok, it is not bad, previously we had 156, 306, and 2598 in EDH and 90, 205, and 391 in EDCS.

In [143]:
# however, we have to check the people attribute...

def check_name_in_people(people_list, name):
    try: return bool(re.search("(\W|^){0}(\W|$)".format(name), str(people_list)))
    #try: return any([el for el in people_list if re.search("(\W|^){0}(\W|$)".format(name), el["nomen"]) or re.search("(\W|^){0}(\W|$)".format(name), el["cognomen"])])
    except: return False

name = "Asclepius"
LIRE["asclepius_people"] = LIRE["people"].apply(lambda x: check_name_in_people(x, name))
name = "Apollo"
LIRE["apollo_people"] = LIRE["people"].apply(lambda x: check_name_in_people(x, name))

In [146]:
def check_people(deity, people_deity):
    if deity: 
        if people_deity:
            return False
        else:
            return True
    else:
        return False

LIRE["asclepius"] = LIRE.apply(lambda row: check_people(row["asclepius"], row["asclepius_people"]), axis=1)
LIRE["apollo"] = LIRE.apply(lambda row: check_people(row["apollo"], row["apollo_people"]), axis=1)

In [147]:
len(LIRE[LIRE["asclepius"]])

224

In [148]:
len(LIRE[LIRE["apollo"]])

490

In [149]:
print(len(LIRE[LIRE["asclepius"]]))
print(len(LIRE[LIRE["apollo"]]))
print(len(LIRE[LIRE["jupiter"]]))

224
490
2868


In [150]:
LIRE.drop(["asclepius_people", "apollo_people"], axis=1, inplace=True)

In [151]:
def list_to_str(people_list):
    try: return str(people_list)
    except: return ""
LIRE["people"] = LIRE["people"].apply(list_to_str)

In [152]:
# for manual check: 
LIRE_asclep = LIRE[LIRE["asclepius"]]
len(LIRE_asclep)

224

In [153]:
LIRE_asclep.to_csv("../data/asclep.csv")

In [154]:
# exclude manually identified false positive in EDCS
asclep_to_exclude = ["EDCS-08201037",
"EDCS-05800274",
"EDCS-70100030",
"EDCS-05801317",
"EDCS-11501437",
"EDCS-13000097",
"EDCS-17200150",
"EDCS-18700374",
"EDCS-14801088",
"EDCS-14801924",
"EDCS-14802923",
"EDCS-11501343",
"EDCS-40300683",
"EDCS-14805762"]

for edcs_id in asclep_to_exclude:
    LIRE.at[LIRE.loc[LIRE["EDCS-ID"]==edcs_id].index[0], "asclepius"] = False

In [155]:
# for manual check: 
LIRE_asclep = LIRE[LIRE["asclepius"]]
len(LIRE_asclep)

210

In [161]:
LIRE.shape

(137305, 115)

In [206]:
LIRE["people"].tolist()

["[{'name': 'Gal. Val. [[Maximiano]]', 'nomen': 'Galerius* Valerius*', 'gender': 'male', 'person_id': '1', 'cognomen': 'Maximianus++'}, {'name': 'Coranius Titianus', 'nomen': 'Coranius', 'gender': 'male', 'person_id': '2', 'cognomen': 'Titianus', 'status': 'senatorial order'}]",
 "[{'person_id': '1', 'nomen': 'Annius+', 'name': '[M.] An[nio] Floriano', 'gender': 'male', 'praenomen': 'M.+', 'cognomen': 'Florianus'}, {'person_id': '2', 'nomen': 'Aurelius*', 'name': 'M. Aur. Probo', 'gender': 'male', 'praenomen': 'M.', 'cognomen': 'Probus'}]",
 "[{'gender': 'male', 'cognomen': 'Caesar Augustus* Germanicus+', 'nomen': 'Claudius', 'praenomen': 'Ti.+', 'name': 'T[i.] Claudius Caesar Aug. G[erm]anicus', 'person_id': '1'}]",
 "[{'gender': 'male', 'praenomen': 'L.', 'name': 'L. Septimi Severi', 'person_id': '1', 'nomen': 'Septimius', 'cognomen': 'Severus'}, {'gender': 'male', 'praenomen': 'M.', 'name': 'M. Aur. Antonini', 'person_id': '2', 'nomen': 'Aurelius*', 'cognomen': 'Antoninus'}, {'gende

In [207]:
def clean_entry(entry):
    try: entry = eval(entry)
    except: pass
    if isinstance(entry, list):
        new_entry = entry
    elif isinstance(entry, str):
        new_entry = [entry]
    else: 
        new_entry = []
    return new_entry

LIRE["status_list"] = LIRE["status_list"].apply(clean_entry)
LIRE["inscr_type"] = LIRE["inscr_type"].apply(clean_entry)
LIRE["status_notation"] = LIRE["status_notation"].apply(clean_entry)
LIRE["inscr_process"] = LIRE["inscr_process"].apply(clean_entry)
LIRE["external_image_uris"] = LIRE["external_image_uris"].apply(clean_entry)
LIRE["fotos"] = LIRE["fotos"].apply(clean_entry)
LIRE["placenames_refs"] = LIRE["placenames_refs"].apply(clean_entry)
LIRE["people"] = LIRE["people"].apply(clean_entry)

In [208]:
LIRE.to_parquet("../data/large_files/LIRE_deities.parquet")

In [157]:
# LIRE.to_parquet("../data/large_files/LIRE_deities.parquet")
LIRE.to_file("../data/large_files/LIRE_deities.geojson", driver="GeoJSON")

In [37]:
#LIRE = gpd.read_file("../data/large_files/LIRE_deities.parquet")
LIRE = gpd.read_file("../data/large_files/LIRE_deities.geojson", driver="GeoJSON")