In [None]:
# TODO: add https://en.wikipedia.org/wiki/List_of_Greek_place_names

In [1]:
import json

In [2]:
with open("pleiades-places-latest.json", "rb") as fo:
    pleiades = json.load(fo)

In [3]:
len(pleiades["@graph"])

37500

In [4]:
def is_greek_char(char: str):
    if 880 <= ord(char) <= 1023 or 7936 <= ord(char) <= 8191:
        return True
    return False

is_greek_char("ά")

True

In [30]:
pleiades_names = list()
for item in pleiades["@graph"]:
    for name in item["names"]:
        # "ancient greek" only in name["description"] 55 times, so not reliable; "greek" 129
        # https://www.ssec.wisc.edu/~tomw/java/unicode.html
        # Ancient Greek hex range: 0370—03FF == 880-1023
        # Greek Extended: 1F00—1FFF == 7936-8191
        # if all(), then 1329 Greek names are found
        # if any(), then 1585; not a big diff, use the ones we know are right
        if name["attested"] and all([is_greek_char(x) for x in name["attested"]]):
                pleiades_names.append(name["attested"])
print("Total:", len(pleiades_names))

Total: 1329


In [35]:
# also "place names" from wiki: https://en.wikipedia.org/wiki/List_of_Greek_place_names
with open("f.txt") as fo:
    wiki_names = fo.readlines()
wiki_names = [s.strip() for s in place_names]

In [36]:
len(wiki_names)

832

In [37]:
print(wiki_names[:3])

['Ἄβδηρα', 'Ἄβυδος', 'Ἀγαθὴ Τύχη']


In [40]:
names = pleiades_names + wiki_names
print(len(names))

2161


In [42]:
# remove multiword place names
# this can be reenabled later
names = [n for n in names if len(n.split()) == 1]
print(len(names))

2135


In [60]:
# trim suffix of each word unless short
# doing this as a naive match to declined forms in real texts
names_new = list()
for name in names:
    femenine_endings = ["ά", "α", "η", "ή"]
    masculine_neuter_endings = ["ος", "ός", "ον", "όν"]
    consonants = list("βγδεζθκλμνξπρσςτυφχψω")
    if not len(name) > 3:
        names_new.append(name)
    elif any([end for end in femenine_endings if name.endswith(end)]):
        names_new.append(name[:-1])
    elif any([end for end in masculine_neuter_endings if name.endswith(end)]):
        names_new.append(name[:-2])
    elif any([end for end in consonants if name.endswith(end)]):
        names_new.append(name)
    else:
        names_new.append(name)
print(names_new)

['Μαινάκῃ', 'Ὠρητανοί', 'Πιτυοῦσσαι', 'Σκομβραρί', 'Ἀβίλυχ', 'Ἥρας', 'Λίξ', 'Καρμανί', 'Ζουχάββαρι', 'Κεδρωσί', 'Γαδρωσί', 'Γεδρωσί', 'Ὀάρακτ', 'Προφθασί', 'Σάρνι', 'Κώφας', 'Ὠγυρις', 'Ὀργάν', 'Νόαρ', 'Βήθηλά', 'Βήθηγ', 'Κουκονήσι', 'Δισπηλιό', 'Ἰάνουκλ', 'Στροφάδες', 'Πλωταί', 'Στροφαδεύς', 'Μασσαλί', 'Νεμαύσ', 'Βιντιμιλίω', 'Ἀλβίγγαυν', 'Ὀξύβιοι', 'Βαβυλωνί', 'Ἐλυμαίς', 'Εὐφράτης', 'Χοάσπης', 'Σοῦσ', 'Φουρνί', 'Σώζουσ', 'Βάρκ', 'Βερενίκ', 'Κυρηναί', 'Εὐεσπερίδας', 'Παλίουρ', 'Φιδεντί', 'Ἴνσουβροι', 'Λιγυστικὴ', 'Πάρμ', 'Ῥήγι', 'Τικῖν', 'Ἰκτούμουλαι', 'Ἀλαλαίου', 'Αὐαλίτης', 'Κασσανῖται', '᾿οπιτέργι', 'Σάπις', 'Ὁμηρίται', 'Μαλίχου', 'Μιναῖοι', 'Ὀφρύνει', 'Ἀλλιανοί', 'Πακτωλὸς', 'Πιτάν', 'Πολυμήδει', 'Ψύρ', 'Ῥοίτει', 'Σάρδεις', 'Σατνιόεις', 'Σκάμανδρ', 'Σίγει', 'Σιμόεις', 'Σίπυλ', 'Σμύρν', 'Τευθρανί', 'Θήβ', 'Θύμβρι', 'Τμῶλ', 'Τρῳάς', 'Ἀρδύνι', 'Ἰτών', 'Κάναι', 'Κλεανδρί', 'Πύλαι', 'Αἴγιλ', 'Ὤγυλ', 'Καμηλονήσι', 'Αἰγιαλὸς', 'Ἀχαί', 'Αἰγιάλει', 'Ἀχαιοί', 'Ἀχέρων', 'Αἰγαί', 'Αἰγειρ', 'Αἴ