In [1]:
import requests
from bs4 import BeautifulSoup

In [2]:
soup = BeautifulSoup(requests.get('https://topostext.org/work/126').text) ## APOLLONIUS RHODIUS, ARGONAUTICA

In [None]:
import collections 
places_dict = {}
freq_dict = collections.defaultdict(int)
for i, a in enumerate(soup.find_all('a')):
  if ('about' in a.attrs.keys()):
    places_dict[a.text] = (a['about'], a['class'], a['lat'], a['long'])
    freq_dict[a.text] += 1

In [None]:
import pandas as pd
place_df = pd.DataFrame.from_dict(places_dict, orient='index').reset_index().rename(columns={'index':'place_name', 0:'topos_url', 1: 'type', 2:'lat', 3:'long'})
freq_df  = pd.DataFrame.from_dict(freq_dict, orient='index').reset_index().rename(columns={'index':'place_name', 0:'frequency'})

In [None]:
place_freq = pd.merge(place_df, freq_df, on='place_name')
place_freq['type'] = place_freq['type'].apply(lambda x: x[0])
place_freq = place_freq.drop(place_freq.loc[place_freq['type'] == 'person'].index)

In [None]:
place_freq

Unnamed: 0,place_name,topos_url,type,lat,long,frequency
0,Pontus,https://topostext.org/place/435335WPon,place,43.5,33.5,8
1,Cyanean rocks,https://topostext.org/place/412291IKya,place,41.234,29.11517,5
2,Anaurus,https://topostext.org/place/393228WAna,place,39.365,22.899,2
3,Pelasgian,https://topostext.org/place/398223UPel,demonym,39.5,22.5,7
4,Pimpleian height,https://topostext.org/place/401225UPim,place,40.1434,22.4931,1
...,...,...,...,...,...,...
278,Tyrrhenians,https://topostext.org/place/430105RTyr,ethnic,43,10.5,1
279,Thera,https://topostext.org/place/364255PThe,place,36.364,25.477,1
280,Aulis,https://topostext.org/place/384236UAul,place,38.4335,23.5925,1
281,Opuntian,https://topostext.org/place/386231ROpu,demonym,38.65,23.04,1


In [None]:
na = pd.read_csv('/content/names.csv') ## place name look up
pl = pd.read_csv('/content/places.csv') ## coord look up
pt = pd.read_csv('/content/places_place_types.csv') ## place type look up

In [None]:
def get_place(pl_id):
    pl_url = f'https://pleiades.stoa.org/places/{pl_id}/json'
    r = requests.get(pl_url)
    try:
      return r.json()
    except:
      return None

def get_pleiades(topos_url):
  links = BeautifulSoup(requests.get(topos_url).text).find_all('a', {'target':'_blank'})
  pl_id = ''
  for a in links:
    if 'Pleiades' in a.text:
      pl_id += a['href'].split('/')[-1]
  try: 
    return get_place(pl_id)
  except ChunkedEncodingError:
    return None

In [None]:
from tqdm import tqdm
tqdm.pandas()

place_freq['pl_json'] = place_freq['topos_url'].progress_apply(get_pleiades)

100%|██████████| 271/271 [04:28<00:00,  1.01it/s]


In [None]:
place_freq['pl_romanized'] = place_freq['pl_json'].apply(lambda x: x['names'][-1]['romanized'] if (len(x) == 26) and (len(x['names']) > 0) else None)

In [None]:
# number of entiries with wiht no romanized name
len(place_freq.loc[place_freq['pl_romanized'].isnull()])

15

In [None]:
place_freq['pl_attested'] = place_freq['pl_json'].apply(lambda x: x['names'][-1]['attested'] if (len(x) == 26) and (len(x['names']) > 0) else None)

In [None]:
place_freq['pl_type'] = place_freq['pl_json'].apply(lambda x: x['placeTypes'][0] if 'placeTypes' in x.keys() else None)

In [None]:
place_freq = place_freq.drop(['pl_json'], axis=1)

In [None]:
place_freq.to_csv('argo_place_freq.csv')

In [183]:
## Final functions
import requests
from bs4 import BeautifulSoup
import collections 
import pandas as pd
import re
from tqdm import tqdm
tqdm.pandas()

def get_place(pl_id):
    pl_url = f'https://pleiades.stoa.org/places/{pl_id}/json'
    r = requests.get(pl_url)
    try:
      return r.json()
    except:
      return None

def get_pleiades(topos_url):
  links = BeautifulSoup(requests.get(topos_url).text).find_all('a', {'target':'_blank'})
  pl_id = ''
  for a in links:
    if 'Pleiades' in a.text:
      pl_id += a['href'].split('/')[-1]
  try: 
    return get_place(pl_id)
  except:
    return None

def resolve_points(df, pl_id):
  pl_id = df['pl_id'].loc[df['pl_id'] == pl_id].iloc[0]
  sim = df.loc[df['pl_id'] == pl_id]
  if len(sim) > 1:
    spn = sim.place_name.max() ## picking shortest name...
    gold = df.loc[df['place_name'] == spn]
    dross = sim.drop(gold.index)
    s = sum(sim.frequency.to_list())
    df['frequency'][df['place_name'] == spn] = s
    df = df.drop(dross.index)
  df = df.reset_index(drop=True)
  return df

def resolve_types(pl_type):
  if (pl_type == 'unknown') or (pl_type == 'unlocated'):
    return 'Mythic site'
  elif (pl_type == 'label') or (pl_type == None):
    return 'Tribe'
  elif (pl_type == 'cape') or (pl_type == 'island') or (pl_type == 'archipelago') or (pl_type == 'peninsula'):
    return 'Coastal feature'
  elif (pl_type == 'water-open') or (pl_type == 'river'):
    return 'Waterway'
  elif (pl_type == 'sanctuary'):
    return 'Temple'
  elif (pl_type == 'urban') or (pl_type == 'settlement'):
    return 'Political entity' 
  else:
    return pl_type.title()

def fill_in_attested(place_name, pl_attested):
  attested = []
  if (pl_attested != '') and (not isinstance(pl_attested, type(None))):
    attested.append(pl_attested)
  if not place_name in attested:
    attested.append(place_name)
  return attested

def fill_in_attested_from_pl_title(title, pl_attested):
  attested = pl_attested
  if '/' in title:
    for name in title.split('/'):
      pl_attested.append(name)
  return list(set(attested))

def getDataForArcGis(topos_url):
  soup = BeautifulSoup(requests.get(topos_url).text)

  text_name = soup.find('h2').text.split(',')
  print(f'Reading{text_name[1]} by {text_name[0]}...')

  places_dict = {}
  freq_dict = collections.defaultdict(int)
  for i, a in enumerate(soup.find_all('a')):
    if ('about' in a.attrs.keys()):
      places_dict[a.text] = (a['about'], a['class'], a['lat'], a['long'])
      freq_dict[a.text] += 1

  place_df = pd.DataFrame.from_dict(places_dict, orient='index').reset_index().rename(columns={'index':'place_name', 0:'topos_url', 1: 'type', 2:'lat', 3:'long'})
  freq_df  = pd.DataFrame.from_dict(freq_dict, orient='index').reset_index().rename(columns={'index':'place_name', 0:'frequency'})

  place_freq = pd.merge(place_df, freq_df, on='place_name')
  place_freq['type'] = place_freq['type'].apply(lambda x: x[0])
  place_freq = place_freq.drop(place_freq.loc[place_freq['type'] == 'person'].index)

  print("Querying Pleiades.")
  place_freq['pl_json'] = place_freq['topos_url'].progress_apply(get_pleiades)
  place_freq = place_freq.drop(place_freq.loc[place_freq['pl_json'].isnull()].index)
  place_freq['pl_id'] = place_freq['pl_json'].apply(lambda x: x['id'] if bool(re.search(r'\d',x['id'])) else None)
  place_freq['title'] = place_freq['pl_json'].apply(lambda x: x['title'].split('/')[0] if '/' in x['title'] else x['title'])
  place_freq['title'] = place_freq.apply(lambda x: x['place_name'] if x['pl_id'] == None else x['title'], axis=1)
  place_freq['pl_romanized'] = place_freq['pl_json'].apply(lambda x: x['names'][-1]['romanized'] if (len(x) == 26) and (len(x['names']) > 0) else None)
  place_freq['pl_attested'] = place_freq['pl_json'].apply(lambda x: x['names'][-1]['attested'] if (len(x) == 26) and (len(x['names']) > 0) else None)
  place_freq['pl_attested'] = place_freq.apply(lambda x: fill_in_attested(x['place_name'], x['pl_attested']), axis=1)
  place_freq['pl_attested'] = place_freq.apply(lambda x: fill_in_attested_from_pl_title(x['title'], x['pl_attested']), axis=1)
  place_freq['pl_type'] = place_freq['pl_json'].apply(lambda x: x['placeTypes'][0] if 'placeTypes' in x.keys() else None)
  place_freq['pl_type'] = place_freq['pl_type'].apply(resolve_types)
  place_freq = place_freq.drop(['pl_json'], axis=1)

  return place_freq, text_name

def outputData(topos_url):
  df, text_name = getDataForArcGis(topos_url)
  for pl_id in df.pl_id.to_list():
    try:
      df = resolve_points(df, pl_id) ## single positional index out of bounds error, not sure why...
    except:
      pass
  author = text_name[0].replace(' ','')
  text_title = text_name[1].replace(' ','').strip()
  df.to_csv(f'{author}_{text_title}_gisdata.csv')
  return df

In [184]:
mi = outputData('https://topostext.org/work/791')

Reading Misopogon by Julian the Emperor...
Querying Pleiades.


100%|██████████| 44/44 [00:23<00:00,  1.84it/s]
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


In [185]:
mi

Unnamed: 0,place_name,topos_url,type,lat,long,frequency,pl_id,title,pl_romanized,pl_attested,pl_type
0,Paros,https://topostext.org/place/371252PPar,place,37.0844,25.1483,1,599867.0,Paros,Paros,[Paros],Political entity
1,Rhine,https://topostext.org/place/507071WRhi,place,50.736,7.1111,1,109277.0,Rhenus (river),Rhenus,[Rhine],Waterway
2,Celts,https://topostext.org/place/490070RKel,ethnic,49.0,7.0,11,,Celts,,[Celts],Tribe
3,Parisii,https://topostext.org/place/489023ULut,ethnic,48.8529,2.3499,2,109126.0,Lutetia,Lutezia,"[Lutezia, Parisii]",Political entity
4,Phrygia,https://topostext.org/place/400310RPhr,place,40.0,31.0,1,609502.0,Phrygia,Berekyntes,[Phrygia],Region
5,Lydians,https://topostext.org/place/385280RLyd,ethnic,38.5,28.0,1,550701.0,Lydia,Maionia,[Lydians],Region
6,Persians,https://topostext.org/place/295540RPer,ethnic,29.5,54.0,1,922698.0,Persis,Pars,[Persians],Tribe
7,Pythian,https://topostext.org/place/385225SDel,demonym,38.48264,22.50108,1,540726.0,Delphi,Delfi,"[Pythian, Delfi]",Temple
8,Troy,https://topostext.org/place/400262UIli,place,39.9575,26.2389,2,550595.0,Ilium,"Troía, Troia","[Troy, Τροία]",Political entity
9,Daphne,https://topostext.org/place/361361UDap,place,36.13,36.1442,6,658450.0,Daphne,Daphne,"[Δάφνη, Daphne]",Political entity


In [186]:
ar = outputData('https://topostext.org/work/126')

Reading Argonautica by Apollonius Rhodius...
Querying Pleiades.


100%|██████████| 271/271 [02:25<00:00,  1.87it/s]
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


In [187]:
ar

Unnamed: 0,place_name,topos_url,type,lat,long,frequency,pl_id,title,pl_romanized,pl_attested,pl_type
0,Pontus,https://topostext.org/place/435335WPon,place,43.5,33.5,8,1224,Pontus Euxinus,Pontike Thalatta,"[Pontus, Ποντική Θάλαττα]",Waterway
1,Cyanean rocks,https://topostext.org/place/412291IKya,place,41.234,29.11517,5,521064,Kyaneai Inss.,Kyaneai,[Cyanean rocks],Coastal feature
2,Anaurus,https://topostext.org/place/393228WAna,place,39.365,22.899,2,540634,Anauros (river),Anauros,[Anaurus],Waterway
3,Pimpleian height,https://topostext.org/place/401225UPim,place,40.1434,22.4931,1,491697,Pimpleia,Pimpleia,[Pimpleian height],Political entity
4,Zone,https://topostext.org/place/409256PZon,place,40.8635,25.6382,1,501667,Zone,Zone,[Zone],Political entity
...,...,...,...,...,...,...,...,...,...,...,...
228,Anaphe,https://topostext.org/place/364258PAna,place,36.359,25.799,3,599491,Anaphe,Anaphe,[Anaphe],Political entity
229,Tyrrhenians,https://topostext.org/place/430105RTyr,ethnic,43,10.5,5,413122,Etruria,Rasna,[Tyrrhenians],Region
230,Thera,https://topostext.org/place/364255PThe,place,36.364,25.477,3,599971,Thera,Thera,[Thera],Political entity
231,Aulis,https://topostext.org/place/384236UAul,place,38.4335,23.5925,1,579889,Aulis,Aulis,[Aulis],Political entity


In [188]:
il = outputData('https://topostext.org/work/2')

Reading Iliad by Homer...
Querying Pleiades.


100%|██████████| 317/317 [02:48<00:00,  1.88it/s]
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


In [189]:
il.sort_values('frequency', ascending=False)

Unnamed: 0,place_name,topos_url,type,lat,long,frequency,pl_id,title,pl_romanized,pl_attested,pl_type
6,Troy,https://topostext.org/place/400262UIli,place,39.9575,26.2389,800,550595,Ilium,"Troía, Troia","[Troy, Τροία]",Political entity
1,Argos,https://topostext.org/place/376227PArg,place,37.63091,22.72079,215,570106,Argos,"Árgos, Argos","[Ἄργος, Argos]",Political entity
0,Olympus,https://topostext.org/place/401224LOly,place,40.0856,22.3586,112,491677,Olympos (mountain in Greece),Olympus,[Olympus],Mountain
221,Lycians,https://topostext.org/place/365295RLyc,ethnic,36.5,29.5,70,638965,Lycia,Lycia,"[Lycians, Lycia]",Region
193,Ida,https://topostext.org/place/397268LIda,place,39.6922,26.8423,47,550592,Ida (mountain),Ida,[Ida],Mountain
...,...,...,...,...,...,...,...,...,...,...,...
158,Trachis,https://topostext.org/place/388224RTra,place,38.75,22.4,1,,Trachis,,[Trachis],Tribe
68,Histiaea,https://topostext.org/place/389231PHis,place,38.9466,23.0905,1,540817,Histiaia,Hestiaia,[Histiaea],Political entity
67,Eretria,https://topostext.org/place/384238PEre,place,38.3982,23.7905,1,579925,Eretria,Eretria,"[Ἐρέτρια, Eretria]",Political entity
161,Pyrasus,https://topostext.org/place/393228PPyr,place,39.2788,22.8212,1,541081,Pyrasos,Pyrasos,"[Πύρασος, Pyrasus]",Political entity
