In [25]:
import altair as alt
import pandas as pd
from vega_datasets import data
from geopy.geocoders import Nominatim
import time

In [26]:
df = pd.read_csv('pleiades.csv')

In [13]:
df.featureTypes.unique()

array(['cemetery', 'settlement, cemetery', 'tomb', 'tomb, cemetery',
       'cemetery, monument', 'tomb, monument',
       'station, cemetery, church', 'cemetery, settlement-modern',
       'cemetery, tumulus', 'tomb, pyramid', 'tomb, villa, basilica',
       'tomb, settlement', 'tomb, villa, cistern',
       'sanctuary, cemetery, pyramid, tomb, settlement, architecturalcomplex',
       'tomb, tumulus', 'production, tomb, settlement',
       'wall-2, cemetery, monument', 'tomb, settlement, fort',
       'settlement, cemetery, villa, settlement-modern',
       'cemetery, settlement, mosque, shrine',
       'tomb, architecturalcomplex', 'sanctuary, settlement, cemetery',
       'settlement, cemetery, temple-2, fort', 'cemetery, villa',
       'unlocated, tomb', 'settlement, cemetery, mine-2',
       'cemetery, church', 'tomb, villa', 'cemetery, abbey, church',
       'sanctuary, cemetery', 'tomb, theatre', 'cemetery, unlabeled',
       'tomb, sanctuary, cistern', 'tomb, temple-2',
      

In [27]:
df = df[(df['featureTypes'].str.contains("cemetery"))]

In [28]:
print(df.shape[0])

637


In [29]:
def get_country(lat, long, geolocator):
    time.sleep(2)
    string = f'{lat}, {long}'
    locationString = geolocator.reverse(string)
    if locationString is None:
        return 'Unknown'
    locationArr = locationString.address.split(', ')
    if locationArr == []:
        return 'Unknown'
    else: 
        return locationArr[-1]

In [30]:
geolocator = Nominatim(user_agent="vis_cw")
df['country'] = df.apply(lambda row: get_country(row.reprLat, row.reprLong, geolocator), axis=1)

In [108]:
gb = df.groupby(['country'])
sorted_gb = gb.size().reset_index(name='counts').sort_values('counts', ascending=False).head(10)

In [109]:
sorted_gb

Unnamed: 0,country,counts
8,Italia,121
45,مصر,54
26,Ελλάς,49
44,ليبيا,49
4,Deutschland,47
15,Polska,46
40,تونس,37
25,Česko,33
3,Danmark,18
39,ایران,18


In [110]:
unique = sorted_gb.country.to_numpy()
unique

array(['Italia', 'مصر', 'Ελλάς', 'ليبيا', 'Deutschland', 'Polska', 'تونس',
       'Česko', 'Danmark', 'ایران'], dtype=object)

In [111]:
ids = [380, 818, 300, 434, 276, 616, 788, 203, 208, 364]
dct = {}
for x in zip(unique, ids):
    dct[x[0]] = x[1]

In [112]:
kf = df[df['country'].isin(unique)]

In [113]:
pd.options.mode.chained_assignment = None
kf['country_id'] = kf['country'].map(dct)

In [114]:
sorted_gb['country_id'] = sorted_gb['country'].map(dct)

In [115]:
sorted_gb

Unnamed: 0,country,counts,country_id
8,Italia,121,380
45,مصر,54,818
26,Ελλάς,49,300
44,ليبيا,49,434
4,Deutschland,47,276
15,Polska,46,616
40,تونس,37,788
25,Česko,33,203
3,Danmark,18,208
39,ایران,18,364


In [149]:
selection = alt.selection(type='single', on='mouseover', fields=['country'], empty='none')

bars = alt.Chart(kf).mark_bar().encode(
    x='count():Q',
    y=alt.Y('country:O', sort=alt.EncodingSortField(field="country", op="count", order='descending')),
#     color='featureTypes:N',
    color=alt.condition(
        selection,
        alt.value('yellow'),
        'featureTypes:N'
    )
).add_selection(selection)

mp = data.world_110m.url
source = alt.topo_feature(mp, 'countries')

globe = alt.Chart(alt.sphere()).mark_geoshape(fill='lightblue', opacity=0.7)
meridian = alt.Chart(alt.graticule()).mark_geoshape(stroke='white', strokeWidth=0.3, opacity=0.8)

background = alt.Chart(source).mark_geoshape(
    stroke="white",
    fill="lightgray"
)

world_map = alt.Chart(source).mark_geoshape(
    stroke="white"
).encode(
     color=alt.condition(selection, alt.value('yellow'), alt.Color('counts:Q', legend=alt.Legend(title='Number of pleiades'), scale=alt.Scale(scheme='orangered')))
).transform_lookup(
    lookup='id',
    from_=alt.LookupData(sorted_gb, 'country_id', ['country', 'counts'])
).add_selection(
    selection
)

ct = alt.layer(
    globe,
    meridian,
    background,
    world_map,
).properties(
    width=750,
    height=500
).project(
    'mercator',
    scale=300,
    center=[-42, 65],
)

(ct & bars).configure_view(stroke=None)