In [72]:
import altair as alt
import pandas as pd
import time
import requests
from vega_datasets import data
from geopy.geocoders import Nominatim

In [73]:
df = pd.read_csv('pleiades.csv')

In [74]:
# filtering only those that contain "aqueduct"
df = df[(df['featureTypes'].str.contains("aqueduct"))]

In [75]:
# executing the cell below requires time, so you can skip ahead and uncomment the 3rd cell
# getting the country name from the given coordinates
def get_country(lat, long, geolocator):
    time.sleep(2)
    string = f'{lat}, {long}'
    locationString = geolocator.reverse(string, language='en')
    if locationString is None:
        return 'Unknown'
    locationArr = locationString.address.split(', ')
    if locationArr == []:
        return 'Unknown'
    else: 
        return locationArr[-1]

In [None]:
# applying the function
geolocator = Nominatim(user_agent="vis_cw")
df['country'] = df.apply(lambda row: get_country(row.reprLat, row.reprLong, geolocator), axis=1)

In [44]:
# the operation above takes quite a while, so we save it for the further use
df.to_csv('pleiades_vis3_withCountry.csv')

In [79]:
# uncomment the code below to read directly from the file (skip 3 cells above)
# df = pd.read_csv('pleiades_vis3_withCountry.csv')

In [80]:
# grouping, sorting, selecting top 10
country_gr = df.groupby(['country'])
sorted_gr = country_gr.size().reset_index(name='counts').sort_values('counts', ascending=False).head(10)

In [81]:
sorted_gr

Unnamed: 0,country,counts
13,Italy,74
23,Turkey,50
22,Tunisia,37
6,France,36
20,Spain,29
12,Israel,26
1,Algeria,19
8,Greece,19
4,Croatia,14
7,Germany,8


In [82]:
unique = sorted_gr.country.to_numpy()

In [83]:
unique

array(['Italy', 'Turkey', 'Tunisia', 'France', 'Spain', 'Israel',
       'Algeria', 'Greece', 'Croatia', 'Germany'], dtype=object)

In [84]:
# getting ids of each country in the list
r = requests.get('https://raw.githubusercontent.com/alisle/world-110m-country-codes/master/world-110m-country-codes.json')
d = r.json()
k = {}
for x in d:
    k[x['name']] = x['id']

dct = {}
for x in unique:
    dct[x] = k[x]

In [85]:
kf = df[df['country'].isin(unique)]

In [86]:
pd.options.mode.chained_assignment = None
kf['country_id'] = kf['country'].map(dct)

In [87]:
sorted_gr['country_id'] = sorted_gr['country'].map(dct)

In [88]:
sorted_gr

Unnamed: 0,country,counts,country_id
13,Italy,74,380
23,Turkey,50,792
22,Tunisia,37,788
6,France,36,250
20,Spain,29,724
12,Israel,26,376
1,Algeria,19,12
8,Greece,19,300
4,Croatia,14,191
7,Germany,8,276


In [89]:
selection = alt.selection(type='single', on='mouseover', fields=['country'], empty='none')

bars = alt.Chart(kf).mark_bar().encode(
    x='count():Q',
    y=alt.Y('country:O', sort=alt.EncodingSortField(field="country", op="count", order='descending')),
#     color='featureTypes:N',
    color=alt.condition(
        selection,
        alt.value('yellow'),
        'featureTypes:N'
    )
).add_selection(selection)

mp = data.world_110m.url
source = alt.topo_feature(mp, 'countries')

globe = alt.Chart(alt.sphere()).mark_geoshape(fill='lightblue', opacity=0.7)
meridian = alt.Chart(alt.graticule()).mark_geoshape(stroke='white', strokeWidth=0.3, opacity=0.8)

background = alt.Chart(source).mark_geoshape(
    stroke="white",
    fill="lightgray"
)

world_map = alt.Chart(source).mark_geoshape(
    stroke="white"
).encode(
     color=alt.condition(selection, alt.value('yellow'), alt.Color('counts:Q', legend=alt.Legend(title='Number of aqueducts'), scale=alt.Scale(scheme='orangered')))
).transform_lookup(
    lookup='id',
    # left joining to get the count and country name
    from_=alt.LookupData(sorted_gr, 'country_id', ['country', 'counts'])
).add_selection(
    selection
)

layered_map = alt.layer(
    globe,
    meridian,
    background,
    world_map,
).properties(
    width=750,
    height=500,
    title='Countries with most aqueducts (top 10)'
).project(
    'mercator',
    scale=330,
    center=[-50, 66],
)

chart = (layered_map & bars).configure_view(stroke=None)

In [90]:
chart