In [237]:
import altair as alt
import pandas as pd
from vega_datasets import data
from geopy.geocoders import Nominatim

In [238]:
geolocator = Nominatim(user_agent="vis_cw")

In [239]:
def is_country(lat, long, geolocator, country):
    string = f'{lat}, {long}'
    locationString = geolocator.reverse("41.285487, 15.917172")
    locationArr = locationString.address.split(', ')
    if len(locationArr) > 0:
        responseCountry = locationArr[-1]
        if type(country) == str:
            return responseCountry == country
        elif type(country) == list:
            for x in country:
                if x == responseCountry:
                    return True
    return False

In [240]:
mp = data.world_110m.url

In [241]:
df = pd.read_csv('pleiades.csv')
cf = df[df.hasConnectionsWith.notnull()]
lf = df[df.connectsWith.notnull()]
# kf = df[(df.reprLat > 6.7499552751) & (df.reprLong > 36.619987291) & (df.reprLat < 18.4802470232) & (df.reprLong < 47.1153931748)]
# df['isItaly'] = df.apply(lambda row: is_country(row.reprLat, row.reprLong, geolocator, ['Italia', 'Italy']), axis=1)

In [242]:
connections = []
for row in zip(cf['id'], cf['hasConnectionsWith']):
    spl = row[1].split(',')
    for x in spl:
        connections.append([row[0], x])

for row in zip(lf['id'], lf['connectsWith']):
    spl = row[1].split(',')
    for x in spl:
        connections.append([row[0], x])

connectionsDf = pd.DataFrame(connections, columns=["origin", "connection"])
print(connectionsDf.head())

   origin connection
0  265876     266518
1  265877     266018
2  265884  482947334
3  265884  482947335
4  265884     265883


In [243]:
print(connectionsDf.shape[0])

17452


In [244]:
df3 = connectionsDf.merge(df[['id', 'title', 'reprLat', 'reprLong', 'description', 'timePeriods']], left_on='origin', right_on='id', how='inner').drop(columns=['id'])
df['id'] = df['id'].astype(str)
df3['connection'] = df3['connection'].astype(str)
df4 = df3.merge(df[['id', 'title', 'reprLat', 'reprLong']], left_on='connection', right_on='id', how='inner', suffixes=('_1', '_2')).drop(columns=['id'])

In [245]:
alt.data_transformers.disable_max_rows()

select_point = alt.selection_single(
    on="mouseover", nearest=True, fields=["origin"], empty="none"
)

# lookup_data = alt.LookupData(
#     df, key="id", fields=["title", "reprLat", "reprLong"]
# )

mp = data.world_110m.url
source = alt.topo_feature(mp, 'countries')

globe = alt.Chart(alt.sphere()).mark_geoshape(fill='lightblue', opacity=0.7)
meridian = alt.Chart(alt.graticule()).mark_geoshape(stroke='white', strokeWidth=0.3, opacity=0.8)

background = alt.Chart(source).mark_geoshape(
    fill="lightgray",
    stroke="white",
#     color=alt.condition(alt.datum.id == 826, alt.value('orange'), alt.value('steelblue')),
)

connections = alt.Chart(df4).mark_rule(opacity=0.35).encode(
    latitude="reprLat_1:Q",
    longitude="reprLong_1:Q",
    latitude2="reprLat_2:Q",
    longitude2="reprLong_2:Q"
).transform_filter(
    select_point
)

points = alt.Chart(df4).mark_circle(color='#B266FF', opacity=0.5).encode(
    latitude="reprLat_1:Q",
    longitude="reprLong_1:Q",
    tooltip=["title_1:N", "description:N", "timePeriods:N", "reprLat_1:Q", "reprLong_1:Q"]
).add_selection(
    select_point
)

alt.layer(
    globe,
    meridian,
    background,
    points,
    connections
).properties(
    width=750,
    height=500,
    title='Pleiades and their connections across the UK (Western Europe)'
).project(
    "mercator",
    scale=1300,
    center=[-4, 55],
).configure_view(stroke=None)