In [358]:
import altair as alt
import pandas as pd
from vega_datasets import data
import numpy as np
import math

In [359]:
df = pd.read_csv('pleiades.csv')

In [360]:
def get_square(loc_str):
    arr = loc_str.split(',')
    if len(arr) == 4: 
        res = (float(arr[2]) - float(arr[0])) * (float(arr[3]) - float(arr[1]))
        if res > 0:
            return res
        else:
            return 0
    else:
        return 0

In [361]:
df['bbox'] = df['bbox'].astype(str)
df['square'] = df.apply(lambda row: get_square(row.bbox), axis=1)

In [362]:
# removing outliers (101 is an empirical value)
df = df[(df['square'] > 0) & (df['square'] <= 101)]

In [363]:
gb = df.groupby(['featureTypes'])
# sorting by the highest std of the square
sorted_gb = gb.agg(np.std, ddof=1).reset_index().sort_values('square', ascending=False).head(15)
most_std = sorted_gb['featureTypes'].to_numpy()

In [364]:
k = gb.size().reset_index(name='counts').sort_values('counts', ascending=False)

In [365]:
# this code can be used to set a value for the count of the location in a category
# since we are relying on std, we would want to have more condidence by increasing the value
k = k[k['counts'] > 10]
count_pass = k['featureTypes'].to_numpy()
intersect = np.intersect1d(most_std, count_pass)
df = df[df['featureTypes'].isin(intersect)]

In [372]:
chart = alt.Chart(df).mark_circle(size=9).encode(
    x=alt.X(
        'jitter:Q',
        title=None,
        axis=alt.Axis(values=[0], ticks=True, grid=False, labels=False),
        scale=alt.Scale(),
    ),
    y='square:Q',
    color='featureTypes:N',
    tooltip=['title:N', 'description:N'],
    column=alt.Column(
        'featureTypes:N',
        header=alt.Header(
            labelAngle=-45,
            titleOrient='top',
            labelOrient='bottom',
            labelAlign='right',
            labelPadding=5,
        ),
    ),
).transform_calculate(
    jitter='sqrt(-2*log(random()))*cos(2*PI*random())'
).properties(
    width=80,
    height=350,
    title="Area occupied by pleiades"
).configure_facet(
    spacing=10
).configure_view(
    stroke=None
).interactive()

In [373]:
chart