In [None]:
%matplotlib inline

In [None]:
%load_ext autoreload
%autoreload 2
import warnings; warnings.simplefilter('ignore')

# Gaia data visualization using voila and vaex
This small voila dashboard shown the basics of building a dashboard using:
 * <a href="https://gea.esac.esa.int/archive/">Gaia</a> dataset, crossmatched to the <a href="https://panstarrs.stsci.edu/">Pan-STARRS</a> dataset.
 * <a href="https://github.com/QuantStack/voila">voila</a>
 * [ipywidgets](https://ipywidgets.readthedocs.io/)

In [None]:
import ipywidgets as widgets
import matplotlib.pylab as plt
import vaex
import vaex.jupyter
import numpy as np


In [None]:
# Uncomment to access the data via S3

import getpass
profile_name = 'stsci' if getpass.getuser() == 'maartenbreddels' else 'default'

df = vaex.open(f's3://astrosurveydata/gaia_ps1_nochunk.hdf5?profile_name={profile_name}')
# df = df[:20_000_000] if you want to experiment quickly with a smaller subset

In [None]:
print(f'The full dataset contains {len(df):,} stars')

In [None]:
columns_all = df.get_column_names()
columns_colors = [col for col in columns_all if "mean_mag" in col ]
print("We found the following photometry columns in the Gaia dataset:\n\t" + "\n\t".join(columns_colors))

In [None]:
df['distance'] = 1/df.parallax
df['M_g'] = df.phot_g_mean_mag - 5 * np.log10(df.distance) - 10

# Filtering the data
We take only a subset of the data, where $\varpi \over \sigma_\varpi > 5$, meaning we cut out negative distances, and very uncertain distances.

In [None]:
# Filter the dataset, ignore negative or very uncertain distances
df = df[df.parallax_over_error>5] 
print(f'The filtered dataset contains {len(df):,} stars')

# Interactive Hertzsprung Russell
Below, we show an [Hertzsprung Russell](https://en.wikipedia.org/wiki/Hertzsprung%E2%80%93Russell_diagram) where you can choose the colors, an extra expression to filter by, or decide to plot the log density or just density.

In [None]:
default_filter = "distance < 10"

In [None]:
@widgets.interact_manual(magnitude1=columns_colors, magnitude2=columns_colors)
def plot(magnitude1=columns_colors[1], magnitude2=columns_colors[0], log=True, extra_filter=default_filter):
    # validat filter
    try:
        df.validate_expression(extra_filter)
    except Exception as e:
        print("oops, invalid expression: " +str(e))
        return
    
    # filter data
    extra_filter = df[extra_filter]  # we convert the string to an vaex (boolean) expression
    dff = df[extra_filter]  # passing the boolean expression to filter the dataset even more

    color = dff[magnitude2] - dff[magnitude1]
    limits_color = dff.limits_percentage(str(color), percentage=99)
    dff.plot(color, df.M_g, limits=[limits_color, [20, -7]], shape=128, f="log" if log else "identity")
    plt.title(f'This plot contains {len(dff):,} stars')
    plt.show()

In [None]:
# Manually trigger the first time
plot.widget.children[-2].click()


_(Note that there currently is a bug in voila that causes the old plot to stay on the page)_