# Generating random charts with `Altair` and `Scipy`

Based on _The Weighted Average Illusion:  Biases in Perceived Mean Position in Scatterplots_, the authors used the following: 

> To generate the x- and y-data, we used Poisson disk sampling [50] to produce 30 uniquely distributed point grids, with minimum distance between the boundaries of any two points set at 8 pixels. This methodology is similar to Gleicher et al. [34]. Each dataset always contained 30 marks, with the number of points selected in piloting.

For the sake of integrating with the `Revisit` platformm, we will use `Altair` and `Scipy`. Luckily `scipy` provides a possion-disk sampler. 

- Note that QMC only provide an $n \times d$ array of numbers in $[0, 1]$. ([source](https://docs.scipy.org/doc/scipy/reference/stats.qmc.html))

In [None]:
import polars as pl
from scipy.stats import qmc # quasi monte carlo submodule 
import numpy as np
import altair as alt

## Playing with the basic syntax 

In [2]:
# Poisson disk sampling 
# https://docs.scipy.org/doc/scipy/reference/generated/scipy.stats.qmc.PoissonDisk.html

rng = np.random.default_rng()
engine = qmc.PoissonDisk(d=2, radius = 0.08, rng = None)
sample = engine.random(30).round(2) # round this to 2 

# turn sample into df for plotting
df = pl.DataFrame(sample)
df
# df.head(5)

# plot using altair
(
    
    alt.Chart(df).mark_point(filled = True).encode(
        alt.X('column_0').scale(domain=(0, 1)),
        alt.Y('column_1').scale(domain=(0, 1))
    ).properties(
        # width = 300,
        # height = 300
    )
)

The default widht and height of altair is **300 pixels**. The default `mark_point` size (**pixel area**, NOT radius) is 30 pixels. 

## Define plotting function 

In [11]:
def create_plot(n=30, point_radius=12.5, point_gap=8, canvas_size=500, optimization_on=True):
    # initialize poisson engine 
    engine = qmc.PoissonDisk(d=2, 
                             radius = (point_radius * 2 + point_gap) * 1.05 / canvas_size, 
                             hypersphere="volume", 
                             ncandidates = 30, 
                             optimization="lloyd" if optimization_on else None)
    # sample dots and scale them 
    sample = qmc.scale(engine.random(n), [point_radius, point_radius], [canvas_size - point_radius, canvas_size - point_radius])
    # turn it to dataframe 
    df = pl.DataFrame(sample)
    # turn it to chart 
    chart = alt.Chart(df).mark_point(filled = True, size = point_radius * point_radius * np.pi, color = "gray").encode(
        alt.X('column_0').scale(domain=(0, canvas_size)).axis(labels = False, grid = False, title = None),
        alt.Y('column_1').scale(domain=(0, canvas_size)).axis(labels = False, grid = False, title = None)
    ).properties(
        width = canvas_size,
        height = canvas_size
    )
    return chart, df, df.mean() # df.mean().to_numpy().flatten()

In [12]:
# optimization makes the points spread more evenly across the canvas 
create_plot()[0]

In [13]:
create_plot()[1]
create_plot()[2]

column_0,column_1
f64,f64
196.123503,357.47793


In [38]:
# or else the dots look more condensed 
create_plot(optimization_on=True)[0]

## Outputing things into vega spec

- Using `vl-convert`: https://github.com/vega/vl-convert to convert things from `vega-lite` to `vega`.
- We need to add [signal](https://vega.github.io/vega/docs/signals/) ...
- ... and [trigger](https://vega.github.io/vega/docs/triggers/) to the thing to make it work with clicks:

In [41]:
import vl_convert as vlc

In [84]:
chart = create_plot(optimization_on=False)[0]
chart.show()
# print(chart.to_json())
# chart.to_url()
vega_spec = vlc.vegalite_to_vega(chart.to_json(), vl_version="5.20")
# vega_spec

In [85]:
# Note that the returned vega_spec is a dictionary
# so we can add things to it 

# add data to keep track of the clicked points 
vega_spec['data'].append({
    'name' : 'clicked_points',
    'values': [],
    'on': [
      {
        "trigger": "clickX",
        "remove": True
      },
      {
        "trigger": "clickX",
        "insert": "clickX"
      }
    ]
  })

In [86]:
# add marks to show the clicked mark 
vega_spec['marks'].append({
    'type': 'symbol',
    "from": {"data": "clicked_points"},
    "encode": {
      "enter": {
        "x": {"scale": "x", "field": "x"},
        "y": {"scale": "y", "field": "y"},
        "fill": {"value": "red"},
        "size": {"value": 144}
      }
    }})

In [87]:
# add a signals section 
vega_spec['signals'] = [
  {
    "name": "clickX",
    "value": None,
    "on": [{"events": "click", "update": "{x: x(), y: 500 - y(), timestamp: now()}"}]
  }
]

In [88]:
vega_spec

{'$schema': 'https://vega.github.io/schema/vega/v5.json',
 'background': 'white',
 'padding': 5,
 'width': 500,
 'height': 500,
 'style': 'cell',
 'data': [{'name': 'data-e0818a145a9412a08b66976e43c90e9c',
   'values': [{'column_0': 196.9407971791853, 'column_1': 457.4290913707163},
    {'column_0': 160.5413867935771, 'column_1': 404.1606580533335},
    {'column_0': 200.83194363638893, 'column_1': 413.5958385941563},
    {'column_0': 147.84984311331445, 'column_1': 457.85922525591224},
    {'column_0': 248.37185198671497, 'column_1': 429.45733372717785},
    {'column_0': 116.8527941987445, 'column_1': 469.3660865711446},
    {'column_0': 116.20249953730657, 'column_1': 417.1507542841777},
    {'column_0': 295.23635466543385, 'column_1': 435.44612270445737},
    {'column_0': 280.5822258044646, 'column_1': 374.5770308008113},
    {'column_0': 225.24876367852815, 'column_1': 484.9726967118699},
    {'column_0': 249.29991466528273, 'column_1': 386.2391553701547},
    {'column_0': 192.59986

In [89]:
import json 

# export things 
# vega_json_string = json.dumps(vega_spec, indent=4)

with open('spec_2.json', 'w') as f:
    json.dump(vega_spec, f, indent=4)

In [90]:
# wrap thigns up into a function 

def vegalite_to_vega(c):
    # generate vega spec 
    vega_spec = vlc.vegalite_to_vega(c.to_json(), vl_version="5.20")
    
    # add data to keep track of the clicked points 
    vega_spec['data'].append({
        'name' : 'clicked_points',
        'values': [],
        'on': [
          {
            "trigger": "clickX",
            "remove": True
          },
          {
            "trigger": "clickX",
            "insert": "clickX"
          }
        ]
      })

    # add marks to show the clicked mark 
    vega_spec['marks'].append({
        'type': 'symbol',
        "from": {"data": "clicked_points"},
        "encode": {
          "enter": {
            "x": {"scale": "x", "field": "x"},
            "y": {"scale": "y", "field": "y"},
            "fill": {"value": "red"},
            "size": {"value": 144}
          }
        }})

    # add signals section 
    vega_spec['signals'] = [
      {
        "name": "clickX",
        "value": None,
        "on": [{"events": "click", "update": "{x: x(), y: 500 - y(), timestamp: now()}"}]
      }
    ]
    return vega_spec

In [91]:
# test run 

chart = create_plot(optimization_on=False)[0]
chart.show()
vegalite_to_vega(chart)

{'$schema': 'https://vega.github.io/schema/vega/v5.json',
 'background': 'white',
 'padding': 5,
 'width': 500,
 'height': 500,
 'style': 'cell',
 'data': [{'name': 'data-ac0a867f4454a9643918cc206e2ea69b',
   'values': [{'column_0': 200.85862517911423, 'column_1': 183.34208179169215},
    {'column_0': 234.0977290185621, 'column_1': 141.00214474282495},
    {'column_0': 252.33970413447713, 'column_1': 171.89103527193922},
    {'column_0': 185.02851825682365, 'column_1': 146.79648492980482},
    {'column_0': 157.18189967307288, 'column_1': 201.21153805957243},
    {'column_0': 229.81694940814947, 'column_1': 218.53211608556535},
    {'column_0': 191.6448070461004, 'column_1': 215.39467274450732},
    {'column_0': 183.70596756488985, 'column_1': 98.83279983890198},
    {'column_0': 239.3175473888542, 'column_1': 81.4703503218374},
    {'column_0': 281.52158917000787, 'column_1': 105.32709902633462},
    {'column_0': 287.10324004620753, 'column_1': 166.32363123515412},
    {'column_0': 232

## Calculate mean 

In [209]:
a, b, c = create_plot(optimization_on=False)

In [210]:
a

In [212]:
# Add centroid afterward
centroid_mark = alt.Chart(c).mark_circle(
    size=15**2 * np.pi,
    color='red',
    stroke='black',
    strokeWidth=2
).encode(
    x=alt.X('column_0', scale=alt.Scale(domain=[0, 500]), axis=None),
    y=alt.Y('column_1', scale=alt.Scale(domain=[0, 500]), axis=None)
)

# Layer them together
chart_with_centroid = a + centroid_mark

In [213]:
chart_with_centroid

---

# Recreating the "Perceptual Pull" paper stimuli. 

- Size: 538 px (W) * 140 px (H) 
- Three levels of mean: low, medium, high? 
- 12 dots / bars
- for line (dot): 15 pixels apart
- for bar: 5 pixels apart 

In [2]:
alt.renderers.set_embed_options(
    padding={"left": 10, "right": 10, "bottom": 10, "top": 10}
)

RendererRegistry.enable('default')

In [13]:
from typing import Literal

Data_level = Literal['H', 'M', 'L']
Chart_type = Literal['line', 'bar']

def generate_perceptual_pull_plot(level:Data_level = 'L', chart_type:Chart_type = 'line', 
    height:int = 140, width: int = 518, n_points: int = 12):
    """
    Generate a perceptual pull plot with random data points 

    Args: 
        mean: mean vaue of the data points 
        width: chart width in pixels 
        height:chart height in pixels 
        n_points: number of data points to generate 
        noise_std: standard deviation of the normal noise 
    
    Returns: 
        An Altair chart object 
    """
    normal_noise = np.random.normal(0, 5, size=n_points)
    lvl_dict = {'H': 120, 'M': 105, 'L': 90}
    # switch the value of mean based on level 
    data = np.round(normal_noise + lvl_dict[level], 2)
    df = pl.DataFrame({'x': [i for i in range(n_points)], 'y': data})
    if chart_type == 'line':
        p = (
            alt.Chart(df).mark_line(point=alt.OverlayMarkDef(fill="black"), color="black").encode(
                alt.X('x').axis(labels = False, grid = False, title = None, tickSize=0),
                alt.Y('y').scale(domain=(0, height)).axis(labels = False, grid = False, title = None, tickSize=0)
            ).properties(
                width = width,
                height = height
            ).configure_view(strokeWidth=0).configure_axis(grid=False, domain=False)
        )
    elif chart_type == 'bar': 
        p = ( 
            alt.Chart(df).mark_bar(point=True, binSpacing=0, color="black").encode(
                alt.X('x:O').axis(labels = False, grid = False, title = None, tickSize=0),
                alt.Y('y').scale(domain=(0, height), reverse=True).axis(labels = False, grid = False, title = None, tickSize=0)
            ).properties(
                width = width,
                height = height
            ).configure_scale(
                bandPaddingInner=0
            ).configure_view(strokeWidth=0).configure_axis(grid=False, domain=False)
        )
    return p

In [15]:
generate_perceptual_pull_plot(level='L')