# Generate configuration using the `revisitpy` package

Source: https://revisit.dev/docs/revisitpy/

Also: 
- https://github.com/revisit-studies/revisitpy-examples
- https://github.com/revisit-studies/revisitpy

In [22]:
import revisitpy as rvt
import pandas as pd 
import polars as pl
from datetime import date 
import json
from scipy.stats import qmc # quasi monte carlo submodule 
import numpy as np
import altair as alt
import vl_convert as vlc
# import revisitpy_server as rvt_server

In [7]:
# confirm working directory 

import os 
os.getcwd()

'/Users/shenglong/Downloads/study'

# A revisit study has the following components in its `config.json`: [original link](https://revisit.dev/docs/typedoc/interfaces/StudyConfig/#importedlibraries)  

- `$schema`: ...
- `studyMetadata`: ...
- `uiConfig`: ...
- `importedLibraries`: ...
- `components`: ...
- `sequence`: ...

---

Some other things: 

- [`dataclasses`](https://docs.python.org/3/library/dataclasses.html): something that is related to python
- [`dataclasses.asdict`](https://docs.python.org/3/library/dataclasses.html): converts the dataclass obj to a dict

They are creating a specific dataclass obj `DataRow`: [link](https://github.com/revisit-studies/revisitpy/blob/51414e51d4c1f9c1f66b3f9c642c3c40a60138fc/src/revisitpy/revisitpy.py#L559)

```python
# Create a data class with attributes based on the headers
        DataRow = make_dataclass("DataRow", [(header, Any) for header in headers])
```

# Metadata 

- Example here: https://revisit.dev/docs/revisitpy/examples/example_jnd_study/ 

In [9]:
# Meta Data
study_metadata = rvt.studyMetadata(
    authors=["Sheng Long"],
    organizations=["Northwestern University"],
    title='Retrieve Value Judgment Study',
    description='',
    date=date.today().strftime("%Y-%m-%d"),
    version='1.0'
)

# UI Config
ui_config = rvt.uiConfig(
  contactEmail="shenglong@u.northwestern.edu",
  logoPath="assets/revisitLogoSquare.svg",
  withSidebar=True,
  withProgressBar=False,
  nextOnEnter=True,
  minWidthSize=800,
  minHeightSize=800,
)

# print(study_metadata)
# print(ui_config)

# Generate each section of the study

... starting from the introduction 

In [13]:
# Introduction
introduction = rvt.component(type='markdown', 
                             path='vis-decode-retrieve-value/assets/introduction.md', 
                             component_name__= 'introduction')
intro_seq = rvt.sequence(order='fixed', components = [introduction]) 

# print(introduction)
# print(intro_seq) 

Now let's define a typical component, starting with what we expect its **response** to look like: 

In [18]:
response = rvt.response(
    id = "retrieve_value", 
    prompt = 'Your selected answer', 
    location = 'belowStimulus', 
    type = 'numerical', 
    required = True,
)
print(response)

{
    "id": "retrieve_value",
    "location": "belowStimulus",
    "prompt": "Your selected answer",
    "required": true,
    "type": "numerical"
}


Now let's define the **component function**, which starts with the chart function (using Altair and scipy) and the vega-lite to vega function: 

In [19]:
def create_plot(n=30, point_radius=12.5, point_gap=8, canvas_size=500, optimization_on=True):
    # initialize poisson engine 
    engine = qmc.PoissonDisk(d=2, 
                             radius = (point_radius * 2 + point_gap) * 1.05 / canvas_size, 
                             hypersphere="volume", 
                             ncandidates = 30, 
                             optimization="lloyd" if optimization_on else None)
    # sample dots and scale them 
    sample = qmc.scale(engine.random(n), [point_radius, point_radius], [canvas_size - point_radius, canvas_size - point_radius])
    # turn it to dataframe 
    df = pl.DataFrame(sample)
    # turn it to chart 
    chart = alt.Chart(df).mark_point(filled = True, size = point_radius * point_radius * np.pi, color = "gray").encode(
        alt.X('column_0').scale(domain=(0, canvas_size)).axis(labels = False, grid = False, title = None),
        alt.Y('column_1').scale(domain=(0, canvas_size)).axis(labels = False, grid = False, title = None)
    ).properties(
        width = canvas_size,
        height = canvas_size
    )
    return chart, df, df.mean() # df.mean().to_numpy().flatten()

In [20]:
# wrap thigns up into a function 

def vegalite_to_vega(c):
    # generate vega spec 
    vega_spec = vlc.vegalite_to_vega(c.to_json(), vl_version="5.20")
    
    # add data to keep track of the clicked points 
    vega_spec['data'].append({
        'name' : 'clicked_points',
        'values': [],
        'on': [
          {
            "trigger": "clickX",
            "remove": True
          },
          {
            "trigger": "clickX",
            "insert": "clickX"
          }
        ]
      })

    # add marks to show the clicked mark 
    vega_spec['marks'].append({
        'type': 'symbol',
        "from": {"data": "clicked_points"},
        "encode": {
          "enter": {
            "x": {"scale": "x", "field": "x"},
            "y": {"scale": "y", "field": "y"},
            "fill": {"value": "red"},
            "size": {"value": 144}
          }
        }})

    # add signals section 
    vega_spec['signals'] = [
      {
        "name": "clickX",
        "value": None,
        "on": [{"events": "click", "update": "{x: x(), y: 500 - y(), timestamp: now()}"}]
      }
    ]
    return vega_spec

In [25]:
# # test 
# c = create_plot()[0]
# c.show()
# vegalite_to_vega(c)

In [10]:
# Let us not worry about the enc-qa related stuff for now ... 

# # read in external data 

# ret_df = pl.read_parquet('public/vis-decode-retrieve-value/encqa_v1_ret.parquet')
# # add a column for the id 
# ret_df = ret_df.with_row_index("id", offset=1)
# ret_df.head()

# print(ret_df.select(pl.col('chart_spec')).head(1).item())

In [None]:
# # ret_df.filter(pl.col("task") == "compute_derived_value_exact").select(["chart_spec"]).head(1).item()
# ret_df.filter(pl.col("task") == "compute_derived_value_exact").head(1)

# Generate component related 

In [None]:
# # rvt.sequence(order = 'random').from_data([(1, 2), (2, 3)])
# # asdict([1, 23])
# new_df = pd.DataFrame({"id": [i + 1 for i in range(10)]}, {"val": [i + 1 for i in range(10)]})
# print(new_df)
# new_df.to_csv('data.csv', index=False)
# print(rvt.data("data.csv"))

In [None]:
# n_rows = 25

new_df = pd.DataFrame({"id": [i + 1 for i in range(n_rows)]})
# print(new_df)
new_df.to_csv('data.csv', index=False)
# print(rvt.data("data.csv"))

data_sequence = rvt.sequence(order = 'random', numSamples=n_rows).from_data(rvt.data("data.csv"))
print(data_sequence)

In [None]:
def retrieve_value_component_function(id): 
    """
    This function is used to generate the component for the retrieve value study. 
    """
    row = ret_df.filter(pl.col('id') == id)
    chart_spec_value = row.select(pl.col('chart_spec')).item()
    question = row.select(pl.col('question')).item()
    # print(json.loads(chart_spec_value))
    # print(question)
    # print(chart_spec_value)

    # get the spec for the given id 
    comp = rvt.component(
        component_name__ = f'retrieve_value_{id}',
        type = 'vega',
        response = [response],
        config = json.loads(chart_spec_value),
        instruction = f'{question}',
        instructionLocation = 'belowStimulus',
        withSidebar = False,
    )
    return comp

In [None]:
# data_sequence.component(retrieve_value_component_function)

In [None]:
# print(data_sequence)
# print(data_sequence.get_components()[0])

In [None]:
# main_sequence = rvt.sequence(order='fixed',components=[introduction]) + data_sequence

study = rvt.studyConfig(
    schema="https://raw.githubusercontent.com/revisit-studies/study/v2.3.1/src/parser/StudyConfigSchema.json",
    uiConfig=ui_config,
    studyMetadata=study_metadata,
    sequence=main_sequence,
    importedLibraries = ['virtual-chinrest']
)
print(study)

In [None]:
# code if we are to use the rvt_server 

# process = rvt_server.serve()
# process.terminate()
# w = rvt.widget(study, server = True)

In [None]:
# str(study)
# print(study)

In [None]:
# Save study 

In [None]:
# write out the study configuration 

# Write directly to file
with open('public/vis-decode-retrieve-value/config.json', 'w', encoding='utf-8') as f:
    # json.write(str(study), f, indent=2, ensure_ascii=False)
    f.write(str(study))

In [187]:
# keep doing things in one go: 

engine = qmc.PoissonDisk(d=2, radius = 0.08, rng = None)
sample = engine.random(30).round(2) # round this to 2 
df = pl.DataFrame(sample * 300)
# plot using altair
(
    
    alt.Chart(df).mark_point(filled = True, size = 14 * 14 * np.pi).encode(
        alt.X('column_0').scale(domain=(0, 300)),
        alt.Y('column_1').scale(domain=(0, 300))
    ).properties(
        # width = 500,
        # height = 500
    )
)

In [427]:
engine = qmc.PoissonDisk(d=2, radius = (12.5 * 2 + 8), # optimization = "lloyd",
                         l_bounds = [12.5, 12.5], u_bounds= [500 - 12.5, 500 - 12.5]
                        )
sample = engine.random(30)
df = pl.DataFrame(sample)
# plot using altair
(
    
    alt.Chart(df).mark_point(filled = True, color = "gray", size = 12.5 * 12.5 * np.pi).encode(
        x=alt.X('column_0').scale(domain=(0, 500)),
        y=alt.Y('column_1').scale(domain=(0, 500))
    ).properties(
        width = 500,
        height = 500
    )
)

# Let us scale things from $[0, 1]$ to $[0, 500]$ by directly manipuating the `sample`: 

In [195]:
# df = pl.DataFrame(sample * 500)
# plot using altair
(
    
    alt.Chart(df).mark_circle(filled = True, size = np.pi * 12 * 12).encode(
        alt.X('column_0').axis(domainColor='black', labels = False).scale(domain=(0, 500)),
        alt.Y('column_1').scale(domain=(0, 500)).axis(labels = False)
    ).properties(
        # width = 500,
        # height = 500
    ).configure_axis(grid = False)
)

# A note on units

`Vega-altair`'s default unit is **pixels**. And from this [website](https://altair-viz.github.io/altair-viz-v4/user_guide/marks.html#:~:text=%2D%20For%20point%20/%20circle%20/%20square,to%20null%20to%20remove%20stroke.), the default `size` for `mark_point` and `mark_circle` is the pixel area, which is 30. So this means the radius is about 3 pixels ...

In [196]:
# (30 / np.pi) ** (1/2)

## Implementing Bridson's Possion Disk Sampler

In [330]:
import math
import random
import numpy as np

def bridson_poisson_disk(width,
                         height,
                         r,
                         k=30,
                         random_state=None,
                         k_points=None):
    """
    Bridson Poisson-disk sampling (2D).
    
    Args:
        width (float): domain width (x in [0, width))
        height (float): domain height (y in [0, height))
        r (float): minimum distance between points
        k (int): max attempts per active point (default 30)
        random_state (int | Random | None): seed or RNG; if int, uses random.Random(seed)
        k_points (int | None): if set, stop when this many points are generated (may be <= total possible).
    
    Returns:
        np.ndarray shape (n,2): sampled points
    """
    # RNG handling
    if isinstance(random_state, int) or random_state is None:
        rng = random.Random(random_state)
    elif isinstance(random_state, random.Random):
        rng = random_state
    else:
        # allow numpy Generator
        try:
            rng = random.Random(int(random_state.integers(0, 2**31 - 1)))
        except Exception:
            raise ValueError("random_state must be int, random.Random, numpy.Generator, or None")
    
    # Cell size and grid
    cell_size = r / math.sqrt(2)
    cols = int(math.ceil(width / cell_size))
    rows = int(math.ceil(height / cell_size))
    grid = [[None for _ in range(rows)] for _ in range(cols)]
    
    def grid_coords(pt):
        gx = int(pt[0] // cell_size)
        gy = int(pt[1] // cell_size)
        return gx, gy
    
    def in_domain(pt):
        return (0 <= pt[0] < width) and (0 <= pt[1] < height)
    
    def neighbors_ok(pt, gx, gy):
        x, y = pt
        xmin = max(gx - 2, 0)
        xmax = min(gx + 2, cols - 1)
        ymin = max(gy - 2, 0)
        ymax = min(gy + 2, rows - 1)
        r2 = r * r
        for i in range(xmin, xmax + 1):
            for j in range(ymin, ymax + 1):
                gpt = grid[i][j]
                if gpt is not None:
                    dx = gpt[0] - x
                    dy = gpt[1] - y
                    if dx*dx + dy*dy < r2:
                        return False
        return True
    
    samples = []
    active = []
    
    # initial point: uniform in domain
    p0 = (rng.random() * width, rng.random() * height)
    samples.append(p0)
    active.append(p0)
    gx0, gy0 = grid_coords(p0)
    grid[gx0][gy0] = p0
    
    # main loop
    while active:
        # pick a random index from active list (Bridson)
        idx = rng.randrange(len(active))
        base = active[idx]
        found = False
        for _ in range(k):
            # sample uniformly in annulus [r, 2r]
            radius = r * (1 + rng.random())  # uniform in [r, 2r)
            theta = rng.random() * 2 * math.pi
            qx = base[0] + radius * math.cos(theta)
            qy = base[1] + radius * math.sin(theta)
            q = (qx, qy)
            if not in_domain(q):
                continue
            gx, gy = grid_coords(q)
            if neighbors_ok(q, gx, gy):
                samples.append(q)
                active.append(q)
                grid[gx][gy] = q
                found = True
                break
        if not found:
            # remove from active list
            active.pop(idx)
        # stopping condition if user only wants k_points
        if k_points is not None and len(samples) >= k_points:
            return np.array(samples[:k_points], dtype=float)
    
    return np.array(samples, dtype=float)


In [332]:
pts_30 = bridson_poisson_disk(width=500, height=500, r=32, k=30, random_state=42, k_points=30)
print(pts_30.shape)   # (30, 2)
pts_30

(30, 2)


array([[319.71339923,  12.50537761],
       [345.1951729 ,  43.12610712],
       [289.03355417,  28.89249443],
       [311.78194403,  57.33277666],
       [312.77499712,  96.71296749],
       [376.2071572 ,   7.24446856],
       [393.73736901,  66.49300517],
       [414.7394492 ,  15.12324105],
       [472.29710016,   7.11026132],
       [253.35961736,  34.47782742],
       [216.63776094,  28.02751022],
       [227.46411777,  72.26233037],
       [195.3742457 ,  52.72159766],
       [262.24790996,  65.92300967],
       [277.0556294 ,  99.26888635],
       [483.03259481,  53.42278465],
       [445.84835392,  67.45284869],
       [344.85579463,  91.29800151],
       [250.4029421 , 144.21469815],
       [350.78551592, 153.47989881],
       [375.04259698, 125.78324492],
       [396.02464273, 100.05173527],
       [326.0195227 , 126.95330475],
       [349.94887049, 212.09550534],
       [425.15345584, 156.12983803],
       [383.9590493 , 162.53598927],
       [283.26484544, 133.69105419],
 

In [425]:
pts_30 = bridson_poisson_disk(width=500, height=500, r=32, k=30, k_points=30)
df = pl.DataFrame(pts_30)
# plot using altair
(
    
    alt.Chart(df).mark_point(filled = True, color = "gray", size = 12.5 * 12.5 * np.pi).encode(
        x=alt.X('column_0').scale(domain=(0, 500)),
        y=alt.Y('column_1').scale(domain=(0, 500))
    ).properties(
        width = 500,
        height = 500
    )
)