<a href="https://colab.research.google.com/github/pinballsurgeon/NLP_Engine_Study/blob/main/perception_baseline.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [75]:
!pip install squarify pypdf



In [76]:
# imports
import numpy as np, pandas as pd, matplotlib.pyplot as plt
from matplotlib.patches import Rectangle
from matplotlib.backends.backend_pdf import PdfPages
import math, os
import squarify

In [77]:
# reference registry
refs = pd.DataFrame([

###### physical_body
    dict(class_id="physical_body", aspect_id="vision_wavelength", aspect_label="Vision (wavelength)", unit="meters", scale="log",
         Umin=1e-12, Umax=1e4, Hmin=380e-9, Hmax=740e-9,
         source_universal="NASA EMS", source_human="NASA visible"),

    dict(class_id="physical_body", aspect_id="vision_luminance", aspect_label="Vision luminance", unit="cd/m^2", scale="log",
         Umin=1e-9, Umax=1e9, Hmin=1e-6, Hmax=1e6,
         source_universal="OOM luminance", source_human="Light adaptation"),

    dict(class_id="physical_body", aspect_id="hearing_frequency", aspect_label="Hearing (frequency)", unit="Hz", scale="log",
         Umin=1e-1, Umax=1e7, Hmin=20.0, Hmax=20000.0,
         source_universal="Hearing ranges", source_human="NCBI human range"),

    dict(class_id="physical_body", aspect_id="hearing_pressure", aspect_label="Hearing level (pressure)", unit="Pa", scale="log",
         Umin=1e-6, Umax=1e7, Hmin=2e-5, Hmax=2e1,
         source_universal="Sound pressure", source_human="SPL reference"),

    dict(class_id="physical_body", aspect_id="touch_vibration", aspect_label="Touch vibration", unit="Hz", scale="log",
         Umin=1e-1, Umax=1e6, Hmin=2.0, Hmax=1000.0,
         source_universal="Mechanical vibration", source_human="Bolanowski 1988"),

    dict(class_id="physical_body", aspect_id="touch_spatial", aspect_label="Touch spatial resolution", unit="meters", scale="log",
         Umin=1e-9, Umax=1.0, Hmin=2e-3, Hmax=4e-2,
         source_universal="Contact scale", source_human="Weinstein 1968"),

    dict(class_id="physical_body", aspect_id="temporal_gap", aspect_label="Temporal gap detection", unit="seconds", scale="log",
         Umin=1e-15, Umax=3.1536e9, Hmin=2e-3, Hmax=1e-1,
         source_universal="Time scales", source_human="Gap detection review"),

##### multisensory


    dict(class_id="multisensory", aspect_id="av_tbw_simple", aspect_label="Audio–visual TBW (flash/beep)", unit="seconds", scale="log",
        Umin=1e-3, Umax=10.0, Hmin=0.08, Hmax=0.20,
        source_universal="TBW construct review (Wallace & Stevenson 2014): https://pmc.ncbi.nlm.nih.gov/articles/PMC4326640/",
        source_human="Typical adult TBW ~160 ms for simple AV (Zerr et al. 2019): https://www.frontiersin.org/articles/10.3389/fpsyg.2019.02489/full"),

    dict(class_id="multisensory", aspect_id="av_tbw_speech", aspect_label="Audio–visual TBW (speech)", unit="seconds", scale="log",
        Umin=1e-3, Umax=10.0, Hmin=0.12, Hmax=0.30,
        source_universal="Intersensory synchrony tutorial (Vroomen & Keetels 2010): https://pubmed.ncbi.nlm.nih.gov/20436185/",
        source_human="Speech TBW often wider ~200–250 ms (Hillock-Dunn et al. 2016): https://www.sciencedirect.com/science/article/abs/pii/S0028393216300525"),

    dict(class_id="multisensory", aspect_id="vh_mle_consistency", aspect_label="Visual–haptic MLE consistency", unit="fraction", scale="linear",
        Umin=0.0, Umax=1.0, Hmin=0.70, Hmax=1.00,
        source_universal="MLE optimality 0–1 consistency range",
        source_human="Near-optimal integration (Ernst & Banks 2002 Nature): https://pubmed.ncbi.nlm.nih.gov/11807554/; Tool-mediated MLE (Takahashi et al. 2017): https://pmc.ncbi.nlm.nih.gov/articles/PMC5380699/"),

    dict(class_id="multisensory", aspect_id="ventriloquism_bias", aspect_label="Ventriloquism spatial bias", unit="degrees", scale="linear",
        Umin=0.0, Umax=45.0, Hmin=1.0, Hmax=10.0,
        source_universal="Ventriloquism paradigms/review (Bruns 2019): https://www.frontiersin.org/articles/10.3389/fnint.2019.00051/full",
        source_human="Near-optimal AV spatial integration; bias scales with disparity (Alais & Burr 2004): https://pubmed.ncbi.nlm.nih.gov/14761661/"),

    dict(class_id="multisensory", aspect_id="sifi_tbw", aspect_label="Sound-induced flash TBW", unit="seconds", scale="log",
        Umin=1e-3, Umax=1.0, Hmin=0.05, Hmax=0.25,
        source_universal="Task-dependent AV TBW framework (Stevenson & Wallace 2013): https://pmc.ncbi.nlm.nih.gov/articles/PMC3711231/",
        source_human="SIFI temporal window tens–hundreds of ms (Hirst et al. 2020 review): https://www.sciencedirect.com/science/article/pii/S0149763420305637"),

    dict(class_id="multisensory", aspect_id="mcgurk_susceptibility", aspect_label="McGurk susceptibility", unit="fraction", scale="linear",
        Umin=0.0, Umax=1.0, Hmin=0.10, Hmax=0.80,
        source_universal="Susceptibility expressed as 0–1 proportion",
        source_human="Large variability across adults/stimuli (Mallick et al. 2015): https://pmc.ncbi.nlm.nih.gov/articles/PMC4580505/; ranges 0–100% observed (Magnotti et al. 2024): https://www.sciencedirect.com/science/article/pii/S0010945220303749"),

######## temporal_causality

    dict(class_id="temporal_causality", aspect_id="toj_threshold", aspect_label="Temporal order JND", unit="seconds", scale="log",
        Umin=1e-3, Umax=1.0, Hmin=0.02, Hmax=0.08,
        source_universal="TOJ paradigms overview: https://pmc.ncbi.nlm.nih.gov/articles/PMC3427541/",
        source_human="Typical TOJ ~20–80 ms: https://journals.plos.org/plosone/article?id=10.1371/journal.pone.0264831 ; recent ranges: https://www.nature.com/articles/s41598-024-84082-z"),

    dict(class_id="temporal_causality", aspect_id="sj_threshold_visual", aspect_label="Simultaneity JND (visual)", unit="seconds", scale="log",
        Umin=1e-3, Umax=1.0, Hmin=0.02, Hmax=0.05,
        source_universal="SJs vs TOJs review: https://www.sciencedirect.com/science/article/abs/pii/S0306452215004431",
        source_human="Visual SJ thresholds tens of ms: https://pmc.ncbi.nlm.nih.gov/articles/PMC3427541/"),

    dict(class_id="temporal_causality", aspect_id="michotte_launching_gap", aspect_label="Causal launching gap", unit="seconds", scale="log",
        Umin=1e-3, Umax=1.0, Hmin=0.01, Hmax=0.15,
        source_universal="Perceptual causality (space–time): https://pmc.ncbi.nlm.nih.gov/articles/PMC2868299/",
        source_human="Michotte-style temporal contiguity window: https://biomotionlab.ca/Text/PP_Guski.pdf ; overview: https://pmc.ncbi.nlm.nih.gov/articles/PMC9617506/"),

    dict(class_id="temporal_causality", aspect_id="intentional_binding_mag", aspect_label="Intentional binding (magnitude)", unit="seconds", scale="linear",
        Umin=0.0, Umax=0.40, Hmin=0.03, Hmax=0.15,
        source_universal="IB as 0–1 s-scale bias proxy: https://link.springer.com/article/10.3758/s13414-017-1292-y",
        source_human="IB ~50–150 ms typical; stability across modalities: https://www.sciencedirect.com/science/article/abs/pii/S1053810024000941 ; see debate: https://www.biorxiv.org/content/10.1101/2023.02.06.526214v3.full-text"),

    dict(class_id="temporal_causality", aspect_id="event_segmentation_timescale", aspect_label="Event segmentation timescale", unit="seconds", scale="log",
        Umin=0.1, Umax=600.0, Hmin=1.0, Hmax=30.0,
        source_universal="Event Segmentation Theory: https://www.sciencedirect.com/science/article/abs/pii/S1364661307003312",
        source_human="Multiple timescales; boundaries seconds–tens of seconds: https://pmc.ncbi.nlm.nih.gov/articles/PMC2263140/ ; overview: https://www.frontiersin.org/articles/10.3389/fnhum.2010.00168/full"),

    dict(class_id="temporal_causality", aspect_id="postdictive_causality_window", aspect_label="Postdictive causality window", unit="seconds", scale="log",
        Umin=1e-3, Umax=1.0, Hmin=0.03, Hmax=0.20,
        source_universal="Spatiotemporal constraints on causal perception: https://www.sciencedirect.com/science/article/abs/pii/S0010028508000108",
        source_human="Delays reducing perceived causality tens–hundreds ms: https://pmc.ncbi.nlm.nih.gov/articles/PMC2868299/"),

###### agency_control

    dict(class_id="agency_control", aspect_id="action_outcome_delay_window", aspect_label="Action→outcome agency window", unit="seconds", scale="log",
        Umin=1e-3, Umax=2.0, Hmin=0.005, Hmax=0.30,
        source_universal="Temporal credit assignment window for agency judgments (review)",
        source_human="Agency declines with increasing delay; typical tolerance <~300 ms (Haggard 2017 review): https://wexler.free.fr/library/files/haggard%20(2017)%20sense%20of%20agency%20in%20the%20human%20brain.pdf ; Delay modulates agency (Erdoğan et al. 2024): https://pmc.ncbi.nlm.nih.gov/articles/PMC12181639/"),

    dict(class_id="agency_control", aspect_id="visuomotor_delay_tolerance", aspect_label="Visuomotor adaptation delay tolerance", unit="seconds", scale="log",
        Umin=1e-3, Umax=1.5, Hmin=0.005, Hmax=0.30,
        source_universal="Feedback delay parameterization in human-in-the-loop control",
        source_human="Adaptation persists but reduced with ~200 ms visual delay (Honda et al. 2012 PLoS ONE): https://pmc.ncbi.nlm.nih.gov/articles/PMC3364281/ ; Delayed feedback disrupts error-based learning (Kitazawa 2016 review): https://pmc.ncbi.nlm.nih.gov/articles/PMC4808111/"),

    dict(class_id="agency_control", aspect_id="speech_auditory_delay_tolerance", aspect_label="Speech auditory-motor delay tolerance", unit="seconds", scale="log",
        Umin=1e-3, Umax=1.0, Hmin=0.005, Hmax=0.10,
        source_universal="Auditory feedback timing in vocal control",
        source_human="Auditory-motor adaptation eliminated at ≥100 ms delay (Max & Maffett 2015): https://pmc.ncbi.nlm.nih.gov/articles/PMC4363140/"),

    dict(class_id="agency_control", aspect_id="somatosensory_attenuation_timing", aspect_label="Somatosensory attenuation timing window", unit="seconds", scale="log",
        Umin=1e-3, Umax=0.5, Hmin=0.005, Hmax=0.15,
        source_universal="Predictive timing for self-touch in internal models",
        source_human="Baseline attenuation strongest near 0 ms; shifts with delay training (~100–150 ms) (Kilteni et al. eLife 2019): https://pmc.ncbi.nlm.nih.gov/articles/PMC6860990/ ; Efference copy necessary (iScience 2020): https://pubmed.ncbi.nlm.nih.gov/32058957/ ; Neural recalibration with 100 ms delays (Nat Commun 2024): https://www.nature.com/articles/s42003-024-06188-4"),

    dict(class_id="agency_control", aspect_id="assisted_control_agency", aspect_label="Agency under assistance (relative)", unit="fraction", scale="linear",
        Umin=0.0, Umax=1.0, Hmin=0.50, Hmax=1.00,
        source_universal="Assistance–agency scale 0–1",
        source_human="Agency during continuous action increases with performance under assistance (Wen & Haggard 2015): https://pmc.ncbi.nlm.nih.gov/articles/PMC4404253/"),

####### symbolic_abstract

    dict(class_id="symbolic_abstract", aspect_id="ans_weber_fraction", aspect_label="Numerosity Weber fraction (ANS)", unit="fraction", scale="linear",
        Umin=0.0, Umax=1.0, Hmin=0.08, Hmax=0.25,
        source_universal="Weber fraction 0–1 range; ANS follows Weber-like scaling (Testolin & Boninsegna 2021): https://link.springer.com/article/10.3758/s13423-020-01801-z",
        source_human="Adults typically ~0.1–0.2; developmental trajectory (Halberda & Feigenson 2008): https://panamath.org/papers/HalberdaFeigenson2008DevPsych.pdf ; overview (Halberda & Odic 2014): https://www.halberdalab.net/files/HalberdaOdic2014WeberChapter.pdf"),

    dict(class_id="symbolic_abstract", aspect_id="pitch_jnd_fraction", aspect_label="Pitch JND (Δf/f)", unit="fraction", scale="linear",
        Umin=0.0, Umax=0.10, Hmin=0.0015, Hmax=0.005,
        source_universal="Δf/f expressed 0–1; psychoacoustic overview (Oxenham 2012): https://pubmed.ncbi.nlm.nih.gov/23015422/",
        source_human="~0.15% musicians, ~0.5% non-musicians near mid-frequencies (Micheyl et al. 2006): https://audition.ens.fr/P2web/eval2006/DP_Micheyl-2006.pdf ; review (McDermott & Oxenham 2008): https://mcdermottlab.mit.edu/papers/McDermott_Oxenham_2008_pitch_music_CONB_review.pdf"),

    dict(class_id="symbolic_abstract", aspect_id="interval_discrimination_cents", aspect_label="Musical interval JND", unit="cents", scale="linear",
        Umin=0.0, Umax=200.0, Hmin=10.0, Hmax=40.0,
        source_universal="Interval JND cast on 0–200 cents window for comparability (minor third ≈ 300 cents upper bound truncated for page layout)",
        source_human="JNDs ~20–40 cents in non-musicians; better in musicians (Zarate et al. 2012): https://pmc.ncbi.nlm.nih.gov/articles/PMC3427364/ ; 20-cent detection near threshold (Schellenberg 2001): https://www.jstor.org/stable/10.1525/mp.2001.19.2.223 ; corroborating review/data: https://pmc.ncbi.nlm.nih.gov/articles/PMC3455123/"),

    dict(class_id="symbolic_abstract", aspect_id="phoneme_vot_boundary_bapa", aspect_label="Phoneme VOT boundary (/b/–/p/)", unit="seconds", scale="log",
        Umin=1e-3, Umax=1e-1, Hmin=0.020, Hmax=0.040,
        source_universal="VOT continuum cast on 1–100 ms for boundary localization comparisons",
        source_human="English bilabial /b/–/p/ boundary ~+20–30 ms; cross-linguistic reviews (Abramson 2017): https://pmc.ncbi.nlm.nih.gov/articles/PMC5665574/ ; boundary examples ~25 ms (Hay 2005): https://repositories.lib.utexas.edu/bitstreams/9e8677cb-ed77-4341-b6b0-ec7e21ccfe11/download ; summary chapter: https://www.degruyterbrill.com/document/doi/10.21832/9781847693761-006/html"),

####### mathematical_cognition

    dict(class_id="mathematical_cognition", aspect_id="subitizing_capacity", aspect_label="Subitizing capacity", unit="items", scale="linear",
        Umin=1, Umax=100, Hmin=1, Hmax=4,
        source_universal="Enumeration scale reference 1–100 items",
        source_human="Adults subitize ~1–4 items (Kaufman et al., 1949; Trick & Pylyshyn, 1994)"),

    dict(class_id="mathematical_cognition", aspect_id="enumeration_counting_slope", aspect_label="Enumeration counting slope", unit="ms/item", scale="linear",
        Umin=0, Umax=1000, Hmin=250, Hmax=350,
        source_universal="Counting rate expressed as ms per item (0–1000 ms limits)",
        source_human="Outside subitizing, slopes ~250–350 ms/item (Trick & Pylyshyn, 1994; Mandler & Shebo, 1982)"),

    dict(class_id="mathematical_cognition", aspect_id="addition_problem_size_slope", aspect_label="Addition problem-size slope", unit="ms per sum increment", scale="linear",
        Umin=0, Umax=200, Hmin=20, Hmax=70,
        source_universal="Problem-size effect cast as ms per unit increase in sum",
        source_human="Single-digit addition slopes ~20–70 ms/increment (Ashcraft, 1992; Uittenhove et al., 2016)"),

    dict(class_id="mathematical_cognition", aspect_id="operation_span_capacity", aspect_label="Operation span capacity (OSPAN)", unit="items", scale="linear",
        Umin=0, Umax=10, Hmin=3, Hmax=7,
        source_universal="Complex span scale 0–10 items",
        source_human="Typical adult OSPAN ~3–7 items (Unsworth et al., 2005)"),

    dict(class_id="mathematical_cognition", aspect_id="number_line_error_0_100", aspect_label="Number line error (0–100)", unit="fraction", scale="linear",
        Umin=0.0, Umax=1.0, Hmin=0.02, Hmax=0.06,
        source_universal="Percent absolute error expressed as fraction 0–1",
        source_human="Adult PAE low (~2–6%) on 0–100 tasks (Siegler & Opfer, 2003; Booth & Siegler, 2006)")

])


refs_df = pd.DataFrame([
    # physical_body
    dict(class_id="physical_body", aspect_label="Vision (wavelength)", Hmin=380e-9, Hmax=740e-9, Umin=1e-12, Umax=1e4, scale="log", source_human="NASA visible. NASA EMS"),
    dict(class_id="physical_body", aspect_label="Vision luminance", Hmin=1e-6, Hmax=1e6, Umin=1e-9, Umax=1e9, scale="log", source_human="Light adaptation. OOM luminance"),
    dict(classid="physical_body", aspect_label="Hearing (frequency)", Hmin=20.0, Hmax=20000.0, Umin=1e-1, Umax=1e7, scale="log", source_human="NCBI human range. Hearing ranges"),
    dict(class_id="physical_body", aspect_label="Hearing level (pressure)", Hmin=2e-5, Hmax=2e1, Umin=1e-6, Umax=1e7, scale="log", source_human="SPL reference. Sound pressure"),
    dict(class_id="physical_body", aspect_label="Touch vibration", Hmin=2.0, Hmax=1000.0, Umin=1e-1, Umax=1e6, scale="log", source_human="Bolanowski 1988. Mechanical vibration"),
    dict(class_id="physical_body", aspect_label="Touch spatial resolution", Hmin=2e-3, Hmax=4e-2, Umin=1e-9, Umax=1.0, scale="log", source_human="Weinstein 1968. Contact scale"),
    dict(class_id="physical_body", aspect_label="Temporal gap detection", Hmin=2e-3, Hmax=1e-1, Umin=1e-15, Umax=3.1536e9, scale="log", source_human="Gap detection review. Time scales"),

    # multisensory
    dict(class_id="multisensory", aspect_label="Audio–visual TBW (flash/beep)", Hmin=0.08, Hmax=0.20, Umin=1e-3, Umax=10.0, scale="log", source_human="Typical adult TBW ~160 ms for simple AV (Zerr et al. 2019): https://www.frontiersin.org/articles/10.3389/fpsyg.2019.02489/full. TBW construct review (Wallace & Stevenson 2014): https://pmc.ncbi.nlm.nih.gov/articles/PMC4326640/"),
    dict(class_id="multisensory", aspect_label="Audio–visual TBW (speech)", Hmin=0.12, Hmax=0.30, Umin=1e-3, Umax=10.0, scale="log", source_human="Speech TBW often wider ~200–250 ms (Hillock-Dunn et al. 2016): https://www.sciencedirect.com/science/article/abs/pii/S0028393216300525. Intersensory synchrony tutorial (Vroomen & Keetels 2010): https://pubmed.ncbi.nlm.nih.gov/20436185/"),
    dict(class_id="multisensory", aspect_label="Visual–haptic MLE consistency", Hmin=0.70, Hmax=1.00, Umin=0.0, Umax=1.0, scale="linear", source_human="Near-optimal integration (Ernst & Banks 2002 Nature): https://pubmed.ncbi.nlm.nih.gov/11807554/; Tool-mediated MLE (Takahashi et al. 2017): https://pmc.ncbi.nlm.nih.gov/articles/PMC5380699/"),
    dict(class_id="multisensory", aspect_label="Ventriloquism spatial bias", Hmin=1.0, Hmax=10.0, Umin=0.0, Umax=45.0, scale="linear", source_human="Near-optimal AV spatial integration; bias scales with disparity (Alais & Burr 2004): https://pubmed.ncbi.nlm.nih.gov/14761661/. Ventriloquism paradigms/review (Bruns 2019): https://www.frontiersin.org/articles/10.3389/fnint.2019.00051/full"),
    dict(class_id="multisensory", aspect_label="Sound-induced flash TBW", Hmin=0.05, Hmax=0.25, Umin=1e-3, Umax=1.0, scale="log", source_human="SIFI temporal window tens–hundreds of ms (Hirst et al. 2020 review): https://www.sciencedirect.com/science/article/pii/S0149763420305637. Task-dependent AV TBW framework (Stevenson & Wallace 2013): https://pmc.ncbi.nlm.nih.gov/articles/PMC3711231/"),
    dict(class_id="multisensory", aspect_label="McGurk susceptibility", Hmin=0.10, Hmax=0.80, Umin=0.0, Umax=1.0, scale="linear", source_human="Large variability across adults/stimuli (Mallick et al. 2015): https://pmc.ncbi.nlm.nih.gov/articles/PMC4580505/; ranges 0–100% observed (Magnotti et al. 2024): https://www.sciencedirect.com/science/article/pii/S0010945220303749"),

    # temporal_causality
    dict(class_id="temporal_causality", aspect_label="Temporal order JND", Hmin=0.02, Hmax=0.08, Umin=1e-3, Umax=1.0, scale="log", source_human="Typical TOJ ~20–80 ms: https://journals.plos.org/plosone/article?id=10.1371/journal.pone.0264831 ; recent ranges: https://www.nature.com/articles/s41598-024-84082-z. TOJ paradigms overview: https://pmc.ncbi.nlm.nih.gov/articles/PMC3427541/"),
    dict(class_id="temporal_causality", aspect_label="Simultaneity JND (visual)", Hmin=0.02, Hmax=0.05, Umin=1e-3, Umax=1.0, scale="log", source_human="Visual SJ thresholds tens of ms: https://pmc.ncbi.nlm.nih.gov/articles/PMC3427541/. SJs vs TOJs review: https://www.sciencedirect.com/science/article/abs/pii/S0306452215004431"),
    dict(class_id="temporal_causality", aspect_label="Causal launching gap", Hmin=0.01, Hmax=0.15, Umin=1e-3, Umax=1.0, scale="log", source_human="Michotte-style temporal contiguity window: https://biomotionlab.ca/Text/PP_Guski.pdf ; overview: https://pmc.ncbi.nlm.nih.gov/articles/PMC9617506/. Perceptual causality (space–time): https://pmc.ncbi.nlm.nih.gov/articles/PMC2868299/"),
    dict(class_id="temporal_causality", aspect_label="Intentional binding (magnitude)", Hmin=0.03, Hmax=0.15, Umin=0.0, Umax=0.40, scale="linear", source_human="IB ~50–150 ms typical; stability across modalities: https://www.sciencedirect.com/science/article/abs/pii/S1053810024000941 ; see debate: https://www.biorxiv.org/content/10.1101/2023.02.06.526214v3.full-text"),
    dict(class_id="temporal_causality", aspect_label="Event segmentation timescale", Hmin=1.0, Hmax=30.0, Umin=0.1, Umax=600.0, scale="log", source_human="Multiple timescales; boundaries seconds–tens of seconds: https://pmc.ncbi.nlm.nih.gov/articles/PMC2263140/ ; overview: https://www.frontiersin.org/articles/10.3389/fnhum.2010.00168/full. Event Segmentation Theory: https://www.sciencedirect.com/science/article/abs/pii/S1364661307003312"),
    dict(class_id="temporal_causality", aspect_label="Postdictive causality window", Hmin=0.03, Hmax=0.20, Umin=1e-3, Umax=1.0, scale="log", source_human="Delays reducing perceived causality tens–hundreds ms: https://pmc.ncbi.nlm.nih.gov/articles/PMC2868299/. Spatiotemporal constraints on causal perception: https://www.sciencedirect.com/science/article/abs/pii/S0010028508000108"),

    # agency_control
    dict(class_id="agency_control", aspect_label="Action→outcome agency window", Hmin=0.005, Hmax=0.30, Umin=1e-3, Umax=2.0, scale="log", source_human="Agency declines with increasing delay; typical tolerance <~300 ms (Haggard 2017 review): https://wexler.free.fr/library/files/haggard%20(2017)%20sense%20of%20agency%20in%20the%20human%20brain.pdf ; Delay modulates agency (Erdoğan et al. 2024): https://pmc.ncbi.nlm.nih.gov/articles/PMC12181639/"),
    dict(class_id="agency_control", aspect_label="Visuomotor adaptation delay tolerance", Hmin=0.005, Hmax=0.30, Umin=1e-3, Umax=1.5, scale="log", source_human="Adaptation persists but reduced with ~200 ms visual delay (Honda et al. 2012 PLoS ONE): https://pmc.ncbi.nlm.nih.gov/articles/PMC3364281/ ; Delayed feedback disrupts error-based learning (Kitazawa 2016 review): https://pmc.ncbi.nlm.nih.gov/articles/PMC4808111/"),
    dict(class_id="agency_control", aspect_label="Speech auditory-motor delay tolerance", Hmin=0.005, Hmax=0.10, Umin=1e-3, Umax=1.0, scale="log", source_human="Auditory-motor adaptation eliminated at ≥100 ms delay (Max & Maffett 2015): https://pmc.ncbi.nlm.nih.gov/articles/PMC4363140/"),
    dict(class_id="agency_control", aspect_label="Somatosensory attenuation timing window", Hmin=0.005, Hmax=0.15, Umin=1e-3, Umax=0.5, scale="log", source_human="Baseline attenuation strongest near 0 ms; shifts with delay training (~100–150 ms) (Kilteni et al. eLife 2019): https://pmc.ncbi.nlm.nih.gov/articles/PMC6860990/ ; Efference copy necessary (iScience 2020): https://pubmed.ncbi.nlm.nih.gov/32058957/ ; Neural recalibration with 100 ms delays (Nat Commun 2024): https://www.nature.com/articles/s42003-024-06188-4"),
    dict(class_id="agency_control", aspect_label="Agency under assistance (relative)", Hmin=0.50, Hmax=1.00, Umin=0.0, Umax=1.0, scale="linear", source_human="Agency during continuous action increases with performance under assistance (Wen & Haggard 2015): https://pmc.ncbi.nlm.nih.gov/articles/PMC4404253/"),

    # symbolic_abstract
    dict(class_id="symbolic_abstract", aspect_label="Numerosity Weber fraction (ANS)", Hmin=0.08, Hmax=0.25, Umin=0.0, Umax=1.0, scale="linear", source_human="Adults typically ~0.1–0.2; developmental trajectory (Halberda & Feigenson 2008): https://panamath.org/papers/HalberdaFeigenson2008DevPsych.pdf ; overview (Halberda & Odic 2014): https://www.halberdalab.net/files/HalberdaOdic2014WeberChapter.pdf"),
    dict(class_id="symbolic_abstract", aspect_label="Pitch JND (Δf/f)", Hmin=0.0015, Hmax=0.005, Umin=0.0, Umax=0.10, scale="linear", source_human="~0.15% musicians, ~0.5% non-musicians near mid-frequencies (Micheyl et al. 2006): https://audition.ens.fr/P2web/eval2006/DP_Micheyl-2006.pdf ; review (McDermott & Oxenham 2008): https://mcdermottlab.mit.edu/papers/McDermott_Oxenham_2008_pitch_music_CONB_review.pdf"),
    dict(class_id="symbolic_abstract", aspect_label="Musical interval JND", Hmin=10.0, Hmax=40.0, Umin=0.0, Umax=200.0, scale="linear", source_human="JNDs ~20–40 cents in non-musicians; better in musicians (Zarate et al. 2012): https://pmc.ncbi.nlm.nih.gov/articles/PMC3427364/ ; 20-cent detection near threshold (Schellenberg 2001): https://www.jstor.org/stable/10.1525/mp.2001.19.2.223 ; corroborating review/data: https://pmc.ncbi.nlm.nih.gov/articles/PMC3455123/"),
    dict(class_id="symbolic_abstract", aspect_label="Phoneme VOT boundary (/b/–/p/)", Hmin=0.020, Hmax=0.040, Umin=1e-3, Umax=1e-1, scale="log", source_human="English bilabial /b/–/p/ boundary ~+20–30 ms; cross-linguistic reviews (Abramson 2017): https://pmc.ncbi.nlm.nih.gov/articles/PMC5665574/ ; boundary examples ~25 ms (Hay 2005): https://repositories.lib.utexas.edu/bitstreams/9e8677cb-ed77-4341-b6b0-ec7e21ccfe11/download"),

    # mathematical_cognition
    dict(class_id="mathematical_cognition", aspect_label="Subitizing capacity", Hmin=1, Hmax=4, Umin=1, Umax=100, scale="linear", source_human="Adults subitize ~1–4 items (Kaufman et al., 1949; Trick & Pylyshyn, 1994)"),
    dict(class_id="mathematical_cognition", aspect_label="Enumeration counting slope", Hmin=250, Hmax=350, Umin=0, Umax=1000, scale="linear", source_human="Outside subitizing, slopes ~250–350 ms/item (Trick & Pylyshyn, 1994; Mandler & Shebo, 1982)"),
    dict(class_id="mathematical_cognition", aspect_label="Addition problem-size slope", Hmin=20, Hmax=70, Umin=0, Umax=200, scale="linear", source_human="Single-digit addition slopes ~20–70 ms/increment (Ashcraft, 1992; Uittenhove et al., 2016)"),
    dict(class_id="mathematical_cognition", aspect_label="Operation span capacity (OSPAN)", Hmin=3, Hmax=7, Umin=0, Umax=10, scale="linear", source_human="Typical adult OSPAN ~3–7 items (Unsworth et al., 2005)"),
    dict(class_id="mathematical_cognition", aspect_label="Number line error (0–100)", Hmin=0.02, Hmax=0.06, Umin=0.0, Umax=1.0, scale="linear", source_human="Adult PAE low (~2–6%) on 0–100 tasks (Siegler & Opfer, 2003; Booth & Siegler, 2006)"),
])



In [78]:
import matplotlib.font_manager as fm
import textwrap

# --- Robust Font Selection ---
# Find the best available sans-serif font on the system for a professional look.
# This avoids fragile web downloads and works reliably in Colab.
font_family = 'Roboto'
try:
    # Check if Roboto is available
    fm.findfont(font_family, fallback_to_default=False)
except ValueError:
    # If not, fall back to a common, high-quality sans-serif font
    print("Roboto not found, falling back to DejaVu Sans.")
    font_family = 'DejaVu Sans'


# --- Design System (Colors & Fonts) ---
# A consistent color palette and typography for a cohesive design
BG_COLOR = "#f5f5f5"
TEXT_COLOR = "#222222"
SUBTLE_TEXT_COLOR = "#666666"
HIGHLIGHT_COLOR = "#007acc"

# Assign a unique, modern color to each perception class
CLASS_COLORS = {
    'physical_body': '#1f77b4',
    'multisensory': '#ff7f0e',
    'temporal_causality': '#2ca02c',
    'agency_control': '#d62728',
    'symbolic_abstract': '#9467bd',
    'mathematical_cognition': '#8c564b'
}

# --- Global Matplotlib Styling ---
# Apply our design system to all future plots
plt.rcParams.update({
    'figure.facecolor': BG_COLOR,
    'axes.facecolor': BG_COLOR,
    'axes.edgecolor': TEXT_COLOR,
    'axes.labelcolor': TEXT_COLOR,
    'axes.titlecolor': TEXT_COLOR,
    'xtick.color': TEXT_COLOR,
    'ytick.color': TEXT_COLOR,
    'text.color': TEXT_COLOR,
    'font.family': font_family,
    'font.weight': 'normal',
    'axes.titleweight': 'bold',
    'axes.labelweight': 'normal',
    'hatch.linewidth': 0.5
})

print(f"Using font: '{font_family}' for the report.")

Roboto not found, falling back to DejaVu Sans.
Using font: 'DejaVu Sans' for the report.


In [79]:
def _valid_row(r):
    if r["Umin"]>=r["Umax"] or r["Hmin"]>=r["Hmax"]: return False
    if not (r["Umin"]<=r["Hmin"]<=r["Hmax"]<=r["Umax"]): return False
    if r["scale"]=="log" and (r["Umin"]<=0 or r["Umax"]<=0 or r["Hmin"]<=0 or r["Hmax"]<=0): return False
    return True

def _frac(x, umin, umax, scale):
    if scale=="log": return (math.log10(x)-math.log10(umin))/(math.log10(umax)-math.log10(umin))
    return (x-umin)/(umax-umin)

def _span_fraction(hmin, hmax, umin, umax, scale):
    if scale=="log": return (math.log10(hmax)-math.log10(hmin))/(math.log10(umax)-math.log10(umin))
    return (hmax-hmin)/(umax-umin)

def _u_weight(umin, umax, scale):
    if scale=="log": return math.log10(umax)-math.log10(umin)
    return umax-umin

def compute_metrics(refs):
    df = refs.copy()
    df = df[df.apply(_valid_row, axis=1)].reset_index(drop=True)
    df["fmin"] = [_frac(hmin, umin, umax, s) for hmin,umin,umax,s in zip(df.Hmin,df.Umin,df.Umax,df.scale)]
    df["fmax"] = [_frac(hmax, umin, umax, s) for hmax,umin,umax,s in zip(df.Hmax,df.Umin,df.Umax,df.scale)]
    df["coverage_fraction"] = [_span_fraction(hmin,hmax,umin,umax,s) for hmin,hmax,umin,umax,s in zip(df.Hmin,df.Hmax,df.Umin,df.Umax,df.scale)]
    df["U_weight"] = [_u_weight(umin,umax,s) for umin,umax,s in zip(df.Umin,df.Umax,df.scale)]
    return df

def class_aggregates(df):
    g = df.groupby("class_id")
    agg = g.apply(lambda x: pd.Series(dict(
        n_aspects=len(x),
        coverage_mean_unweighted=float(x["coverage_fraction"].mean()),
        coverage_mean_weighted=float((x["coverage_fraction"]*x["U_weight"]).sum()/x["U_weight"].sum()),
        coverage_median=float(x["coverage_fraction"].median()),
        coverage_min=float(x["coverage_fraction"].min()),
        coverage_max=float(x["coverage_fraction"].max())
    ))).reset_index()
    return agg

In [80]:
import os, re, math, numpy as np, pandas as pd, matplotlib.pyplot as plt, seaborn as sns
from matplotlib.backends.backend_pdf import PdfPages
import matplotlib.colors as mcolors

# ==============================================================================
# SECTION 1: STYLE DEFAULTS & CORE DATA FUNCTIONS
# ==============================================================================

# --- Style Defaults ---
BG_COLOR          = globals().get("BG_COLOR", "white")
TEXT_COLOR        = globals().get("TEXT_COLOR", "#111")
SUBTLE_TEXT_COLOR = globals().get("SUBTLE_TEXT_COLOR", "#666")
HIGHLIGHT_COLOR   = globals().get("HIGHLIGHT_COLOR", "#2f6ebb")
CLASS_COLORS      = globals().get("CLASS_COLORS", {})
THEME_BLUE        = "#2f6ebb"

# --- Core Data Processing (Used by all report generation) ---
def compute_metrics(refs):
    df = refs.copy()
    df = df[df.apply(lambda r: "Hmin" in r and r["Hmin"] < r["Hmax"] and "Umin" in r and r["Umin"] < r["Umax"], axis=1)].copy()
    def span_fraction(r):
        if r["scale"] == "log":
            if any(x <= 0 for x in [r["Umin"], r["Umax"], r["Hmin"], r["Hmax"]]): return 0.0
            return (math.log10(r["Hmax"]) - math.log10(r["Hmin"])) / (math.log10(r["Umax"]) - math.log10(r["Umin"]))
        return (r["Hmax"] - r["Hmin"]) / (r["Umax"] - r["Umin"])
    def u_span(r):
        if r["scale"] == "log":
            if r["Umin"] <= 0 or r["Umax"] <= 0: return 0.0
            return math.log10(r["Umax"]) - math.log10(r["Umin"])
        return r["Umax"] - r["Umin"]
    df["coverage_fraction"] = df.apply(span_fraction, axis=1)
    df["U_weight"] = df.apply(u_span, axis=1)
    tot = df["U_weight"].sum()
    df["U_share"] = df["U_weight"] / tot if tot > 0 else 0.0
    return df

def class_aggregates(df):
    base = df.groupby("class_id")["coverage_fraction"].agg(
        coverage_mean="mean", median="median", min="min", max="max", n_aspects="count"
    ).reset_index()
    base["range"] = base["max"] - base["min"]
    wmean = (df.groupby("class_id")
             .apply(lambda x: np.average(x["coverage_fraction"], weights=x["U_weight"]))
             .reset_index(name="coverage_mean_weighted"))
    return base.merge(wmean, on="class_id", how="left")

# ==============================================================================
# SECTION 2: NEW - DEDICATED HELPERS FOR THE DETAILED SUMMARY PAGE
# These are renamed to prevent conflicts with the other report script.
# ==============================================================================

def _detail_wrap_label(s, width=14, max_lines=2):
    s = re.sub(r"[_\-]+", " ", str(s)).strip()
    words, lines, cur = s.split(), [], ""
    for w in words:
        add = (w if not cur else " " + w)
        if len(cur + add) <= width: cur += add
        else:
            lines.append(cur.strip()); cur = w
            if len(lines) >= max_lines - 1:
                cur = (cur + " " + " ".join(words[words.index(w)+1:])).strip()
                break
    if cur: lines.append(cur.strip())
    return "\n".join(lines[:max_lines])

def _detail_class_order_by_pc1(agg):
    X = agg[["coverage_mean","median","max","range","n_aspects"]].astype(float).to_numpy()
    X = (X - X.mean(0)) / np.where(X.std(0)==0, 1, X.std(0))
    u, s, vt = np.linalg.svd(np.nan_to_num(X), full_matrices=False)
    pc1 = (X @ vt.T)[:,0]
    return agg["class_id"].to_numpy()[np.argsort(pc1)]

def _detail_base_positions(agg):
    classes = _detail_class_order_by_pc1(agg)
    cx = np.linspace(0.06, 0.94, len(classes))
    return classes, dict(zip(classes, cx)), (cx[1]-cx[0] if len(cx)>1 else 0.2)

def _detail_shrink_center_h(ax, frac=0.10):
    p = ax.get_position(); new_w = p.width * (1 - frac); new_x = p.x0 + (p.width - new_w) / 2
    ax.set_position([new_x, p.y0, new_w, p.height])

def _detail_continuous_intensity(df, agg, width=4200, sigma_mul=0.22, jitter_mul=0.18, gamma=0.8, eq_by_class=True, seed=42):
    classes, pos, spacing = _detail_base_positions(agg)
    x = np.linspace(0, 1, width); I = np.zeros_like(x)
    rng = np.random.default_rng(seed); sigma = max(1e-3, spacing * sigma_mul)
    if eq_by_class:
        cls_w = df.groupby("class_id")["U_weight"].sum().replace(0, 1.0)
        eq = (cls_w.mean() / cls_w).reindex(df["class_id"]).to_numpy()
    else:
        eq = np.ones(len(df))
    for i, r in df.reset_index(drop=True).iterrows():
        cov = float(np.clip(r["coverage_fraction"], 0, 1))
        cx  = pos.get(r["class_id"], 0.5)
        xi  = np.clip(cx + rng.normal(0, spacing*jitter_mul), 0, 1)
        w   = float(r.get("U_weight", 1.0)) * eq[i]
        I  += w * cov * np.exp(-0.5*((x - xi)/sigma)**2)
    if I.max() > 0: I /= I.max()
    I = I**gamma
    return I, classes, pos

# ==============================================================================
# SECTION 3: PLOTTING FUNCTIONS FOR THE DETAILED REPORT
# ==============================================================================

def plot_summary_page_v2(df, agg, *,
                         height_scales=(0.30, 0.34, 0.36),
                         top_shrink=0.10, mid_shrink=0.10,
                         field_width=4200, field_sigma=0.22, field_jitter=0.18, field_gamma=0.8,
                         labels_target=10, eq_by_class=True):
    fig = plt.figure(figsize=(8.5, 11), facecolor=BG_COLOR)
    fig.text(0.095, 0.965, "Human perception — coverage overview", fontsize=18, weight="bold", color=TEXT_COLOR)
    fig.text(0.095, 0.94,  "", fontsize=10, color=SUBTLE_TEXT_COLOR)
    gs = fig.add_gridspec(3, 1, height_ratios=list(height_scales),
                          hspace=0.44, top=0.92, bottom=0.09, left=0.12, right=0.88)
    ax_top = fig.add_subplot(gs[0, 0]); ax_mid = fig.add_subplot(gs[1, 0]); ax_bot = fig.add_subplot(gs[2, 0])

    # --- Top Plot: Heatmap ---
    heat = agg.set_index("class_id")[["coverage_mean","median","max","range","n_aspects"]].astype(float)
    normed = heat.copy()
    for col in normed.columns:
        vmin, vmax = float(normed[col].min()), float(normed[col].max())
        normed[col] = 0.5 if abs(vmax - vmin) < 1e-12 else (normed[col] - vmin) / (vmax - vmin)
    annot_df = heat.round(2).astype(str)
    sns.heatmap(normed, ax=ax_top, annot=annot_df, fmt="", cmap="Blues",
                linewidths=.5, linecolor=(0,0,0,0.15), cbar=False, annot_kws={"size": 7})
    ax_top.set_title("Metrics (per-column scaled)", fontsize=11, pad=8, color=THEME_BLUE)
    ax_top.set_ylabel("")
    ax_top.tick_params(axis="y", rotation=0, labelsize=8); ax_top.tick_params(axis="x", labelsize=8)
    ax_top.set_yticklabels([_detail_wrap_label(t.get_text(), 14, 2) for t in ax_top.get_yticklabels()]) # <-- UPDATED
    ax_top.set_xticklabels([_detail_wrap_label(t.get_text(), 10, 2) for t in ax_top.get_xticklabels()]) # <-- UPDATED
    _detail_shrink_center_h(ax_top, top_shrink) # <-- UPDATED

    # --- Middle Plot: Violin Plots ---
    order = agg.sort_values("coverage_mean", ascending=False)["class_id"]
    sns.violinplot(ax=ax_mid, y="class_id", x="coverage_fraction", data=df, order=order,
                   hue="class_id", palette=CLASS_COLORS if CLASS_COLORS else None,
                   inner="quartile", cut=0, legend=False, orient="h", linewidth=0.8)
    ax_mid.set_title("Distributions (per aspect)", fontsize=11, pad=8, color=THEME_BLUE)
    ax_mid.set_xlabel("coverage fraction", fontsize=9, color=SUBTLE_TEXT_COLOR); ax_mid.set_ylabel("")
    ax_mid.tick_params(axis="y", labelsize=8)
    ax_mid.spines[["right","top"]].set_visible(False); ax_mid.xaxis.grid(True, linestyle="--", alpha=0.5)
    _detail_shrink_center_h(ax_mid, mid_shrink) # <-- UPDATED

    # --- Bottom Plot: Continuous Field ---
    # I, classes, pos = _detail_continuous_intensity(df, agg, width=field_width, sigma_mul=field_sigma, # <-- UPDATED
    #                                              jitter_mul=field_jitter, gamma=field_gamma, eq_by_class=eq_by_class)
    # y = np.linspace(0, 1, 220)
    # vtex = 1 - 0.10 * np.cos(2 * np.pi * y) - 0.05 * np.cos(6 * np.pi * y)
    # img = np.clip(np.outer(vtex, I), 0, 1)
    # ax_bot.imshow(img, cmap="Blues", origin="lower", aspect="auto", extent=(0,1,0,1))
    # ax_bot.set_title("Continuous coverage field (equalized by class)" if eq_by_class else "Continuous coverage field",
    #                  fontsize=11, pad=8, color=THEME_BLUE)
    # ax_bot.axis("off")
    # for c in classes:
    #     ax_bot.plot([pos[c], pos[c]], [0.14, 0.88], color=(0,0,0,0.22), lw=0.55)
    # step = max(1, len(classes)//max(1, labels_target)); show = classes[::step][:labels_target]
    # for c in show:
    #     ax_bot.text(pos[c], 0.08, _detail_wrap_label(c, 14, 2), ha="center", va="top", fontsize=8, color=TEXT_COLOR) # <-- UPDATED

    return fig

def plot_class_page_letter_v2(df_class, agg_row, page_num):
    def _fraction_bounds(r):
        try:
            if r["scale"] == "log":
                if r["Umin"]<=0 or r["Umax"]<=0 or r["Hmin"]<=0 or r["Hmax"]<=0: return np.nan, np.nan
                du = np.log10(r["Umax"]) - np.log10(r["Umin"]);
                if du<=0: return np.nan, np.nan
                fmin = (np.log10(r["Hmin"]) - np.log10(r["Umin"])) / du
                fmax = (np.log10(r["Hmax"]) - np.log10(r["Umin"])) / du
            else:
                du = (r["Umax"] - r["Umin"]);
                if du<=0: return np.nan, np.nan
                fmin = (r["Hmin"] - r["Umin"]) / du
                fmax = (r["Hmax"] - r["Umin"]) / du
            return np.clip(fmin,0,1), np.clip(fmax,0,1)
        except Exception: return np.nan, np.nan

    bars = df_class.copy(); bars[["fmin","fmax"]] = bars.apply(_fraction_bounds, axis=1, result_type="expand")
    bars = bars.dropna(subset=["fmin","fmax"]); bars["span"] = (bars["fmax"] - bars["fmin"]).clip(lower=0)
    order = bars.sort_values("coverage_fraction", ascending=True).reset_index(drop=True); n = len(order)
    fig = plt.figure(figsize=(8.5, 11), facecolor=BG_COLOR)
    fig.text(0.1, 0.95, "Human Perceptual Coverage Report", fontsize=14, weight='bold', color=TEXT_COLOR)
    fig.text(0.9, 0.95, f"Domain: {agg_row['class_id']}", ha='right', fontsize=12, color=SUBTLE_TEXT_COLOR)
    fig.patches.extend([plt.Rectangle((0.1, 0.93), 0.8, 0.002, fc=SUBTLE_TEXT_COLOR, transform=fig.transFigure, alpha=0.5)])
    ax_top = fig.add_axes([0.3, 0.55, 0.55, 0.35])
    class_color = CLASS_COLORS.get(agg_row['class_id'], '#6ea8dc')
    ax_top.barh(range(n), order["span"], left=order["fmin"], height=0.6, color=class_color, alpha=0.75)
    ax_top.set_title(f"Aspect windows within universal span — {agg_row['class_id']}", loc='left', fontsize=12, pad=12)
    ax_top.set_xlabel("fraction of universal span (declared scale)", fontsize=9); ax_top.set_xlim(0,1)
    ax_top.set_ylim(-0.5, max(-0.5, n-0.5)); ax_top.set_yticks(range(n)); ax_top.set_yticklabels([])
    ax_top.xaxis.grid(True, linestyle='--', color=SUBTLE_TEXT_COLOR, alpha=0.5)
    ax_top.spines[['right','top','left']].set_visible(False); ax_top.tick_params(axis='y', length=0)
    axpos = ax_top.get_position()
    for i, r in order.iterrows():
        fig.text(axpos.x0 - 0.01, axpos.y0 + (i + 0.5) / n * axpos.height, str(r["aspect_label"]), ha="right", va="center", fontsize=8, color=TEXT_COLOR)
        ax_top.text(1.01, i, f"{r['coverage_fraction']*100:.1f}%", ha="left", va="center", fontsize=8, color=TEXT_COLOR)
        src = r.get("source_human", "");
        if isinstance(src, str) and src.startswith(("http://","https://")):
            ax_top.text(1.10, i, "source", ha="left", va="center", fontsize=8, color=HIGHLIGHT_COLOR, url=src)
    gs_bottom = fig.add_gridspec(1, 2, top=0.45, bottom=0.1, left=0.1, right=0.9, wspace=0.4)
    ax_left = fig.add_subplot(gs_bottom[0,0]); ax_right = fig.add_subplot(gs_bottom[0,1], projection="polar")
    ax_left.axis("off"); ax_left.set_title("Domain summary", loc='left', fontsize=12, pad=8, color=THEME_BLUE)
    w = bars["U_weight"].to_numpy()
    wmean = (np.average(bars["coverage_fraction"], weights=w) if np.isfinite(w).all() and w.sum()>0 else float(bars["coverage_fraction"].mean()))
    stats = [("Aspects", f"{int(len(bars))}"),("Weighted mean", f"{wmean*100:.1f}%"),("Median", f"{bars['coverage_fraction'].median()*100:.1f}%"),("Range", f"{bars['coverage_fraction'].min()*100:.1f}% – {bars['coverage_fraction'].max()*100:.1f}%")]
    for i, (k, v) in enumerate(stats):
        y = 0.82 - i*0.2; ax_left.text(0, y, k, ha='left', va='top', fontsize=10, weight='bold', color=TEXT_COLOR)
        ax_left.text(0, y-0.08, v, ha='left', va='top', fontsize=14, color=TEXT_COLOR)
    ax_left.text(0, 0.02, "Coverage = span(H)/span(U). Bars normalized 0–1.", fontsize=8, color=SUBTLE_TEXT_COLOR)
    def _radar_axes(n): ang = np.linspace(0, 2*np.pi, n, endpoint=False); return np.r_[ang, ang[:1]]
    def plot_radar(ax, labels, values, color):
        n=len(labels); ang=_radar_axes(n); vals=np.r_[values,values[:1]]; ax.set_theta_offset(np.pi/2)
        ax.set_theta_direction(-1); ax.set_ylim(0,1); ax.plot(ang,vals,lw=2,color=color); ax.fill(ang,vals,alpha=0.25,color=color)
        ax.set_xticks(_radar_axes(n)[:-1]); ax.set_xticklabels(labels,fontsize=7); ax.set_yticks([0.25,0.5,0.75])
        ax.set_yticklabels(["0.25","0.5","0.75"],fontsize=7)
    plot_radar(ax_right, order["aspect_label"].tolist(), order["coverage_fraction"].clip(0,1).tolist(), class_color)
    fig.text(0.5, 0.05, f"Detail Page | {page_num}", ha='center', fontsize=9, color=SUBTLE_TEXT_COLOR)
    return fig

# ==============================================================================
# SECTION 4: MAIN GENERATOR FUNCTION
# ==============================================================================

def generate_report(refs, output_dir="world_class_report_v2", file_prefix="perception", **summary_kwargs):
    os.makedirs(output_dir, exist_ok=True)
    df  = compute_metrics(refs)
    agg = class_aggregates(df)
    pdf_path = os.path.join(output_dir, f"{file_prefix}_report.pdf")
    with PdfPages(pdf_path) as pdf:
        fig = plot_summary_page_v2(df, agg, **summary_kwargs)
        pdf.savefig(fig, facecolor=fig.get_facecolor()); plt.close(fig)
        for i, cid in enumerate(agg["class_id"]):
            d = df[df["class_id"]==cid].copy()
            a = agg[agg["class_id"]==cid].iloc[0]
            fig = plot_class_page_letter_v2(d, a, page_num=i+2)
            pdf.savefig(fig, facecolor=fig.get_facecolor()); plt.close(fig)
    print("✨ Report generated")
    print("📄", pdf_path)
    return pdf_path

In [81]:
import matplotlib.pyplot as plt
from matplotlib.backends.backend_pdf import PdfPages
import pandas as pd
import textwrap
import re
import numpy as np
import math


# --- Report Statistics and Core Assumptions (for Page 2) ---
report_data = {
    'domain': [
        'Agency & Control', 'Physical Body', 'Temporal Causality',
        'Multisensory', 'Mathematical Cognition', 'Symbolic & Abstract'
    ],
    'mean_coverage': [0.52, 0.30, 0.28, 0.27, 0.16, 0.13],
    'assumption': [
        "Human agency relies on immediate, low-latency feedback. Augmentation must preserve this temporal link to feel natural and intuitive.",
        "Our native senses perceive only narrow bands of a much wider physical reality. Technology must act as a translator for this unseen information.",
        "We perceive cause-and-effect relationships primarily within short time windows. AI is needed to reveal complex, long-range causal patterns.",
        "The brain's method of combining sensory inputs is variable and imperfect. Augmentation can stabilize and enhance sensory integration, especially in XR.",
        "Core numerical and logical abilities are highly constrained. The primary demand is for cognitive offloading tools that augment working memory.",
        "The human ability to distinguish between abstract symbols like sounds or concepts is coarse. Specialized tools are needed for finer-grained analysis."
    ]
}
df_report = pd.DataFrame(report_data).sort_values(by='mean_coverage', ascending=False).reset_index(drop=True)


def compute_metrics(refs):
    df = refs.copy()
    df = df[df.apply(lambda r: "Hmin" in r and "Hmax" in r and r["Hmin"] < r["Hmax"] and r["Umin"] < r["Umax"], axis=1)].copy()
    def span_fraction(r):
        if r["scale"] == "log":
            if any(x <= 0 for x in [r["Umin"], r["Umax"], r["Hmin"], r["Hmax"]]): return 0.0
            return (math.log10(r["Hmax"]) - math.log10(r["Hmin"])) / (math.log10(r["Umax"]) - math.log10(r["Umin"]))
        return (r["Hmax"] - r["Hmin"]) / (r["Umax"] - r["Umin"])
    def u_span(r):
        if r["scale"] == "log":
            if r["Umin"] <= 0 or r["Umax"] <= 0: return 0.0
            return math.log10(r["Umax"]) - math.log10(r["Umin"])
        return r["Umax"] - r["Umin"]
    df["coverage_fraction"] = df.apply(span_fraction, axis=1)
    df["U_weight"] = df.apply(u_span, axis=1)
    tot = df["U_weight"].sum()
    df["U_share"] = df["U_weight"] / tot if tot > 0 else 0.0
    return df

def class_aggregates(df):
    base = df.groupby("class_id")["coverage_fraction"].agg(
        coverage_mean="mean", median="median", min="min", max="max", n_aspects="count"
    ).reset_index()
    base["range"] = base["max"] - base["min"]
    # The 'wmean' part isn't strictly necessary for the fix, but this is the full original function
    wmean = (df.groupby("class_id")
             .apply(lambda x: np.average(x["coverage_fraction"], weights=x.get("U_weight", np.ones(len(x)))))
             .reset_index(name="coverage_mean_weighted"))
    return base.merge(wmean, on="class_id", how="left")

def _class_order_by_pc1(agg):
    X = agg[["coverage_mean", "n_aspects"]].astype(float).to_numpy()
    X = (X - X.mean(0)) / np.where(X.std(0) == 0, 1, X.std(0))
    if X.shape[1] > 0:
        _, _, vt = np.linalg.svd(np.nan_to_num(X), full_matrices=False)
        pc1 = (X @ vt.T)[:, 0]
        return agg["class_id"].to_numpy()[np.argsort(pc1)]
    return agg["class_id"].to_numpy()

def _base_positions(agg):
    classes = _class_order_by_pc1(agg)
    cx = np.linspace(0.06, 0.94, len(classes))
    return classes, dict(zip(classes, cx)), (cx[1]-cx[0] if len(cx)>1 else 0.2)

def _continuous_intensity_for_cover(df, agg, width=4200, sigma_mul=0.22, jitter_mul=0.18, gamma=0.8, seed=42):
    classes, pos, spacing = _base_positions(agg)
    x = np.linspace(0, 1, width); I = np.zeros_like(x)
    rng = np.random.default_rng(seed); sigma = max(1e-3, spacing * sigma_mul)
    for _, r in df.iterrows():
        cov = float(np.clip(r["coverage_fraction"], 0, 1))
        cx  = pos.get(r["class_id"], 0.5)
        xi  = np.clip(cx + rng.normal(0, spacing * jitter_mul), 0, 1)
        I  += cov * np.exp(-0.5 * ((x - xi) / sigma)**2)
    if I.max() > 0: I /= I.max()
    return I**gamma, classes, pos

def plot_coverage_field(ax, df, agg):
    """NEW: A self-contained function to draw the visualization on a given Axes."""
    I, classes, pos = _continuous_intensity_for_cover(df, agg)
    y = np.linspace(0, 1, 100)
    vtex = 1 - 0.10 * np.cos(2 * np.pi * y) - 0.05 * np.cos(6 * np.pi * y)
    img = np.clip(np.outer(vtex, I), 0, 1)
    ax.imshow(img, cmap="Blues", origin="lower", aspect="auto", extent=(0, 1, 0, 1))
    ax.axis("off")
    for c in classes:
        ax.plot([pos[c], pos[c]], [0.14, 0.88], color=(0,0,0,0.22), lw=0.55)
        label = c.replace('_', ' ').replace(' ', '\n') # Simple wrap
        ax.text(pos[c], 0.08, label, ha="center", va="top", fontsize=7, color="#333333")


def create_letter_page():
    fig = plt.figure(figsize=(8.5, 11), facecolor='white')
    plt.rcParams['font.family'] = 'sans-serif'; plt.rcParams['font.sans-serif'] = ['DejaVu Sans']
    plt.rcParams['text.color'] = '#111111'
    return fig

def draw_footer(fig, page_num, total_pages):
    fig.text(0.5, 0.03, f"A Quantitative Survey of Human Perceptual Limits | Page {page_num} of {total_pages}",
             ha='center', va='center', fontsize=8, color='#888888')

def parse_citation(text, aspect_label):
    url_match = re.search(r'(https?://[^\s]+)', text)
    url = url_match.group(1) if url_match else None
    clean_text = re.sub(r'\s*https?://[^\s]+', '', text).strip()
    if not clean_text.endswith('.'): clean_text += '.'
    return f"[{aspect_label}] {clean_text}", url


def create_page_one(page_num, total_pages, df_full, agg_full):
    """Creates the title page, abstract, intro, AND the new visualization."""
    fig = create_letter_page()

    # --- Headers  ---
    fig.text(0.5, 0.88, "A Quantitative Survey of Human Perceptual Limits:", ha='center', fontsize=20, weight='bold')
    fig.text(0.5, 0.84, "The Strategic Imperative for Augmentation", ha='center', fontsize=18)
    fig.text(0.5, 0.79, "Dan Ehlers (pinballsurgeon@gmail.com) | August 31, 2025", ha='center', fontsize=10, color='#666666')

    # --- Abstract  ---
    fig.text(0.12, 0.72, "Abstract", fontsize=12, weight='bold')
    abstract_text = "This report provides a quantitative survey of human perceptual and cognitive capabilities, framed as the 'coverage' of universally available spectra. By synthesizing data across six core domains—from basic sensory input to abstract mathematical thought—we map the narrow windows through which humans experience reality. Our analysis reveals profound limitations, particularly in symbolic and mathematical reasoning. These findings establish a data-driven framework for identifying and prioritizing the critical human augmentation technologies required to meet the demands of 2026 and beyond."
    fig.text(0.12, 0.68, textwrap.fill(abstract_text, 90), ha='left', va='top', fontsize=10.5, linespacing=1.4)

    # --- Introduction ---
    fig.text(0.12, 0.53, "1. Introduction", fontsize=12, weight='bold')
    intro_text = "The modern world generates information at a scale and complexity that far exceeds the natural processing limits of the human mind. The field of human augmentation aims to bridge this gap, using technology to enhance our innate abilities. However, to be effective, these efforts must be directed at our most significant limitations. This paper provides a clear, quantitative map of those limitations.\n\nWe introduce a standardized metric, 'perceptual coverage,' to measure the fraction of a universal physical or conceptual range that is accessible to human perception. By applying this metric across diverse functions, we create a comparative overview of our capabilities. This analysis serves a strategic purpose: to highlight the domains with the lowest coverage, thereby revealing where technological augmentation can provide the greatest impact."
    fig.text(0.12, 0.49, textwrap.fill(intro_text, 90), ha='left', va='top', fontsize=10.5, linespacing=1.4)


    fig.text(0.13, 0.28, "Figure 1: Continuous Coverage Field (Equalized by Class)", fontsize=11)
    ax_field = fig.add_axes([0.12, 0.13, 0.76, 0.13])
    plot_coverage_field(ax_field, df_full, agg_full)

    draw_footer(fig, page_num, total_pages)
    return fig

def create_page_two(page_num, total_pages):
    """Creates the methodology, findings chart, and summary table."""
    fig = create_letter_page()
    fig.text(0.12, 0.92, "2. Method and Key Findings", fontsize=14, weight='bold')
    fig.text(0.12, 0.87, "Methodology", fontsize=11, weight='bold')
    method_text = "We evaluated over a dozen distinct aspects of perception and cognition, grouped into six domains. For each aspect, a 'human range' (H) was compared against a 'universal spectrum' (U) derived from scientific literature. Coverage was calculated as the ratio of the human span to the universal span. Spans for logarithmic scales (e.g., sound frequency) were calculated in log space to properly reflect perceptual sensitivity."
    fig.text(0.12, 0.83, textwrap.fill(method_text, 90), ha='left', va='top', fontsize=10.5, linespacing=1.4)
    fig.text(0.5, 0.72, "Mean Perceptual Coverage by Domain", ha='right', fontsize=10, weight='bold')
    ax = fig.add_axes([0.3, 0.6, 0.6, 0.12])
    bars = ax.barh(df_report['domain'], df_report['mean_coverage'], color='#3B5B8F', height=0.7)
    ax.invert_yaxis(); ax.set_xlim(0, 0.6); ax.tick_params(axis='y', length=0); ax.tick_params(axis='x', labelsize=8, colors='#555555')
    ax.spines[['top', 'right', 'left']].set_visible(False); ax.spines['bottom'].set_color('#AAAAAA')
    ax.get_yaxis().set_ticks([]); ax.set_xticks([0, 0.1, 0.2, 0.3, 0.4, 0.5])
    for i, (domain, value) in enumerate(zip(df_report['domain'], df_report['mean_coverage'])):
        ax.text(-0.01, i, domain, va='center', ha='right', fontsize=9, weight='bold')
        ax.text(value + 0.01, i, f"{value*100:.0f}%", va='center', ha='left', fontsize=9, weight='bold', color='#3B5B8F')
    fig.text(0.5, 0.52, "Table 1: Domain Coverage and Core Augmentation Assumptions", ha='center', fontsize=10, weight='bold')
    y_pos, row_h = 0.47, 0.08
    fig.text(0.12, y_pos, "Domain", weight='bold', fontsize=10); fig.text(0.35, y_pos, "Coverage", weight='bold', fontsize=10)
    fig.text(0.48, y_pos, "Core Assumption for Augmentation", weight='bold', fontsize=10)
    fig.patches.extend([plt.Rectangle((0.12, y_pos - 0.02), 0.76, 0.002, fc='#AAAAAA', transform=fig.transFigure)])
    for _, row in df_report.iterrows():
        y_pos -= row_h
        fig.text(0.12, y_pos, row['domain'], fontsize=9, va='top')
        fig.text(0.35, y_pos, f"{row['mean_coverage']:.0%}", fontsize=9, va='top')
        fig.text(0.48, y_pos, textwrap.fill(row['assumption'], 55), fontsize=9, va='top', linespacing=1.3)
    draw_footer(fig, page_num, total_pages)
    return fig

# Page 3 and 4 functions remain unchanged...
def create_page_three(page_num, total_pages):
    fig = create_letter_page()
    fig.text(0.12, 0.92, "3. Strategic Augmentation Demands for 2026", fontsize=14, weight='bold')
    fig.text(0.12, 0.86, "1. Sensory Translation and Transduction", fontsize=11, weight='bold')
    text1 = "The profound limits in basic sensory perception (Physical Body domain) highlight an urgent need for technologies that make the invisible visible. The goal is to translate out-of-band energy—such as infrared, ultraviolet, or ultrasonic frequencies—into formats humans can intuitively understand. This moves beyond niche instruments to everyday tools for navigating the world."
    fig.text(0.12, 0.82, textwrap.fill(text1, 90), ha='left', va='top', fontsize=10.5, linespacing=1.4)
    fig.text(0.12, 0.72, "2. Seamless Cognitive Offloading", fontsize=11, weight='bold')
    text2 = "The critical bottlenecks in mathematical and symbolic thought demand AI-powered cognitive partners. These systems must augment working memory and automate complex reasoning, integrating smoothly with human thought processes rather than simply acting as calculators. The aim is to reduce cognitive load and enable focus on higher-level problem-solving."
    fig.text(0.12, 0.68, textwrap.fill(text2, 90), ha='left', va='top', fontsize=10.5, linespacing=1.4)
    fig.text(0.12, 0.58, "3. Latency-Aware Interface Design", fontsize=11, weight='bold')
    text3 = "The high coverage in Agency & Control is a warning: our sense of control is fragile and depends on immediate feedback. As we interact through more complex technology (VR/AR, remote robotics), designers must aggressively minimize lag. Preserving this temporal link is a fundamental constraint for any augmentation that feels direct and intuitive."
    fig.text(0.12, 0.54, textwrap.fill(text3, 90), ha='left', va='top', fontsize=10.5, linespacing=1.4)
    fig.text(0.12, 0.44, "4. Conclusion", fontsize=12, weight='bold')
    conclusion_text = "By quantifying the limits of human perception, this work provides a data-driven map for a strategic approach to human augmentation. The clearest takeaway is that our greatest needs lie not in amplifying existing strengths, but in systematically bridging the vast gaps in our sensory and cognitive capabilities. The technologies designed to address these specific, measured shortfalls will define the next generation of human-computer partnership and unlock new potential for discovery and understanding."
    fig.text(0.12, 0.40, textwrap.fill(conclusion_text, 90), ha='left', va='top', fontsize=10.5, linespacing=1.4)
    draw_footer(fig, page_num, total_pages)
    return fig

def create_page_four_references(page_num, total_pages):
    fig = create_letter_page()

    fig.text(0.12, 0.92, "5. References", fontsize=14, weight='bold')

    y_pos = 0.88
    for index, row in refs_df.iterrows():
        if y_pos < 0.1: break

        citation_text, url = parse_citation(row['source_human'], row['aspect_label'])

        wrapped_text = textwrap.fill(citation_text, 95)

        text_obj = fig.text(0.12, y_pos, wrapped_text, ha='left', va='top', fontsize=8.5, linespacing=1.3)

        if url:
            text_obj.set_url(url)
        num_lines = len(wrapped_text.split('\n'))
        y_pos -= (num_lines * 0.015 + 0.012)

    draw_footer(fig, page_num, total_pages)
    return fig


def generate_final_report(filename="Human_Augmentation_Report_2026.pdf"):
    """Orchestrates the creation of all pages and saves them to a single PDF."""

    # --- MODIFIED: Prepare the full dataset needed for the visualization ---
    print("🔬 Computing metrics from full dataset for visualization...")
    df_full = compute_metrics(refs_df)
    agg_full = class_aggregates(df_full)

    TOTAL_PAGES = 4
    with PdfPages(filename) as pdf:
        print("📄 Generating Page 1: Title, Intro, and Coverage Field...")
        # Pass the computed data to the page one function
        fig1 = create_page_one(1, TOTAL_PAGES, df_full, agg_full)
        pdf.savefig(fig1)
        plt.close(fig1)

        print("📄 Generating Page 2: Findings...")
        fig2 = create_page_two(2, TOTAL_PAGES)
        pdf.savefig(fig2)
        plt.close(fig2)

        print("📄 Generating Page 3: Demands...")
        fig3 = create_page_three(3, TOTAL_PAGES)
        pdf.savefig(fig3)
        plt.close(fig3)

        print("📄 Generating Page 4: References...")
        fig4 = create_page_four_references(4, TOTAL_PAGES)
        pdf.savefig(fig4)
        plt.close(fig4)

    print(f"\n✅ Final report successfully generated. Saved as '{filename}'")

# --- Execute the report generation ---
generate_final_report()

🔬 Computing metrics from full dataset for visualization...
📄 Generating Page 1: Title, Intro, and Coverage Field...


  .apply(lambda x: np.average(x["coverage_fraction"], weights=x.get("U_weight", np.ones(len(x)))))


📄 Generating Page 2: Findings...
📄 Generating Page 3: Demands...
📄 Generating Page 4: References...

✅ Final report successfully generated. Saved as 'Human_Augmentation_Report_2026.pdf'


In [82]:
import os
from pypdf import PdfWriter

# --- Configuration ---

arxiv_style_report_path = "Human_Augmentation_Report_2026.pdf"

details_report_dir = "world_class_report_v2"
details_report_filename = "perception_report.pdf"
details_report_path = os.path.join(details_report_dir, details_report_filename)

final_combined_path = "Perception_Report_Complete_Final.pdf"

# --- Execution ---

print("🚀 Starting the final report generation pipeline...")

try:

    print(f"📄 Generating the 4-page 'arXiv-style' report (front matter)...")
    generate_final_report(filename=arxiv_style_report_path)

    print(f"📄 Generating the detailed summary & breakdown report...")
    _ = generate_report(refs_df, output_dir=details_report_dir, file_prefix="perception")
    if not os.path.exists(arxiv_style_report_path) or not os.path.exists(details_report_path):
        raise FileNotFoundError("One or both of the required PDF files were not generated successfully.")

    print(f"\n📎 Assembling the final document...")
    pdf_merger = PdfWriter()

    print(f"   + Adding Front Matter: '{arxiv_style_report_path}'")
    pdf_merger.append(arxiv_style_report_path)

    print(f"   + Adding Detailed Report: '{details_report_path}'")
    pdf_merger.append(details_report_path)

    with open(final_combined_path, "wb") as output_pdf:
        pdf_merger.write(output_pdf)

    pdf_merger.close()
    print(f"\n🎉 Success! The final combined report is complete.")
    print(f"   Saved as: '{final_combined_path}'")

except NameError as e:
    print(f"❌ ERROR: A required variable or function is not defined. Please ensure all previous cells have been run.")
    print(f"   Missing component: {e}")
except Exception as e:
    print(f"❌ ERROR: An unexpected error occurred during the finalization process.")
    print(f"   Details: {e}")

🚀 Starting the final report generation pipeline...
📄 Generating the 4-page 'arXiv-style' report (front matter)...
🔬 Computing metrics from full dataset for visualization...
📄 Generating Page 1: Title, Intro, and Coverage Field...


  .apply(lambda x: np.average(x["coverage_fraction"], weights=x.get("U_weight", np.ones(len(x)))))


📄 Generating Page 2: Findings...
📄 Generating Page 3: Demands...
📄 Generating Page 4: References...

✅ Final report successfully generated. Saved as 'Human_Augmentation_Report_2026.pdf'
📄 Generating the detailed summary & breakdown report...


  .apply(lambda x: np.average(x["coverage_fraction"], weights=x.get("U_weight", np.ones(len(x)))))


✨ Report generated
📄 world_class_report_v2/perception_report.pdf

📎 Assembling the final document...
   + Adding Front Matter: 'Human_Augmentation_Report_2026.pdf'
   + Adding Detailed Report: 'world_class_report_v2/perception_report.pdf'

🎉 Success! The final combined report is complete.
   Saved as: 'Perception_Report_Complete_Final.pdf'
