# Testing Visuals

## Initial Setup

In [3]:
import polars as pl
from pathlib import Path

import plotly.graph_objects as go

DATA_PATH = (
    Path.cwd().resolve() / "data" / "daioe_scb_years_all_levels.parquet"
)

lf = pl.scan_parquet(DATA_PATH)

In [4]:
lf.collect_schema()

Schema([('level', String),
        ('ssyk_code', String),
        ('age', String),
        ('sex', String),
        ('year', Int64),
        ('count', Int64),
        ('occupation', String),
        ('age_group', String),
        ('weight_sum', Int64),
        ('daioe_allapps_avg', Float64),
        ('daioe_stratgames_avg', Float64),
        ('daioe_videogames_avg', Float64),
        ('daioe_imgrec_avg', Float64),
        ('daioe_imgcompr_avg', Float64),
        ('daioe_imggen_avg', Float64),
        ('daioe_readcompr_avg', Float64),
        ('daioe_lngmod_avg', Float64),
        ('daioe_translat_avg', Float64),
        ('daioe_speechrec_avg', Float64),
        ('daioe_genai_avg', Float64),
        ('daioe_allapps_wavg', Float64),
        ('daioe_stratgames_wavg', Float64),
        ('daioe_videogames_wavg', Float64),
        ('daioe_imgrec_wavg', Float64),
        ('daioe_imgcompr_wavg', Float64),
        ('daioe_imggen_wavg', Float64),
        ('daioe_readcompr_wavg', Float64),
      

In [5]:
# Small cache of unique values for UI choices
LEVELS = (
    lf.select(pl.col("level").unique().sort())
    .collect()
    .to_series()
    .to_list()
)

SEXES = (
    lf.select(pl.col("sex").unique().sort())
    .collect()
    .to_series()
    .to_list()
)

AGES = (
    lf.select(pl.col("age").unique().sort())
    .collect()
    .to_series()
    .to_list()
)

YEARS = (
    lf.select(pl.col("year").unique().sort())
    .collect()
    .to_series()
    .to_list()
)

YEAR_MIN, YEAR_MAX = min(YEARS), max(YEARS)

#df.collect_schema()

METRICS: dict[str, str] = {
    "daioe_allapps": "üìö All Applications",
    "daioe_stratgames": "‚ôüÔ∏è Strategy Games",
    "daioe_videogames": "üéÆ Video Games (Real-Time)",
    "daioe_imgrec": "üñºÔ∏è Image Recognition",
    "daioe_imgcompr": "üß© Image Comprehension",
    "daioe_imggen": "üé® Image Generation",
    "daioe_readcompr": "üìñ Reading Comprehension",
    "daioe_lngmod": "‚úçÔ∏è Language Modeling",
    "daioe_translat": "üåê Translation",
    "daioe_speechrec": "üéôÔ∏è Speech Recognition",
    "daioe_genai": "üß† Generative AI",
}

In [6]:
p_df = lf.select(pl.col(["year", "count"]))\
    .group_by(pl.col("year"))\
        .agg(pl.col("count").sum())\
            .collect().to_pandas()



In [7]:

p_df = (
    lf.select(["year", "count"])
      .group_by("year")
      .agg(pl.col("count").sum().alias("total_count"))
      .sort("year")
      .collect()
)

fig = go.Figure()

fig.add_trace(
    go.Scatter(
        x=p_df["year"].to_list(),
        y=p_df["total_count"].to_list(),
        mode="lines+markers"
    )
)

fig.update_layout(
    paper_bgcolor="rgba(0,0,0,0)",  # outside plot area
    plot_bgcolor="rgba(0,0,0,0)"    # inside plot area
)

fig.show()
