## 1. Setup
Import libraries and create the local output directory.


In [53]:
from pyscbwrapper import SCB
import polars as pl
from pathlib import Path
import pandas as pd

root=Path.cwd().resolve()

data_dir = root / "data"
data_dir.mkdir(parents=True, exist_ok=True)

## 2. SCB Table Configuration
Define the table mapping and choose the target table ID.


In [54]:
TABLES = {
    "month_tab": ("en", "AM", "AM0401", "AM0401I", "NAKUSysselYrke2012M")
}

tab_id = "month_tab"

## 3. Initialize SCB Client
Create the API client for the selected SCB table.


In [55]:
scb = SCB(*TABLES[f"{tab_id}"])

In [56]:
#scb.info()

## 4. Load Variable Metadata
Read available dimensions and extract keys/values used to build the query.


In [57]:
var_ = scb.get_variables()

In [58]:
occupations_key = next(k for k in var_ if "occupation" in k.lower())
occupations = var_[occupations_key]

In [59]:
observations_key = next(k for k in var_ if "observations" in k.lower())
observations = var_[observations_key][0]

In [60]:
months_key = next(k for k in var_ if "month" in k.lower())
months = var_[months_key]

In [61]:
sex_key = next(k for k in var_ if "sex" in k.lower())
sex = var_[sex_key][:2]

## 5. Build and Run Query
Set dimension selections and fetch data from SCB.


In [62]:

scb.set_query(
    **{
        occupations_key: occupations,
        months_key: months,
        observations_key: observations,
        sex_key: sex,
    }
)

In [63]:
scb_data = scb.get_data()
scb_fetch = scb_data["data"]

## 6. Create Code Mappings
Map SCB codes to readable occupation and sex labels.


In [64]:
codes = scb.get_query()["query"][0]["selection"]["values"]
occ_dict = dict(zip(codes, occupations))

In [65]:
sex_codes = scb.get_query()["query"][3]["selection"]["values"]
sex_dict = dict(zip(sex_codes, sex))

## 7. Transform Response to DataFrame
Normalize API payload, clean records, and cast data types.


In [66]:
df = (
    pl.DataFrame(scb_fetch)
    .with_columns([
        pl.col("key").list.get(0).alias("code_1"),
        pl.col("key").list.get(1).alias("sex"),
        pl.col("key").list.get(2).alias("month"),
        pl.col("values").list.get(0).alias("value"),
    ])
    .drop(["key", "values"])
    .with_columns([
        pl.col("code_1").replace(occ_dict).alias("occupation"),
        pl.col("sex").replace(sex_dict).alias("sex")
    ])
    .filter(~pl.col("code_1").is_in(["0002", "0000"]))
    .with_columns([
        pl.col("code_1").cast(pl.Utf8),
        pl.col("occupation").cast(pl.Utf8),
        pl.col("sex").cast(pl.Utf8),
        pl.col("month").cast(pl.Utf8),           
        pl.col("value").cast(pl.Float64, strict=False),        
    ])
)


## 8. Format Month Values
Convert month values to a readable year-month label.


In [67]:
df = df.with_columns(
    pl.col("month")
        .str.replace("M", "-")
        .str.strptime(pl.Date, "%Y-%m")
        .dt.strftime("%Y-%b")
        .alias("month")
)


## 9. Preview Output
Check a sample of the transformed data.


In [68]:
df.head(10)

code_1,sex,month,value,occupation
str,str,str,f64,str
"""1""","""men""","""2015-Jan""",169.8,"""Managers"""
"""1""","""men""","""2015-Feb""",164.8,"""Managers"""
"""1""","""men""","""2015-Mar""",156.2,"""Managers"""
"""1""","""men""","""2015-Apr""",171.5,"""Managers"""
"""1""","""men""","""2015-May""",177.8,"""Managers"""
"""1""","""men""","""2015-Jun""",151.0,"""Managers"""
"""1""","""men""","""2015-Jul""",174.6,"""Managers"""
"""1""","""men""","""2015-Aug""",174.4,"""Managers"""
"""1""","""men""","""2015-Sep""",157.0,"""Managers"""
"""1""","""men""","""2015-Oct""",189.5,"""Managers"""


## 10. Save Dataset
Write the final dataset to Parquet in the `data` folder.


In [69]:
df.write_parquet(data_dir/ "scb_months.parquet")