In [1]:
%load_ext autoreload
%autoreload 2   

In [2]:
from typing import Any
import re
from enum import Enum

In [3]:
from tools.writer import EnumEntry, enum_file, write_enum_class, AUTO_ENUMS_PATH

In [4]:
from statscan.util.log import configure_logging
from statscan.wds.client import WDS

In [5]:
configure_logging(level='DEBUG')

In [8]:
client = WDS(timeout=30)

In [9]:
codesets: dict = await client.get_code_sets()

2025-08-26 12:41:39,252 :: DEBUG :: httpcore.connection :: _trace.atrace:87 - connect_tcp.started host='www150.statcan.gc.ca' port=443 local_address=None timeout=30 socket_options=None


2025-08-26 12:41:39,292 :: DEBUG :: httpcore.connection :: _trace.atrace:87 - connect_tcp.complete return_value=<httpcore._backends.anyio.AnyIOStream object at 0x3da81660290>
2025-08-26 12:41:39,292 :: DEBUG :: httpcore.connection :: _trace.atrace:87 - start_tls.started ssl_context=<ssl.SSLContext object at 0x3da81177d10> server_hostname='www150.statcan.gc.ca' timeout=30
2025-08-26 12:41:39,352 :: DEBUG :: httpcore.connection :: _trace.atrace:87 - start_tls.complete return_value=<httpcore._backends.anyio.AnyIOStream object at 0x3da81660790>
2025-08-26 12:41:39,353 :: DEBUG :: httpcore.http11 :: _trace.atrace:87 - send_request_headers.started request=<Request [b'GET']>
2025-08-26 12:41:39,353 :: DEBUG :: httpcore.http11 :: _trace.atrace:87 - send_request_headers.complete
2025-08-26 12:41:39,354 :: DEBUG :: httpcore.http11 :: _trace.atrace:87 - send_request_body.started request=<Request [b'GET']>
2025-08-26 12:41:39,354 :: DEBUG :: httpcore.http11 :: _trace.atrace:87 - send_request_body.

In [11]:
codesets

{'scalar': [{'scalarFactorCode': 0,
   'scalarFactorDescEn': 'units',
   'scalarFactorDescFr': 'unités'},
  {'scalarFactorCode': 1,
   'scalarFactorDescEn': 'tens',
   'scalarFactorDescFr': 'dizaines'},
  {'scalarFactorCode': 2,
   'scalarFactorDescEn': 'hundreds',
   'scalarFactorDescFr': 'centaines'},
  {'scalarFactorCode': 3,
   'scalarFactorDescEn': 'thousands',
   'scalarFactorDescFr': 'milliers'},
  {'scalarFactorCode': 4,
   'scalarFactorDescEn': 'tens of thousands',
   'scalarFactorDescFr': 'dizaines de milliers'},
  {'scalarFactorCode': 5,
   'scalarFactorDescEn': 'hundreds of thousands',
   'scalarFactorDescFr': 'centaines de milliers'},
  {'scalarFactorCode': 6,
   'scalarFactorDescEn': 'millions',
   'scalarFactorDescFr': 'millions'},
  {'scalarFactorCode': 7,
   'scalarFactorDescEn': 'tens of millions',
   'scalarFactorDescFr': 'dizaines de millions'},
  {'scalarFactorCode': 8,
   'scalarFactorDescEn': 'hundreds of millions',
   'scalarFactorDescFr': 'centaines de millions

In [15]:
def get_keys(entry_dict: dict[str, Any]) -> tuple[str, str, set[str]]:
    '''
    get the key-key, value-key, and remaining keys
    '''
    remaining_keys = set()
    key_key = None
    value_key = None

    for k in entry_dict.keys():
        if k.endswith("DescEn"):
            key_key = k
        elif k.endswith("Code"):
            value_key = k
        else:
            remaining_keys.add(k)
    
    if key_key is None:
        for k in entry_dict.keys():
            if k.endswith("En"):
                key_key = k
                remaining_keys.remove(k)
                break
        if key_key is None:
            raise ValueError(f"Could not find key key for enum. Possible keys are {entry_dict.keys()}")
    if value_key is None:
        for k in entry_dict.keys():
            if k.endswith("Id"):
                value_key = k
                remaining_keys.remove(k)
                break
        if value_key is None:
            raise ValueError(f"Could not find value key for enum. Possible keys are {entry_dict.keys()}")

    return key_key, value_key, remaining_keys


In [16]:
def camel_to_snake(camel: str) -> str:
    """
    Convert a camelCase string to snake_case.
    """
    snake = re.sub(r'(?<!^)(?=[A-Z])', '_', camel).lower()
    return snake

In [18]:
from pathlib import Path

# Create scratch output directory using absolute path
project_root = Path("/home/pbouill/VS Code Projects/statistics-canada")
scratch_dir = project_root / "scratch" / "generated_enums_notebook"
scratch_dir.mkdir(parents=True, exist_ok=True)

print(f"Generating enums to: {scratch_dir}")

for k, v in o.items():
    entries: list[EnumEntry] = []
    cls_name = k[0].upper() + k[1:]
    print(f'Processing Enum: {cls_name}')
    key_key, value_key, remaining_keys = get_keys(v[0])
    print(f"  Key: {key_key}, Value: {value_key}, Other keys: {remaining_keys}")
    for entry_dict in v:
        try:
            entry_key = entry_dict[key_key]
            entry_value = entry_dict[value_key]
        except KeyError as ke:
            print(f"Error processing entry in {cls_name}: {ke}. Available keys are: {entry_dict.keys()}")
            continue
        entry_comment = str({k: v for k, v in entry_dict.items() if k in remaining_keys}) if remaining_keys else None
        if entry_key is None:
            entry_key = 'None'
        entry = EnumEntry(key=entry_key, value=int(entry_value), comment=entry_comment)
        
        entries.append(entry)

    # Output to scratch directory
    output_file = scratch_dir / f'{camel_to_snake(k)}.py'
    with enum_file(fp=output_file, imports={'enum': 'Enum'}, overwrite=True) as f:
        write_enum_class(f, entries=entries, cls_name=cls_name, skip_methods=True)
    print(f"  -> Generated: {output_file}")

print(f"\n✅ Generated enums from notebook approach in: {scratch_dir}")

2025-08-26 12:52:20,417 :: DEBUG :: tools.writer :: writer.write_enum_class:159 - Writing class Scalar to file from template None...
2025-08-26 12:52:20,418 :: DEBUG :: tools.writer :: writer.write_enum_class:181 - key: units is unique
2025-08-26 12:52:20,418 :: DEBUG :: tools.writer :: writer.write_enum_class:181 - key: tens is unique
2025-08-26 12:52:20,419 :: DEBUG :: tools.writer :: writer.write_enum_class:181 - key: hundreds is unique
2025-08-26 12:52:20,419 :: DEBUG :: tools.writer :: writer.write_enum_class:181 - key: thousands is unique
2025-08-26 12:52:20,420 :: DEBUG :: tools.writer :: writer.write_enum_class:181 - key: tens of thousands is unique
2025-08-26 12:52:20,420 :: DEBUG :: tools.writer :: writer.write_enum_class:181 - key: hundreds of thousands is unique
2025-08-26 12:52:20,421 :: DEBUG :: tools.writer :: writer.write_enum_class:181 - key: millions is unique
2025-08-26 12:52:20,422 :: DEBUG :: tools.writer :: writer.write_enum_class:181 - key: tens of millions is un

Generating enums to: /home/pbouill/VS Code Projects/statistics-canada/scratch/generated_enums_notebook
Processing Enum: Scalar
  Key: scalarFactorDescEn, Value: scalarFactorCode, Other keys: {'scalarFactorDescFr'}
  -> Generated: /home/pbouill/VS Code Projects/statistics-canada/scratch/generated_enums_notebook/scalar.py
Processing Enum: Frequency
  Key: frequencyDescEn, Value: frequencyCode, Other keys: {'frequencyDescFr'}
  -> Generated: /home/pbouill/VS Code Projects/statistics-canada/scratch/generated_enums_notebook/frequency.py
Processing Enum: Symbol
  Key: symbolDescEn, Value: symbolCode, Other keys: {'symbolDescFr', 'symbolRepresentationFr', 'symbolRepresentationEn'}
  -> Generated: /home/pbouill/VS Code Projects/statistics-canada/scratch/generated_enums_notebook/symbol.py
Processing Enum: Status
  Key: statusDescEn, Value: statusCode, Other keys: {'statusRepresentationFr', 'statusRepresentationEn', 'statusDescFr'}
  -> Generated: /home/pbouill/VS Code Projects/statistics-canada

2025-08-26 12:52:20,616 :: DEBUG :: tools.writer :: writer.write_enum_class:181 - key: 2019 constant dollars is unique
2025-08-26 12:52:20,617 :: DEBUG :: tools.writer :: writer.write_enum_class:181 - key: Incidents is unique
2025-08-26 12:52:20,618 :: DEBUG :: tools.writer :: writer.write_enum_class:181 - key: Tonnes per thousand dollars is unique
2025-08-26 12:52:20,618 :: DEBUG :: tools.writer :: writer.write_enum_class:181 - key: Index value (2019=0) is unique
2025-08-26 12:52:20,619 :: DEBUG :: tools.writer :: writer.write_enum_class:181 - key: Megajoules is unique
2025-08-26 12:52:20,619 :: DEBUG :: tools.writer :: writer.write_enum_class:181 - key: Full cords is unique
2025-08-26 12:52:20,620 :: DEBUG :: tools.writer :: writer.write_enum_class:181 - key: Bricks is unique
2025-08-26 12:52:20,620 :: DEBUG :: tools.writer :: writer.write_enum_class:181 - key: Metres is unique
2025-08-26 12:52:20,621 :: DEBUG :: tools.writer :: writer.write_enum_class:181 - key: Grams per one millio

  -> Generated: /home/pbouill/VS Code Projects/statistics-canada/scratch/generated_enums_notebook/uom.py
Processing Enum: Survey
  Key: surveyEn, Value: surveyCode, Other keys: {'surveyFr'}


2025-08-26 12:52:20,885 :: DEBUG :: tools.writer :: writer.write_enum_class:181 - key: Employment Dynamics is unique
2025-08-26 12:52:20,889 :: DEBUG :: tools.writer :: writer.write_enum_class:181 - key: Survey of the Medical Devices Industry is unique
2025-08-26 12:52:20,890 :: DEBUG :: tools.writer :: writer.write_enum_class:181 - key: University and College Academic Staff System - Full-time Staff is unique
2025-08-26 12:52:20,892 :: DEBUG :: tools.writer :: writer.write_enum_class:181 - key: Survey of Service Industries: Book Publishers is unique
2025-08-26 12:52:20,893 :: DEBUG :: tools.writer :: writer.write_enum_class:181 - key: Annual Survey of Service Industries: Heritage Institutions is unique
2025-08-26 12:52:20,894 :: DEBUG :: tools.writer :: writer.write_enum_class:181 - key: Survey of Service Industries: Performing Arts is unique
2025-08-26 12:52:20,895 :: DEBUG :: tools.writer :: writer.write_enum_class:181 - key: Television Viewing Databank is unique
2025-08-26 12:52:20,

  -> Generated: /home/pbouill/VS Code Projects/statistics-canada/scratch/generated_enums_notebook/survey.py
Processing Enum: Subject
  Key: subjectEn, Value: subjectCode, Other keys: {'subjectFr'}


2025-08-26 12:52:21,566 :: DEBUG :: tools.writer :: writer.write_enum_class:181 - key: Languages/Use of languages/Other content related to Use of languages is unique
2025-08-26 12:52:21,566 :: DEBUG :: tools.writer :: writer.write_enum_class:181 - key: Population and demography/Migration/Internal migration is unique
2025-08-26 12:52:21,567 :: DEBUG :: tools.writer :: writer.write_enum_class:181 - key: Population and demography/Migration/International migration is unique
2025-08-26 12:52:21,566 :: DEBUG :: tools.writer :: writer.write_enum_class:181 - key: Population and demography/Migration/Internal migration is unique
2025-08-26 12:52:21,567 :: DEBUG :: tools.writer :: writer.write_enum_class:181 - key: Population and demography/Migration/International migration is unique
2025-08-26 12:52:21,567 :: DEBUG :: tools.writer :: writer.write_enum_class:181 - key: Population and demography/Migration/Other content related to Migration is unique
2025-08-26 12:52:21,567 :: DEBUG :: tools.writer

  -> Generated: /home/pbouill/VS Code Projects/statistics-canada/scratch/generated_enums_notebook/subject.py
Processing Enum: ClassificationType
  Key: classificationTypeEn, Value: classificationTypeCode, Other keys: {'classificationTypeFr'}
  -> Generated: /home/pbouill/VS Code Projects/statistics-canada/scratch/generated_enums_notebook/classification_type.py
Processing Enum: SecurityLevel
  Key: securityLevelDescEn, Value: securityLevelCode, Other keys: {'securityLevelDescFr', 'securityLevelRepresentationEn', 'securityLevelRepresentationFr'}
  -> Generated: /home/pbouill/VS Code Projects/statistics-canada/scratch/generated_enums_notebook/security_level.py
Processing Enum: Terminated
  Key: codeTextEn, Value: codeId, Other keys: {'displayCodeEn', 'displayCodeFr', 'codeTextFr'}
  -> Generated: /home/pbouill/VS Code Projects/statistics-canada/scratch/generated_enums_notebook/terminated.py
Processing Enum: WdsResponseStatus
  Key: codeTextEn, Value: codeId, Other keys: {'codeTextFr'}
  -

In [12]:
# Check what variables we have
print("Available variables:")
for var_name in sorted(globals().keys()):
    if not var_name.startswith('_'):
        print(f"  {var_name}: {type(globals()[var_name])}")

# Check if 'o' exists, and if not, assign it to codesets
if 'o' not in globals():
    print("\nSetting o = codesets")
    o = codesets
else:
    print(f"\n'o' already exists: {type(o)}")

# Show structure of first few items in codesets/o
print(f"\nCodesets has {len(codesets)} items:")
for i, (k, v) in enumerate(list(codesets.items())[:3]):
    print(f"  {k}: {type(v)} with {len(v) if isinstance(v, list) else 'N/A'} items")
    if isinstance(v, list) and v:
        print(f"    Sample item: {v[0] if v else 'None'}")

Available variables:
  AUTO_ENUMS_PATH: <class 'pathlib._local.PosixPath'>
  Any: <class 'typing._AnyMeta'>
  Enum: <class 'enum.EnumType'>
  EnumEntry: <class 'type'>
  In: <class 'list'>
  Out: <class 'dict'>
  WDS: <class 'type'>
  client: <class 'statscan.wds.client.WDS'>
  codesets: <class 'dict'>
  configure_logging: <class 'function'>
  enum_file: <class 'function'>
  exit: <class 'IPython.core.autocall.ZMQExitAutocall'>
  get_ipython: <class 'method'>
  open: <class 'function'>
  quit: <class 'IPython.core.autocall.ZMQExitAutocall'>
  re: <class 'module'>
  write_enum_class: <class 'function'>

Setting o = codesets

Codesets has 11 items:
  scalar: <class 'list'> with 10 items
    Sample item: {'scalarFactorCode': 0, 'scalarFactorDescEn': 'units', 'scalarFactorDescFr': 'unités'}
  frequency: <class 'list'> with 17 items
    Sample item: {'frequencyCode': 1, 'frequencyDescEn': 'Daily', 'frequencyDescFr': 'Quotidienne'}
  symbol: <class 'list'> with 3 items
    Sample item: {'sym