# Environment setting / 環境設定
Data Constraining / 資料約束

In [1]:
import os
import sys
from pathlib import Path

# 自動載入 utils / Auto-load utils
if "COLAB_GPU" in os.environ:
    url = "https://raw.githubusercontent.com/nics-tw/petsard/main/demo/utils.py"
    exec(open(url).read())
else:
    # 靜默搜尋 utils.py / Silent search for utils.py
    current = Path.cwd()
    for _ in range(5):
        if (current / "utils.py").exists():
            sys.path.insert(0, str(current))
            break
        current = current.parent

    # 匯入 utils 模組 / Import utils module
    from utils import quick_setup

# 快速設定 / Quick setup
is_colab, branch, yaml_path = quick_setup(
    yaml_file="data-constraining.yaml",
    benchmark_data=[
        "adult-income",
    ],
    branch="main",  # 可選，預設為 "main"
)

from petsard import Executor

🚀 PETsARD v1.5.1
📅 2025-07-31 13:32:19 UTC+8
📁 Subfolder: tutorial/use-cases
📄 YAML path: petsard/demo/tutorial/use-cases/data-constraining.yaml
⚙️ Configuration content:
---
Loader:
  data:
    filepath: 'benchmark/adult-income.csv'
Preprocessor:
  demo:
    method: 'default'
Synthesizer:
  demo:
    method: 'default'
Postprocessor:
  demo:
    method: 'default'
Constrainer:
  demo:
    nan_groups:
      # Delete entire row when workclass is NA
      workclass: 'delete'
      # Set income to NA if occupation is NA
      occupation:
        'erase':
          - 'income'
      # Copy educational-num value to age when educational-num exists but age is NA
      age:
        'copy':
          'educational-num'
    field_constraints:
      - "age >= 18 & age <= 65"
      - "hours-per-week >= 20 & hours-per-week <= 60"
    field_combinations:
      -
        - education: income
        - Doctorate: ['>50K']
          Masters: ['>50K', '<=50K']
    field_proportions:
      # Maintain educatio

# Execution and Result / 執行與結果

In [2]:
exec_case = Executor(config=yaml_path)
exec_case.run()

Now is petsard_Loader[data]_Preprocessor[demo]_Synthesizer[demo]_Postprocessor[demo]_Constrainer[demo] save to csv...


In [3]:
exec_case.get_result()[
    "Loader[data]_Preprocessor[demo]_Synthesizer[demo]_Postprocessor[demo]_Constrainer[demo]_Reporter[output]"
][
    "Loader[data]_Preprocessor[demo]_Synthesizer[demo]_Postprocessor[demo]_Constrainer[demo]"
]

Unnamed: 0,age,workclass,fnlwgt,education,educational-num,marital-status,occupation,relationship,race,gender,capital-gain,capital-loss,hours-per-week,native-country,income
0,54.972619,Private,248086.234650,Bachelors,14.997408,Divorced,Prof-specialty,Not-in-family,White,Male,0.0,0.0,44.961441,United-States,<=50K
1,35.424846,Private,162100.877764,HS-grad,8.591939,Separated,Sales,Not-in-family,White,Male,0.0,0.0,38.729875,United-States,>50K
2,31.608780,Private,283775.414886,Masters,8.118525,Married-civ-spouse,Transport-moving,Husband,White,Male,0.0,0.0,36.079162,United-States,<=50K
3,51.955552,Private,93366.695644,Some-college,10.087445,Married-civ-spouse,Exec-managerial,Husband,White,Male,0.0,0.0,49.001269,United-States,>50K
4,29.799907,?,115255.095382,HS-grad,12.829138,Divorced,Other-service,Husband,White,Male,0.0,0.0,49.400222,United-States,<=50K
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
28553,45.398884,Local-gov,235923.934440,Some-college,10.465101,Married-spouse-absent,Prof-specialty,Not-in-family,White,Female,0.0,0.0,40.964473,United-States,<=50K
28554,36.105483,Private,229382.788725,Assoc-voc,7.341230,Married-civ-spouse,Prof-specialty,Not-in-family,White,Male,0.0,0.0,35.087358,India,<=50K
28555,21.887462,Private,247886.254377,Prof-school,8.631462,Divorced,Protective-serv,Unmarried,White,Male,0.0,0.0,36.886687,United-States,>50K
28556,39.294116,Private,45339.753052,Assoc-acdm,12.180024,Married-civ-spouse,Adm-clerical,Not-in-family,Black,Male,0.0,0.0,36.596125,United-States,<=50K
