# Environment setting / 環境設定
Specify Data Schema / 指定資料表架構

In [1]:
import os
import sys
from pathlib import Path

# 自動載入 utils / Auto-load utils
if "COLAB_GPU" in os.environ:
    url = "https://raw.githubusercontent.com/nics-tw/petsard/main/demo/utils.py"
    exec(open(url).read())
else:
    # 靜默搜尋 utils.py / Silent search for utils.py
    current = Path.cwd()
    for _ in range(5):
        if (current / "utils.py").exists():
            sys.path.insert(0, str(current))
            break
        current = current.parent

    # 匯入 utils 模組 / Import utils module
    from utils import quick_setup

# 快速設定 / Quick setup
is_colab, branch, yaml_path = quick_setup(
    yaml_file="specify-schema.yaml",
    benchmark_data=[
        "adult-income",
    ],
    branch="main",  # 可選，預設為 "main"
)

from petsard import Executor

🚀 PETsARD v1.5.1
📅 2025-08-03 02:24:31 UTC+8
📁 Subfolder: tutorial/use-cases
📄 YAML path: petsard/demo/tutorial/use-cases/specify-schema.yaml
⚙️ Configuration content:
---
Loader:
  data:
    filepath: 'benchmark/adult-income.csv'
  data-w-schema:
    filepath: 'benchmark/adult-income.csv'
    schema:
      # 全域參數
      compute_stats: true
      optimize_dtypes: true
      nullable_int: true

      # 欄位參數
      fields:
        # 數值型欄位
        age:
          type: 'int'
        fnlwgt:
          type: 'int'
        # 字串型欄位
        gender:
          type: 'str'
          category_method: 'force'
        # 含自訂缺失值的欄位
        native-country:
          type: 'str'
          na_values: '?'
        workclass:
          type: 'str'
          na_values: '?'
        occupation:
          type: 'str'
          na_values: '?'
Describer:
  summary:
    method: 'default'
Reporter:
  save_report_columnwise:
    method: 'save_report'
    granularity: 'columnwise'
...


# Execution and Result / 執行與結果

In [2]:
exec_case = Executor(config=yaml_path)
exec_case.run()

Now is petsard[Report]_[columnwise] save to csv...
Now is petsard[Report]_[columnwise] save to csv...


In [3]:
exec_case.get_result()[
    "Loader[data]_Describer[summary]_Reporter[save_report_columnwise]"
]["[columnwise]"]

Unnamed: 0,full_expt_name,Loader,Describer,column,summary_mean,summary_median,summary_std,summary_min,summary_max,summary_kurtosis,summary_skew,summary_q1,summary_q3,summary_na_count,summary_nunique
0,Loader[data]_Describer[columnwise],data,[columnwise],age,38.64,37.0,13.71,17.0,90.0,-0.18,0.56,28.0,48.0,0.0,
1,Loader[data]_Describer[columnwise],data,[columnwise],fnlwgt,189664.13,178144.5,105604.03,12285.0,1490400.0,6.06,1.44,117550.5,237642.0,0.0,
2,Loader[data]_Describer[columnwise],data,[columnwise],educational-num,10.08,10.0,2.57,1.0,16.0,0.63,-0.32,9.0,12.0,0.0,
3,Loader[data]_Describer[columnwise],data,[columnwise],capital-gain,1079.07,0.0,7452.02,0.0,99999.0,152.69,11.89,0.0,0.0,0.0,
4,Loader[data]_Describer[columnwise],data,[columnwise],capital-loss,87.5,0.0,403.0,0.0,4356.0,20.01,4.57,0.0,0.0,0.0,
5,Loader[data]_Describer[columnwise],data,[columnwise],hours-per-week,40.42,40.0,12.39,1.0,99.0,2.95,0.24,40.0,45.0,0.0,
6,Loader[data]_Describer[columnwise],data,[columnwise],workclass,,,,,,,,,,0.0,9.0
7,Loader[data]_Describer[columnwise],data,[columnwise],education,,,,,,,,,,0.0,16.0
8,Loader[data]_Describer[columnwise],data,[columnwise],marital-status,,,,,,,,,,0.0,7.0
9,Loader[data]_Describer[columnwise],data,[columnwise],occupation,,,,,,,,,,0.0,15.0


In [4]:
exec_case.get_result()[
    "Loader[data-w-schema]_Describer[summary]_Reporter[save_report_columnwise]"
]["[columnwise]"]

Unnamed: 0,full_expt_name,Loader,Describer,column,summary_mean,summary_median,summary_std,summary_min,summary_max,summary_kurtosis,summary_skew,summary_q1,summary_q3,summary_na_count,summary_nunique
0,Loader[data-w-schema]_Describer[columnwise],data-w-schema,[columnwise],age,38.64,37.0,13.71,17.0,90.0,-0.18,0.56,28.0,48.0,0.0,
1,Loader[data-w-schema]_Describer[columnwise],data-w-schema,[columnwise],capital-gain,1079.07,0.0,7452.02,0.0,99999.0,152.69,11.89,0.0,0.0,0.0,
2,Loader[data-w-schema]_Describer[columnwise],data-w-schema,[columnwise],capital-loss,87.5,0.0,403.0,0.0,4356.0,20.01,4.57,0.0,0.0,0.0,
3,Loader[data-w-schema]_Describer[columnwise],data-w-schema,[columnwise],education,,,,,,,,,,0.0,16.0
4,Loader[data-w-schema]_Describer[columnwise],data-w-schema,[columnwise],educational-num,10.08,10.0,2.57,1.0,16.0,0.63,-0.32,9.0,12.0,0.0,
5,Loader[data-w-schema]_Describer[columnwise],data-w-schema,[columnwise],fnlwgt,189664.13,178144.5,105604.03,12285.0,1490400.0,6.06,1.44,117550.5,237642.0,0.0,
6,Loader[data-w-schema]_Describer[columnwise],data-w-schema,[columnwise],gender,,,,,,,,,,0.0,2.0
7,Loader[data-w-schema]_Describer[columnwise],data-w-schema,[columnwise],hours-per-week,40.42,40.0,12.39,1.0,99.0,2.95,0.24,40.0,45.0,0.0,
8,Loader[data-w-schema]_Describer[columnwise],data-w-schema,[columnwise],income,,,,,,,,,,0.0,2.0
9,Loader[data-w-schema]_Describer[columnwise],data-w-schema,[columnwise],marital-status,,,,,,,,,,0.0,7.0
