# Environment setting / 環境設定
External Synthesis with Default Evaluation / 外部合成與預設評測

In [1]:
import os
import sys
from pathlib import Path

# 自動載入 utils / Auto-load utils
if "COLAB_GPU" in os.environ:
    url = "https://raw.githubusercontent.com/nics-tw/petsard/main/demo/utils.py"
    exec(open(url).read())
else:
    # 靜默搜尋 utils.py / Silent search for utils.py
    current = Path.cwd()
    for _ in range(5):
        if (current / "utils.py").exists():
            sys.path.insert(0, str(current))
            break
        current = current.parent

    # 匯入 utils 模組 / Import utils module
    from utils import quick_setup

# 快速設定 / Quick setup
is_colab, branch, yaml_path = quick_setup(
    yaml_file="external-synthesis-default-evaluation.yaml",
    benchmark_data=[
        "adult-income_ori",
        "adult-income_control",
        "adult-income_syn",
    ],
    branch="main",  # 可選，預設為 "main"
)

from petsard import Executor

🚀 PETsARD v1.6.0rc1
📅 2025-08-05 15:56:36 UTC+8
📁 Subfolder: tutorial
📄 YAML path: petsard/demo/tutorial/external-synthesis-default-evaluation.yaml
⚙️ Configuration content:
---
Splitter:
  custom:
    method: 'custom_data'
    filepath:
      ori: 'benchmark/adult-income_ori.csv'
      control: 'benchmark/adult-income_control.csv'
Synthesizer:
  custom:
    method: 'custom_data'
    filepath: 'benchmark/adult-income_syn.csv'
Evaluator:
  demo-diagnostic:
    method: 'sdmetrics-diagnosticreport'
  demo-quality:
    method: 'sdmetrics-qualityreport'
  demo-singlingout:
    method: 'anonymeter-singlingout'
  demo-linkability:
    method: 'anonymeter-linkability'
    aux_cols:
      -
        - 'age'
        - 'marital-status'
        - 'relationship'
        - 'gender'
      -
        - 'workclass'
        - 'educational-num'
        - 'occupation'
        - 'income'
  demo-inference:
    method: 'anonymeter-inference'
    secret: 'income'
  demo-classification:
    method: 'mlutility-cl

# Execution and Result / 執行與結果

In [2]:
exec_case = Executor(config=yaml_path)
exec_case.run()

Generating report ...

(1/2) Evaluating Data Validity: |██████████| 15/15 [00:00<00:00, 206.99it/s]|
Data Validity Score: 100.0%

(2/2) Evaluating Data Structure: |██████████| 1/1 [00:00<00:00, 318.09it/s]|
Data Structure Score: 100.0%

Overall Score (Average): 100.0%

Generating report ...

(1/2) Evaluating Column Shapes: |██████████| 15/15 [00:00<00:00, 100.53it/s]|
Column Shapes Score: 95.26%

(2/2) Evaluating Column Pair Trends: |██████████| 105/105 [00:00<00:00, 147.95it/s]|
Column Pair Trends Score: 61.56%

Overall Score (Average): 78.41%



Found 1652 failed queries out of 2000. Check DEBUG messages for more details.
Reached maximum number of attempts 500000 when generating singling out queries. Returning 160 instead of the requested 2000.To avoid this, increase the number of attempts or set it to ``None`` to disable The limitation entirely.
Attack `multivariate` could generate only 160 singling out queries out of the requested 2000. This can probably lead to an underestimate of the singling out risk.
  self._sanity_check()


In [4]:
exec_case.get_result()[
    "Splitter[custom_[1-1]]_Synthesizer[custom]_Evaluator[demo-quality]_Reporter[rpt]"
]["[global]"]

Unnamed: 0,full_expt_name,Splitter,Synthesizer,Evaluator,demo-diagnostic_Score,demo-diagnostic_Data Validity,demo-diagnostic_Data Structure,demo-quality_Score,demo-quality_Column Shapes,demo-quality_Column Pair Trends
0,Splitter[custom_[1-1]]_Synthesizer[custom]_Eva...,custom_[1-1],custom,[global],1.0,1.0,1.0,0.78,0.95,0.62


In [7]:
exec_case.get_result()[
    "Splitter[custom_[1-1]]_Synthesizer[custom]_Evaluator[demo-quality]_Reporter[rpt]"
]["[columnwise]"]

Unnamed: 0,full_expt_name,Splitter,Synthesizer,Evaluator,column,demo-diagnostic_Property,demo-diagnostic_Metric,demo-diagnostic_Score,demo-quality_Property,demo-quality_Metric,demo-quality_Score
0,Splitter[custom_[1-1]]_Synthesizer[custom]_Eva...,custom_[1-1],custom,[columnwise],age,Data Validity,BoundaryAdherence,1.0,Column Shapes,KSComplement,0.962276
1,Splitter[custom_[1-1]]_Synthesizer[custom]_Eva...,custom_[1-1],custom,[columnwise],capital-gain,Data Validity,BoundaryAdherence,1.0,Column Shapes,KSComplement,0.916669
2,Splitter[custom_[1-1]]_Synthesizer[custom]_Eva...,custom_[1-1],custom,[columnwise],capital-loss,Data Validity,BoundaryAdherence,1.0,Column Shapes,KSComplement,0.952934
3,Splitter[custom_[1-1]]_Synthesizer[custom]_Eva...,custom_[1-1],custom,[columnwise],education,Data Validity,CategoryAdherence,1.0,Column Shapes,TVComplement,0.956952
4,Splitter[custom_[1-1]]_Synthesizer[custom]_Eva...,custom_[1-1],custom,[columnwise],educational-num,Data Validity,BoundaryAdherence,1.0,Column Shapes,KSComplement,0.882758
5,Splitter[custom_[1-1]]_Synthesizer[custom]_Eva...,custom_[1-1],custom,[columnwise],fnlwgt,Data Validity,BoundaryAdherence,1.0,Column Shapes,KSComplement,0.957951
6,Splitter[custom_[1-1]]_Synthesizer[custom]_Eva...,custom_[1-1],custom,[columnwise],gender,Data Validity,CategoryAdherence,1.0,Column Shapes,TVComplement,0.997953
7,Splitter[custom_[1-1]]_Synthesizer[custom]_Eva...,custom_[1-1],custom,[columnwise],hours-per-week,Data Validity,BoundaryAdherence,1.0,Column Shapes,KSComplement,0.745886
8,Splitter[custom_[1-1]]_Synthesizer[custom]_Eva...,custom_[1-1],custom,[columnwise],income,Data Validity,CategoryAdherence,1.0,Column Shapes,TVComplement,0.980344
9,Splitter[custom_[1-1]]_Synthesizer[custom]_Eva...,custom_[1-1],custom,[columnwise],marital-status,Data Validity,CategoryAdherence,1.0,Column Shapes,TVComplement,0.989558


In [8]:
exec_case.get_result()[
    "Splitter[custom_[1-1]]_Synthesizer[custom]_Evaluator[demo-quality]_Reporter[rpt]"
]["[pairwise]"]

Unnamed: 0,full_expt_name,Splitter,Synthesizer,Evaluator,column1,column2,demo-quality_Property,demo-quality_Metric,demo-quality_Score,demo-quality_Real Correlation,demo-quality_Synthetic Correlation,demo-quality_Error
0,Splitter[custom_[1-1]]_Synthesizer[custom]_Eva...,custom_[1-1],custom,[pairwise],age,workclass,Column Pair Trends,ContingencySimilarity,0.869526,,,
1,Splitter[custom_[1-1]]_Synthesizer[custom]_Eva...,custom_[1-1],custom,[pairwise],age,fnlwgt,Column Pair Trends,CorrelationSimilarity,0.991312,-0.074923,-0.057548,
2,Splitter[custom_[1-1]]_Synthesizer[custom]_Eva...,custom_[1-1],custom,[pairwise],age,education,Column Pair Trends,ContingencySimilarity,0.841399,,,
3,Splitter[custom_[1-1]]_Synthesizer[custom]_Eva...,custom_[1-1],custom,[pairwise],age,educational-num,Column Pair Trends,CorrelationSimilarity,0.982322,0.034849,0.070204,
4,Splitter[custom_[1-1]]_Synthesizer[custom]_Eva...,custom_[1-1],custom,[pairwise],age,marital-status,Column Pair Trends,ContingencySimilarity,0.739974,,,
...,...,...,...,...,...,...,...,...,...,...,...,...
100,Splitter[custom_[1-1]]_Synthesizer[custom]_Eva...,custom_[1-1],custom,[pairwise],capital-loss,native-country,Column Pair Trends,ContingencySimilarity,0.006296,,,
101,Splitter[custom_[1-1]]_Synthesizer[custom]_Eva...,custom_[1-1],custom,[pairwise],capital-loss,income,Column Pair Trends,ContingencySimilarity,0.006322,,,
102,Splitter[custom_[1-1]]_Synthesizer[custom]_Eva...,custom_[1-1],custom,[pairwise],hours-per-week,native-country,Column Pair Trends,ContingencySimilarity,0.649528,,,
103,Splitter[custom_[1-1]]_Synthesizer[custom]_Eva...,custom_[1-1],custom,[pairwise],hours-per-week,income,Column Pair Trends,ContingencySimilarity,0.630922,,,
