# Environment setting
環境設定

In [1]:
import os
import requests
from pathlib import Path


# determine branch, default is main
branch = "main"

# Check if running in Google Colab
is_colab = "COLAB_GPU" in os.environ

if is_colab:
    # Download the utils.py file from GitHub
    utils_url = (
        f"https://raw.githubusercontent.com/nics-tw/petsard/{branch}/demo/utils.py"
    )
    response = requests.get(utils_url)

    if response.status_code == 200:
        # Save the utils.py file
        with open("utils.py", "w") as f:
            f.write(response.text)

        # Create an empty __init__.py
        Path("__init__.py").touch()
    else:
        raise RuntimeError(
            f"Failed to download utils.py. Status code: {response.status_code}"
        )

In [None]:
# Now import and run the setup
from utils import (
    get_yaml_path,
    setup_environment,
)


setup_environment(
    is_colab,
    branch,
    benchmark_data=[
        "adult-income_ori",
        "adult-income_control",
        "adult-income_syn",
    ],
)

In [3]:
from petsard import Executor

# YAML Configuration for PETsARD
PETsARD 的 YAML 設定

## External Synthesis with Default Evaluation
外部合成與預設評測

In [4]:
yaml_file_case: str = "external-synthesis-default-evaluation.yaml"

yaml_path_case: str = get_yaml_path(
    is_colab=is_colab,
    yaml_file=yaml_file_case,
    branch=branch,
)

Configuration content:
---
Splitter:
  custom:
    method: 'custom_data'
    filepath:
      ori: 'benchmark/adult-income_ori.csv'
      control: 'benchmark/adult-income_control.csv'
Synthesizer:
  custom:
    method: 'custom_data'
    filepath: 'benchmark/adult-income_syn.csv'
Evaluator:
  demo-diagnostic:
    method: 'sdmetrics-diagnosticreport'
  demo-quality:
    method: 'sdmetrics-qualityreport'
  demo-singlingout:
    method: 'anonymeter-singlingout'
  demo-linkability:
    method: 'anonymeter-linkability'
    aux_cols:
      -
        - 'age'
        - 'marital-status'
        - 'relationship'
        - 'gender'
      -
        - 'workclass'
        - 'educational-num'
        - 'occupation'
        - 'income'
  demo-inference:
    method: 'anonymeter-inference'
    secret: 'income'
  demo-classification:
    method: 'mlutility-classification'
    target: 'income'
Reporter:
  save_report_global:
    method: 'save_report'
    granularity: 'global'
...


### Execution and Result
執行與結果

In [5]:
exec_case = Executor(config=yaml_path_case)
exec_case.run()

Generating report ...

(1/2) Evaluating Data Validity: |██████████| 15/15 [00:00<00:00, 369.68it/s]|
Data Validity Score: 100.0%

(2/2) Evaluating Data Structure: |██████████| 1/1 [00:00<00:00, 497.54it/s]|
Data Structure Score: 100.0%

Overall Score (Average): 100.0%

Now is petsard[Report]_[global] save to csv...
Generating report ...

(1/2) Evaluating Column Shapes: |██████████| 15/15 [00:00<00:00, 111.31it/s]|
Column Shapes Score: 95.26%

(2/2) Evaluating Column Pair Trends: |██████████| 105/105 [00:00<00:00, 373.70it/s]|
Column Pair Trends Score: 61.56%

Overall Score (Average): 78.41%

Now is petsard[Report]_[global] save to csv...


Found 1604 failed queries out of 2000. Check DEBUG messages for more details.
Reached maximum number of attempts 500000 when generating singling out queries. Returning 130 instead of the requested 2000.To avoid this, increase the number of attempts or set it to ``None`` to disable The limitation entirely.
Attack `multivariate` could generate only 130 singling out queries out of the requested 2000. This can probably lead to an underestimate of the singling out risk.
  self._sanity_check()


Now is petsard[Report]_[global] save to csv...


  self._sanity_check()


Now is petsard[Report]_[global] save to csv...
Now is petsard[Report]_[global] save to csv...
Now is petsard[Report]_[global] save to csv...


In [6]:
exec_case.get_result()[
    "Splitter[custom_[1-1]]_Synthesizer[custom]_Evaluator[demo-classification]_Reporter[save_report_global]"
]["[global]"]

Unnamed: 0,full_expt_name,Splitter,Synthesizer,Evaluator,demo-diagnostic_Score,demo-diagnostic_Data Validity,demo-diagnostic_Data Structure,demo-quality_Score,demo-quality_Column Shapes,demo-quality_Column Pair Trends,...,demo-inference_attack_rate_err,demo-inference_baseline_rate,demo-inference_baseline_rate_err,demo-inference_control_rate,demo-inference_control_rate_err,demo-classification_ori_mean,demo-classification_ori_std,demo-classification_syn_mean,demo-classification_syn_std,demo-classification_diff
0,Splitter[custom_[1-1]]_Synthesizer[custom]_Eva...,custom_[1-1],custom,[global],1.0,,,0.78,,,...,0.02,0.64,0.02,0.71,0.02,0.86,0.01,0.78,0.01,-0.08
