# Environment setting
環境設定

In [1]:
import os
import requests
from pathlib import Path


# determine branch, default is main
branch = '628-guide---tutorial'  # 'main'

# Check if running in Google Colab
is_colab = 'COLAB_GPU' in os.environ

if is_colab:
    # Download the utils.py file from GitHub
    utils_url = f"https://raw.githubusercontent.com/nics-tw/petsard/{branch}/demo/utils.py"
    response = requests.get(utils_url)

    if response.status_code == 200:
        # Save the utils.py file
        with open('utils.py', 'w') as f:
            f.write(response.text)

        # Create an empty __init__.py
        Path('__init__.py').touch()
    else:
        raise RuntimeError(f"Failed to download utils.py. Status code: {response.status_code}")

In [None]:
# Now import and run the setup
from utils import (
    get_yaml_path,
    setup_environment,
)


setup_environment(
    is_colab,
    branch,
    benchmark_data=[
        'adult-income',
        'adult-income_syn',
    ]
)

In [3]:
from petsard import Executor

# YAML Configuration for PETsARD
PETsARD 的 YAML 設定

## Case 1: Default Synthesis
情境一：預設合成

In [4]:
yaml_file_case1: str = 'default-synthesis.yaml'

yaml_path_case1: str = get_yaml_path(
    is_colab=is_colab,
    yaml_file=yaml_file_case1,
    branch=branch,
)

Configuration content:
---
Loader:
  data:
    filepath: 'benchmark/adult-income.csv'
Preprocessor:
  demo:
    method: 'default'
Synthesizer:
  demo:
    method: 'default' # sdv-single_table-gaussiancopula
Postprocessor:
  demo:
    method: 'default'
Reporter:
  output:
    method: 'save_data'
    output: 'result'
    source: 'Synthesizer'
...


### Execution and Result
執行與結果

In [None]:
# Initialize and run executor
exec_case1 = Executor(config=yaml_path_case1)
exec_case1.run()

In [6]:
exec_case1.get_result()[
    'Loader[data]_Preprocessor[demo]_Synthesizer[demo]_Postprocessor[demo]_Reporter[output]'
]['Loader[data]_Preprocessor[demo]_Synthesizer[demo]']

Unnamed: 0,age,workclass,fnlwgt,education,educational-num,marital-status,occupation,relationship,race,gender,capital-gain,capital-loss,hours-per-week,native-country,income
0,-0.301077,0.777913,-1.127244,0.739250,-0.108733,0.109714,0.042016,0.523058,0.291500,0.494177,-0.144804,-0.217127,0.605229,0.511552,0.650282
1,0.451589,0.582764,-0.741387,0.638755,-1.112518,0.705370,0.318392,0.221028,0.182432,0.329242,-0.144804,-0.217127,0.069462,0.709300,0.015853
2,0.180620,0.344636,-1.323728,0.408862,0.465818,0.913399,0.946822,0.730795,0.817732,0.723027,-0.144804,-0.217127,0.259887,0.222110,0.162427
3,-0.695894,0.291718,-0.166330,0.281678,-0.687339,0.210152,0.556549,0.572196,0.415635,0.333028,-0.144804,-0.217127,0.000029,0.344679,0.816879
4,0.960534,0.956539,0.518242,0.491285,0.833963,0.975519,0.205082,0.659030,0.908437,0.723790,-0.144804,-0.217127,-0.285916,0.641320,0.813337
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
48837,-0.076744,0.522081,0.835773,0.468472,-0.067872,0.402400,0.925479,0.668797,0.286812,0.431196,-0.144804,-0.217127,-0.185438,0.066759,0.342094
48838,-1.115698,0.989985,0.506421,0.758318,-0.659889,0.532963,0.811258,0.702569,0.338117,0.944704,-0.144804,-0.217127,0.904795,0.546556,0.003239
48839,-1.389251,0.558905,0.758521,0.094355,0.382834,0.228334,0.156616,0.253474,0.888184,0.396770,-0.144804,-0.217127,-0.356322,0.423847,0.269145
48840,1.027045,0.910664,-0.361514,0.700601,-0.519895,0.451344,0.375054,0.346037,0.352586,0.318353,-0.144804,-0.217127,0.337804,0.053145,0.699214


## Case 2: Default Synthesis and Default Evaluation
情境二：預設合成與預設評測

In [7]:
yaml_file_case2: str = 'default-synthesis-default-evaluation.yaml'

yaml_path_case2: str = get_yaml_path(
    is_colab=is_colab,
    yaml_file=yaml_file_case2,
    branch=branch,
)

Configuration content:
---
Loader:
  data:
    filepath: 'benchmark/adult-income.csv'
Preprocessor:
  demo:
    method: 'default'
Synthesizer:
  demo:
    method: 'default'
Postprocessor:
  demo:
    method: 'default'
Evaluator:
  demo:
    method: 'default' # 'sdmetrics-qualityreport'
Reporter:
  output:
    method: 'save_data'
    output: 'result'
    source: 'Synthesizer'
  save_report_global:
    method: 'save_report'
    output: 'evaluation'
    eval: 'demo'
    granularity: 'global'
...


### Execution and Result
執行與結果

In [None]:
exec_case2 = Executor(config=yaml_path_case2)
exec_case2.run()

In [9]:
exec_case2.get_result()[
    'Loader[data]_Preprocessor[demo]_Synthesizer[demo]_Postprocessor[demo]_Evaluator[demo]_Reporter[save_report_global]'
]['demo_[global]']

Unnamed: 0,full_expt_name,Loader,Preprocessor,Synthesizer,Postprocessor,Evaluator,demo_Score,demo_Column Shapes,demo_Column Pair Trends
result,Loader[data]_Preprocessor[demo]_Synthesizer[de...,data,demo,demo,demo,demo_[global],0.780121,0.95364,0.606602


## Case 3: External Synthesis with Default Evaluation
情境三：外部合成與預設評測

In [10]:
yaml_file_case3: str = 'external-synthesis-default-evaluation.yaml'

yaml_path_case3: str = get_yaml_path(
    is_colab=is_colab,
    yaml_file=yaml_file_case3,
    branch=branch,
)

Configuration content:
---
Loader:
  data:
    filepath: 'benchmark/adult-income.csv'
Synthesizer:
  custom:
    method: 'custom_data'
    filepath: 'benchmark/adult-income_syn.csv'
Evaluator:
  demo:
    method: 'default'
Reporter:
  save_report_global:
    method: 'save_report'
    output: 'evaluation'
    eval: 'demo'
    granularity: 'global'
...


### Execution and Result
執行與結果

In [None]:
exec_case3 = Executor(config=yaml_path_case3)
exec_case3.run()

In [12]:
exec_case3.get_result()[
    'Loader[data]_Synthesizer[custom]_Evaluator[demo]_Reporter[save_report_global]'
]['demo_[global]']

Unnamed: 0,full_expt_name,Loader,Synthesizer,Evaluator,demo_Score,demo_Column Shapes,demo_Column Pair Trends
result,Loader[data]_Synthesizer[custom]_Evaluatordemo...,data,custom,demo_[global],0.784236,0.952582,0.61589
