# Environment setting
環境設定

In [None]:
import os
import requests
from pathlib import Path


# determine branch, default is main
branch = "main"

# Check if running in Google Colab
is_colab = "COLAB_GPU" in os.environ

if is_colab:
    # Download the utils.py file from GitHub
    utils_url = (
        f"https://raw.githubusercontent.com/nics-tw/petsard/{branch}/demo/utils.py"
    )
    response = requests.get(utils_url)

    if response.status_code == 200:
        # Save the utils.py file
        with open("utils.py", "w") as f:
            f.write(response.text)

        # Create an empty __init__.py
        Path("__init__.py").touch()
    else:
        raise RuntimeError(
            f"Failed to download utils.py. Status code: {response.status_code}"
        )

In [None]:
# Now import and run the setup
from utils import (
    get_yaml_path,
    setup_environment,
)


setup_environment(
    is_colab,
    branch,
    benchmark_data=[
        "adult-income",
    ],
)

In [3]:
from petsard import Executor

# YAML Configuration for PETsARD
PETsARD 的 YAML 設定

## Comparing Synthesizers
比較合成演算法

In [None]:
yaml_file_case: str = "comparing-synthesizers.yaml"

yaml_path_case: str = get_yaml_path(
    is_colab=is_colab,
    yaml_file=yaml_file_case,
    branch=branch,
)

Configuration content:
---
Loader:
  data:
    filepath: 'benchmark/adult-income.csv'
Preprocessor:
  demo:
    method: 'default'
Synthesizer:
  gaussian-copula:
    method: 'sdv-single_table-gaussiancopula'
  ctgan:
    method: 'sdv-single_table-ctgan'
  tvae:
    method: 'sdv-single_table-tvae'
Postprocessor:
  demo:
    method: 'default'
Evaluator:
  demo-quality:
    method: 'sdmetrics-qualityreport'
Reporter:
  output:
    method: 'save_data'
    source: 'Synthesizer'
  save_report_global:
    method: 'save_report'
    granularity: 'global'
...


### Execution and Result
執行與結果

In [5]:
# Initialize and run executor
exec_case = Executor(config=yaml_path_case)
exec_case.run()

Generating report ...

(1/2) Evaluating Column Shapes: |██████████| 15/15 [00:00<00:00, 79.93it/s]|
Column Shapes Score: 95.37%

(2/2) Evaluating Column Pair Trends: |██████████| 105/105 [00:00<00:00, 225.87it/s]|
Column Pair Trends Score: 60.46%

Overall Score (Average): 77.92%

Now is petsard_Loader[data]_Preprocessor[demo]_Synthesizer[gaussian-copula] save to csv...
Now is petsard[Report]_[global] save to csv...
Generating report ...

(1/2) Evaluating Column Shapes: |██████████| 15/15 [00:00<00:00, 78.89it/s]|
Column Shapes Score: 90.35%

(2/2) Evaluating Column Pair Trends: |██████████| 105/105 [00:00<00:00, 253.98it/s]|
Column Pair Trends Score: 50.6%

Overall Score (Average): 70.48%

Now is petsard_Loader[data]_Preprocessor[demo]_Synthesizer[ctgan] save to csv...
Now is petsard[Report]_[global] save to csv...
Generating report ...

(1/2) Evaluating Column Shapes: |██████████| 15/15 [00:00<00:00, 114.11it/s]|
Column Shapes Score: 85.99%

(2/2) Evaluating Column Pair Trends: |█████

In [None]:
exec_case.get_result()[
    "Loader[data]_Preprocessor[demo]_Synthesizer[tvae]_Postprocessor[demo]_Evaluator[demo-quality]_Reporter[save_report_global]"
]["[global]"]

Unnamed: 0,full_expt_name,Loader,Preprocessor,Synthesizer,Postprocessor,Evaluator,Score,Column Shapes,Column Pair Trends
0,Loader[data]_Preprocessor[demo]_Synthesizer[ct...,data,demo,ctgan,demo,[global],0.704766,0.903543,0.505989
1,Loader[data]_Preprocessor[demo]_Synthesizer[ga...,data,demo,gaussian-copula,demo,[global],0.779163,0.953704,0.604621
2,Loader[data]_Preprocessor[demo]_Synthesizer[tv...,data,demo,tvae,demo,[global],0.687809,0.859874,0.515744
