# Environment setting
環境設定

In [1]:
import os
import requests
from pathlib import Path


# determine branch, default is main
branch = '628-guide---tutorial'  # 'main'

# Check if running in Google Colab
is_colab = 'COLAB_GPU' in os.environ

if is_colab:
    # Download the utils.py file from GitHub
    utils_url = f"https://raw.githubusercontent.com/nics-tw/petsard/{branch}/demo/utils.py"
    response = requests.get(utils_url)

    if response.status_code == 200:
        # Save the utils.py file
        with open('utils.py', 'w') as f:
            f.write(response.text)

        # Create an empty __init__.py
        Path('__init__.py').touch()
    else:
        raise RuntimeError(f"Failed to download utils.py. Status code: {response.status_code}")

In [None]:
# Now import and run the setup
from utils import (
    get_yaml_path,
    setup_environment,
)


setup_environment(is_colab, branch, benchmark_data=['adult-income'])

In [3]:
from petsard import Executor

# YAML Configuration for PETsARD
PETsARD 的 YAML 設定

## Case 1: Default Synthesis
情境一：預設合成

In [4]:
yaml_file: str = 'default-synthesis.yaml'

config_path: str = get_yaml_path(
    is_colab,
    yaml_file ,
    branch,
)

Configuration content:
---
Loader:
  data:
    filepath: 'benchmark/adult-income.csv'
Preprocessor:
  demo:
    method: 'default'
Synthesizer:
  demo:
    method: 'default' # sdv-single_table-gaussiancopula
Postprocessor:
  demo:
    method: 'default'
Reporter:
  output:
    method: 'save_data'
    output: 'result'
    source: 'Synthesizer'
...


### Execution and Result
執行與結果

In [None]:
# Initialize and run executor
exec_case1 = Executor(config=config_path)
exec_case1.run()

In [6]:
exec_case1.get_result()[
    'Loader[data]_Preprocessor[demo]_Synthesizer[demo]_Postprocessor[demo]_Reporter[output]'
]['Loader[data]_Preprocessor[demo]_Synthesizer[demo]']

Unnamed: 0,age,workclass,fnlwgt,education,educational-num,marital-status,occupation,relationship,race,gender,capital-gain,capital-loss,hours-per-week,native-country,income
0,0.122281,0.525064,0.869376,0.334948,2.089931,0.353652,0.434406,0.132581,0.655078,0.610615,-0.144804,-0.217127,-0.030757,0.531594,0.790763
1,0.489589,0.284826,0.721203,0.126802,0.171552,0.396774,0.295231,0.377160,0.953971,0.255022,-0.144804,-0.217127,-0.359801,0.190913,0.057074
2,0.731759,0.679257,1.368452,0.608712,-1.420712,0.816812,0.514641,0.849581,0.757104,0.764633,-0.144804,-0.217127,0.003821,0.806643,0.452064
3,-0.711120,0.348826,-0.082903,0.385100,0.098133,0.448730,0.896633,0.287156,0.224610,0.305202,-0.144804,-0.217127,-0.034650,0.563699,0.624261
4,0.790442,0.889920,-1.474207,0.627964,0.787020,0.856093,0.219883,0.948675,0.776081,0.560408,-0.144804,-0.217127,0.251950,0.786329,0.565800
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
48837,-0.480261,0.053689,-0.624819,0.626247,-1.029726,0.575329,0.566661,0.298706,0.521831,0.531699,-0.144804,-0.217127,0.204698,0.840366,0.334402
48838,0.631255,0.010183,0.520843,0.014077,0.168265,0.503537,0.369892,0.660540,0.978711,0.997650,-0.144804,-0.217127,0.787121,0.421841,0.233477
48839,-0.917363,0.548432,-0.896794,0.023567,-0.021983,0.188322,0.199937,0.683974,0.040099,0.637901,-0.144804,-0.217127,-0.240906,0.657962,0.550157
48840,0.983459,0.243579,-0.258490,0.539480,0.840984,0.437465,0.551687,0.182913,0.823940,0.199089,-0.144804,-0.217127,0.329183,0.949179,0.484864


## Case 2: Default Synthesis and Default Evaluation
情境二：預設合成與預設評測

In [7]:
yaml_file: str = 'default-synthesis-default-evaluation.yaml'

config_path: str = get_yaml_path(
    is_colab,
    yaml_file ,
    branch,
)

Configuration content:
---
Loader:
  data:
    filepath: 'benchmark/adult-income.csv'
Preprocessor:
  demo:
    method: 'default'
Synthesizer:
  demo:
    method: 'default'
Postprocessor:
  demo:
    method: 'default'
Evaluator:
  demo:
    method: 'default' # 'sdmetrics-qualityreport'
Reporter:
  output:
    method: 'save_data'
    output: 'result'
    source: 'Synthesizer'
  save_report_global:
    method: 'save_report'
    output: 'evaluation'
    eval: 'demo'
    granularity: 'global'
...


### Execution and Result
執行與結果

In [None]:
exec_case2 = Executor(config=config_path)
exec_case2.run()

In [9]:
exec_case2.get_result()[
    'Loader[data]_Preprocessor[demo]_Synthesizer[demo]_Postprocessor[demo]_Evaluator[demo]_Reporter[save_report_global]'
]['demo_[global]']

Unnamed: 0,full_expt_name,Loader,Preprocessor,Synthesizer,Postprocessor,Evaluator,demo_Score,demo_Column Shapes,demo_Column Pair Trends
result,Loader[data]_Preprocessor[demo]_Synthesizer[de...,data,demo,demo,demo,demo_[global],0.780709,0.954358,0.607061
