# Environment setting
環境設定

In [1]:
import os
import requests
from pathlib import Path


# determine branch, default is main
branch = 'main'

# Check if running in Google Colab
is_colab = 'COLAB_GPU' in os.environ

if is_colab:
    # Download the utils.py file from GitHub
    utils_url = f"https://raw.githubusercontent.com/nics-tw/petsard/{branch}/demo/utils.py"
    response = requests.get(utils_url)

    if response.status_code == 200:
        # Save the utils.py file
        with open('utils.py', 'w') as f:
            f.write(response.text)

        # Create an empty __init__.py
        Path('__init__.py').touch()
    else:
        raise RuntimeError(f"Failed to download utils.py. Status code: {response.status_code}")

In [2]:
# Now import and run the setup
from utils import (
    get_yaml_path,
    setup_environment,
)


setup_environment(
    is_colab,
    branch,
    benchmark_data=[
        'adult-income',
    ]
)

Obtaining file:///Users/justyn.chen/Dropbox/310_Career_%E5%B7%A5%E4%BD%9C/20231016_NICS_%E8%B3%87%E5%AE%89%E9%99%A2/41_PETsARD/petsard
  Installing build dependencies: started
  Installing build dependencies: finished with status 'done'
  Checking if build backend supports build_editable: started
  Checking if build backend supports build_editable: finished with status 'done'
  Getting requirements to build editable: started
  Getting requirements to build editable: finished with status 'done'
  Installing backend dependencies: started
  Installing backend dependencies: finished with status 'done'
  Preparing editable metadata (pyproject.toml): started
  Preparing editable metadata (pyproject.toml): finished with status 'done'
Building wheels for collected packages: petsard
  Building editable for petsard (pyproject.toml): started
  Building editable for petsard (pyproject.toml): finished with status 'done'
  Created wheel for petsard: filename=petsard-1.0.0-py3-none-any.whl size=6548 

In [3]:
from petsard import Executor

# YAML Configuration for PETsARD
PETsARD 的 YAML 設定

## Default Synthesis
預設合成

In [4]:
yaml_file_case: str = 'default-synthesis.yaml'

yaml_path_case: str = get_yaml_path(
    is_colab=is_colab,
    yaml_file=yaml_file_case,
    branch=branch,
)

Configuration content:
---
Loader:
  data:
    filepath: 'benchmark/adult-income.csv'
Preprocessor:
  demo:
    method: 'default'
Synthesizer:
  demo:
    method: 'default' # sdv-single_table-gaussiancopula
Postprocessor:
  demo:
    method: 'default'
Reporter:
  output:
    method: 'save_data'
    source: 'Synthesizer'
...


### Execution and Result
執行與結果

In [5]:
# Initialize and run executor
exec_case = Executor(config=yaml_path_case)
exec_case.run()



Synthesizer (SDV): Fitting GaussianCopula.
Synthesizer (SDV): Fitting GaussianCopula spent 2.1472 sec.


INFO:root:age changes data dtype from float64 to int8 for metadata alignment.
INFO:root:workclass changes data dtype from category[object] to category[object] for metadata alignment.
INFO:root:fnlwgt changes data dtype from float64 to int32 for metadata alignment.
INFO:root:education changes data dtype from category[object] to category[object] for metadata alignment.
INFO:root:educational-num changes data dtype from float64 to int8 for metadata alignment.
INFO:root:marital-status changes data dtype from category[object] to category[object] for metadata alignment.
INFO:root:occupation changes data dtype from category[object] to category[object] for metadata alignment.
INFO:root:relationship changes data dtype from category[object] to category[object] for metadata alignment.
INFO:root:race changes data dtype from category[object] to category[object] for metadata alignment.
INFO:root:gender changes data dtype from category[object] to category[object] for metadata alignment.
INFO:root:capi

Synthesizer (SDV): Sampling GaussianCopula # 48842 rows (same as Loader data) in 0.5637 sec.
Now is petsard_Loader[data]_Preprocessor[demo]_Synthesizer[demo] save to csv...


INFO:PETsARD.Reporter:Completed Reporter execution (elapsed: 0:00:00)
INFO:PETsARD.Executor:Completed PETsARD execution workflow (elapsed: 0:00:04)


In [6]:
exec_case.get_result()[
    'Loader[data]_Preprocessor[demo]_Synthesizer[demo]_Postprocessor[demo]_Reporter[output]'
]['Loader[data]_Preprocessor[demo]_Synthesizer[demo]']

Unnamed: 0,age,workclass,fnlwgt,education,educational-num,marital-status,occupation,relationship,race,gender,capital-gain,capital-loss,hours-per-week,native-country,income
0,-0.338793,0.651233,-0.834556,0.802083,-0.142939,0.133326,0.023760,0.519774,0.767149,0.243726,-0.144804,-0.217127,0.495736,0.672347,0.669766
1,0.079925,0.142549,-0.161477,0.813573,-1.063722,0.657667,0.452331,0.256223,0.943350,0.303715,-0.144804,-0.217127,0.343426,0.160866,0.092158
2,-1.091084,0.047445,-0.637146,0.719032,0.716713,0.819231,0.904255,0.708433,0.456525,0.512394,-0.144804,-0.217127,-0.280469,0.872078,0.183472
3,-0.292034,0.643163,-0.275020,0.169449,-0.745080,0.159834,0.621887,0.512725,0.430089,0.506198,-0.144804,-0.217127,0.187590,0.706809,0.701878
4,-0.706602,0.436099,1.347557,0.713543,1.405098,0.775259,0.075585,0.510337,0.114685,0.964113,-0.144804,-0.217127,-0.122640,0.250858,0.391552
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
48837,-0.124527,0.716272,1.072999,0.493508,-0.131807,0.413934,0.960612,0.695537,0.560236,0.275418,-0.144804,-0.217127,-0.320496,0.598720,0.501546
48838,-1.275132,0.810842,-0.627980,0.934618,-0.824537,0.865725,0.243948,0.785933,0.868534,0.025985,-0.144804,-0.217127,-0.596839,0.116173,0.103323
48839,-0.076258,0.969914,-0.224480,0.042907,-0.084509,0.811384,0.286936,0.351211,0.183354,0.305026,-0.144804,-0.217127,0.414186,0.579807,0.088197
48840,-0.490073,0.513440,1.243949,0.927234,-0.240715,0.114386,0.362815,0.250212,0.487246,0.320093,-0.144804,-0.217127,0.082451,0.726091,0.622346
