# Environment setting
環境設定

In [1]:
import os
import requests
import sys
from pathlib import Path
from pprint import pprint
from typing import Optional

import pandas as pd


# determine branch, default is main
branch: str = "main"

# determine subfolder, default is None (petsard/demo/)
subfolder: Optional[str] = "developer-guide"


# Check if running in Google Colab, if so, download the utils.py file from GitHub
is_colab: bool = "COLAB_GPU" in os.environ
if is_colab:
    utils_url = (
        f"https://raw.githubusercontent.com/nics-tw/petsard/{branch}/demo/utils.py"
    )
    response = requests.get(utils_url)

    if response.status_code == 200:
        with open("utils.py", "w") as f:
            f.write(response.text)

        Path("__init__.py").touch()
    else:
        raise RuntimeError(
            f"Failed to download utils.py. Status code: {response.status_code}"
        )


# If not colab, and also contains subfolderl, add the correct path of util.py
else:
    if subfolder:
        sys.path.append(os.path.dirname(os.getcwd()))

In [None]:
# Now import and run the setup
from utils import (
    get_yaml_path,
    setup_environment,
)


setup_environment(
    is_colab,
    branch,
    benchmark_data=[
        "adult-income",
    ],
    subfolder=subfolder,
)

In [3]:
from petsard import Executor

# YAML Configuration for PETsARD
PETsARD 的 YAML 設定

## mpUCCs Singling-Out evaluation
最大部分唯一欄位組合 指認性評測

In [4]:
yaml_file_case: str = "mpuccs.yaml"

yaml_path_case: str = get_yaml_path(
    is_colab=is_colab,
    yaml_file=yaml_file_case,
    branch=branch,
    subfolder=subfolder,
)

Configuration content:
---
Loader:
  data:
    filepath: 'benchmark/adult-income.csv'
Splitter:
  demo:
    num_samples: 1
    train_split_ratio: 0.8
Preprocessor:
  demo:
    method: 'default'
Synthesizer:
  demo:
    method: 'default'
Postprocessor:
  demo:
    method: 'default'
Evaluator:
  demo-mpuccs:
    method: 'mpuccs'
    n_cols:
      - 1
      - 2
      - 3
Reporter:
  output:
    method: 'save_data'
    source: 'Synthesizer'
  save_report_global:
    method: 'save_report'
    granularity: 'global'
  save_report_details:
    method: 'save_report'
    granularity: 'details'
  save_report_tree:
    method: 'save_report'
    granularity: 'tree'
...


### Execution and Result
執行與結果

In [None]:
exec_case = Executor(config=yaml_path_case)
exec_case.run()

In [None]:
exec_case.get_result()[
    "Loader[data]_Splitter[demo_[1-1]]_Preprocessor[demo]_Synthesizer[demo]_Postprocessor[demo]_Evaluator[demo-mpuccs]_Reporter[save_report_global]"
]["[global]"]

Unnamed: 0,full_expt_name,Loader,Splitter,Preprocessor,Synthesizer,Postprocessor,Evaluator,demo-mpuccs_total_syn_records,demo-mpuccs_total_ori_records,demo-mpuccs_total_identified,demo-mpuccs_identification_rate,demo-mpuccs_weighted_identification_rate,demo-mpuccs_total_combinations_checked,demo-mpuccs_total_combinations_pruned,demo-mpuccs_config_n_cols,demo-mpuccs_config_min_entropy_delta,demo-mpuccs_config_field_decay_factor,demo-mpuccs_config_renyi_alpha,demo-mpuccs_config_numeric_precision,demo-mpuccs_config_datetime_precision
0,Loader[data]_Splitter[demo_[1-1]]_Preprocessor...,data,demo_[1-1],demo,demo,demo,[global],39073,39041,24340,0.622937,0.133116,4943,2568,"[1, 2, 3, 4, 5]",0.0,0.5,2.0,,
