# Environment setting
環境設定

In [1]:
import os
import requests
from pathlib import Path


# determine branch, default is main
branch = 'main'

# Check if running in Google Colab
is_colab = 'COLAB_GPU' in os.environ

if is_colab:
    # Download the utils.py file from GitHub
    utils_url = f"https://raw.githubusercontent.com/nics-tw/petsard/{branch}/demo/utils.py"
    response = requests.get(utils_url)

    if response.status_code == 200:
        # Save the utils.py file
        with open('utils.py', 'w') as f:
            f.write(response.text)

        # Create an empty __init__.py
        Path('__init__.py').touch()
    else:
        raise RuntimeError(f"Failed to download utils.py. Status code: {response.status_code}")

In [2]:
# Now import and run the setup
from utils import (
    get_yaml_path,
    setup_environment,
)


setup_environment(
    is_colab,
    branch,
    benchmark_data=[
        'adult-income',
    ]
)

Obtaining file:///Users/justyn.chen/Dropbox/310_Career_%E5%B7%A5%E4%BD%9C/20231016_NICS_%E8%B3%87%E5%AE%89%E9%99%A2/41_PETsARD/petsard
  Installing build dependencies: started
  Installing build dependencies: finished with status 'done'
  Checking if build backend supports build_editable: started
  Checking if build backend supports build_editable: finished with status 'done'
  Getting requirements to build editable: started
  Getting requirements to build editable: finished with status 'done'
  Installing backend dependencies: started
  Installing backend dependencies: finished with status 'done'
  Preparing editable metadata (pyproject.toml): started
  Preparing editable metadata (pyproject.toml): finished with status 'done'
Building wheels for collected packages: petsard
  Building editable for petsard (pyproject.toml): started
  Building editable for petsard (pyproject.toml): finished with status 'done'
  Created wheel for petsard: filename=petsard-1.0.0-py3-none-any.whl size=6548 

In [3]:
from petsard import Executor

# YAML Configuration for PETsARD
PETsARD 的 YAML 設定

## Data Description
資料描述

In [4]:
yaml_file_case: str = 'data-description.yaml'

yaml_path_case: str = get_yaml_path(
    is_colab=is_colab,
    yaml_file=yaml_file_case,
    branch=branch,
)

Configuration content:
---
Loader:
  data:
    filepath: 'benchmark/adult-income.csv'
Describer:
  summary:
    method: 'default'
Reporter:
  save_report_global:
    method: 'save_report'
    granularity: 'global'
  save_report_columnwise:
    method: 'save_report'
    granularity: 'columnwise'
  save_report_pairwise:
    method: 'save_report'
    granularity: 'pairwise'
...


### Execution and Result
執行與結果

In [5]:
# Initialize and run executor
exec_case = Executor(config=yaml_path_case)
exec_case.run()

Now is petsard[Report]_[global] save to csv...
Now is petsard[Report]_[columnwise] save to csv...
Now is petsard[Report]_[pairwise] save to csv...


In [9]:
exec_case.get_result()[
    'Loader[data]_Describer[summary]_Reporter[save_report_global]'
]['[global]']

Unnamed: 0,full_expt_name,Loader,Describer,summary_row_count,summary_col_count,summary_na_count
0,Loader[data]_Describer[global],data,[global],48842,15,0


In [10]:
exec_case.get_result()[
    'Loader[data]_Describer[summary]_Reporter[save_report_columnwise]'
]['[columnwise]']

Unnamed: 0,full_expt_name,Loader,Describer,column,summary_mean,summary_median,summary_std,summary_min,summary_max,summary_kurtosis,summary_skew,summary_q1,summary_q3,summary_na_count,summary_nunique
0,Loader[data]_Describer[columnwise],data,[columnwise],age,38.643585,37.0,13.71051,17.0,90.0,-0.184269,0.55758,28.0,48.0,0,
1,Loader[data]_Describer[columnwise],data,[columnwise],fnlwgt,189664.134597,178144.5,105604.025423,12285.0,1490400.0,6.057848,1.438892,117550.5,237642.0,0,
2,Loader[data]_Describer[columnwise],data,[columnwise],educational-num,10.078089,10.0,2.570973,1.0,16.0,0.625745,-0.316525,9.0,12.0,0,
3,Loader[data]_Describer[columnwise],data,[columnwise],capital-gain,1079.067626,0.0,7452.019058,0.0,99999.0,152.693096,11.894659,0.0,0.0,0,
4,Loader[data]_Describer[columnwise],data,[columnwise],capital-loss,87.502314,0.0,403.004552,0.0,4356.0,20.014346,4.569809,0.0,0.0,0,
5,Loader[data]_Describer[columnwise],data,[columnwise],hours-per-week,40.422382,40.0,12.391444,1.0,99.0,2.951059,0.23875,40.0,45.0,0,
6,Loader[data]_Describer[columnwise],data,[columnwise],workclass,,,,,,,,,,0,9.0
7,Loader[data]_Describer[columnwise],data,[columnwise],education,,,,,,,,,,0,16.0
8,Loader[data]_Describer[columnwise],data,[columnwise],marital-status,,,,,,,,,,0,7.0
9,Loader[data]_Describer[columnwise],data,[columnwise],occupation,,,,,,,,,,0,15.0


In [11]:
exec_case.get_result()[
    'Loader[data]_Describer[summary]_Reporter[save_report_pairwise]'
]['[pairwise]']

Unnamed: 0,full_expt_name,Loader,Describer,column1,column2,summary_corr
0,Loader[data]_Describer[pairwise],data,[pairwise],age,age,1.0
1,Loader[data]_Describer[pairwise],data,[pairwise],fnlwgt,age,-0.076628
2,Loader[data]_Describer[pairwise],data,[pairwise],educational-num,age,0.03094
3,Loader[data]_Describer[pairwise],data,[pairwise],capital-gain,age,0.077229
4,Loader[data]_Describer[pairwise],data,[pairwise],capital-loss,age,0.056944
5,Loader[data]_Describer[pairwise],data,[pairwise],hours-per-week,age,0.071558
6,Loader[data]_Describer[pairwise],data,[pairwise],fnlwgt,fnlwgt,1.0
7,Loader[data]_Describer[pairwise],data,[pairwise],educational-num,fnlwgt,-0.038761
8,Loader[data]_Describer[pairwise],data,[pairwise],capital-gain,fnlwgt,-0.003706
9,Loader[data]_Describer[pairwise],data,[pairwise],capital-loss,fnlwgt,-0.004366
