# PETsARD

## import PETsARD

In [2]:
import os
from pprint import pprint
import sys

sys.path.append('D:\\Dropbox\\89_其他應用\\GitHub\\PETsARD')
os.chdir('D:\\Dropbox\\89_其他應用\\GitHub\\PETsARD\\demo')
# sys.path.append('/home/ec2-user/SageMaker/PETs-Experiment')
# os.chdir('/home/ec2-user/SageMaker/PETs-Experiment/demo')

import PETsARD


# Module-by-Module

## Loader

In [3]:
loader = PETsARD.Loader(
    filepath='benchmark://adult',
    na_values={k: '?' for k in [
        'workclass',
        'occupation',
        'native-country'
    ]}
)
print(loader.data.head(1))


Loader - Benchmarker: file benchmark\adult.csv already exist and match SHA-256.
                      PETsARD will ignore download and use local data directly.
   age workclass  fnlwgt education  educational-num marital-status  \
0   25   Private  226802      11th                7  Never-married   

          occupation relationship   race gender  capital-gain  capital-loss  \
0  Machine-op-inspct    Own-child  Black   Male             0             0   

   hours-per-week native-country income  
0              40  United-States  <=50K  


## metadata

In [4]:
pprint(loader.metadata.metadata)

{'col': {'age': {'dtype': dtype('int8'),
                 'infer_dtype': 'numerical',
                 'na_percentage': 0.0},
         'capital-gain': {'dtype': dtype('int32'),
                          'infer_dtype': 'numerical',
                          'na_percentage': 0.0},
         'capital-loss': {'dtype': dtype('int16'),
                          'infer_dtype': 'numerical',
                          'na_percentage': 0.0},
         'education': {'dtype': CategoricalDtype(categories=['10th', '11th', '12th', '1st-4th', '5th-6th', '7th-8th',
                  '9th', 'Assoc-acdm', 'Assoc-voc', 'Bachelors', 'Doctorate',
                  'HS-grad', 'Masters', 'Preschool', 'Prof-school',
                  'Some-college'],
, ordered=False),
                       'infer_dtype': 'categorical',
                       'na_percentage': 0.0},
         'educational-num': {'dtype': dtype('int8'),
                             'infer_dtype': 'numerical',
                             'na_percent

## Splitter

In [5]:
splitter = PETsARD.Splitter(
    data=loader.data,
    num_samples=1,  # 30,
    train_split_ratio=0.8
)
print(splitter.data[1]['train'].shape[0])
print(splitter.data[1]['validation'].shape[0])
print(splitter.data[1]['train'].head(1))
print(splitter.data[1]['validation'].head(1))


39073
9769
   age workclass  fnlwgt education  educational-num marital-status  \
0   25   Private  226802      11th                7  Never-married   

          occupation relationship   race gender  capital-gain  capital-loss  \
0  Machine-op-inspct    Own-child  Black   Male             0             0   

   hours-per-week native-country income  
0              40  United-States  <=50K  
   age workclass  fnlwgt education  educational-num      marital-status  \
0   27   Private  205145   HS-grad                9  Married-civ-spouse   

          occupation relationship   race gender  capital-gain  capital-loss  \
0  Machine-op-inspct      Husband  White   Male             0             0   

   hours-per-week native-country income  
0              40  United-States  <=50K  


## Processor: transform()

In [6]:
processor_config = PETsARD.Config.ProcessorConfig(
    colnames = list(loader.metadata.metadata['col'].keys()),
    config = {
        'missingist': {
            'method': 'missingist_drop',
            'all': True
        },
        #'method': , # ValueError: y contains previously unseen labels:
        'encoder': [
            {'method': 'encoder_label',
                'include': ['education','marital-status','relationship','gender']
            },
            {'method': 'encoder_uniform',
                'include': ['workclass', 'occupation', 'race', 'native-country', 'income']
            }
        ],
        'outlierist': {
            'method': 'outlierist_iqr',
            'include': 'hours-per-week'
        },
        'scaler': None
        # 'scaler': {
        #     'method': 'scaler_standard',
        #     'exclude': ['hours-per-week',
        #         'workclass', 'education', 'marital-status',
        #         'occupation', 'relationship', 'race', 'gender',
        #         'native-country', 'income'
        #     ]
        # }
    }
)
pprint(processor_config.config_transform)

{'encoder': {'education': 'encoder_label',
             'gender': 'encoder_label',
             'income': 'encoder_uniform',
             'marital-status': 'encoder_label',
             'native-country': 'encoder_uniform',
             'occupation': 'encoder_uniform',
             'race': 'encoder_uniform',
             'relationship': 'encoder_label',
             'workclass': 'encoder_uniform'},
 'missingist': {'age': 'missingist_drop',
                'capital-gain': 'missingist_drop',
                'capital-loss': 'missingist_drop',
                'education': 'missingist_drop',
                'educational-num': 'missingist_drop',
                'fnlwgt': 'missingist_drop',
                'gender': 'missingist_drop',
                'hours-per-week': 'missingist_drop',
                'income': 'missingist_drop',
                'marital-status': 'missingist_drop',
                'native-country': 'missingist_drop',
                'occupation': 'missingist_drop',
          

In [8]:
processor = PETsARD.Processor.Processor(metadata=loader.metadata,)
processor.update_config(processor_config.config_transform)
processor.fit(
    data=splitter.data[1]['train'],
    sequence=None
)
preproc_data = processor.transform(
    data=splitter.data[1]['train']
)
print(preproc_data.head(1))

No self-defined config passed.  Generate a config automatically.
   age  workclass  fnlwgt  education  educational-num  marital-status  \
0   25   0.305754  226802          1                7               4   

   occupation  relationship     race  gender  capital-gain  capital-loss  \
0     0.77943             3  0.89837       1             0             0   

   hours-per-week  native-country    income  
0              40        0.240867  0.480535  


## Synthesizer

In [47]:
synthesizer = PETsARD.Synthesizer(
    data=preproc_data,
    synthesizing_method='sdv-singletable-gaussiancopula'
)
synthesizer.fit_sample()
print(synthesizer.data_syn.head(1))


Synthesizer (SDV - SingleTable): Metafile loading time: 0.0415 sec.
Synthesizer (SDV - SingleTable): Fitting GaussianCopula.
Synthesizer (SDV - SingleTable): Fitting  GaussianCopula spent 9.4193 sec.
Synthesizer (SDV - SingleTable): Sampling GaussianCopula # 21486 rows (same as raw) in 1.434 sec.
        age  workclass    fnlwgt  education  educational-num  marital-status  \
0  1.274034   0.847556 -0.214793         15         -1.03573               0   

   occupation  relationship      race  gender  capital-gain  capital-loss  \
0    0.193904             0  0.323358       1     -0.145386     -0.216634   

   hours-per-week  native-country   income  
0        0.176305        0.662747  0.67922  


## Processor: inverse_transform()

In [48]:
postproc_data = processor.inverse_transform(
    data=synthesizer.data_syn
)
print(postproc_data.head(1))

         age  workclass         fnlwgt     education  educational-num  \
0  56.054993  Local-gov  167216.417519  Some-college         7.409966   

  marital-status    occupation relationship   race gender  capital-gain  \
0       Divorced  Craft-repair      Husband  White   Male           0.0   

   capital-loss  hours-per-week native-country income  
0           0.0       42.567547  United-States  <=50K  


## Evaluator

### Anonymeter

In [49]:
eval = PETsARD.Evaluator(
    evaluating_method='anonymeter-singlingout-univariate',
    data={
        'ori': splitter.data[1]['train'],
        'syn': postproc_data,
        'control': splitter.data[1]['validation']
    },
    anonymeter_n_attacks=2 # 2000
)
eval.eval()
eval.Evaluator.evaluation

Evaluator (Anonymeter - SinglingOut - Univariate): Now is SinglingOut - Univariate Evaluator
Evaluator (Anonymeter - SinglingOut - Univariate): Evaluator time: 0.0496 sec.
Evaluator (Anonymeter): Evaluating  SinglingOut - Univariate.


{'Risk': 0.0,
 'Risk_CI_btm': 0.0,
 'Risk_CI_top': 0.6928102649963914,
 'Attack_Rate': 0.32880988624667346,
 'Attack_Rate_err': 0.32880988624667346,
 'Baseline_Rate': 0.32880988624667346,
 'Baseline_Rate_err': 0.32880988624667346,
 'Control_Rate': 0.32880988624667346,
 'Control_Rate_err': 0.32880988624667346}

In [39]:
eval = PETsARD.Evaluator(
    evaluating_method='anonymeter-linkability',
    data={
        'ori': splitter.data[1]['train'],
        'syn': postproc_data,
        'control': splitter.data[1]['validation']
    },
    anonymeter_n_attacks=2, # 2000,
    anonymeter_n_neighbors=10,
    anonymeter_n_jobs=-1,
    anonymeter_aux_cols=[
        ['age', 'fnlwgt', 'race', 'gender', 'native-country'],
        ['workclass', 'education', 'capital-gain', 'capital-loss', 'hours-per-week']
    ]
)
eval.eval()
eval.Evaluator.evaluation


Evaluator (Anonymeter - Linkability): Now is Linkability Evaluator
Evaluator (Anonymeter - Linkability): aux_cols are [age, fnlwgt, race, gender, native-country]
                                      and [workclass, education, capital-gain, capital-loss, hours-per-week].
Evaluator (Anonymeter - Linkability): Evaluator time: 0.001 sec.
Evaluator (Anonymeter): Evaluating  Linkability.
Evaluator (Anonymeter): Evaluating Linkability spent 60.1813 sec.


{'Risk': 0.0,
 'Risk_CI_btm': 0.0,
 'Risk_CI_top': 0.6928102649963914,
 'Attack_Rate': 0.32880988624667346,
 'Attack_Rate_err': 0.32880988624667346,
 'Baseline_Rate': 0.32880988624667346,
 'Baseline_Rate_err': 0.32880988624667346,
 'Control_Rate': 0.32880988624667346,
 'Control_Rate_err': 0.32880988624667346}

In [55]:
eval = PETsARD.Evaluator(
    evaluating_method='anonymeter-inference',
    data={
        'ori': splitter.data[1]['train'],
        'syn': postproc_data,
        'control': splitter.data[1]['validation']
    },
    anonymeter_n_attacks=2, #2000,
    anonymeter_n_jobs=-1,
    anonymeter_secret='age'
)
eval.eval()
eval.Evaluator.evaluation


Evaluator (Anonymeter - Inference): Now is Inference Evaluator
Evaluator (Anonymeter - Inference): Evaluator time: 0.001 sec.
Evaluator (Anonymeter): Evaluating  Inference.
Evaluator (Anonymeter): Evaluating Inference spent 0.7485 sec.


{'Risk': 0.0,
 'Risk_CI_btm': 0.0,
 'Risk_CI_top': 0.029909810191960658,
 'Attack_Rate': 0.08518697431292883,
 'Attack_Rate_err': 0.024167884844459902,
 'Baseline_Rate': 0.07923272035569816,
 'Baseline_Rate_err': 0.02336369331827033,
 'Control_Rate': 0.09312597958923642,
 'Control_Rate_err': 0.025183431700097014}

### SDMetrics

In [40]:
eval = PETsARD.Evaluator(
    evaluating_method='sdmetrics-single_table-qualityreport',
    data={
        'ori': splitter.data[1]['train'],
        'syn': postproc_data
    }
)
eval.eval()
eval.Evaluator.evaluation

Evaluator (SDMetrics): Evaluating QualityReport.
Generating report ...
(1/2) Evaluating Column Shapes: : 100%|██████████| 15/15 [00:00<00:00, 32.56it/s]
(2/2) Evaluating Column Pair Trends: : 100%|██████████| 105/105 [00:07<00:00, 14.31it/s]

Overall Score: 73.53%

Properties:
- Column Shapes: 90.48%
- Column Pair Trends: 56.58%
Evaluator (SDMetrics): Evaluating QualityReport spent 7.8112 sec.


{'score': 0.7352610230168115,
 'properties': {'Column Shapes': {'Score': 0.904767942490236},
  'Column Pair Trends': {'Score': 0.5657541035433872}},
 'details': {'Column Shapes':              Column        Metric     Score
  0               age  KSComplement  0.939545
  1         workclass  TVComplement  0.997249
  2            fnlwgt  KSComplement  0.949285
  3         education  TVComplement  0.444169
  4   educational-num  KSComplement  0.794142
  5    marital-status  TVComplement  0.970994
  6        occupation  TVComplement  0.988192
  7      relationship  TVComplement  0.962895
  8              race  TVComplement  0.997614
  9            gender  TVComplement  0.999087
  10     capital-gain  KSComplement  0.917411
  11     capital-loss  KSComplement  0.953600
  12   hours-per-week  KSComplement  0.681621
  13   native-country  TVComplement  0.994843
  14           income  TVComplement  0.980872,
  'Column Pair Trends':            Column 1         Column 2                 Metric   

In [51]:
eval = PETsARD.Evaluator(
    evaluating_method='sdmetrics-single_table-diagnosticreport',
    data={
        'ori': splitter.data[1]['train'],
        'syn': postproc_data
    }
)
eval.eval()
eval.Evaluator.evaluation

Evaluator (SDMetrics): Evaluating DiagnosticReport.
Generating report ...
(1/2) Evaluating Data Validity: :   0%|          | 0/15 [00:00<?, ?it/s]

(1/2) Evaluating Data Validity: : 100%|██████████| 15/15 [00:00<00:00, 149.84it/s]
(2/2) Evaluating Data Structure: : 100%|██████████| 1/1 [00:00<00:00, 427.34it/s]

Overall Score: 100.0%

Properties:
- Data Validity: 100.0%
- Data Structure: 100.0%
Evaluator (SDMetrics): Evaluating DiagnosticReport spent 0.1076 sec.


{'score': 1.0,
 'properties': {'Data Validity': {'Score': 1.0},
  'Data Structure': {'Score': 1.0}},
 'details': {'Data Validity':              Column             Metric  Score
  0               age  BoundaryAdherence    1.0
  1         workclass  CategoryAdherence    1.0
  2            fnlwgt  BoundaryAdherence    1.0
  3         education  CategoryAdherence    1.0
  4   educational-num  BoundaryAdherence    1.0
  5    marital-status  CategoryAdherence    1.0
  6        occupation  CategoryAdherence    1.0
  7      relationship  CategoryAdherence    1.0
  8              race  CategoryAdherence    1.0
  9            gender  CategoryAdherence    1.0
  10     capital-gain  BoundaryAdherence    1.0
  11     capital-loss  BoundaryAdherence    1.0
  12   hours-per-week  BoundaryAdherence    1.0
  13   native-country  CategoryAdherence    1.0
  14           income  CategoryAdherence    1.0,
  'Data Structure':            Metric  Score
  0  TableStructure    1.0}}

# Executor

## run()

In [2]:
import os
from pprint import pprint
import sys

sys.path.append('D:\\Dropbox\\89_其他應用\\GitHub\\PETsARD')
os.chdir('D:\\Dropbox\\89_其他應用\\GitHub\\PETsARD\\demo')

import PETsARD


para_Executor = {
    'Loader': {
        'adult': {
            'filepath': 'benchmark://adult',
            'na_values': {k: '?' for k in [
                'workclass',
                'occupation',
                'native-country'
            ]}
        }
    },
    'Splitter': {
        '0.8': {
            'num_samples': 2,
            'train_split_ratio': 0.8,
        }
    },
    'Processor': {
        'drop-IQR-stanard-label': {
            'missingist': {
                'method': 'missingist_drop',
                'all': True
            },
            #'method': , # ValueError: y contains previously unseen labels:
            'encoder': [
                {'method': 'encoder_label',
                 'include': ['education','marital-status','relationship','gender']
                },
                {'method': 'encoder_uniform',
                 'include': ['workclass', 'occupation', 'race', 'native-country', 'income']
                }
            ],
            'outlierist': {
                'method': 'outlierist_iqr',
                'include': 'hours-per-week'
            },
            'scaler': {
                'method': 'scaler_standard',
                'exclude': ['hours-per-week',
                    'workclass', 'education', 'marital-status',
                    'occupation', 'relationship', 'race', 'gender',
                    'native-country', 'income'
                ]
            }
        }
    },
    'Synthesizer': {
        'GaussianCopula': {
            'synthesizing_method': 'sdv-singletable-gaussiancopula'
        }
    },
    'Evaluator': {
        'anonymeter-SinglingOut': {
            'evaluating_method': 'anonymeter-singlingout-univariate',
            'anonymeter_n_attacks': 1,  # 2000'
            'anonymeter_num_samples': 2
        }
    }
}

executor_single = PETsARD.Executor(**para_Executor)
executor_single.run()
pprint(
    executor_single.evaluator[(
        'adult',
        '0.8',
        1,
        'drop-IQR-stanard-label',
        'GaussianCopula',
        'anonymeter-SinglingOut',
        1
    )].Evaluator.evaluation
)

Loader - Benchmarker: file benchmark\adult.csv already exist and match SHA-256.
                      PETsARD will ignore download and use local data directly.
Executor - Loader: adult loading time: 8.8062 sec.
Executor - Splitter: 0.8 splitting time: 0.0843 sec.
No self-defined config passed.  Generate a config automatically.
Executor - Processor (preprocessing): drop-IQR-stanard-label processing time: 0.5596 sec.
Synthesizer (SDV - SingleTable): Metafile loading time: 0.0237 sec.
Synthesizer (SDV - SingleTable): Fitting GaussianCopula.
Synthesizer (SDV - SingleTable): Fitting  GaussianCopula spent 8.7439 sec.
Synthesizer (SDV - SingleTable): Sampling GaussianCopula # 21577 rows (same as raw) in 1.3954 sec.
Executor - Synthesizer: GaussianCopula synthesizing time: 10.163 sec.
Executor - Processor (postprocessing): drop-IQR-stanard-label processing time: 0.0267 sec.
Evaluator (Anonymeter - SinglingOut - Univariate): Now is SinglingOut - Univariate Evaluator
Evaluator (Anonymeter - Sing

In [1]:
import os
from pprint import pprint
import sys

sys.path.append('D:\\Dropbox\\89_其他應用\\GitHub\\PETsARD')
os.chdir('D:\\Dropbox\\89_其他應用\\GitHub\\PETsARD\\demo')

import PETsARD


para_Executor = {
    'Loader': {
        'adult': {
            'filepath': 'benchmark://adult',
            'na_values': {k: '?' for k in [
                'workclass',
                'occupation',
                'native-country'
            ]}
        }
    },
    'Splitter': {
        '0.8': {
            'num_samples': 2,
            'train_split_ratio': 0.8,
        }
    },
    'Processor': {
        'drop-IQR-stanard-label': {
            'missingist': {
                'method': 'missingist_drop',
                'all': True
            },
            #'method': , # ValueError: y contains previously unseen labels:
            'encoder': [
                {'method': 'encoder_label',
                 'include': ['education','marital-status','relationship','gender']
                },
                {'method': 'encoder_uniform',
                 'include': ['workclass', 'occupation', 'race', 'native-country', 'income']
                }
            ],
            'outlierist': {
                'method': 'outlierist_iqr',
                'include': 'hours-per-week'
            },
            'scaler': {
                'method': 'scaler_standard',
                'exclude': ['hours-per-week',
                    'workclass', 'education', 'marital-status',
                    'occupation', 'relationship', 'race', 'gender',
                    'native-country', 'income'
                ]
            }
        }
    },
    'Synthesizer': {
        'GaussianCopula': {
            'synthesizing_method': 'sdv-singletable-gaussiancopula'
        }
    },
    'Evaluator': {
        'anonymeter-SinglingOut': {
            'evaluating_method': 'anonymeter-singlingout-univariate',
            'anonymeter_n_attacks': 1,  # 2000'
            'anonymeter_num_samples': 2
        }
    }
}

# Processor contains lambda function, but python couldn't pickle it.
# so Processor .run_parallel() didn't valid after Processor migration.
executor_parallel = PETsARD.Executor(**para_Executor)
executor_parallel.run_parallel()
pprint(
    executor_parallel.evaluator[(
        'adult',
        '0.8',
        1,
        'drop-IQR-stanard-label',
        'GaussianCopula',
        'anonymeter-SinglingOut',
        1
    )].Evaluator.evaluation
)

Loading:   0%|          | 0/1 [00:00<?, ?it/s]
Processing:   0%|          | 0/1 [00:20<?, ?it/s]s/it]
Splitting: 100%|██████████| 1/1 [00:20<00:00, 20.74s/it]
Loading: 100%|██████████| 1/1 [00:20<00:00, 20.74s/it]


AttributeError: Can't pickle local object 'Processor.__init__.<locals>.<lambda>'

# Un arrangment

In [None]:
import pandas as pd


def Result(__evaluator):
    import numpy as np
    __dict_result = {}
    __para_to_handle = [('Risk', ['risk()', 'value']), ('Risk_CI_btm', ['risk()', 'ci[0]']), ('Risk_CI_top', ['risk()', 'ci[1]']), ('Attack_Rate', ['results()', 'attack_rate', 'value']), ('Attack_Rate_err', ['results()', 'attack_rate', 'error']), ('Baseline_Rate', ['results()', 'baseline_rate', 'value']), ('Baseline_Rate_err', ['results()', 'baseline_rate', 'error']), ('Control_Rate', ['results()', 'control_rate', 'value']), ('Control_Rate_err', ['results()', 'control_rate', 'error'])
                        ]
    for __key, __attrs in __para_to_handle:
        try:
            __attr_value = __evaluator
            for __attr in __attrs:
                if '()' in __attr:
                    __method_name = __attr.split('(')[0]
                    if hasattr(__attr_value, __method_name):
                        __method = getattr(__attr_value, __method_name)
                        if callable(__method):
                            __attr_value = __method()
                        else:
                            __dict_result[__key] = np.nan
                            break
                    else:
                        __dict_result[__key] = np.nan
                        break
                elif '[' in __attr:
                    __attr_name = __attr.split('[')[0]
                    __index = int(__attr.split('[')[1].rstrip(']'))
                    if hasattr(__attr_value, __attr_name)\
                            and isinstance(getattr(__attr_value, __attr_name), (list, dict, tuple)):
                        try:
                            __attr_value = getattr(
                                __attr_value, __attr_name)[__index]
                        except (IndexError, KeyError):
                            __dict_result[__key] = np.nan
                            break
                    else:
                        __dict_result[__key] = np.nan
                        break
                else:
                    __attr_value = getattr(__attr_value, __attr)
            __dict_result[__key] = __attr_value
        except Exception as e:
            __dict_result[__key] = np.nan
    return __dict_result


for i in ['01', '02', '03']:  # ,'04','05','06'
    from anonymeter.evaluators import SinglingOutEvaluator
    print(f"PETsARD[20231224-085805]_Trial[{i}][Ori].csv")
    evaluator = SinglingOutEvaluator(ori=pd.read_csv(f"PETsARD[20231224-085805]_Trial[{i}][Ori].csv"), syn=pd.read_csv(f"PETsARD[20231224-085805]_Trial[{i}-1-1]Postproc.csv"), control=pd.read_csv(f"PETsARD[20231224-085805]_Trial[{i}][Ctrl].csv"), n_attacks=2000
                                     )
    try:
        evaluator.evaluate(mode='univariate')
        print(Result(evaluator))
    except RuntimeError as ex:
        print(f"Singling out evaluation failed with {ex}."
              "Please re-run this cell."
              "For more stable results increase `n_attacks`. Note that this will "
              "make the evaluation slower.")


In [None]:
import itertools
for type in ['][Ori]', '][Ctrl]', '-1-1]Postproc']:
    for combo in itertools.combinations(['01', '02', '03', '04', '05', '06'], 2):
        df_a = pd.read_csv(
            f"PETsARD[20231224-085805]_Trial[{combo[0]}{type}.csv")
        df_b = pd.read_csv(
            f"PETsARD[20231224-085805]_Trial[{combo[1]}{type}.csv")
        if df_a.equals(df_b):
            print(type+': '+str(combo))
            print("They're same!!??")
        # else:
            # print(type+': '+str(combo))
            # print("They're different.")
print('done.')


In [None]:
import pandas as pd


def Result(__evaluator):
    import numpy as np
    __dict_result = {}
    __para_to_handle = [('Risk', ['risk()', 'value']), ('Risk_CI_btm', ['risk()', 'ci[0]']), ('Risk_CI_top', ['risk()', 'ci[1]']), ('Attack_Rate', ['results()', 'attack_rate', 'value']), ('Attack_Rate_err', ['results()', 'attack_rate', 'error']), ('Baseline_Rate', ['results()', 'baseline_rate', 'value']), ('Baseline_Rate_err', ['results()', 'baseline_rate', 'error']), ('Control_Rate', ['results()', 'control_rate', 'value']), ('Control_Rate_err', ['results()', 'control_rate', 'error'])
                        ]
    for __key, __attrs in __para_to_handle:
        try:
            __attr_value = __evaluator
            for __attr in __attrs:
                if '()' in __attr:
                    __method_name = __attr.split('(')[0]
                    if hasattr(__attr_value, __method_name):
                        __method = getattr(__attr_value, __method_name)
                        if callable(__method):
                            __attr_value = __method()
                        else:
                            __dict_result[__key] = np.nan
                            break
                    else:
                        __dict_result[__key] = np.nan
                        break
                elif '[' in __attr:
                    __attr_name = __attr.split('[')[0]
                    __index = int(__attr.split('[')[1].rstrip(']'))
                    if hasattr(__attr_value, __attr_name)\
                            and isinstance(getattr(__attr_value, __attr_name), (list, dict, tuple)):
                        try:
                            __attr_value = getattr(
                                __attr_value, __attr_name)[__index]
                        except (IndexError, KeyError):
                            __dict_result[__key] = np.nan
                            break
                    else:
                        __dict_result[__key] = np.nan
                        break
                else:
                    __attr_value = getattr(__attr_value, __attr)
            __dict_result[__key] = __attr_value
        except Exception as e:
            __dict_result[__key] = np.nan
    return __dict_result


for i in ['01', '02', '03']:  # ,'04','05','06'
    from anonymeter.evaluators import SinglingOutEvaluator
    print(f"PETsARD[20231224-085805]_Trial[{i}][Ori].csv")
    evaluator = SinglingOutEvaluator(ori=pd.read_csv(f"PETsARD[20231224-085805]_Trial[{i}][Ori].csv"), syn=pd.read_csv(f"PETsARD[20231224-085805]_Trial[{i}-1-1]Postproc.csv"), control=pd.read_csv(f"PETsARD[20231224-085805]_Trial[{i}][Ctrl].csv"), n_attacks=2000
                                     )
    try:
        evaluator.evaluate(mode='univariate')
        print(Result(evaluator))
    except RuntimeError as ex:
        print(f"Singling out evaluation failed with {ex}."
              "Please re-run this cell."
              "For more stable results increase `n_attacks`. Note that this will "
              "make the evaluation slower.")


Executor - Loader: adult loading time: 6.8097 sec.
Executor - Splitter: 0.8 splitting time: 0.339 sec.
Preprocessor - Outlierist (IQR): Dropped  1060 rows on fnlwgt         . Kept [-63981.5, 419234.5] only.
Preprocessor - Outlierist (IQR): Dropped   227 rows on educational-num. Kept [3.0, 19.0] only.
Preprocessor - Outlierist (IQR): Dropped  1705 rows on capital-loss   . Kept [0.0, 0.0] only.
Preprocessor - Outlierist (IQR): Dropped  9432 rows on hours-per-week . Kept [32.5, 52.5] only.
Preprocessor - Outlierist (IQR): Dropped   214 rows on age            . Kept [-0.5, 75.5] only.
Preprocessor - Outlierist (IQR): Dropped  3030 rows on capital-gain   . Kept [0.0, 0.0] only.
Preprocessor - Outlierist (IQR): Totally Dropped 13932 in 36207 rows.
Preprocessor - Encoder (Label): Column native-country  been labelized from 0 to 39.
Preprocessor - Encoder (Label): Column gender          been labelized from 0 to  1.
Preprocessor - Encoder (Label): Column race            been labelized from 0 to 

Found 765 failed queries out of 2000. Check DEBUG messages for more details.


Executor - Evaluator: anonymeter-SinglingOut at 1 trials evaluating time: 131.365 sec.
Evaluator (Anonymeter - SinglingOut - Univariate): Now is SinglingOut - Univariate Evaluator
Evaluator (Anonymeter - SinglingOut - Univariate): Evaluator time: 0.0322 sec.
Evaluator (Anonymeter): Evaluating  SinglingOut - Univariate.


Found 802 failed queries out of 2000. Check DEBUG messages for more details.


Executor - Evaluator: anonymeter-SinglingOut at 2 trials evaluating time: 131.1331 sec.
Evaluator (Anonymeter - SinglingOut - Univariate): Now is SinglingOut - Univariate Evaluator
Evaluator (Anonymeter - SinglingOut - Univariate): Evaluator time: 0.0336 sec.
Evaluator (Anonymeter): Evaluating  SinglingOut - Univariate.


Found 830 failed queries out of 2000. Check DEBUG messages for more details.


Executor - Evaluator: anonymeter-SinglingOut at 3 trials evaluating time: 131.5346 sec.
Evaluator (Anonymeter - SinglingOut - Univariate): Now is SinglingOut - Univariate Evaluator
Evaluator (Anonymeter - SinglingOut - Univariate): Evaluator time: 0.0356 sec.
Evaluator (Anonymeter): Evaluating  SinglingOut - Univariate.


Found 794 failed queries out of 2000. Check DEBUG messages for more details.


Executor - Evaluator: anonymeter-SinglingOut at 4 trials evaluating time: 131.4821 sec.
Evaluator (Anonymeter - SinglingOut - Univariate): Now is SinglingOut - Univariate Evaluator
Evaluator (Anonymeter - SinglingOut - Univariate): Evaluator time: 0.0351 sec.
Evaluator (Anonymeter): Evaluating  SinglingOut - Univariate.


Found 821 failed queries out of 2000. Check DEBUG messages for more details.


Executor - Evaluator: anonymeter-SinglingOut at 5 trials evaluating time: 132.587 sec.
Evaluator (Anonymeter - SinglingOut - Univariate): Now is SinglingOut - Univariate Evaluator
Evaluator (Anonymeter - SinglingOut - Univariate): Evaluator time: 0.036 sec.
Evaluator (Anonymeter): Evaluating  SinglingOut - Univariate.


Found 800 failed queries out of 2000. Check DEBUG messages for more details.


Executor - Evaluator: anonymeter-SinglingOut at 6 trials evaluating time: 131.8783 sec.
Evaluator (Anonymeter - SinglingOut - Univariate): Now is SinglingOut - Univariate Evaluator
Evaluator (Anonymeter - SinglingOut - Univariate): Evaluator time: 0.0352 sec.
Evaluator (Anonymeter): Evaluating  SinglingOut - Univariate.


Found 799 failed queries out of 2000. Check DEBUG messages for more details.


In [None]:
import pandas as pd
for i in ['01', '02', '03']:  # ,'04','05','06'
    eval = PETsARD.Evaluator(evaluating_method='anonymeter-singlingout-univariate', data={'ori': pd.read_csv(f"PETsARD[20231224-085805]_Trial[{i}][Ori].csv"), 'syn': pd.read_csv(f"PETsARD[20231224-085805]_Trial[{i}-1-1]Postproc.csv"), 'control': pd.read_csv(f"PETsARD[20231224-085805]_Trial[{i}][Ctrl].csv")
                                                                                          }, anonymeter_n_attacks=500
                             )
    eval.eval()
    print(eval.Evaluator.evaluation)


In [None]:
import itertools
for type in ['][Ori]', '][Ctrl]', '-1-1]Postproc']:
    for combo in itertools.combinations(['01', '02', '03', '04', '05', '06'], 2):
        df_a = pd.read_csv(
            f"PETsARD[20231224-085805]_Trial[{combo[0]}{type}.csv")
        df_b = pd.read_csv(
            f"PETsARD[20231224-085805]_Trial[{combo[1]}{type}.csv")
        if df_a.equals(df_b):
            print(type+': '+str(combo))
            print("They're same!!??")
        # else:
            # print(type+': '+str(combo))
            # print("They're different.")
print('done.')


Executor - Evaluator: anonymeter-SinglingOut at 7 trials evaluating time: 131.5421 sec.
Evaluator (Anonymeter - SinglingOut - Univariate): Now is SinglingOut - Univariate Evaluator
Evaluator (Anonymeter - SinglingOut - Univariate): Evaluator time: 0.0354 sec.
Evaluator (Anonymeter): Evaluating  SinglingOut - Univariate.


In [None]:
{
    'Loader': {
        'NHANES': {
            'filepath': '../[sunset]/data/[NHANES] B.csv',
            'header_exist': False,
            'header_names': ['gen', 'age', 'race', 'edu', 'mar', 'bmi', 'dep', 'pir', 'gh', 'mets', 'qm', 'dia']
        }
    }
}
