In [1]:
import numpy as np
import pandas as pd
from synthpop.metrics import (
    MetricsReport,
    EfficacyMetrics,
    DisclosureProtection
)

In [2]:
# Create a small real DataFrame
real_df = pd.DataFrame({
    "numeric_col": [1, 2, 3, 4, 5, np.nan],
    "categorical_col": ["a", "b", "a", "c", "b", "b"],
    "datetime_col": pd.date_range("2023-01-01", periods=6),
    "boolean_col": [True, False, True, False, True, False]
})

# Create a corresponding synthetic DataFrame
#can come from one of the SDG methods 
synthetic_df = pd.DataFrame({
    "numeric_col": [1.1, 2.1, 2.9, 3.8, 5.2, np.nan],
    "categorical_col": ["a", "b", "b", "c", "d", "b"],
    "datetime_col": pd.date_range("2023-01-01", periods=6),
    "boolean_col": [True, True, True, False, True, False]
})

# Optional metadata
#can be obtained also by the missing data handler 
metadata = {
    "numeric_col": "numerical",
    "categorical_col": "categorical",
    "datetime_col": "datetime",
    "boolean_col": "boolean"
}

In [None]:
# Instantiate and generate a diagnostic report
report = MetricsReport(real_df, synthetic_df, metadata)
report_df = report.generate_report()
print("=== Diagnostic Report ===")
display(report_df)

##  2. DEMO: EfficacyMetrics

In [None]:
# EXAMPLE A: Regression
real_reg = pd.DataFrame({
    "feat1": np.random.normal(0, 1, 100),
    "feat2": np.random.normal(5, 2, 100),
    "target": np.random.normal(10, 3, 100)
})
synthetic_reg = pd.DataFrame({
    "feat1": np.random.normal(0, 1, 100),
    "feat2": np.random.normal(5, 2, 100),
    "target": np.random.normal(10, 3, 100)
})

reg_efficacy = EfficacyMetrics(task='regression', target_column="target")
reg_metrics = reg_efficacy.evaluate(real_reg, synthetic_reg)
print("=== Regression Efficacy Metrics ===")
print(reg_metrics)

In [None]:
# EXAMPLE B: Classification
real_clf = pd.DataFrame({
    "feat1": np.random.normal(0, 1, 100),
    "feat2": np.random.normal(5, 2, 100),
    "target": np.random.choice(["A", "B"], size=100)
})
synthetic_clf = pd.DataFrame({
    "feat1": np.random.normal(0, 1, 100),
    "feat2": np.random.normal(5, 2, 100),
    "target": np.random.choice(["A", "B"], size=100)
})

clf_efficacy = EfficacyMetrics(task='classification', target_column="target")
clf_metrics = clf_efficacy.evaluate(real_clf, synthetic_clf)
print("\n=== Classification Efficacy Metrics ===")
print(clf_metrics)

## 3. DEMO: Privacy metrics

In [None]:
# Example numeric real vs. synthetic data
real_privacy = pd.DataFrame({
    "col1": np.random.normal(0, 1, 100),
    "col2": np.random.normal(5, 2, 100)
})
synthetic_privacy = real_privacy + np.random.normal(0, 0.3, real_privacy.shape)

dp = DisclosureProtection(real_privacy, synthetic_privacy)
dp_score = dp.score()
dp_report = dp.report()

print("\n=== Disclosure Protection ===")
print(f"Score: {dp_score:.3f}")
print("Detailed Report:", dp_report)