<a   href="https://colab.research.google.com/github.com/juaml/PrettYharmonize/blob/master/PrettYharmonize_usage_example.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# This is an example for setting and using PrettYHarmonize

## Installation

In [None]:
import numpy as np
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score
from sklearn.model_selection import train_test_split


In [None]:
!git clone https://github.com/juaml/PrettYharmonize.git -q
from prettyharmonize import PrettYharmonizeClassifier

In [None]:
def run_demo():
    # ---------------------------------------------------------
    # 1. Generate Simulated Multi-Site Data
    # ---------------------------------------------------------
    print("Generating synthetic multi-site data...")
    np.random.seed(42)
    n_samples = 300
    n_features = 20

    # Random features & labels
    X = np.random.randn(n_samples, n_features)
    y = np.random.randint(0, 2, size=n_samples)

    # Assign to 3 Sites
    sites = np.random.choice(["Site_A", "Site_B", "Site_C"], size=n_samples)

    # Add ARTIFICIAL SITE EFFECTS (Noise)
    # Site A is shifted +2.0, Site B is shifted -2.0
    X[sites == "Site_A"] += 2.0
    X[sites == "Site_B"] -= 2.0

    # Split into Train and Test
    X_train, X_test, y_train, y_test, sites_train, sites_test = (
        train_test_split(
            X, y, sites, test_size=0.3, stratify=y, random_state=42
        )
    )

    # ---------------------------------------------------------
    # 2. Run PrettYharmonize
    # ---------------------------------------------------------
    print("\nRunning PrettYharmonize...")
    model = PrettYharmonizeClassifier(estimator=SVC(random_state=42))

    # Note: We pass 'sites' to both fit and predict
    model.fit(X_train, y_train, sites=sites_train)
    y_pred = model.predict(X_test, sites=sites_test)

    # ---------------------------------------------------------
    # 3. Evaluate
    # ---------------------------------------------------------
    acc = accuracy_score(y_test, y_pred)
    print(f"✅ Accuracy with Harmonization: {acc:.2f}")

    # Contrast with Raw Data
    raw_svc = SVC(random_state=42)
    raw_svc.fit(X_train, y_train)
    raw_acc = accuracy_score(y_test, raw_svc.predict(X_test))
    print(f"⚠️ Accuracy on Raw Data:        {raw_acc:.2f}")


if __name__ == "__main__":
    run_demo()