# Diabetes fusion example


In [None]:
from sklearn.datasets import load_diabetes
import numpy as np
import pandas as pd
from datafusion_ml import fuse_datasets

# Prepare A and B
data = load_diabetes()
X = pd.DataFrame(data.data, columns=data.feature_names)
y = pd.Series(data.target, name="disease_progression")

# Pick overlap features
overlap = ["bmi", "bp", "s1", "s2"]
A = X[overlap].copy()
A["disease_progression"] = y

# Synthetic classification target in B
rng = np.random.default_rng(7)
score = (X[overlap].to_numpy() @ np.array([0.6, 0.2, 0.1, 0.1])) + rng.normal(0, 0.3, size=X.shape[0])
threshold = np.median(score)
B = X[overlap].copy()
B["high_risk"] = (score > threshold).astype(int)

# Fuse
res = fuse_datasets(A, B, prefer_pycaret=False)
res.fused.head()
            
