# DBKNN Examples

Example usage of the DBKNN library for solute cluster detection.

## 1. Synthetic example

In [None]:
import numpy as np

from dbknn import DBKNN
from dbknn.evaluation import evaluate_f1

rng = np.random.default_rng(42)
cluster = rng.normal(loc=[20, 20, 20], scale=1.0, size=(80, 3))
matrix = rng.uniform(0, 50, size=(400, 3))
positions = np.vstack([cluster, matrix])
truth = np.array([1] * 80 + [0] * 400)

model = DBKNN(eps=8.0, min_samples=5, k=5)
model.fit(positions)

metrics = evaluate_f1(model.labels_, truth)
print(f"threshold={model.threshold_:.4f}, F1={metrics['f1']:.4f}")

## 2. XYZ dataset

In [None]:
from dbknn import DBKNN
from dbknn.io import load_apt_data, save_labeled_xyz
from dbknn.plotting import plot_results_3d, plot_scores

filepath = "dump_156.xyz" # your dump file
solute_pos, all_pos, ground_truth = load_apt_data(
    filepath, solute_species=["Ni", "Mn", "Cu"],
)

model = DBKNN(eps=8.0, min_samples=15, k=10)
model.fit(solute_pos)

plot_scores(model.hybrid_scores_, model.threshold_, output_path="scores.png")
plot_results_3d(solute_pos, model.labels_, output_path="results_3d.png")
save_labeled_xyz(
    filepath, "labeled.xyz",
    solute_species=["Ni", "Mn", "Cu"],
    solute_labels=model.labels_,
    solute_scores=model.hybrid_scores_,
)

## 3. CSV dataset (mass-to-charge ranging)

In [None]:
from dbknn import DBKNN
from dbknn.io import load_apt_csv, save_csv_as_xyz, save_labeled_csv
from dbknn.plotting import plot_results_3d, plot_scores

mc_ranges = {
    "Fe": [(23.30, 24.50), (26.50, 28.50), (53.50, 56.90)],
    "Ni": [(28.50, 30.60), (57.50, 60.60)],
    "Mn": [(26.50, 28.50), (54.00, 55.50)],
    "Cu": [(31.00, 32.50), (63.00, 65.50)],
}

solute_pos, all_pos, _ = load_apt_csv(
    "apt_data.csv", mc_ranges, matrix_species=["Fe"],
)

model = DBKNN(eps=8.0, min_samples=15, k=10)
model.fit(solute_pos)

plot_scores(model.hybrid_scores_, model.threshold_, output_path="csv_scores.png")
plot_results_3d(solute_pos, model.labels_, output_path="csv_results_3d.png")
save_labeled_csv(
    "apt_data.csv", "csv_labeled.csv", mc_ranges,
    matrix_species=["Fe"],
    solute_labels=model.labels_, solute_scores=model.hybrid_scores_,
)
save_csv_as_xyz(
    "apt_data.csv", "csv_labeled.xyz", mc_ranges,
    matrix_species=["Fe"],
    solute_labels=model.labels_, solute_scores=model.hybrid_scores_,
)

## 4. Manual threshold

Atoms with score <= threshold are classified as cluster.

In [None]:
from dbknn import DBKNN
from dbknn.io import load_apt_data

solute_pos, _, _ = load_apt_data("dump_156.xyz", solute_species=["Ni", "Mn", "Cu"])

model = DBKNN(eps=8.0, min_samples=15, k=10, threshold=5.0)
model.fit(solute_pos)

print(f"threshold={model.threshold_:.4f}, cluster={int(model.labels_.sum())}")

## 5. Custom DBSCAN multiplier weights

`hybrid_score = kNN_distance * multiplier`, where `multiplier = cluster_weight` (default 0.5) for DBSCAN cluster atoms and `noise_weight` (default 1.5) for noise atoms.

In [None]:
from dbknn import DBKNN
from dbknn.io import load_apt_data

solute_pos, _, _ = load_apt_data("dump_156.xyz", solute_species=["Ni", "Mn", "Cu"])

model = DBKNN(eps=8.0, min_samples=15, k=10, cluster_weight=0.3, noise_weight=2.0)
model.fit(solute_pos)

print(f"threshold={model.threshold_:.4f}, cluster={int(model.labels_.sum())}")

## 6. Cluster volume and composition analysis (XYZ)

In [None]:
import numpy as np
from ase.io import read as ase_read

from dbknn import DBKNN
from dbknn.analysis import compute_cluster_stats, save_cluster_stats_csv
from dbknn.io import load_apt_data
from dbknn.plotting import plot_cluster_stats

filepath = "dump_156.xyz"
solute_species = ["Ni", "Mn", "Cu"]

solute_pos, _, _ = load_apt_data(filepath, solute_species=solute_species)

# Get per-solute-atom species labels
atoms = ase_read(filepath, format="extxyz")
all_symbols = np.array(atoms.get_chemical_symbols())
species = all_symbols[np.isin(all_symbols, solute_species)]

model = DBKNN(eps=8.0, min_samples=15, k=10)
model.fit(solute_pos)

stats = compute_cluster_stats(
    solute_pos, model.dbscan_labels_, model.labels_, species=species,
)
save_cluster_stats_csv(stats, output="cluster_stats.csv")
plot_cluster_stats(stats, output_path="cluster_stats.png")

## 7. Cluster volume and composition analysis (CSV)

In [None]:
import numpy as np

from dbknn import DBKNN
from dbknn.analysis import compute_cluster_stats, save_cluster_stats_csv
from dbknn.io import load_apt_csv, mc_to_species
from dbknn.plotting import plot_cluster_stats

csv_filepath = "apt_data.csv"
mc_ranges = {
    "Fe": [(23.30, 24.50), (26.50, 28.50), (53.50, 56.90)],
    "Ni": [(28.50, 30.60), (57.50, 60.60)],
    "Mn": [(26.50, 28.50), (54.00, 55.50)],
    "Cu": [(31.00, 32.50), (63.00, 65.50)],
}
matrix_species = ["Fe"]

solute_pos, _, _ = load_apt_csv(csv_filepath, mc_ranges, matrix_species=matrix_species)

# Get per-solute-atom species labels from mc ranging
raw = np.loadtxt(csv_filepath, delimiter=",")
all_species = mc_to_species(raw[:, 3], mc_ranges)
species = all_species[~np.isin(all_species, matrix_species)]

model = DBKNN(eps=8.0, min_samples=15, k=10)
model.fit(solute_pos)

stats = compute_cluster_stats(
    solute_pos, model.dbscan_labels_, model.labels_, species=species,
)
save_cluster_stats_csv(stats, output="csv_cluster_stats.csv")
plot_cluster_stats(stats, output_path="csv_cluster_stats.png")