In [None]:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from numpy import mean
from sklearn.datasets import load_wine
from sklearn.model_selection import cross_val_score, RepeatedKFold
from sklearn.neighbors import KNeighborsClassifier
from sklearn.pipeline import Pipeline

from beaverfe import auto_feature_pipeline, evaluate_transformations

# 1. Get the dataset

In [None]:
data = load_wine()

df = pd.DataFrame(data.data, columns=data.feature_names)
df["target"] = data.target

x, y = df.drop(columns="target"), df["target"]

# 2. Define the model

In [None]:
model = KNeighborsClassifier()
scoring = "accuracy"
direction = "maximize"

# 3. Evaluate base model

In [None]:
pipe = Pipeline(steps=[("m", model)])
cv = RepeatedKFold(n_splits=10, n_repeats=3, random_state=23)

base_scores = cross_val_score(pipe, x, y, scoring="accuracy", cv=cv, n_jobs=-1)
base_score = mean(base_scores)
print(f"{base_score:.3f}")

# 4. Run auto transformations

In [None]:
transformations = auto_feature_pipeline(x, y, model, scoring, direction, verbose=False)

# 5. Evalute

In [None]:
scores = evaluate_transformations(
    transformations, x, y, model, scoring, cv=3, plot_file=None
)

In [None]:
names = [s["name"] for s in scores]
values = [s["score"] for s in scores]

# Enumerate names to avoid duplicates
display_names = [f"{i} - {name}" for i, name in enumerate(names)]

x = np.arange(len(values))

plt.figure(figsize=(10, 6))
plt.plot(x, values, marker="o", linestyle="-", linewidth=2)
plt.title("Score evolution by transformation")
plt.xlabel("Step")
plt.ylabel("Score")
plt.grid(True, linestyle="--", alpha=0.6)

plt.xticks(x, display_names, rotation=45)
plt.tight_layout()