In [None]:
import json

import pandas as pd
from numpy import mean
from sklearn.datasets import load_wine
from sklearn.model_selection import cross_val_score, RepeatedKFold
from sklearn.neighbors import KNeighborsClassifier
from sklearn.pipeline import Pipeline

from beaverfe import auto_feature_pipeline, BeaverPipeline

# 1. Get the dataset

In [None]:
data = load_wine()

df = pd.DataFrame(data.data, columns=data.feature_names)
df["target"] = data.target

x, y = df.drop(columns="target"), df["target"]

# 2. Define the model

In [None]:
model = KNeighborsClassifier()
scoring = "accuracy"
direction = "maximize"

# 3. Evaluate base model

In [None]:
pipe = Pipeline(steps=[("m", model)])
cv = RepeatedKFold(n_splits=10, n_repeats=3, random_state=23)

base_scores = cross_val_score(pipe, x, y, scoring="accuracy", cv=cv, n_jobs=-1)
base_score = mean(base_scores)
print(f"{base_score:.3f}")

# 4. Run auto transformations

In [None]:
transformations = auto_feature_pipeline(x, y, model, scoring, direction, verbose=False)

# Create transformer based on transformations
transformer = BeaverPipeline(transformations)

In [None]:
print(json.dumps(transformations, indent=4))

# 5. Evalute

In [None]:
pipe = Pipeline(steps=[("t", transformer), ("m", model)])
cv = RepeatedKFold(n_splits=10, n_repeats=3, random_state=23)

scores = cross_val_score(pipe, x, y, scoring="accuracy", cv=cv, n_jobs=-1)
score = mean(scores)
print(f"Improved: {base_score:.3f} -> {score:.3f}")