# Test Pipeline

In [4]:
import os
import sys
sys.path.append(os.path.dirname(os.path.abspath(os.getcwd())))
from sklearn.datasets import load_breast_cancer
from src.preprocessor import Preprocessor
from src.model_selector import ModelSelector
from src.evaluator import Evaluator

In [5]:
# Load dataset
data = load_breast_cancer()
X, y = data.data, data.target

# Preprocess
prep = Preprocessor()
X_train, X_test, y_train, y_test = prep.split_data(X, y)
X_train_scaled, X_test_scaled = prep.scale(X_train, X_test)

# Train & Evaluate
selector = ModelSelector()
evaluator = Evaluator()

results = {}
for name, model in selector.get_models().items():
    model.fit(X_train_scaled, y_train)
    metrics = evaluator.evaluate(model, X_train_scaled, y_train)
    results[name] = metrics

In [11]:
display(results)

{'LogisticRegression': {'mean_score': np.float64(0.9736263736263737),
  'std': np.float64(0.01785502946073838),
  'all_scores': array([0.97802198, 0.96703297, 1.        , 0.97802198, 0.94505495])},
 'RandomForest': {'mean_score': np.float64(0.9604395604395604),
  'std': np.float64(0.01490621974313245),
  'all_scores': array([0.97802198, 0.94505495, 0.97802198, 0.95604396, 0.94505495])},
 'XGBoost': {'mean_score': np.float64(0.964835164835165),
  'std': np.float64(0.014578570507056717),
  'all_scores': array([0.95604396, 0.96703297, 0.98901099, 0.96703297, 0.94505495])}}