In [1]:
import pickle
import mlflow

import os
import sys

sys.path.append(os.path.abspath(os.path.join(os.path.dirname(os.getcwd()), "")))
from src.models.train import train_and_log_model


from sklearn.model_selection import train_test_split

In [2]:
# Load processed data
with open("../data/processed/split_data_selected.pkl", "rb") as f:
    X_train, X_test, y_train, y_test = pickle.load(f)

In [3]:
# Models
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from interpret.glassbox import ExplainableBoostingClassifier
import xgboost as xgb

In [4]:
mlflow.set_tracking_uri("mlruns")
mlflow.set_experiment("Breast Cancer Prediction")

<Experiment: artifact_location='file:///c:/Users/mrosk/OneDrive/Desktop/breast_cancer_prediction/notebooks/mlruns/822727107593033544', creation_time=1748421783420, experiment_id='822727107593033544', last_update_time=1748421783420, lifecycle_stage='active', name='Breast Cancer Prediction', tags={}>

In [6]:
# Train and log each model
train_and_log_model("Logistic Regression", LogisticRegression(max_iter=1000), X_train, y_train, X_test, y_test, "../models")
train_and_log_model("Decision Tree", DecisionTreeClassifier(), X_train, y_train, X_test, y_test, "../models")
train_and_log_model("Random Forest", RandomForestClassifier(), X_train, y_train, X_test, y_test, "../models")
train_and_log_model("EBM", ExplainableBoostingClassifier(), X_train, y_train, X_test, y_test, "../models")
train_and_log_model("XGBoost", xgb.XGBClassifier(use_label_encoder=False, eval_metric='logloss'), X_train, y_train, X_test, y_test, "../models")



Model Logistic Regression trained and logged successfully.




Model Decision Tree trained and logged successfully.




Model Random Forest trained and logged successfully.


Parameters: { "use_label_encoder" } are not used.



Model EBM trained and logged successfully.




Model XGBoost trained and logged successfully.


In [8]:
# Load full feature split from pickle
with open("../data/processed/split_data_full.pkl", "rb") as f:
    X_train_full, X_test_full, y_train, y_test = pickle.load(f)

# Train and log on full feature set
train_and_log_model("Logistic Regression (Full)", LogisticRegression(max_iter=1000), X_train_full, y_train, X_test_full, y_test, "../models")
train_and_log_model("Decision Tree (Full)", DecisionTreeClassifier(), X_train_full, y_train, X_test_full, y_test, "../models")
train_and_log_model("Random Forest (Full)", RandomForestClassifier(), X_train_full, y_train, X_test_full, y_test, "../models")
train_and_log_model("EBM (Full)", ExplainableBoostingClassifier(), X_train_full, y_train, X_test_full, y_test, "../models")
train_and_log_model("XGBoost (Full)", xgb.XGBClassifier(use_label_encoder=False, eval_metric="logloss"), X_train_full, y_train, X_test_full, y_test, "../models")



Model Logistic Regression (Full) trained and logged successfully.




Model Decision Tree (Full) trained and logged successfully.




Model Random Forest (Full) trained and logged successfully.


Parameters: { "use_label_encoder" } are not used.



Model EBM (Full) trained and logged successfully.




Model XGBoost (Full) trained and logged successfully.
