# Task 08 - Simulação de um fluxo (pipeline) de retreino automático

In [7]:
from data_access_handler import DataAccessHandler
from feature_selector import FeatureSelector
from model import Model,ModelSelector,ModelOptimizer
from utils import f1_score_micro

OPTUNA_CV = 5
OPTUNA_N_TRIALS = 10
RANDOM_STATE = 42
DATA_PATH = "./data/"
MODEL_PATH = "./models/"

In [4]:
print("Loading training data into memory...")
access_handler = DataAccessHandler(main_path=DATA_PATH)
df_train = access_handler.load(dataset_type="train")
print("Data loaded!\n")

target = 'fetal_health'
X,y = df_train.drop(columns=target),df_train[target].values.ravel()

print("Selecting best features for training...")
feature_selector = FeatureSelector()
feature_selector.select_best_features(X=X,y=y)
feature_selector.save_best_features(path = DATA_PATH)
features = feature_selector.get_selected_features
print("Best features selected!\n")

X,y = df_train[features],df_train[target].values.ravel()

print("Optimizing available ML models...")
model_optimizer = ModelOptimizer()
model_optimizer.optimize_all_models(X,y)
lr_best,rf_best,lgbm_best = model_optimizer.get_optimized_models
print("All models optimized!\n")

lr_model = Model(model = lr_best)
lr_model.save(path=MODEL_PATH,model_name="logistic_regression")

rf_model = Model(model = rf_best)
rf_model.save(path=MODEL_PATH,model_name="random_forest")

lgbm_best = Model(model = lgbm_best)
lgbm_best.save(path=MODEL_PATH,model_name="light_gbm")

del lr_best,rf_best,lgbm_best,lr_model,rf_model,lgbm_best
print("All optimized models saved!\n")

print("Loading saved models into memory...")
lr_model = Model()
lr_model.load(path=MODEL_PATH,model_name="logistic_regression")

rf_model = Model()
rf_model.load(path=MODEL_PATH,model_name="random_forest")

lgbm_best = Model()
lgbm_best.load(path=MODEL_PATH,model_name="light_gbm")
print("All optimized models loaded into memory!\n")


print("Loading test set for model selection...")
df_test = access_handler.load(dataset_type="test")
X,y = df_test[features],df_test[target].values.ravel()
print("Data loaded into memory!\n")

print("Choosing final model...")
model_selector = ModelSelector(models=[lr_model,rf_model,lgbm_best],
                            model_names=["logistic_regression","random_forest","light_gbm"])
model_selector.select_best_model(X,y)

print("Saving final model...")
model_selector.get_winner_model.save(path = MODEL_PATH,model_name = "winner_model")
print("Final model saved!")

del model_selector,df_test,access_handler

# Task 09 - Simulação de um Fluxo de Inferência

In [9]:
print("Loading training data into memory...")
access_handler = DataAccessHandler(main_path=DATA_PATH)
df = access_handler.load(dataset_type="test").iloc[:10] # simulando 10 amostras para inferência
print("Data loaded!\n")

feature_selector = FeatureSelector()
feature_selector.load_best_features(path = DATA_PATH)
features = feature_selector.get_selected_features
print("Best features selected!\n")

target = 'fetal_health'
X = df[features]

print("Model loaded into memory!\n")
model = Model()
model.load(path=MODEL_PATH,model_name="winner_model")

print("Prediction for provided sample:")
model.predict(X)

Loading training data into memory...
Data loaded!

Best features selected!

Model loaded into memory!

Prediction for provided sample:


array([1., 1., 1., 1., 1., 3., 1., 3., 2., 2.])