In [1]:
# Importaciones necesarias
import sys
import os
import pandas as pd
import joblib
from sklearn.metrics import mean_squared_error, r2_score, mean_absolute_error
from sklearn.linear_model import LinearRegression, ElasticNet, Ridge
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import GradientBoostingRegressor, StackingRegressor

# Asegurar la accesibilidad a módulos personalizados
sys.path.append('../src')

# Importar funciones utilitarias y de modelado
from utils.utils import load_data
from production.model_deployment import (
    split_data,
    scale_data,
    train_random_forest,
    train_gradient_boosting,
    train_extra_trees,
    train_and_evaluate_stacking,
    save_model,
    load_model
)

def main():
    # Ruta para guardar los modelos entrenados
    MODEL_DIR = '../src/models/age_prediction_models'
    os.makedirs(MODEL_DIR, exist_ok=True)
    
    # Cargar y preparar datos
    df = load_data('../data/processed/prediction_edad_marcha.csv')
    cols = ['edad', 'step length_walk', 'total force rate_walk', 'footstrike type_walk', 'stance excursion (mp->to)_walk', 'imc']
    X_train, X_test, y_train, y_test = split_data(df[cols], 'edad')
    (X_train_ss, X_test_ss), (X_train_mm, X_test_mm) = scale_data(X_train, X_test)
    
    # Entrenar y guardar modelos individuales
    rf_model = train_random_forest(X_train, y_train)
    save_model(rf_model, 'rf_model.pkl', MODEL_DIR)
    
    gbr_model = train_gradient_boosting(X_train, y_train)
    save_model(gbr_model, 'gbr_model.pkl', MODEL_DIR)
    
    et_model = train_extra_trees(X_train, y_train)
    save_model(et_model, 'extraTrees_model.pkl', MODEL_DIR)
    
    # Configuración y evaluación de Stacking Regressor con distintos estimadores finales
    final_estimators = [
        ('Linear Regression', LinearRegression()),
        ('Ridge', Ridge()),
        ('ElasticNet', ElasticNet()),
        ('Decision Tree', DecisionTreeRegressor(max_depth=5)),
        ('Gradient Boosting', GradientBoostingRegressor(n_estimators=100))
    ]
    
    base_estimators = [('rf', rf_model),('gbr', gbr_model),('et', et_model)]
    for name, estimator in final_estimators:
        train_and_evaluate_stacking(X_train, y_train, X_test, y_test, base_estimators, estimator, name, MODEL_DIR)

if __name__ == "__main__":
    main()

2024-05-10 15:55:31,103 - INFO - Testing with final estimator: Linear Regression
2024-05-10 15:55:31,137 - INFO - Linear Regression - Train R2: 0.8659, Test R2: 0.5221
2024-05-10 15:55:33,575 - INFO - Testing with final estimator: Ridge
2024-05-10 15:55:33,607 - INFO - Ridge - Train R2: 0.8659, Test R2: 0.5221
2024-05-10 15:55:36,022 - INFO - Testing with final estimator: ElasticNet
2024-05-10 15:55:36,054 - INFO - ElasticNet - Train R2: 0.8635, Test R2: 0.5203
2024-05-10 15:55:38,476 - INFO - Testing with final estimator: Decision Tree
2024-05-10 15:55:38,508 - INFO - Decision Tree - Train R2: 0.7751, Test R2: 0.4389
2024-05-10 15:55:40,959 - INFO - Testing with final estimator: Gradient Boosting
2024-05-10 15:55:40,992 - INFO - Gradient Boosting - Train R2: 0.8239, Test R2: 0.4524


In [2]:
# Cargar y usar el modelo
loaded_model = load_model('extraTrees_model.pkl', '../src/models/age_prediction_models')
loaded_model