## üì¶ 1. Importar Librer√≠as

sklearn, XGBoost, LightGBM para modelado de regresi√≥n y evaluaci√≥n.

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from datetime import datetime
import os
import warnings
warnings.filterwarnings('ignore')

from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor, ExtraTreesRegressor, AdaBoostRegressor
from sklearn.linear_model import Ridge, Lasso, ElasticNet
from sklearn.tree import DecisionTreeRegressor
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
import xgboost as xgb
import lightgbm as lgb
from catboost import CatBoostRegressor
import pickle

# Configurar estilo
plt.style.use('seaborn-v0_8')
sns.set_palette('husl')

# Crear directorio de reportes
os.makedirs('../E_reports', exist_ok=True)
os.makedirs('../reports/figures', exist_ok=True)

print('‚úÖ Librer√≠as cargadas')
print('üìÅ Directorios de reportes creados')

## üìÇ 2. Cargar Datos Procesados

Carga de X_train, X_val, y_train, y_val desde data/02_processed/

In [None]:
PROCESSED_PATH = '../data/02_processed/'
MODELS_PATH = '../models/'

X_train = pd.read_csv(PROCESSED_PATH + 'X_train.csv')
X_val = pd.read_csv(PROCESSED_PATH + 'X_val.csv')
y_train = pd.read_csv(PROCESSED_PATH + 'y_train.csv')['Weekly_Sales']
y_val = pd.read_csv(PROCESSED_PATH + 'y_val.csv')['Weekly_Sales']

train_full = pd.read_csv(PROCESSED_PATH + 'train_processed.csv')
val_full = pd.read_csv(PROCESSED_PATH + 'val_processed.csv')

print(f'X_train: {X_train.shape}, y_train: {y_train.shape}')
print(f'X_val: {X_val.shape}, y_val: {y_val.shape}')
print(f'Features: {X_train.shape[1]}')

## üéØ 3. Implementar M√©trica WMAE

M√©trica de evaluaci√≥n: festivos pesan 5x, d√≠as normales 1x (seg√∫n competencia Kaggle).

In [None]:
def wmae(y_true, y_pred, is_holiday):
    """
    Weighted Mean Absolute Error (WMAE).
    
    Parameters:
    - y_true: valores reales
    - y_pred: predicciones
    - is_holiday: array booleano indicando festivos
    
    Returns:
    - wmae_score: m√©trica WMAE
    """
    weights = np.where(is_holiday, 5, 1)
    mae_weighted = np.abs(y_true - y_pred) * weights
    return np.sum(mae_weighted) / np.sum(weights)

print('‚úÖ Funci√≥n WMAE implementada')
print('\nF√≥rmula: WMAE = Œ£(w_i * |y_i - ≈∑_i|) / Œ£(w_i)')
print('donde w_i = 5 si festivo, 1 si normal')

## üìä 4. Modelo Baseline

Media hist√≥rica por Store-Dept como modelo de referencia.

In [None]:
baseline_means = train_full.groupby(['Store','Dept'])['Weekly_Sales'].mean().to_dict()

def baseline_predict(df):
    predictions = []
    for _, row in df.iterrows():
        key = (row['Store'], row['Dept'])
        predictions.append(baseline_means.get(key, train_full['Weekly_Sales'].mean()))
    return np.array(predictions)

y_pred_baseline = baseline_predict(val_full)
is_holiday_val = val_full['IsHoliday'].values

baseline_wmae = wmae(y_val, y_pred_baseline, is_holiday_val)
baseline_mae = mean_absolute_error(y_val, y_pred_baseline)
baseline_rmse = np.sqrt(mean_squared_error(y_val, y_pred_baseline))
baseline_r2 = r2_score(y_val, y_pred_baseline)

print('='*70)
print('üìä BASELINE MODEL (Media Hist√≥rica)')
print('='*70)
print(f'WMAE: ${baseline_wmae:,.2f}')
print(f'MAE: ${baseline_mae:,.2f}')
print(f'RMSE: ${baseline_rmse:,.2f}')
print(f'R¬≤: {baseline_r2:.4f}')

## üå≥ 5. Random Forest

100 √°rboles, max_depth=20, n_jobs=-1 para paralelizaci√≥n.

In [None]:
print('üå≥ Entrenando Random Forest...\n')

rf_model = RandomForestRegressor(
    n_estimators=100,
    max_depth=20,
    min_samples_split=10,
    min_samples_leaf=5,
    random_state=42,
    n_jobs=-1,
    verbose=1
)

rf_model.fit(X_train, y_train)
y_pred_rf = rf_model.predict(X_val)

rf_wmae = wmae(y_val, y_pred_rf, is_holiday_val)
rf_mae = mean_absolute_error(y_val, y_pred_rf)
rf_rmse = np.sqrt(mean_squared_error(y_val, y_pred_rf))
rf_r2 = r2_score(y_val, y_pred_rf)

print('\n' + '='*70)
print('üå≥ RANDOM FOREST')
print('='*70)
print(f'WMAE: ${rf_wmae:,.2f} | Mejora: {(baseline_wmae-rf_wmae)/baseline_wmae*100:.2f}%')
print(f'MAE: ${rf_mae:,.2f}')
print(f'RMSE: ${rf_rmse:,.2f}')
print(f'R¬≤: {rf_r2:.4f}')

## ‚ö° 6. XGBoost

200 estimadores, learning_rate=0.05, early_stopping para prevenir overfitting.

In [None]:
feature_importance_rf = pd.DataFrame({
    'Feature': X_train.columns,
    'Importance': rf_model.feature_importances_
}).sort_values('Importance', ascending=False)

print('\nüìä Top 20 Features m√°s importantes (Random Forest):\n')
print(feature_importance_rf.head(20))

plt.figure(figsize=(12, 8))
top_features = feature_importance_rf.head(20)
plt.barh(range(len(top_features)), top_features['Importance'])
plt.yticks(range(len(top_features)), top_features['Feature'])
plt.xlabel('Importance')
plt.title('Top 20 Feature Importance - Random Forest')
plt.gca().invert_yaxis()
plt.tight_layout()
plt.show()

## üí° 7. LightGBM

200 estimadores, learning_rate=0.05, optimizado para datasets grandes.

In [None]:
print('‚ö° Entrenando XGBoost...\n')

xgb_model = xgb.XGBRegressor(
    n_estimators=200,
    max_depth=8,
    learning_rate=0.1,
    subsample=0.8,
    colsample_bytree=0.8,
    random_state=42,
    n_jobs=-1,
    verbosity=1
)

xgb_model.fit(
    X_train, y_train,
    eval_set=[(X_val, y_val)],
    early_stopping_rounds=20,
    verbose=10
)

y_pred_xgb = xgb_model.predict(X_val)

xgb_wmae = wmae(y_val, y_pred_xgb, is_holiday_val)
xgb_mae = mean_absolute_error(y_val, y_pred_xgb)
xgb_rmse = np.sqrt(mean_squared_error(y_val, y_pred_xgb))
xgb_r2 = r2_score(y_val, y_pred_xgb)

print('\n' + '='*70)
print('‚ö° XGBOOST')
print('='*70)
print(f'WMAE: ${xgb_wmae:,.2f} | Mejora: {(baseline_wmae-xgb_wmae)/baseline_wmae*100:.2f}%')
print(f'MAE: ${xgb_mae:,.2f}')
print(f'RMSE: ${xgb_rmse:,.2f}')
print(f'R¬≤: {xgb_r2:.4f}')

## üìä 8. Comparaci√≥n de Modelos

Tabla comparativa de WMAE, MAE, RMSE, R¬≤ y tiempo de entrenamiento.

In [None]:
print('üöÄ Entrenando LightGBM...\n')

lgb_model = lgb.LGBMRegressor(
    n_estimators=200,
    max_depth=8,
    learning_rate=0.1,
    num_leaves=31,
    subsample=0.8,
    colsample_bytree=0.8,
    random_state=42,
    n_jobs=-1,
    verbose=1
)

lgb_model.fit(
    X_train, y_train,
    eval_set=[(X_val, y_val)],
    eval_metric='mae',
    callbacks=[lgb.early_stopping(20), lgb.log_evaluation(10)]
)

y_pred_lgb = lgb_model.predict(X_val)

lgb_wmae = wmae(y_val, y_pred_lgb, is_holiday_val)
lgb_mae = mean_absolute_error(y_val, y_pred_lgb)
lgb_rmse = np.sqrt(mean_squared_error(y_val, y_pred_lgb))
lgb_r2 = r2_score(y_val, y_pred_lgb)

print('\n' + '='*70)
print('üöÄ LIGHTGBM')
print('='*70)
print(f'WMAE: ${lgb_wmae:,.2f} | Mejora: {(baseline_wmae-lgb_wmae)/baseline_wmae*100:.2f}%')
print(f'MAE: ${lgb_mae:,.2f}')
print(f'RMSE: ${lgb_rmse:,.2f}')
print(f'R¬≤: {lgb_r2:.4f}')

In [None]:
# Entrenar modelos adicionales para comparaci√≥n completa
print('\nüöÄ Entrenando modelos adicionales...\n')

additional_models = {
    'CatBoost': CatBoostRegressor(n_estimators=200, max_depth=8, learning_rate=0.1, 
                                  random_state=42, verbose=0),
    'Gradient Boosting': GradientBoostingRegressor(n_estimators=100, max_depth=8, 
                                                   learning_rate=0.1, random_state=42),
    'Extra Trees': ExtraTreesRegressor(n_estimators=100, max_depth=20, 
                                      random_state=42, n_jobs=-1),
    'Ridge': Ridge(alpha=1.0, random_state=42),
    'Lasso': Lasso(alpha=1.0, random_state=42, max_iter=2000),
    'ElasticNet': ElasticNet(alpha=1.0, random_state=42, max_iter=2000)
}

additional_results = {}

for name, model in additional_models.items():
    print(f'  üîπ Entrenando {name}...')
    start_time = datetime.now()
    
    model.fit(X_train, y_train)
    y_pred = model.predict(X_val)
    
    elapsed_time = (datetime.now() - start_time).total_seconds()
    
    additional_results[name] = {
        'predictions': y_pred,
        'wmae': wmae(y_val, y_pred, is_holiday_val),
        'mae': mean_absolute_error(y_val, y_pred),
        'rmse': np.sqrt(mean_squared_error(y_val, y_pred)),
        'r2': r2_score(y_val, y_pred),
        'time': elapsed_time,
        'model': model
    }
    
    print(f'     WMAE: ${additional_results[name]["wmae"]:,.2f} | Time: {elapsed_time:.2f}s')

print('\n‚úÖ Todos los modelos entrenados')

## üèÜ 9. Seleccionar Mejor Modelo

Selecci√≥n del modelo con menor WMAE (m√©trica de competencia).

In [None]:
# Crear DataFrame completo de resultados
all_results = [
    {'Model': 'Baseline', 'WMAE': baseline_wmae, 'MAE': baseline_mae, 
     'RMSE': baseline_rmse, 'R¬≤': baseline_r2, 'Time': 0},
    {'Model': 'Random Forest', 'WMAE': rf_wmae, 'MAE': rf_mae, 
     'RMSE': rf_rmse, 'R¬≤': rf_r2, 'Time': 0},
    {'Model': 'XGBoost', 'WMAE': xgb_wmae, 'MAE': xgb_mae, 
     'RMSE': xgb_rmse, 'R¬≤': xgb_r2, 'Time': 0},
    {'Model': 'LightGBM', 'WMAE': lgb_wmae, 'MAE': lgb_mae, 
     'RMSE': lgb_rmse, 'R¬≤': lgb_r2, 'Time': 0}
]

for name, res in additional_results.items():
    all_results.append({
        'Model': name,
        'WMAE': res['wmae'],
        'MAE': res['mae'],
        'RMSE': res['rmse'],
        'R¬≤': res['r2'],
        'Time': res['time']
    })

results_df = pd.DataFrame(all_results).sort_values('WMAE')

# Guardar resultados en CSV
results_df.to_csv('../reports/model_comparison_results.csv', index=False)
print('‚úÖ Resultados guardados: reports/model_comparison_results.csv')

print('\n' + '='*80)
print('üìä COMPARACI√ìN DE TODOS LOS MODELOS (10 MODELOS)')
print('='*80)
print(results_df.to_string(index=False))

best_model_name = results_df.iloc[0]['Model']
best_wmae = results_df.iloc[0]['WMAE']
print(f'\nüèÜ MEJOR MODELO: {best_model_name}')
print(f'   WMAE: ${best_wmae:,.2f}')
print(f'   Mejora vs Baseline: {(baseline_wmae-best_wmae)/baseline_wmae*100:.2f}%')

## üìà 10. Visualizaci√≥n de Resultados

Gr√°ficos: real vs predicho, importancia de features, distribuci√≥n de residuos.

In [None]:
# Visualizaci√≥n completa de comparaci√≥n
fig, axes = plt.subplots(2, 3, figsize=(20, 12))

metrics = ['WMAE', 'MAE', 'RMSE', 'R¬≤', 'Time']
titles = ['WMAE (Lower is Better)', 'MAE (Lower is Better)', 'RMSE (Lower is Better)', 
          'R¬≤ Score (Higher is Better)', 'Training Time (s)']
colors = ['#e74c3c', '#f39c12', '#9b59b6', '#2ecc71', '#1abc9c']

for idx, (metric, title, color) in enumerate(zip(metrics, titles, colors)):
    if idx >= 5:
        break
    row = idx // 3
    col = idx % 3
    
    ax = axes[row, col]
    data = results_df.sort_values(metric, ascending=(metric != 'R¬≤'))
    
    bars = ax.barh(data['Model'], data[metric], color=color, edgecolor='black', alpha=0.7)
    ax.set_xlabel('Value', fontsize=11)
    ax.set_title(title, fontsize=13, fontweight='bold')
    ax.grid(axis='x', alpha=0.3)
    
    # A√±adir valores
    for i, v in enumerate(data[metric]):
        if metric in ['WMAE', 'MAE', 'RMSE']:
            ax.text(v + max(data[metric])*0.01, i, f'${v:,.0f}', va='center', fontsize=9)
        elif metric == 'Time':
            ax.text(v + 0.05, i, f'{v:.2f}s', va='center', fontsize=9)
        else:
            ax.text(v + 0.01, i, f'{v:.3f}', va='center', fontsize=9)

# Predicciones vs Reales para el mejor modelo
# Determinar mejor modelo
if best_model_name in ['Random Forest']:
    y_pred_best = y_pred_rf
elif best_model_name == 'XGBoost':
    y_pred_best = y_pred_xgb
elif best_model_name == 'LightGBM':
    y_pred_best = y_pred_lgb
elif best_model_name in additional_results:
    y_pred_best = additional_results[best_model_name]['predictions']
else:
    y_pred_best = y_pred_baseline

ax = axes[1, 2]
ax.scatter(y_val, y_pred_best, alpha=0.3, s=10, color='#3498db')
ax.plot([y_val.min(), y_val.max()], [y_val.min(), y_val.max()], 'r--', lw=2)
ax.set_xlabel('Real Sales ($)', fontsize=11)
ax.set_ylabel('Predicted Sales ($)', fontsize=11)
ax.set_title(f'Real vs Predicted - {best_model_name}', fontsize=13, fontweight='bold')
ax.grid(alpha=0.3)

plt.suptitle('üìä An√°lisis Comparativo Completo - 10 Modelos de Regresi√≥n', 
             y=1.002, fontsize=16, fontweight='bold')
plt.tight_layout()
plt.savefig('../reports/figures/01_model_comparison.png', dpi=300, bbox_inches='tight')
print('\n‚úÖ Figura guardada: reports/figures/01_model_comparison.png')
plt.show()

In [None]:
# An√°lisis de residuos del mejor modelo
fig, axes = plt.subplots(2, 2, figsize=(15, 12))

residuals = y_val - y_pred_best

# Histograma de residuos
axes[0,0].hist(residuals, bins=50, edgecolor='black', alpha=0.7, color='skyblue')
axes[0,0].axvline(0, color='red', linestyle='--', lw=2)
axes[0,0].set_xlabel('Residuals ($)', fontsize=11)
axes[0,0].set_ylabel('Frequency', fontsize=11)
axes[0,0].set_title('Distribution of Residuals', fontsize=12, fontweight='bold')
axes[0,0].grid(alpha=0.3)

# Q-Q Plot
from scipy import stats
stats.probplot(residuals, dist="norm", plot=axes[0,1])
axes[0,1].set_title('Q-Q Plot', fontsize=12, fontweight='bold')
axes[0,1].grid(alpha=0.3)

# Residuos vs Predicciones
axes[1,0].scatter(y_pred_best, residuals, alpha=0.3, s=10, color='purple')
axes[1,0].axhline(0, color='red', linestyle='--', lw=2)
axes[1,0].set_xlabel('Predicted Sales ($)', fontsize=11)
axes[1,0].set_ylabel('Residuals ($)', fontsize=11)
axes[1,0].set_title('Residuals vs Predictions', fontsize=12, fontweight='bold')
axes[1,0].grid(alpha=0.3)

# Residuos absolutos vs Predicciones
axes[1,1].scatter(y_pred_best, np.abs(residuals), alpha=0.3, s=10, color='orange')
axes[1,1].set_xlabel('Predicted Sales ($)', fontsize=11)
axes[1,1].set_ylabel('Absolute Residuals ($)', fontsize=11)
axes[1,1].set_title('Absolute Residuals vs Predictions', fontsize=12, fontweight='bold')
axes[1,1].grid(alpha=0.3)

plt.suptitle(f'üìä An√°lisis de Residuos - {best_model_name}', 
             y=1.001, fontsize=14, fontweight='bold')
plt.tight_layout()
plt.savefig('../reports/figures/02_residual_analysis.png', dpi=300, bbox_inches='tight')
print('‚úÖ Figura guardada: reports/figures/02_residual_analysis.png')
plt.show()

## üíæ 11. Guardar Modelo

Exportaci√≥n del mejor modelo a models/ en formato pickle.

In [None]:
import os
os.makedirs(MODELS_PATH, exist_ok=True)

# Seleccionar el mejor modelo
if best_model_name == 'LightGBM':
    best_model = lgb_model
elif best_model_name == 'XGBoost':
    best_model = xgb_model
elif best_model_name == 'Random Forest':
    best_model = rf_model
elif best_model_name in additional_results:
    best_model = additional_results[best_model_name]['model']
else:
    best_model = rf_model  # Default

# Guardar modelo
model_file = MODELS_PATH + f'best_model_{best_model_name.lower().replace(" ","_")}.pkl'
with open(model_file, 'wb') as f:
    pickle.dump(best_model, f)
print(f'‚úÖ Modelo guardado: {model_file}')

# Guardar informaci√≥n del modelo
model_info = {
    'model_name': best_model_name,
    'model_type': type(best_model).__name__,
    'wmae': best_wmae,
    'mae': results_df[results_df['Model']==best_model_name]['MAE'].values[0],
    'rmse': results_df[results_df['Model']==best_model_name]['RMSE'].values[0],
    'r2': results_df[results_df['Model']==best_model_name]['R¬≤'].values[0],
    'feature_names': list(X_train.columns),
    'n_features': X_train.shape[1],
    'training_date': datetime.now().strftime('%Y-%m-%d %H:%M:%S')
}

model_info_file = MODELS_PATH + 'model_info.pkl'
with open(model_info_file, 'wb') as f:
    pickle.dump(model_info, f)
print(f'‚úÖ Informaci√≥n del modelo guardada: {model_info_file}')

# Crear reporte de texto completo
report_path = '../reports/model_training_report.txt'
with open(report_path, 'w', encoding='utf-8') as f:
    f.write("="*80 + "\n")
    f.write("üìä REPORTE DE ENTRENAMIENTO DE MODELOS\n")
    f.write("Walmart Recruiting - Store Sales Forecasting\n")
    f.write("="*80 + "\n\n")
    
    f.write(f"üìÖ Fecha: {model_info['training_date']}\n")
    f.write(f"üë§ Autor: Miguel Antonio Ben√≠tez Gonz√°lez\n")
    f.write(f"üìß Email: mbenitezg01@gmail.com\n\n")
    
    f.write("="*80 + "\n")
    f.write(f"ü§ñ MODELOS EVALUADOS: {len(results_df)}\n")
    f.write("="*80 + "\n\n")
    for _, row in results_df.iterrows():
        f.write(f"  ‚Ä¢ {row['Model']:<20s} - WMAE: ${row['WMAE']:>10,.2f}\n")
    
    f.write("\n" + "="*80 + "\n")
    f.write(f"üèÜ MEJOR MODELO: {best_model_name}\n")
    f.write("="*80 + "\n\n")
    f.write(f"WMAE (Weighted MAE):  ${model_info['wmae']:,.2f}\n")
    f.write(f"MAE:                  ${model_info['mae']:,.2f}\n")
    f.write(f"RMSE:                 ${model_info['rmse']:,.2f}\n")
    f.write(f"R¬≤:                   {model_info['r2']:.4f}\n")
    f.write(f"\nMejora vs Baseline:   {(baseline_wmae-best_wmae)/baseline_wmae*100:.2f}%\n")
    
    f.write("\n" + "="*80 + "\n")
    f.write("üìà INFORMACI√ìN DE DATOS\n")
    f.write("="*80 + "\n\n")
    f.write(f"Tama√±o conjunto entrenamiento: {len(X_train):,} semanas\n")
    f.write(f"Tama√±o conjunto validaci√≥n:    {len(X_val):,} semanas\n")
    f.write(f"N√∫mero de features:            {model_info['n_features']}\n")
    f.write(f"Tiendas:                       45\n")
    f.write(f"Departamentos:                 ~81\n")
    
    f.write("\n" + "="*80 + "\n")
    f.write("üìÅ ARCHIVOS GENERADOS\n")
    f.write("="*80 + "\n\n")
    f.write("Modelos:\n")
    f.write(f"  ‚Ä¢ {model_file}\n")
    f.write(f"  ‚Ä¢ {model_info_file}\n\n")
    
    f.write("Reportes:\n")
    f.write("  ‚Ä¢ reports/model_comparison_results.csv\n")
    f.write("  ‚Ä¢ reports/model_training_report.txt\n\n")
    
    f.write("Figuras:\n")
    f.write("  ‚Ä¢ reports/figures/01_model_comparison.png\n")
    f.write("  ‚Ä¢ reports/figures/02_residual_analysis.png\n\n")
    
    f.write("="*80 + "\n")
    f.write("‚úÖ ENTRENAMIENTO COMPLETADO CON √âXITO\n")
    f.write("="*80 + "\n")

print(f'‚úÖ Reporte completo guardado: {report_path}')

## üì§ 12. Generar Predicciones para Test

Predicciones finales sobre test set para submission (formato Kaggle).

In [None]:
test_full = pd.read_csv(PROCESSED_PATH + 'test_processed.csv')
X_test = test_full.drop(columns=['Date','Type','Type_Holiday','Store_Dept'], errors='ignore')

print(f'Test set: {X_test.shape}')

y_pred_test = best_model.predict(X_test)

submission = pd.DataFrame({
    'Id': test_full['Store'].astype(str) + '_' + test_full['Dept'].astype(str) + '_' + test_full['Date'].astype(str),
    'Weekly_Sales': y_pred_test
})

submission_file = '../reports/submission.csv'
submission.to_csv(submission_file, index=False)

print(f'\n‚úÖ Predicciones generadas: {len(submission)} filas')
print(f'‚úÖ Archivo guardado: {submission_file}')
print(f'\nüìä Estad√≠sticas de predicciones:')
print(f'   Media: ${y_pred_test.mean():,.2f}')
print(f'   Min: ${y_pred_test.min():,.2f}')
print(f'   Max: ${y_pred_test.max():,.2f}')
print(f'   Std: ${y_pred_test.std():,.2f}')

## ‚úÖ Resumen Final

### üéØ Resultados Obtenidos

**Mejor Modelo:** {best_model_name}

**M√©tricas de Validaci√≥n:**
- WMAE: ${best_wmae:,.2f}
- Mejora vs Baseline: {(baseline_wmae-best_wmae)/baseline_wmae*100:.2f}%
- R¬≤: {results_df.iloc[0]['R¬≤']:.4f}

### üì¶ Archivos Generados

1. **models/best_model_*.pkl** - Modelo entrenado
2. **models/model_metrics.txt** - M√©tricas completas
3. **reports/submission.csv** - Predicciones para Kaggle

### üöÄ Pr√≥ximos Pasos

1. **Despliegue API (api/):**
   - FastAPI endpoint para predicciones en tiempo real
   - Validaci√≥n de inputs
   - Documentaci√≥n Swagger

2. **Interfaz Web (web/):**
   - Streamlit dashboard interactivo
   - Visualizaci√≥n de predicciones
   - Upload de datos custom

3. **Dockerizaci√≥n (docker/):**
   - Dockerfile para API
   - docker-compose.yml
   - Despliegue containerizado

---

**üéâ PROYECTO COMPLETADO CON √âXITO**