Dependencias 

In [1]:
import numpy as np
from sklearn.metrics import mean_squared_error,  mean_absolute_percentage_error
from sklearn.pipeline import Pipeline
from sklearn.impute import SimpleImputer
import pandas as pd
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.linear_model import LinearRegression
import os
from sklearn.compose import ColumnTransformer
import joblib
from sklearn.linear_model import Ridge 
from sklearn.model_selection import GridSearchCV

In [2]:
possible_paths = [
    '../analisis_exploratorios_tp3/',  
    '../../analisis_exploratorios_tp3/'  
]
data_path = None
for path in possible_paths:
    if os.path.exists(f"{path}train.csv"):
        data_path = path
        break

if data_path:
    print(f" Archivos CSV encontrados en: {data_path}")
    train = pd.read_csv(f'{data_path}train.csv', index_col=0, parse_dates=True)
    validation = pd.read_csv(f'{data_path}validation.csv', index_col=0, parse_dates=True)
    test = pd.read_csv(f'{data_path}test.csv', index_col=0, parse_dates=True)
else:
    print(" No se encontraron los archivos CSV en ninguna ruta")

print(f" Datos cargados exitosamente:")
print(f"   Train: {train.shape}")
print(f"   Validation: {validation.shape}")
print(f"   Test: {test.shape}")

 Archivos CSV encontrados en: ../analisis_exploratorios_tp3/
 Datos cargados exitosamente:
   Train: (1110, 48)
   Validation: (370, 48)
   Test: (370, 48)


In [3]:
mapper2 = ColumnTransformer([
    
    ('scale_btc_h', StandardScaler(), ['btc_h']),
    ('scale_btc_l', StandardScaler(), ['btc_l']),
    ('scale_btc_v', StandardScaler(), ['btc_v']),
    ('scale_btc_prev_c1', StandardScaler(), ['btc_prev_c1']),
    ('scale_btc_prev_c2', StandardScaler(), ['btc_prev_c2']),
    ('scale_btc_pct_prev1', StandardScaler(), ['btc_pct_prev1']),
    ('scale_fng_value', StandardScaler(), ['fng_value']),
    ('scale_bitcoin', StandardScaler(), ['bitcoin']),
    ('scale_is_month_end', StandardScaler(), ['is_month_end']),
    ('ohe_year', OneHotEncoder(handle_unknown='ignore'), ['Year']),
    ('scale_RSI_14', StandardScaler(), ['RSI_14']),
    ('scale_BBL_20_2.0_2.0', StandardScaler(), ['BBL_20_2.0_2.0']),
    ('scale_BBM_20_2.0_2.0', StandardScaler(), ['BBM_20_2.0_2.0']),
    ('scale_BBU_20_2.0_2.0', StandardScaler(), ['BBU_20_2.0_2.0']),
    ('scale_BBB_20_2.0_2.0', StandardScaler(), ['BBB_20_2.0_2.0']),
])

mapper2.fit(train)
train_transformed = mapper2.transform(train)
validation_transformed = mapper2.transform(validation)
test_transformed = mapper2.transform(test)
train_df = pd.DataFrame(train_transformed, columns=mapper2.get_feature_names_out(), index=train.index)

In [4]:
ridge_models_optimized = {} 
output_dir = 'src/models'
os.makedirs(output_dir, exist_ok=True)
targets = [f"NextClose_BTC{i}" for i in range(1, 8)] 

param_grid_ridge = {    
    'regressor__alpha': [0.1, 1.0, 10.0, 100.0, 500.0, 1000.0] 
}
print(" Iniciando BÚSQUEDA DE HIPERPARÁMETROS (Alpha) para 7 modelos Ridge...")
for target in targets:
    print(f"\n--- Optimizando y Entrenando {target} ---")
    
    train_cleaned = train.dropna(subset=[target])
    
    ridge_pipeline = Pipeline([
        ('mapper', mapper2), 
        ('imputer', SimpleImputer(strategy='mean')),
        ('regressor', Ridge(random_state=42)),
    ])
    
    grid_search = GridSearchCV(
        estimator=ridge_pipeline, 
        param_grid=param_grid_ridge, 
        scoring='neg_mean_absolute_error', 
        cv=3, 
        n_jobs=-1, 
        verbose=0
    )
    
    X_train = train_cleaned.drop(columns=targets, errors='ignore') 
    y_train = train_cleaned[target]
    grid_search.fit(X_train, y_train)
    best_ridge_model = grid_search.best_estimator_
    ridge_models_optimized[target] = best_ridge_model
    
    X_val = validation.drop(columns=targets, errors='ignore') 
    y_true_full = validation[target]
    y_pred = best_ridge_model.predict(X_val)
    y_pred_series = pd.Series(y_pred, index=validation.index)
    
    comparison_df = pd.DataFrame({'y_true': y_true_full, 'y_pred': y_pred_series})
    comparison_df.dropna(subset=['y_true'], inplace=True)
    
    y_true_aligned = comparison_df['y_true']
    y_pred_aligned = comparison_df['y_pred']
    
    rmse = np.sqrt(mean_squared_error(y_true_aligned, y_pred_aligned))
    mape = mean_absolute_percentage_error(y_true_aligned, y_pred_aligned) * 100
    
    print(f"    MEJOR ALPHA: {grid_search.best_params_['regressor__alpha']:.1f}")
    print(f"    Mejor Puntuación de CV (MAE): {-grid_search.best_score_:.2f} USD")
    print(f"    RMSE en Validación: {rmse:,.2f} USD, MAPE: {mape:.2f}%")

 Iniciando BÚSQUEDA DE HIPERPARÁMETROS (Alpha) para 7 modelos Ridge...

--- Optimizando y Entrenando NextClose_BTC1 ---
    MEJOR ALPHA: 0.1
    Mejor Puntuación de CV (MAE): 1056.22 USD
    RMSE en Validación: 1,743.67 USD, MAPE: 2.46%

--- Optimizando y Entrenando NextClose_BTC2 ---
    MEJOR ALPHA: 0.1
    Mejor Puntuación de CV (MAE): 1515.32 USD
    RMSE en Validación: 2,227.57 USD, MAPE: 3.17%

--- Optimizando y Entrenando NextClose_BTC3 ---
    MEJOR ALPHA: 0.1
    Mejor Puntuación de CV (MAE): 1854.15 USD
    RMSE en Validación: 2,775.82 USD, MAPE: 3.99%

--- Optimizando y Entrenando NextClose_BTC4 ---
    MEJOR ALPHA: 0.1
    Mejor Puntuación de CV (MAE): 2134.91 USD
    RMSE en Validación: 3,159.63 USD, MAPE: 4.59%

--- Optimizando y Entrenando NextClose_BTC5 ---
    MEJOR ALPHA: 0.1
    Mejor Puntuación de CV (MAE): 2368.50 USD
    RMSE en Validación: 3,442.60 USD, MAPE: 5.07%

--- Optimizando y Entrenando NextClose_BTC6 ---
    MEJOR ALPHA: 0.1
    Mejor Puntuación de CV (M

In [5]:
for target in targets:
    test_cleaned = test.dropna(subset=[target])
    
    if target in ridge_models_optimized:
        model = ridge_models_optimized[target]
        
        y_test_pred = model.predict(test_cleaned)
        y_test_true = test_cleaned[target]
        
        rmse_test = np.sqrt(mean_squared_error(y_test_true, y_test_pred))
        mape_test = mean_absolute_percentage_error(y_test_true, y_test_pred) * 100
        
        best_alpha = model.named_steps['regressor'].alpha
        
        print(f"Target: {target:15} | Alpha: {best_alpha:<5} | RMSE: {rmse_test:,.2f} | MAPE: {mape_test:.2f}%")
    else:
        print(f"Modelo para {target} no encontrado en 'ridge_models_optimized'.")

#  Guardar los modelos finales
output_dir = 'src/models'
os.makedirs(output_dir, exist_ok=True)
joblib.dump(ridge_models_optimized, os.path.join(output_dir, 'ridge_models_optimizados.joblib'))
print(f"\nModelos guardados en {output_dir}")

Target: NextClose_BTC1  | Alpha: 0.1   | RMSE: 1,692.54 | MAPE: 2.40%
Target: NextClose_BTC2  | Alpha: 0.1   | RMSE: 2,189.67 | MAPE: 3.41%
Target: NextClose_BTC3  | Alpha: 0.1   | RMSE: 2,508.31 | MAPE: 4.05%
Target: NextClose_BTC4  | Alpha: 0.1   | RMSE: 2,936.75 | MAPE: 4.74%
Target: NextClose_BTC5  | Alpha: 0.1   | RMSE: 3,403.75 | MAPE: 5.57%
Target: NextClose_BTC6  | Alpha: 0.1   | RMSE: 3,768.73 | MAPE: 5.96%
Target: NextClose_BTC7  | Alpha: 1.0   | RMSE: 4,182.60 | MAPE: 6.61%

Modelos guardados en src/models
