In [None]:
# 1. Growth indicators (excluding future growth)
GROWTH = [
    g for g in df.keys()
    if g.startswith('growth_') and 'future' not in g
]

# 2. Raw OHLCV columns
OHLCV = ['Open', 'High', 'Low', 'Close', 'Adj Close_x', 'Volume']

# 3. Targets to predict
TO_PREDICT = [
    g for g in df.keys()
    if 'future' in g
]

# 4. Categorical features
CATEGORICAL = ['Month', 'Weekday', 'Ticker', 'ticker_type', 'month_wom']

# 5. Columns to drop (manual + raw OHLCV + categoricals)
TO_DROP = [
    'Year', 'Date', 'index_x', 'index_y', 'index', 'Quarter', 'Adj Close_y'
] + CATEGORICAL + OHLCV

# 6. Custom numerical features
CUSTOM_NUMERICAL = [
    'SMA10',
    'SMA20',
    'growing_moving_average',
    'high_minus_low_relative',
    'volatility',
    'ln_volume'
]

# 7. Technical indicators (TA-Lib)
TECHNICAL_INDICATORS = [
    'adx', 'adxr', 'apo', 'aroon_1', 'aroon_2', 'aroonosc',
    'bop', 'cci', 'cmo', 'dx', 'macd', 'macdsignal', 'macdhist',
    'macd_ext', 'macdsignal_ext', 'macdhist_ext', 'macd_fix',
    'macdsignal_fix', 'macdhist_fix', 'mfi', 'minus_di', 'mom',
    'plus_di', 'dm', 'ppo', 'roc', 'rocp', 'rocr', 'rocr100',
    'rsi', 'slowk', 'slowd', 'fastk', 'fastd', 'fastk_rsi',
    'fastd_rsi', 'trix', 'ultosc', 'willr', 'ad', 'adosc',
    'obv', 'atr', 'natr', 'ht_dcperiod', 'ht_dcphase',
    'ht_phasor_inphase', 'ht_phasor_quadrature', 'ht_sine_sine',
    'ht_sine_leadsine', 'ht_trendmod', 'avgprice', 'medprice',
    'typprice', 'wclprice'
]

# 8. Technical patterns
TECHNICAL_PATTERNS = [
    g for g in df.keys()
    if 'cdl' in g
]

# 9. Macro-economic features
MACRO = [
    'gdppot_us_yoy', 'gdppot_us_qoq',
    'cpi_core_yoy', 'cpi_core_mom',
    'FEDFUNDS', 'DGS1', 'DGS5', 'DGS10'
]

# 10. All numerical features (for preprocessing)
NUMERICAL = (
    GROWTH
    + TECHNICAL_INDICATORS
    + TECHNICAL_PATTERNS
    + CUSTOM_NUMERICAL
    + MACRO
)

# 11. Any other stray columns
OTHER = [
    k for k in df.keys()
    if k not in OHLCV + CATEGORICAL + NUMERICAL + TO_DROP
]


In [None]:
import argparse
import os
import json


def parse_args():
    """
    Parse command-line arguments.
    """
    parser = argparse.ArgumentParser(description="Train & compare multiple ML models.")
    parser.add_argument(
        "--config", type=str, default="config.json",
        help="Path to JSON config file with hyperparameters and paths"
    )
    return parser.parse_args()




In [None]:

def load_config(path):
    """
    Load JSON configuration.
    """
    with open(path, "r") as f:
        return json.load(f)

In [None]:
def load_data(data_cfg):
    """
    Load dataset(s).
    Returns raw_train, raw_val, raw_test
    """
    # TODO: implement data loading
    pass


In [None]:
def preprocess_data(raw, cfg):
    """
    Preprocess raw data (scaling, encoding, windowing, etc.).
    Returns processed features X, labels y.
    """
    # TODO: implement preprocessing
    pass

In [None]:
def get_classical_models(model_cfg):
    """
    Instantiate classical models.
    model_cfg is a dict with sub-configs for each.
    Returns a dict: { "xgb": model_obj, ... }
    """
    models = {}
    # TODO: e.g.
    # from xgboost import XGBClassifier
    # models["xgb"] = XGBClassifier(**model_cfg["xgb"])
    # from catboost import CatBoostClassifier
    # models["cat"] = CatBoostClassifier(**model_cfg["cat"])
    # from sklearn.ensemble import RandomForestClassifier
    # models["rf"]  = RandomForestClassifier(**model_cfg["rf"])
    # from sklearn.tree import DecisionTreeClassifier
    # models["dt"]  = DecisionTreeClassifier(**model_cfg["dt"])
    return models

In [None]:
def train_model(name, model, X_train, y_train, X_val, y_val, cfg):
    """
    Train a single model.
    name: string key for the model (e.g. "xgb" or "lstm")
    cfg: training hyperparameters (epochs, batch_size, etc.)
    Returns trained_model, history (if available)
    """
    # TODO: implement training logic
    pass

In [None]:

def evaluate_model(name, model, X_test, y_test, cfg):
    """
    Evaluate a single model.
    Returns a dict of metrics (accuracy, precision, recall, etc.).
    """
    # TODO: implement evaluation
    pass


In [None]:
def compare_models(results):
    """
    Given a dict of {model_name: metrics}, print or plot a comparison.
    """
    # TODO: implement comparison (e.g., pretty print table or plot)
    pass


In [None]:
def save_artifacts(model, name, save_cfg):
    """
    Save model and any artifacts (plots, history) to disk.
    """
    # TODO: implement model serialization and artifact saving
    pass



In [None]:
def main():
    # 1. Parse args & load config
    args = parse_args()
    cfg = load_config(args.config)

    # 2. Data
    raw_train, raw_val, raw_test = load_data(cfg["data"])
    X_train, y_train = preprocess_data(raw_train, cfg["preprocessing"])
    X_val,   y_val   = preprocess_data(raw_val,   cfg["preprocessing"])
    X_test,  y_test  = preprocess_data(raw_test,  cfg["preprocessing"])

    # 3. Instantiate models
    classical = get_classical_models(cfg["models"]["classical"])
    dl_model  = build_deep_model(cfg["models"]["deep"])

    all_models = {**classical, "deep": dl_model}

    # 4. Train & Evaluate
    results = {}
    for name, model in all_models.items():
        print(f"\n=== {name.upper()} ===")
        trained, hist = train_model(
            name, model, X_train, y_train, X_val, y_val, cfg["training"].get(name, {})
        )
        metrics = evaluate_model(
            name, trained, X_test, y_test, cfg["evaluation"].get(name, {})
        )
        results[name] = metrics

        # Save each
        save_artifacts(trained, name, cfg["save"].get(name, {}))

    # 5. Compare
    compare_models(results)


if __name__ == "__main__":
    main()