In [None]:
import numpy as np
import polars as pl
import pandas as pd
import glob, json, joblib
from pathlib import Path

import sys, os
sys.path.append(os.path.dirname(os.path.abspath(os.path.dirname('src'))))
from src.position import create_position_mapper

In [None]:
# fold 앙상블 로딩을 위해 메타데이터/모델 읽기
artifacts_dir = Path("../artifacts")
ensemble_meta_path = artifacts_dir / "ensemble_metadata.json"

if ensemble_meta_path.exists():
    ensemble_metadata = json.load(open(ensemble_meta_path))
    return_model_paths = ensemble_metadata.get("return_model_paths", [])
    risk_model_paths = ensemble_metadata.get("risk_model_paths", [])
    fold_params = ensemble_metadata.get("fold_params", [])
else:
    return_model_paths = sorted(glob.glob(str(artifacts_dir / "return_model_fold*.pkl")))
    risk_model_paths = sorted(glob.glob(str(artifacts_dir / "risk_model_fold*.pkl")))
    fold_params = []

return_models = [joblib.load(p) for p in return_model_paths]
risk_models   = [joblib.load(p) for p in risk_model_paths]
feature_cols  = json.load(open(artifacts_dir / "feature_cols.json"))

print(feature_cols)

# fold별 k/b 평균이 저장되어 있으면 사용, 없으면 1.0/0.0 기본값
opt_params_path = artifacts_dir / "optimal_params.json"
if opt_params_path.exists():
    opt_params = json.load(open(opt_params_path))
    k_opt, b_opt = opt_params.get("k", 1.0), opt_params.get("b", 0.0)
else:
    k_opt, b_opt = 1.0, 0.0

print(k_opt, b_opt)

In [None]:
# fold 앙상블: fold별 모델 예측을 평균한 뒤 포지션 매핑

config_path = "../conf/params.yaml"

def predict(test: pl.DataFrame) -> float:
    pdf = test.to_pandas()
    cols = [c for c in feature_cols if c in pdf.columns]
    X = pdf[cols]
    X = pdf.drop(columns=['is_scored', 'lagged_forward_returns'])

    print("########## ", "1. Train Return Model on train fold")
    r_hat = np.mean([m.predict(X) for m in return_models], axis=0)

    print("########## ", "2. Train Risk Model on train fold")
    sigma_hat = np.mean([m.predict(X) for m in risk_models], axis=0)

    print("r_hat : ", r_hat)
    print("sigma_hat : ", sigma_hat)
    
    print("########## ", "3. Convert these predictions to positions")
    mapper = create_position_mapper(strategy="sharpe_scaling", config_path=str(config_path))
    print("max_allocation:", mapper.max_allocation)
    allocations = mapper.map_positions(
        r_hat=r_hat,
        sigma_hat=sigma_hat,
        # k=k_opt,
        # b=b_opt
    )

    print("FINISH Allocations >>>>>>>>> ", allocations)
    
    return allocations

In [None]:

df = pd.read_csv("../data/raw/test.csv")
df_polars = pl.from_pandas(df)
predict(df_polars)

mydata.zip

``` 
zip -r mydata.zip src/ scripts/ conf/ artifacts/ -x "*.pyc" -x "__pycache__/*" 
```