In [3]:
import pandas as pd
import adata

etfs = ['561300', '159726', '515100', '513500', '161119', '518880', '164824', '159985', '513330', '513100', '513030', '513520']

df = pd.DataFrame()

end_date = '2025-06-03'

for i in etfs:
    etf_df = adata.fund.market.get_market_etf(i, start_date='2022-01-01', end_date=end_date, k_type=1)
    etf_df['close'] = etf_df['close'].astype(float)
    etf_df['symbol'] = i
    etf_df = etf_df.ffill()
    df = pd.concat([df, etf_df])

In [18]:
df_pivot = df.pivot(index='trade_date', columns='fund_code', values='close')
df_pivot

fund_code,159726,159985,161119,164824,513030,513100,513330,513500,513520,515100,518880,561300
trade_date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
2022-01-04,0.987,1.245,1.522,1.240,1.200,1.081,0.563,1.449,1.191,1.108,3.620,0.957
2022-01-05,0.994,1.262,1.521,1.234,1.194,1.059,0.541,1.443,1.190,1.098,3.635,0.946
2022-01-06,0.990,1.232,1.521,1.217,1.184,1.030,0.535,1.409,1.158,1.117,3.614,0.935
2022-01-07,1.014,1.234,1.519,1.235,1.186,1.038,0.553,1.418,1.158,1.123,3.592,0.934
2022-01-10,1.032,1.237,1.520,1.241,1.183,1.028,0.566,1.408,1.158,1.132,3.599,0.937
...,...,...,...,...,...,...,...,...,...,...,...,...
2025-05-26,1.165,1.915,1.755,1.527,1.890,1.543,0.466,1.991,1.464,1.469,7.425,0.818
2025-05-27,1.170,1.929,1.754,1.522,1.887,1.550,0.468,1.997,1.468,1.471,7.366,0.815
2025-05-28,1.177,1.922,1.756,1.519,1.888,1.568,0.467,2.012,1.455,1.475,7.375,0.816
2025-05-29,1.185,1.922,1.754,1.524,1.900,1.589,0.481,2.032,1.469,1.477,7.300,0.820


In [11]:
from sklearn import set_config
from sklearn.model_selection import (
    GridSearchCV,
    KFold,
    RandomizedSearchCV,
    train_test_split
)
from sklearn.pipeline import Pipeline
from scipy.stats import loguniform
import matplotlib.pyplot as plt

from skfolio import RatioMeasure, RiskMeasure
from skfolio.datasets import load_factors_dataset, load_sp500_dataset
from skfolio.distance import KendallDistance
from skfolio.model_selection import (
    CombinatorialPurgedCV,
    WalkForward,
    cross_val_predict,
)
from skfolio.moments import (
    DenoiseCovariance,
    DetoneCovariance,
    EWMu,
    GerberCovariance,
    ShrunkMu,
    ShrunkCovariance
)
from skfolio.optimization import (
    MeanRisk,
    NestedClustersOptimization,
    ObjectiveFunction,
    RiskBudgeting,
    HierarchicalRiskParity,
    DistributionallyRobustCVaR,
    StackingOptimization,
    MaximumDiversification,
    HierarchicalEqualRiskContribution,
    InverseVolatility,
    ConvexOptimization
)
from skfolio.pre_selection import SelectKExtremes
from skfolio.preprocessing import prices_to_returns
from skfolio.prior import BlackLitterman, EmpiricalPrior, FactorModel
from skfolio.uncertainty_set import BootstrapMuUncertaintySet
from skfolio.portfolio import MultiPeriodPortfolio
from skfolio.cluster import HierarchicalClustering, LinkageMethod

In [19]:
prices = df_pivot.copy()
prices = prices.sort_values(by='trade_date').dropna()

cv = WalkForward(test_size=1, train_size=7, freq="MS")


model = HierarchicalRiskParity(
    risk_measure=RiskMeasure.CVAR,
    prior_estimator=EmpiricalPrior(
        covariance_estimator=ShrunkCovariance(shrinkage=0.9)
    ),

)

model3 = RiskBudgeting(
    risk_measure=RiskMeasure.CDAR,
    prior_estimator=EmpiricalPrior(
        covariance_estimator=ShrunkCovariance(shrinkage=0.9)
    ),
    portfolio_params=dict(name="Risk Parity - Covariance Shrinkage"),
)

estimators = [
    ("model1", model),
    ("model3", MaximumDiversification()),
    ("model2", DistributionallyRobustCVaR(wasserstein_ball_radius=0.01)),
]

model_stacking = StackingOptimization(
    estimators=estimators,
    final_estimator=model3
)

X = prices_to_returns(prices)
X.index = pd.to_datetime(X.index)

pred_stacking = cross_val_predict(
    model_stacking,
    X,
    cv=cv,
    n_jobs=-1,
    portfolio_params=dict(name="Stacking")
)

In [20]:
returns = pred_stacking.plot_cumulative_returns()
returns.show(renderer='iframe')

In [21]:
composition = pred_stacking.plot_composition()
composition.show(renderer='iframe')

In [22]:
pred_stacking.summary()

Mean                                     0.037%
Annualized Mean                           9.29%
Variance                               0.00080%
Annualized Variance                       0.20%
Semi-Variance                          0.00045%
Annualized Semi-Variance                  0.11%
Standard Deviation                        0.28%
Annualized Standard Deviation             4.49%
Semi-Deviation                            0.21%
Annualized Semi-Deviation                 3.37%
Mean Absolute Deviation                   0.20%
CVaR at 95%                               0.59%
EVaR at 95%                               1.63%
Worst Realization                         2.83%
CDaR at 95%                               2.22%
MAX Drawdown                              3.30%
Average Drawdown                          0.54%
EDaR at 95%                               2.49%
First Lower Partial Moment                0.10%
Ulcer Index                              0.0081
Gini Mean Difference                    

In [23]:
pred_stacking.weights_per_observation

Unnamed: 0,159726,159985,161119,164824,513030,513100,513330,513500,513520,515100,518880,561300
2022-09-01,0.019064,0.028930,0.666644,0.053648,0.021688,0.021083,0.012887,0.017917,0.019442,0.017930,0.103605,0.017160
2022-09-02,0.019064,0.028930,0.666644,0.053648,0.021688,0.021083,0.012887,0.017917,0.019442,0.017930,0.103605,0.017160
2022-09-05,0.019064,0.028930,0.666644,0.053648,0.021688,0.021083,0.012887,0.017917,0.019442,0.017930,0.103605,0.017160
2022-09-06,0.019064,0.028930,0.666644,0.053648,0.021688,0.021083,0.012887,0.017917,0.019442,0.017930,0.103605,0.017160
2022-09-07,0.019064,0.028930,0.666644,0.053648,0.021688,0.021083,0.012887,0.017917,0.019442,0.017930,0.103605,0.017160
...,...,...,...,...,...,...,...,...,...,...,...,...
2025-04-24,0.024661,0.063996,0.520277,0.057057,0.038040,0.031914,0.034947,0.030369,0.034288,0.032252,0.078123,0.054076
2025-04-25,0.024661,0.063996,0.520277,0.057057,0.038040,0.031914,0.034947,0.030369,0.034288,0.032252,0.078123,0.054076
2025-04-28,0.024661,0.063996,0.520277,0.057057,0.038040,0.031914,0.034947,0.030369,0.034288,0.032252,0.078123,0.054076
2025-04-29,0.024661,0.063996,0.520277,0.057057,0.038040,0.031914,0.034947,0.030369,0.034288,0.032252,0.078123,0.054076


In [67]:
df_c = df_pivot.copy()
df_c = prices_to_returns(df_c)
df_c.index = pd.to_datetime(df_c.index)

end_date = df_c.index.max()

train_start_date = (end_date - pd.DateOffset(months=7))
train_end_date = (end_date - pd.DateOffset(months=1))

train_df = df_c.loc[train_start_date:train_end_date]
predict_df = df_c.loc[train_end_date:]

model_stacking.fit(train_df)

pred_weight = model_stacking.predict(predict_df)

In [68]:
train_start_date

Timestamp('2024-10-30 00:00:00')

In [69]:
weight_df = pred_weight.weights_per_observation.tail(6)
weight_df.round(2)

Unnamed: 0,159726,159985,161119,164824,513030,513100,513330,513500,513520,515100,518880,561300
2025-05-23,0.02,0.06,0.68,0.03,0.03,0.01,0.03,0.02,0.02,0.04,0.05,0.03
2025-05-26,0.02,0.06,0.68,0.03,0.03,0.01,0.03,0.02,0.02,0.04,0.05,0.03
2025-05-27,0.02,0.06,0.68,0.03,0.03,0.01,0.03,0.02,0.02,0.04,0.05,0.03
2025-05-28,0.02,0.06,0.68,0.03,0.03,0.01,0.03,0.02,0.02,0.04,0.05,0.03
2025-05-29,0.02,0.06,0.68,0.03,0.03,0.01,0.03,0.02,0.02,0.04,0.05,0.03
2025-05-30,0.02,0.06,0.68,0.03,0.03,0.01,0.03,0.02,0.02,0.04,0.05,0.03
