In [15]:
import pandas as pd
import adata

etfs = ['561300', '159726', '515100', '513500', '161119', '518880', '164824', '159985', '513330']

df = pd.DataFrame()

end_date = '2025-02-05'

for i in etfs:
    etf_df = adata.fund.market.get_market_etf(i, start_date='2018-01-01', end_date=end_date, k_type=1)
    etf_df['close'] = etf_df['close'].astype(float)
    pivot_df = etf_df.pivot(index='trade_date', columns='fund_code', values='close')
    df = pd.concat([df, pivot_df], axis=1)

In [16]:
from sklearn import set_config
from sklearn.model_selection import (
    GridSearchCV,
    KFold,
    RandomizedSearchCV,
    train_test_split
)
from sklearn.pipeline import Pipeline
from scipy.stats import loguniform
import matplotlib.pyplot as plt

from skfolio import RatioMeasure, RiskMeasure
from skfolio.datasets import load_factors_dataset, load_sp500_dataset
from skfolio.distance import KendallDistance
from skfolio.model_selection import (
    CombinatorialPurgedCV,
    WalkForward,
    cross_val_predict,
)
from skfolio.moments import (
    DenoiseCovariance,
    DetoneCovariance,
    EWMu,
    GerberCovariance,
    ShrunkMu,
    ShrunkCovariance
)
from skfolio.optimization import (
    MeanRisk,
    NestedClustersOptimization,
    ObjectiveFunction,
    RiskBudgeting,
    HierarchicalRiskParity,
    DistributionallyRobustCVaR,
    StackingOptimization,
    MaximumDiversification,
    HierarchicalEqualRiskContribution,
    InverseVolatility,
    ConvexOptimization
)
from skfolio.pre_selection import SelectKExtremes
from skfolio.preprocessing import prices_to_returns
from skfolio.prior import BlackLitterman, EmpiricalPrior, FactorModel
from skfolio.uncertainty_set import BootstrapMuUncertaintySet
from skfolio.portfolio import MultiPeriodPortfolio
from skfolio.cluster import HierarchicalClustering, LinkageMethod

In [70]:
prices = df.copy()
prices = prices.sort_values(by='trade_date').dropna()

cv = WalkForward(test_size=1, train_size=7, freq="MS")


model = HierarchicalRiskParity(
    risk_measure=RiskMeasure.CVAR,
    prior_estimator=EmpiricalPrior(
        covariance_estimator=ShrunkCovariance(shrinkage=0.9)
    ),
    # distance_estimator=KendallDistance(),
    # hierarchical_clustering_estimator=LinkageMethod(),
)

model3 = RiskBudgeting(
    risk_measure=RiskMeasure.CDAR,
    prior_estimator=EmpiricalPrior(
        covariance_estimator=ShrunkCovariance(shrinkage=0.9)
    ),
    portfolio_params=dict(name="Risk Parity - Covariance Shrinkage"),
)

estimators = [
    ("model1", model),
    ("model3", MaximumDiversification()),
    ("model2", DistributionallyRobustCVaR(wasserstein_ball_radius=0.01)),
]

model_stacking = StackingOptimization(
    estimators=estimators,
    final_estimator=MeanRisk(
        # objective_function=ObjectiveFunction.MINIMIZE_RISK,
        risk_measure=RiskMeasure.EVAR,
        min_weights=0,
    )
    # final_estimator=model3
)

X = prices_to_returns(prices)
X.index = pd.to_datetime(X.index)

pred_stacking = cross_val_predict(
    model_stacking,
    X,
    cv=cv,
    n_jobs=-1,
    portfolio_params=dict(name="Stacking")
)

In [71]:
returns = pred_stacking.plot_cumulative_returns()
returns.show(renderer='iframe')

In [72]:
composition = pred_stacking.plot_composition()
composition.show(renderer='iframe')

In [73]:
pred_stacking.summary()

Mean                                     0.032%
Annualized Mean                           8.06%
Variance                               0.00026%
Annualized Variance                      0.065%
Semi-Variance                          0.00013%
Annualized Semi-Variance                 0.033%
Standard Deviation                        0.16%
Annualized Standard Deviation             2.55%
Semi-Deviation                            0.11%
Annualized Semi-Deviation                 1.82%
Mean Absolute Deviation                   0.12%
CVaR at 95%                               0.33%
EVaR at 95%                               0.49%
Worst Realization                         0.74%
CDaR at 95%                               1.24%
MAX Drawdown                              1.78%
Average Drawdown                          0.31%
EDaR at 95%                               1.43%
First Lower Partial Moment               0.061%
Ulcer Index                              0.0046
Gini Mean Difference                    

In [74]:
pred_stacking.weights_per_observation

Unnamed: 0,561300,159726,515100,513500,161119,518880,164824,159985,513330
2022-08-01,0.009665,0.006464,0.005951,0.008696,0.762314,0.124209,0.059070,0.022082,0.001550
2022-08-02,0.009665,0.006464,0.005951,0.008696,0.762314,0.124209,0.059070,0.022082,0.001550
2022-08-03,0.009665,0.006464,0.005951,0.008696,0.762314,0.124209,0.059070,0.022082,0.001550
2022-08-04,0.009665,0.006464,0.005951,0.008696,0.762314,0.124209,0.059070,0.022082,0.001550
2022-08-05,0.009665,0.006464,0.005951,0.008696,0.762314,0.124209,0.059070,0.022082,0.001550
...,...,...,...,...,...,...,...,...,...
2025-01-21,0.051093,0.000000,0.000000,0.008237,0.764102,0.055569,0.051274,0.061022,0.008702
2025-01-22,0.051093,0.000000,0.000000,0.008237,0.764102,0.055569,0.051274,0.061022,0.008702
2025-01-23,0.051093,0.000000,0.000000,0.008237,0.764102,0.055569,0.051274,0.061022,0.008702
2025-01-24,0.051093,0.000000,0.000000,0.008237,0.764102,0.055569,0.051274,0.061022,0.008702


In [75]:
df_c = df.copy()
df_c = prices_to_returns(df_c)
df_c.index = pd.to_datetime(df_c.index)

end_date = df_c.index.max()

start_date = end_date - pd.DateOffset(months=6)
df_last_7_months = df_c.loc[start_date:end_date]

pred_weight = model_stacking.fit_predict(df_last_7_months)

In [76]:
ret = pred_weight.plot_cumulative_returns()
ret.show(renderer='iframe')

In [78]:
import numpy as np
# 将权重转换为百分比，并四舍五入到小数点后两位
weights_percentage_rounded = np.round(model_stacking.weights_, 2)

# 创建 DataFrame
df_weights = pd.DataFrame(data=[weights_percentage_rounded], columns=etfs)
df_weights

Unnamed: 0,561300,159726,515100,513500,161119,518880,164824,159985,513330
0,0.07,0.0,0.0,0.02,0.75,0.06,0.05,0.05,0.0
