In [27]:
import pandas as pd
import adata

etfs = ['561300', '159726', '515100', '513500', '161119', '518880', '164824', '159985', '513330', '513100', '513030', '513520']

df = pd.DataFrame()

end_date = '2025-02-05'

for i in etfs:
    etf_df = adata.fund.market.get_market_etf(i, start_date='2022-01-01', end_date=end_date, k_type=1)
    etf_df['close'] = etf_df['close'].astype(float)
    pivot_df = etf_df.pivot(index='trade_date', columns='fund_code', values='close')
    df = pd.concat([df, pivot_df], axis=1)

In [29]:
df = df.ffill()

In [28]:
nan_rows = df[df.isna().any(axis=1)]
nan_rows

fund_code,561300,159726,515100,513500,161119,518880,164824,159985,513330,513100,513030,513520
trade_date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
2022-01-13,0.93,1.032,1.115,1.415,1.522,3.654,1.264,1.242,0.583,,1.199,1.17
2022-03-29,0.818,0.96,1.101,,1.522,3.849,1.184,1.629,0.48,0.978,1.042,1.079
2025-01-23,0.799,1.03,1.423,2.205,1.754,6.187,1.483,1.919,0.413,1.643,,1.55


In [30]:
from sklearn import set_config
from sklearn.model_selection import (
    GridSearchCV,
    KFold,
    RandomizedSearchCV,
    train_test_split
)
from sklearn.pipeline import Pipeline
from scipy.stats import loguniform
import matplotlib.pyplot as plt

from skfolio import RatioMeasure, RiskMeasure
from skfolio.datasets import load_factors_dataset, load_sp500_dataset
from skfolio.distance import KendallDistance
from skfolio.model_selection import (
    CombinatorialPurgedCV,
    WalkForward,
    cross_val_predict,
)
from skfolio.moments import (
    DenoiseCovariance,
    DetoneCovariance,
    EWMu,
    GerberCovariance,
    ShrunkMu,
    ShrunkCovariance
)
from skfolio.optimization import (
    MeanRisk,
    NestedClustersOptimization,
    ObjectiveFunction,
    RiskBudgeting,
    HierarchicalRiskParity,
    DistributionallyRobustCVaR,
    StackingOptimization,
    MaximumDiversification,
    HierarchicalEqualRiskContribution,
    InverseVolatility,
    ConvexOptimization
)
from skfolio.pre_selection import SelectKExtremes
from skfolio.preprocessing import prices_to_returns
from skfolio.prior import BlackLitterman, EmpiricalPrior, FactorModel
from skfolio.uncertainty_set import BootstrapMuUncertaintySet
from skfolio.portfolio import MultiPeriodPortfolio
from skfolio.cluster import HierarchicalClustering, LinkageMethod

In [44]:
prices = df.copy()
prices = prices.sort_values(by='trade_date').dropna()

cv = WalkForward(test_size=1, train_size=7, freq="MS")


model = HierarchicalRiskParity(
    risk_measure=RiskMeasure.CVAR,
    prior_estimator=EmpiricalPrior(
        covariance_estimator=ShrunkCovariance(shrinkage=0.9)
    ),
    # distance_estimator=KendallDistance(),
    # hierarchical_clustering_estimator=LinkageMethod(),
)

model3 = RiskBudgeting(
    risk_measure=RiskMeasure.CDAR,
    prior_estimator=EmpiricalPrior(
        covariance_estimator=ShrunkCovariance(shrinkage=0.9)
    ),
    portfolio_params=dict(name="Risk Parity - Covariance Shrinkage"),
)

estimators = [
    ("model1", model),
    ("model3", MaximumDiversification()),
    ("model2", DistributionallyRobustCVaR(wasserstein_ball_radius=0.01)),
]

model_stacking = StackingOptimization(
    estimators=estimators,
    # final_estimator=MeanRisk(
    #     risk_measure=RiskMeasure.EVAR,
    #     solver="OSQP"
    # )
    final_estimator=model3
)

X = prices_to_returns(prices)
X.index = pd.to_datetime(X.index)

pred_stacking = cross_val_predict(
    model_stacking,
    X,
    cv=cv,
    n_jobs=-1,
    portfolio_params=dict(name="Stacking")
)

In [45]:
returns = pred_stacking.plot_cumulative_returns()
returns.show(renderer='iframe')

In [46]:
composition = pred_stacking.plot_composition()
composition.show(renderer='iframe')

In [47]:
pred_stacking.summary()

Mean                                     0.037%
Annualized Mean                           9.45%
Variance                               0.00061%
Annualized Variance                       0.15%
Semi-Variance                          0.00032%
Annualized Semi-Variance                 0.080%
Standard Deviation                        0.25%
Annualized Standard Deviation             3.92%
Semi-Deviation                            0.18%
Annualized Semi-Deviation                 2.83%
Mean Absolute Deviation                   0.19%
CVaR at 95%                               0.51%
EVaR at 95%                               0.87%
Worst Realization                         1.40%
CDaR at 95%                               2.17%
MAX Drawdown                              2.70%
Average Drawdown                          0.54%
EDaR at 95%                               2.33%
First Lower Partial Moment               0.096%
Ulcer Index                              0.0080
Gini Mean Difference                    

In [48]:
pred_stacking.weights_per_observation

Unnamed: 0,561300,159726,515100,513500,161119,518880,164824,159985,513330,513100,513030,513520
2022-09-01,0.015966,0.018084,0.017287,0.017095,0.674129,0.103178,0.053172,0.028077,0.013156,0.020567,0.020616,0.018673
2022-09-02,0.015966,0.018084,0.017287,0.017095,0.674129,0.103178,0.053172,0.028077,0.013156,0.020567,0.020616,0.018673
2022-09-05,0.015966,0.018084,0.017287,0.017095,0.674129,0.103178,0.053172,0.028077,0.013156,0.020567,0.020616,0.018673
2022-09-06,0.015966,0.018084,0.017287,0.017095,0.674129,0.103178,0.053172,0.028077,0.013156,0.020567,0.020616,0.018673
2022-09-07,0.015966,0.018084,0.017287,0.017095,0.674129,0.103178,0.053172,0.028077,0.013156,0.020567,0.020616,0.018673
...,...,...,...,...,...,...,...,...,...,...,...,...
2025-01-21,0.055494,0.016521,0.018030,0.018543,0.637873,0.057489,0.046450,0.060350,0.028169,0.019446,0.027138,0.014498
2025-01-22,0.055494,0.016521,0.018030,0.018543,0.637873,0.057489,0.046450,0.060350,0.028169,0.019446,0.027138,0.014498
2025-01-23,0.055494,0.016521,0.018030,0.018543,0.637873,0.057489,0.046450,0.060350,0.028169,0.019446,0.027138,0.014498
2025-01-24,0.055494,0.016521,0.018030,0.018543,0.637873,0.057489,0.046450,0.060350,0.028169,0.019446,0.027138,0.014498


In [49]:
df_c = df.copy()
df_c = prices_to_returns(df_c)
df_c.index = pd.to_datetime(df_c.index)

end_date = df_c.index.max()

start_date = end_date - pd.DateOffset(months=6)
df_last_7_months = df_c.loc[start_date:end_date]

pred_weight = model_stacking.fit_predict(df_last_7_months)

In [50]:
ret = pred_weight.plot_cumulative_returns()
ret.show(renderer='iframe')

In [51]:
import numpy as np
# 将权重转换为百分比，并四舍五入到小数点后两位
weights_percentage_rounded = np.round(model_stacking.weights_, 2)

# 创建 DataFrame
df_weights = pd.DataFrame(data=[weights_percentage_rounded], columns=etfs)
df_weights

Unnamed: 0,561300,159726,515100,513500,161119,518880,164824,159985,513330,513100,513030,513520
0,0.06,0.02,0.02,0.02,0.58,0.07,0.05,0.06,0.03,0.02,0.04,0.02
