In [4]:
import yfinance as yf
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from scipy import stats

In [5]:
# 周期映射: daily/weekly/monthly/3month -> yfinance interval
import time

INTERVAL_MAP = {'daily': '1d', 'weekly': '1wk', 'monthly': '1mo', '3month': '3mo'}
RESAMPLE_DAYS = {'daily': 1, 'weekly': 5, 'monthly': 20, '3month': 60}

def _resample_ohlc_n_days(df, n_days):
    """将日线 OHLC 按 n_days 聚合成周期线。df 已按时间降序。Open=周期首日, High=max, Low=min, Close=周期末日。增加 Date 列：起始~结束"""
    ohlc = df[['Open', 'High', 'Low', 'Close', 'Adj Close']].copy()
    if isinstance(ohlc.columns, pd.MultiIndex):
        ohlc.columns = ohlc.columns.get_level_values(-1)
    rows = []
    for i in range(0, len(ohlc), n_days):
        chunk = ohlc.iloc[i:i + n_days]
        if len(chunk) < n_days:
            continue
        dt_start = chunk.index[-1]
        dt_end = chunk.index[0]
        date_str = f"{pd.Timestamp(dt_start).strftime('%Y-%m-%d')} ~ {pd.Timestamp(dt_end).strftime('%Y-%m-%d')}"
        rows.append({
            'Date': date_str,
            'Open': float(chunk['Open'].iloc[-1]),
            'High': float(chunk['High'].max()),
            'Low': float(chunk['Low'].min()),
            'Close': float(chunk['Close'].iloc[0]),
            'Adj Close': float(chunk['Adj Close'].iloc[0]),
        })
    out = pd.DataFrame(rows)
    if len(out) > 0:
        out.index = range(len(out) - 1, -1, -1)
    return out

def _flatten_ohlc_columns(df):
    """统一处理 yfinance MultiIndex 列，返回 Open/High/Low/Close/Adj Close 单层列"""
    try:
        return df[['Open', 'High', 'Low', 'Close', 'Adj Close']].copy()
    except KeyError:
        pass
    if isinstance(df.columns, pd.MultiIndex):
        df = df.copy()
        for lvl in [1, 0]:
            vals = df.columns.get_level_values(lvl)
            if 'Open' in vals:
                df.columns = vals
                return df[['Open', 'High', 'Low', 'Close', 'Adj Close']].copy()
    raise ValueError("无法解析 yfinance 列结构")

def data_prep_multi_interval(ticker, start="2010-01-01", end="2025-12-31", max_retries=3, retry_delay=5):
    """
    一次下载日线，通过 5/20/60 交易日聚合合成 weekly/monthly/3month（失败时自动重试）。
    返回: (daily_df, weekly_df, monthly_df, df_3month) 四个 DataFrame
    用法: daily, weekly, monthly, df_3month = data_prep_multi_interval("SPY")
    """
    last_err = None
    for attempt in range(max_retries):
        try:
            raw = yf.download(ticker, start=start, end=end, interval='1d', auto_adjust=False, multi_level_index=False)
            if raw is None or (isinstance(raw, pd.DataFrame) and raw.empty):
                raise ValueError(f"yfinance 未返回数据: {ticker}")
            raw = _flatten_ohlc_columns(raw)
            break
        except Exception as e:
            last_err = e
            if attempt < max_retries - 1:
                time.sleep(retry_delay)
            else:
                raise last_err
    raw = raw.sort_index(ascending=False)
    INDICATOR_COLS = ['Date', 'Open', 'High', 'Low', 'Close', 'Adj Close', 'C-C Returns', 'H-L Returns', 'O-C Returns', 'Randomness']
    daily = data_prep("", data=raw)
    weekly = data_prep("", data=resampled) if len(resampled := _resample_ohlc_n_days(raw, 5)) > 0 else pd.DataFrame(columns=INDICATOR_COLS)
    monthly = data_prep("", data=resampled) if len(resampled := _resample_ohlc_n_days(raw, 20)) > 0 else pd.DataFrame(columns=INDICATOR_COLS)
    df_3month = data_prep("", data=resampled) if len(resampled := _resample_ohlc_n_days(raw, 60)) > 0 else pd.DataFrame(columns=INDICATOR_COLS)
    return daily, weekly, monthly, df_3month

def data_prep(ticker, start="2010-01-01", end="2025-12-31", interval="daily", data=None):
    """
    快速生成 data preparation
    
    参数:
        ticker: 股票代码（当 data 为 None 时使用）
        start, end: 日期范围（当 data 为 None 时使用）
        interval: 周期 - 'daily'(日线), 'weekly'(周线), 'monthly'(月线), '3month'(三个月线)
        data: 可选，直接传入 OHLC DataFrame（需含 Open/High/Low/Close/Adj Close）
    
    返回:
        含 C-C Returns, H-L Returns, O-C Returns, Randomness 列的 DataFrame
    """
    if data is None:
        iv = INTERVAL_MAP.get(interval, interval)  # 支持 '1d' 等直接传入
        raw = yf.download(ticker, start=start, end=end, interval=iv, auto_adjust=False, multi_level_index=False)
        df = _flatten_ohlc_columns(raw)
    else:
        df = _flatten_ohlc_columns(data)
        if 'Date' in data.columns:
            df.insert(0, 'Date', data['Date'].values)
    
    df = df.sort_index(ascending=False)  # descending by date (newest first)

    def _to_series(x):
        return x.iloc[:, 0] if isinstance(x, pd.DataFrame) else x

    adj = _to_series(df['Adj Close'])
    high = _to_series(df['High'])
    low = _to_series(df['Low'])
    open_ = _to_series(df['Open'])
    close = _to_series(df['Close'])

    # C-C Returns: (本行Adj Close - 下一行Adj Close) / 下一行Adj Close
    df['C-C Returns'] = (adj - adj.shift(-1)) / adj.shift(-1)
    # H-L Returns：(本行High - 本行Low) / 本行Low
    df['H-L Returns'] = (high - low) / low
    # O-C Returns：(本行Close - 本行Open) / 本行Open
    df['O-C Returns'] = (close - open_) / open_
    # Randomness
    df['Randomness'] = np.abs(df['C-C Returns'] - df['H-L Returns'])

    return df

In [None]:
# 返回四个 df，解包使用
daily, weekly, monthly, df_3month = data_prep_multi_interval(ticker="SPY")
print("daily:", len(daily), "行 | weekly:", len(weekly), "行 | monthly:", len(monthly), "行 | 3month:", len(df_3month), "行")
display(daily.head(), weekly.head(), monthly.head(), df_3month.head())

In [6]:
RETURNS_MAP = {'cc': 'C-C Returns', 'hl': 'H-L Returns', 'oc': 'O-C Returns'}

def distribution_of_return(prices=None, ticker="SPY", start="2010-01-01", end="2025-12-31", interval="daily",
                        returns_type="cc", merge_bound_left=-0.02, merge_bound_right=0.02, bin_width=0.005, plot=True):
    """
    Returns 完整分布分析（描述统计、正负零、1-3 std、区间分布、正态性检验、可视化）
    
    参数:
        prices: 含 C-C/H-L/O-C Returns 的 DataFrame；为 None 时用 ticker 下载
        ticker, start, end: 当 prices 为 None 时使用
        interval: 周期 - 'daily'(日线), 'weekly'(周线), 'monthly'(月线), '3month'(三个月线)
        returns_type: 分析类型 - 'cc'/'C-C Returns'(日间), 'hl'/'H-L Returns'(振幅), 'oc'/'O-C Returns'(日内)
        merge_bound_left: 左尾合并边界，小于此值合并为一档
        merge_bound_right: 右尾合并边界，大于此值合并为一档
        bin_width: 中间区间每档宽度
        plot: 是否显示图表
    
    返回:
        dict: {returns, stats_df, sign_stats, std_dev_df, freq_df, jb_stat, jb_pval}
    """
    if prices is None:
        prices = data_prep(ticker, start=start, end=end, interval=interval)
    
    col = RETURNS_MAP.get(str(returns_type).lower(), returns_type)  # 'cc'->'C-C Returns', 或直接传列名
    if col not in prices.columns:
        raise ValueError(f"Column '{col}' not found. Use returns_type='cc'|'hl'|'oc' or ensure prices has C-C/H-L/O-C Returns.")
    
    cc = prices[col].dropna()
    if isinstance(cc, pd.DataFrame):
        cc = cc.iloc[:, 0]
    
    n = len(cc)
    results = {}
    
    # 1. Descriptive statistics
    stats_df = pd.DataFrame({
        'Statistic': ['Mean', 'Std', 'Skewness', 'Kurtosis', 'Min', '25%', 'Median', '75%', 'Max', 'N'],
        'Value': [cc.mean(), cc.std(), cc.skew(), cc.kurtosis(), cc.min(), cc.quantile(0.25), 
                  cc.median(), cc.quantile(0.75), cc.max(), len(cc)]
    })
    results['stats_df'] = stats_df
    
    # 2. Sign statistics
    cc_gt0, cc_lt0, cc_eq0 = cc > 0, cc < 0, cc == 0
    sign_stats = pd.DataFrame({
        'Cond': ['> 0', '< 0', '= 0'],
        'Freq': [cc_gt0.sum(), cc_lt0.sum(), cc_eq0.sum()],
        'Pct(%)': [(cc_gt0.sum()/n*100).round(4), (cc_lt0.sum()/n*100).round(4), (cc_eq0.sum()/n*100).round(4)],
        'Mean': [cc[cc_gt0].mean() if cc_gt0.any() else np.nan, 
                 cc[cc_lt0].mean() if cc_lt0.any() else np.nan, 
                 cc[cc_eq0].mean() if cc_eq0.any() else 0.0]
    })
    sign_stats['freq_adj_returns'] = sign_stats['Mean'] * (sign_stats['Pct(%)'] / 100)
    results['sign_stats'] = sign_stats
    
    # 3. 1-3 Std Dev
    cc_vals = np.asarray(cc).flatten()
    mu, sigma = np.mean(cc_vals), np.std(cc_vals)
    actual_counts = [np.sum((cc_vals >= mu - k*sigma) & (cc_vals <= mu + k*sigma)) for k in [1, 2, 3]]
    actual_pct = [c / len(cc_vals) * 100 for c in actual_counts]
    normal_pct = [(2*stats.norm.cdf(k)-1)*100 for k in [1, 2, 3]]
    std_dev_df = pd.DataFrame({
        'Upper Bound': [f'{(mu + k*sigma)*100:.2f}%' for k in [1, 2, 3]],
        'Lower Bound': [f'{(mu - k*sigma)*100:.2f}%' for k in [1, 2, 3]],
        'Actual Count': actual_counts,
        'Actual % Count': [f'{p:.2f}%' for p in actual_pct],
        'Normal % Count': [f'{p:.2f}%' for p in normal_pct],
    }, index=['1 std', '2 std', '3 std'])
    results['std_dev_df'] = std_dev_df
    
    # 4. Bin distribution (customizable merge bounds and bin width)
    n_mid = int(round((merge_bound_right - merge_bound_left) / bin_width))
    mid_edges = np.linspace(merge_bound_left, merge_bound_right, n_mid + 1)
    bins = np.concatenate([[-np.inf], mid_edges, [np.inf]])
    counts, _ = np.histogram(cc, bins=bins)
    def bin_label(i):
        lo, hi = bins[i], bins[i+1]
        if hi <= merge_bound_left: return f"< {merge_bound_left}"
        elif lo >= merge_bound_right: return f"> {merge_bound_right}"
        return f"[{lo:.3f}, {hi:.3f})"
    freq_df = pd.DataFrame({
        'Bin': [bin_label(i) for i in range(len(bins)-1)],
        'Freq': counts, 'Pct(%)': (counts / n * 100).round(4),
        'probability': (counts / n).round(6),
        'cumulative_probability': (counts / n).cumsum().round(6)
    })
    freq_df = freq_df[freq_df['Freq'] > 0].reset_index(drop=True)
    results['freq_df'] = freq_df
    results['bins'] = bins
    results['returns'] = cc
    results['returns_type'] = col
    
    # 5. Normality test
    jb_stat, jb_pval = stats.jarque_bera(cc)
    results['jb_stat'], results['jb_pval'] = jb_stat, jb_pval
    
    # Print & display
    print("=" * 50, f"\n{col} Descriptive Statistics\n", "=" * 50)
    print(stats_df.to_string(index=False))
    print("\n" + "=" * 50, f"\n{col} Sign Statistics\n", "=" * 50)
    w1, w2, w3, w4, w5 = 6, 8, 12, 14, 18
    print(f"{'Cond':<{w1}}{'Freq':>{w2}}{'Pct(%)':>{w3}}{'Mean':>{w4}}{'freq_adj_returns':>{w5}}")
    for _, r in sign_stats.iterrows():
        avg = r['Mean'] if pd.notna(r['Mean']) else 0
        print(f"{r['Cond']:<{w1}}{int(r['Freq']):>{w2}}{r['Pct(%)']:>{w3}.4f}{avg:>{w4}.6f}{r['freq_adj_returns']:>{w5}.6f}")
    print("\n" + "=" * 70, "\n1-3 Std Dev: Actual vs Normal\n", "=" * 70)
    display(std_dev_df)
    print(f"\n" + "=" * 80 + f"\nBin Distribution (<{merge_bound_left} merged, >{merge_bound_right} merged, middle {bin_width}/bin)\n" + "=" * 80)
    display(freq_df)
    print("\n" + "=" * 50, "\nNormality Test\n", "=" * 50)
    print(f"Jarque-Bera: stat={jb_stat:.4f}, p-value={jb_pval:.4e}")
    print(f"  -> {'Reject normality' if jb_pval < 0.05 else 'Cannot reject normality'} (alpha=0.05)")
    
    if plot:
        fig, axes = plt.subplots(1, 2, figsize=(12, 5))
        bins_plot = bins.copy()
        bins_plot[bins_plot == -np.inf] = min(cc.min(), merge_bound_left) - 0.01
        bins_plot[bins_plot == np.inf] = max(cc.max(), merge_bound_right) + 0.01
        axes[0].hist(cc, bins=bins_plot, density=True, alpha=0.6, color='steelblue', edgecolor='white', label=col)
        x = np.linspace(cc.min(), cc.max(), 200)
        axes[0].plot(x, stats.norm.pdf(x, cc.mean(), cc.std()), 'r-', lw=2, label='Normal fit')
        axes[0].set_xlabel(col); axes[0].set_ylabel('density'); axes[0].set_title(f'{col} Distribution')
        axes[0].legend(); axes[0].grid(True, alpha=0.3)
        stats.probplot(cc, dist="norm", plot=axes[1])
        axes[1].set_title('Q-Q Plot'); axes[1].grid(True, alpha=0.3)
        plt.tight_layout(); plt.show()
    
    return results

In [4]:
# 快速使用：可选 interval = 'daily' | 'weekly' | 'monthly' | '3month'
# prices = data_prep("SPY", interval="daily")
prices = data_prep("SPY", interval="weekly")   # 周线
# prices = data_prep("SPY", interval="monthly") # 月线
# prices = data_prep("SPY", interval="3month")  # 三个月线
prices.head()

[*********************100%***********************]  1 of 1 completed


Price,Open,High,Low,Close,Adj Close,C-C Returns,H-L Returns,O-C Returns,Randomness
Ticker,SPY,SPY,SPY,SPY,SPY,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
Date,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2
2025-12-26,690.640015,691.659973,681.710022,681.919983,681.919983,-0.009335,0.014596,-0.012626,0.023931
2025-12-19,676.590027,690.830017,676.469971,690.380005,688.346008,0.020563,0.021228,0.020382,0.000665
2025-12-12,688.169983,688.880005,671.200012,676.469971,674.476929,-0.018428,0.026341,-0.017002,0.044769
2025-12-05,685.469971,689.25,681.309998,689.169983,687.139526,0.006984,0.011654,0.005398,0.00467
2025-11-28,680.859985,685.369995,678.73999,684.390015,682.373657,0.00693,0.009768,0.005185,0.002838


In [None]:
# Returns 分布分析：returns_type='cc'|'hl'|'oc' (C-C/H-L/O-C Returns)
results = distribution_of_return(prices=prices, returns_type="cc", plot=True)
# results = distribution_of_return(prices=prices, returns_type="hl", plot=True)  # H-L
# results = distribution_of_return(prices=prices, returns_type="oc", plot=True)  # O-C

# 或直接指定 ticker 和周期：
# results = distribution_of_return(ticker="AAPL", interval="weekly", start="2020-01-01", end="2024-12-31", plot=True)
# interval: 'daily'(日线), 'weekly'(周线), 'monthly'(月线), '3month'(三个月线)

# 返回的 results 包含：cc, stats_df, sign_stats, std_dev_df, freq_df, jb_stat, jb_pval


In [None]:
# 或直接指定 ticker 和周期：
results = distribution_of_return(ticker="GOOG", interval="weekly",returns_type="cc", start="2000-01-01", end="2025-12-31", merge_bound_left=-0.1, merge_bound_right=0.1, bin_width=0.02, plot=True)
# interval: 'daily'(日线), 'weekly'(周线), 'monthly'(月线), '3month'(三个月线)

# 返回的 results 包含：cc, stats_df, sign_stats, std_dev_df, freq_df, jb_stat, jb_pval

In [7]:
# Asset universe: Asset, Class, Notes
assets_df = pd.DataFrame([
    # Government Bonds
    ("TLT", "Government Bonds", "20+ yr"),
    ("IEF", "Government Bonds", "7-10 yr"),
    ("IEI", "Government Bonds", "3-7 yr"),
    ("SHY", "Government Bonds", "1-3 yr"),
    # Corporate Bonds
    ("LQD", "Corporate Bonds", "Investment Grade"),
    ("VCIT", "Corporate Bonds", "Investment Grade"),
    ("JNK", "Corporate Bonds", "Junk"),
    ("HYG", "Corporate Bonds", "Junk"),
    # FX Major
    ("EURUSD", "FX Major", None),
    ("USDJPY", "FX Major", None),
    ("GBPUSD", "FX Major", None),
    ("AUDUSD", "FX Major", None),
    # FX EM
    ("USDZAR", "FX EM", None),
    ("USDBRL", "FX EM", None),
    ("USDTRY", "FX EM", None),
    # Equity Index
    ("SPY", "Equity Index", "S&P 500"),
    ("QQQ", "Equity Index", "Nasdaq 100"),
    ("DIA", "Equity Index", "Dow Jones Industrial"),
    ("IWV", "Equity Index", "Russell 3000"),
    ("IWM", "Equity Index", "Russell 2000"),
    # Equity - Mega Cap (FAANMG)
    ("META", "Equity - Mega Cap", "FAANMG"),
    ("AAPL", "Equity - Mega Cap", "FAANMG"),
    ("AMZN", "Equity - Mega Cap", "FAANMG"),
    ("NFLX", "Equity - Mega Cap", "FAANMG"),
    ("MSFT", "Equity - Mega Cap", "FAANMG"),
    ("GOOG", "Equity - Mega Cap", "FAANMG"),
    # Equity - Large Cap
    ("MAR", "Equity - Large Cap", "Discretionary/Industrial"),
    ("LVS", "Equity - Large Cap", "Discretionary/Industrial"),
    ("LEN", "Equity - Large Cap", "Discretionary/Industrial"),
    ("BBY", "Equity - Large Cap", "Discretionary/Industrial"),
    ("ODFL", "Equity - Large Cap", "Discretionary/Industrial"),
    ("LUV", "Equity - Large Cap", "Discretionary/Industrial"),
    ("PCAR", "Equity - Large Cap", "Discretionary/Industrial"),
    ("JCI", "Equity - Large Cap", "Discretionary/Industrial"),
    ("WMT", "Equity - Large Cap", "Discretionary/Industrial/Tech?"),
    ("HSY", "Equity - Large Cap", "F&B, Healthcare, Utilities"),
    ("CPB", "Equity - Large Cap", "F&B, Healthcare, Utilities"),
    ("STZ", "Equity - Large Cap", "F&B, Healthcare, Utilities"),
    ("MNST", "Equity - Large Cap", "F&B, Healthcare, Utilities"),
    ("BIIB", "Equity - Large Cap", "F&B, Healthcare, Utilities"),
    ("ALGN", "Equity - Large Cap", "F&B, Healthcare, Utilities"),
    ("XEL", "Equity - Large Cap", "F&B, Healthcare, Utilities"),
    ("PPL", "Equity - Large Cap", "F&B, Healthcare, Utilities"),
    ("SNDK", "Equity - Large Cap", "Semiconductor"),
    ("MU", "Equity - Large Cap", "Semiconductor"),
    ("TSM", "Equity - Large Cap", "Semiconductor"),
    ("PLTR", "Equity - Large Cap", "AI/Military"),
    # Equity - Mid Cap
    ("WING", "Equity - Mid Cap", None),
    ("FRPT", "Equity - Mid Cap", None),
    ("BILL", "Equity - Mid Cap", None),
    ("PEGA", "Equity - Mid Cap", None),
    ("OLED", "Equity - Mid Cap", None),
    ("XPO", "Equity - Mid Cap", None),
    ("FND", "Equity - Mid Cap", None),
    ("TREX", "Equity - Mid Cap", None),
    ("MOS", "Equity - Mid Cap", None),
    ("OLLI", "Equity - Mid Cap", None),
    ("DKS", "Equity - Mid Cap", None),
    ("VIRT", "Equity - Mid Cap", None),
    ("JBLU", "Equity - Mid Cap", None),
], columns=["Asset", "Class", "Notes"])

In [8]:
# 方法2：每个标的仅下载一次日线，用 data_prep_multi_interval 合成四周期，计算 Std/Mean 拼接至 assets_df
import time

try:
    from tqdm import tqdm
    iter_assets = tqdm(assets_df['Asset'], desc="C-C std & H-L mean (单次下载)")
except ImportError:
    iter_assets = assets_df['Asset']

std_cols = ['Std_Daily(%)', 'Std_Weekly(%)', 'Std_Monthly(%)', 'Std_3Month(%)']
hl_mean_cols = ['MeanHL_Daily(%)', 'MeanHL_Weekly(%)', 'MeanHL_Monthly(%)', 'MeanHL_3Month(%)']
start, end = "2010-01-01", "2025-12-31"
DELAY_SEC = 2

def to_yf_ticker(ticker):
    if ticker in ['EURUSD', 'USDJPY', 'GBPUSD', 'AUDUSD', 'USDZAR', 'USDBRL', 'USDTRY']:
        return f"{ticker}=X"
    return ticker

def _to_scalar(x):
    arr = np.asarray(x).ravel()
    return float(arr[0]) if len(arr) > 0 and np.isfinite(arr[0]) else np.nan

def _std_hl_from_df(df):
    """从 df 提取 C-C std 和 H-L mean（百分比）"""
    if df is None or (isinstance(df, pd.DataFrame) and len(df) == 0):
        return np.nan, np.nan
    cc = df['C-C Returns'].dropna()
    hl = df['H-L Returns'].dropna()
    if isinstance(cc, pd.DataFrame):
        cc = cc.iloc[:, 0]
    if isinstance(hl, pd.DataFrame):
        hl = hl.iloc[:, 0]
    std_val = _to_scalar(cc.std()) * 100 if len(cc) > 0 else np.nan
    mean_val = _to_scalar(hl.mean()) * 100 if len(hl) > 0 else np.nan
    return std_val, mean_val

std_data, hl_mean_data = [], []
for asset in iter_assets:
    time.sleep(DELAY_SEC)
    try:
        daily, weekly, monthly, df_3month = data_prep_multi_interval(to_yf_ticker(asset), start=start, end=end)
        std_row, hl_row = [], []
        for df in [daily, weekly, monthly, df_3month]:
            s, m = _std_hl_from_df(df)
            std_row.append(s)
            hl_row.append(m)
    except Exception:
        std_row = hl_row = [np.nan] * 4
    std_data.append(std_row)
    hl_mean_data.append(hl_row)

all_cols = std_cols + hl_mean_cols
base_df = assets_df.drop(columns=all_cols, errors='ignore')
assets_df = pd.concat([
    base_df,
    pd.DataFrame(std_data, columns=std_cols),
    pd.DataFrame(hl_mean_data, columns=hl_mean_cols)
], axis=1)

[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed

1 Failed download:
['SHY']: TypeError("'NoneType' object is not subscriptable")
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************

In [9]:
# Export the resulting DataFrame to a CSV file
assets_df.to_csv("./export/DistributionOfReturns_per_asset.csv", index=False)

In [10]:
# 按 Class 聚合：每个 class 的平均 std 和 h-l mean
std_cols = ['Std_Daily(%)', 'Std_Weekly(%)', 'Std_Monthly(%)', 'Std_3Month(%)']
hl_mean_cols = ['MeanHL_Daily(%)', 'MeanHL_Weekly(%)', 'MeanHL_Monthly(%)', 'MeanHL_3Month(%)']
agg_cols = [c for c in std_cols + hl_mean_cols if c in assets_df.columns]
if 'Class' not in assets_df.columns:
    raise ValueError("assets_df 缺少 Class 列")
if not agg_cols:
    raise ValueError("assets_df 缺少 Std/MeanHL 列，请先运行上方计算 assets_df 的 cell")
class_summary_df = assets_df.groupby('Class')[agg_cols].mean().reset_index()

In [11]:
# Export the resulting DataFrame to a CSV file
class_summary_df.to_csv("./export/DistributionOfReturns_per_class.csv", index=False)