In [2]:
import yfinance as yf
import pandas as pd
import numpy as np
from sklearn.decomposition import PCA
from sklearn.preprocessing import StandardScaler
import matplotlib.pyplot as plt

# 定义港股代码及时间范围
tickers = ["0700.HK", "0005.HK", "0939.HK", "2318.HK", "0388.HK"]
start_date = "2000-01-01"
end_date = "2024-12-01"

# 下载股票数据并检查有效性
data = {}
for ticker in tickers:
    try:
        stock_data = yf.download(ticker, start=start_date, end=end_date)['Adj Close']
        if stock_data.empty:
            print(f"Warning: {ticker} 没有数据")
        else:
            data[ticker] = stock_data
    except Exception as e:
        print(f"Error downloading {ticker}: {e}")

# 保留有效数据
valid_data = {k: v for k, v in data.items() if not v.empty}

# 构建 DataFrame
if valid_data:
    df = pd.DataFrame(valid_data).interpolate(method='linear').dropna()
    print("下载的数据：")
    print(df.head())

    # 计算对数收益率
    log_returns = np.log(df / df.shift(1)).dropna()

    # 数据标准化并进行 PCA 分析
    scaler = StandardScaler()
    scaled_data = scaler.fit_transform(log_returns)
    pca = PCA()
    pca.fit(scaled_data)

    # 输出 PCA 结果
    explained_variance_ratio = pca.explained_variance_ratio_
    cumulative_variance = np.cumsum(explained_variance_ratio)

    print("\n主成分解释的方差比例：")
    for i, ratio in enumerate(explained_variance_ratio):
        print(f"主成分 {i+1}: {ratio:.4f}")

    # 绘制累计方差比例图
    plt.figure(figsize=(10, 6))
    plt.plot(range(1, len(cumulative_variance) + 1), cumulative_variance, marker='o')
    plt.title("累计方差解释比例")
    plt.xlabel("主成分数量")
    plt.ylabel("累计方差比例")
    plt.grid()
    plt.show()
else:
    print("没有有效的股票数据，无法进行分析。")

[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed


ValueError: If using all scalar values, you must pass an index