In [10]:
import yfinance as yf
import pandas as pd

# 选定股票和市场指数
stock_tickers = ["AAPL", "MSFT", "GOOGL", "AMZN", "TSLA", "NVDA", "JPM", "V", "PG", "DIS"]
market_ticker = "^GSPC"

# 下载数据
start_date = "2019-01-01"
end_date = "2024-01-01"
data = yf.download(stock_tickers + [market_ticker], start=start_date, end=end_date)

# 提取收盘价
adj_close_data = data["Close"]

# **确保数据为数值类型**
adj_close_data = adj_close_data.apply(pd.to_numeric, errors='coerce')

# **修复 `pct_change()` 计算 NaN 问题**
returns = adj_close_data.pct_change().ffill().dropna()

# **保存 CSV**
adj_close_data.to_csv("adj_close_data.csv")   # 保存原始收盘价数据
returns.to_csv("returns_data.csv")            # 保存每日收益率数据

# **打印前 5 行，确认数据结构**
print("Adjusted Close Data:")
print(adj_close_data.head())

print("\nReturns Data:")
print(returns.head())


[*********************100%***********************]  11 of 11 completed

Adjusted Close Data:
Ticker           AAPL       AMZN         DIS      GOOGL        JPM       MSFT  \
Date                                                                            
2019-01-02  37.667191  76.956497  106.340645  52.483086  82.916229  95.119827   
2019-01-03  33.915260  75.014000  103.764351  51.029533  81.737831  91.620560   
2019-01-04  35.363071  78.769501  106.965195  53.647011  84.751137  95.881752   
2019-01-07  35.284367  81.475502  107.892273  53.540031  84.810043  96.004036   
2019-01-08  35.956993  82.829002  108.731529  54.010281  84.650116  96.700134   

Ticker          NVDA         PG       TSLA           V        ^GSPC  
Date                                                                 
2019-01-02  3.377779  77.998947  20.674667  127.274918  2510.030029  
2019-01-03  3.173704  77.452087  20.024000  122.688324  2447.889893  
2019-01-04  3.377035  79.032898  21.179333  127.973900  2531.939941  
2019-01-07  3.555818  78.716743  22.330667  130.281525  2549.




In [5]:
import statsmodels.api as sm
import numpy as np
import pandas as pd

# **读取 CSV，确保数据格式正确**
returns = pd.read_csv("returns_data.csv")

# **去除列名空格**
returns.columns = returns.columns.str.strip()

# **转换日期格式**
returns.rename(columns={"Date": "date"}, inplace=True)  # 确保日期列正确
returns["date"] = pd.to_datetime(returns["date"])  # 转换日期格式

# **设置索引为日期**
returns.set_index("date", inplace=True)

# **转换所有数据为数值类型，防止 OLS 报错**
returns = returns.apply(pd.to_numeric, errors='coerce')

# **去除 NaN 值**
returns.dropna(inplace=True)

# **定义无风险收益率（Rf）**
Rf = 0.05  # 5%

# **分离市场收益率和个股收益率**
market_ticker = "^GSPC"  # S&P 500 作为市场指数
market_returns = returns[market_ticker]  # 市场收益率
stock_returns = returns.drop(columns=[market_ticker])  # 个股收益率

# **存储 CAPM 计算结果**
beta_values = {}
alpha_values = {}
idiosyncratic_variance = {}

# **对每只股票进行 CAPM 计算**
for stock in stock_returns.columns:
    Y = stock_returns[stock] - Rf  # **计算超额收益** Ri - Rf
    X = market_returns - Rf  # **市场超额收益** Rm - Rf

    # **确保数据为数值类型**
    Y = pd.to_numeric(Y, errors='coerce')
    X = pd.to_numeric(X, errors='coerce')

    # **去除 NaN/无效数据**
    valid_idx = Y.notna() & X.notna()
    Y = Y[valid_idx]
    X = X[valid_idx]

    # **转换为 NumPy 数组，防止 statsmodels 误判数据类型**
    Y = np.asarray(Y, dtype=np.float64)
    X = np.asarray(X, dtype=np.float64)

    # **添加常数项 α**
    X = sm.add_constant(X)

    # **执行回归**
    model = sm.OLS(Y, X).fit()
    alpha, beta = model.params  # 获取 α 和 β
    residuals = model.resid  # 获取 ϵi

    # **计算个体风险（idiosyncratic variance）**
    idiosyncratic_variance[stock] = np.var(residuals)

    # **存储 α 和 β**
    alpha_values[stock] = alpha
    beta_values[stock] = beta

# **计算市场平均收益**
market_mean_return = market_returns.mean()

# **计算每只股票的预期收益**
expected_returns = {stock: Rf + beta * (market_mean_return - Rf) for stock, beta in beta_values.items()}

# **保存 CAPM 计算结果到 CSV**
results_df = pd.DataFrame({
    "Beta": beta_values,
    "Expected Return": expected_returns,
    "Idiosyncratic Variance": idiosyncratic_variance
})

results_df.to_csv("capm_results.csv")  # 保存结果

# **打印 CAPM 计算结果**
print("CAPM 计算完成，结果已保存到 capm_results.csv")
print(results_df)

ValueError: zero-size array to reduction operation maximum which has no identity

In [None]:
from cvxopt import matrix, solvers

# 计算市场方差 σ²m
market_variance = np.var(market_returns)

# 计算协方差矩阵 Σ
num_assets = len(stock_returns.columns)
covariance_matrix = np.zeros((num_assets, num_assets))
stock_list = stock_returns.columns

for i in range(num_assets):
    for j in range(num_assets):
        if i == j:
            covariance_matrix[i, j] = beta_values[stock_list[i]] ** 2 * market_variance + idiosyncratic_variance[stock_list[i]]
        else:
            covariance_matrix[i, j] = beta_values[stock_list[i]] * beta_values[stock_list[j]] * market_variance

# 将协方差矩阵转换为 QP 需要的形式
P = matrix(covariance_matrix)
q = matrix(np.zeros(num_assets))

# 目标收益率 μp
target_returns = [0.08, 0.12, 0.15]  # 只用 3 个目标收益（作业要求）

# 存储最优权重
optimal_weights = {}

# 依次计算不同目标收益率的最优投资组合
for mu_p in target_returns:
    G = matrix(-np.identity(num_assets))  # 约束 w ≥ 0
    h = matrix(np.zeros(num_assets))
    A = matrix(np.vstack((np.ones(num_assets), [expected_returns[stock] for stock in stock_list])), (2, num_assets))
    b = matrix([1.0, mu_p])

    sol = solvers.qp(P, q, G, h, A, b)  # 求解 QP 问题
    weights = np.array(sol['x']).flatten()

    optimal_weights[mu_p] = weights
    print(f"Optimal Weights for μp={mu_p}: {weights}")


In [None]:
import matplotlib.pyplot as plt

risks = []

for mu_p, weights in optimal_weights.items():
    portfolio_variance = weights.T @ covariance_matrix @ weights
    portfolio_std = np.sqrt(portfolio_variance)
    risks.append(portfolio_std)

# 绘制有效前沿
plt.figure(figsize=(8, 6))
plt.plot(risks, target_returns, marker='o', linestyle='-')
plt.xlabel("Portfolio Risk (Standard Deviation)")
plt.ylabel("Expected Return")
plt.title("Efficient Frontier")
plt.grid(True)
plt.show()