# 基金组合回测

### 导入包，登录

In [None]:
from jqdatasdk import *
import matplotlib.pyplot as plt 
import pandas as pd
import seaborn as sns
import numpy as np

In [None]:
# 登录，需要在聚宽注册账号并申请数据权限，免费一年
auth('','')

### 准备基金代码列表

In [None]:
code_list = '519736,163402,163406,007119,161005,519712,206018,000948'.split(',')

### 函数定义

In [None]:
def get_data(code, limit=100):
    '获取某基金最近的单位净值数据'
    q = query(finance.FUND_NET_VALUE.day,finance.FUND_NET_VALUE.sum_value)
    q = q.filter(finance.FUND_NET_VALUE.code==code)
    q = q.order_by(finance.FUND_NET_VALUE.day.desc()).limit(limit)
    df = finance.run_query(q)
    df.rename(columns={'sum_value': code}, inplace=True) 
    df['day'] = pd.to_datetime(df['day'], format='%Y-%m-%d')
    return df.set_index('day')

In [None]:
def get_all_data(code_list, limit=100):
    '获取一组基金净值数据'
    ret  = None
    for code in code_list:
        df = get_data(code, limit)
        if ret is not None:
            ret = pd.merge(ret, df, on='day')
        else:
            ret = df
    return ret

In [None]:
def abs_return_pcnt(series):
    '绝对回报率'
    return (series.iloc[-1] - series.iloc[0]) / series.iloc[0]

def sharpe_ratio(series, freq='daily'):
    '夏普比率'
    r = series / series.shift(1) - 1
    r = r.dropna()
    if freq == 'daily':
        sr = r.mean() / r.std() * np.sqrt(252)
    elif freq == 'monthly':
        sr = r.mean() / r.std() * np.sqrt(12)
    else:
        sr = r.mean() / r.std()
    return sr

def maximum_drawdown(series):
    '最大回撤'
    performance_list = list(series)
    i = np.argmax(np.maximum.accumulate(performance_list) - performance_list)
    j = np.argmax(performance_list[:i])
    mdd = (performance_list[i] - performance_list[j]) / performance_list[j]
    return mdd

### 获取数据，查看净值曲线

In [None]:
df=get_all_data(code_list,500)
df = df.sort_index()
df.plot(figsize=[18,5])

### 等仓位组合净值曲线、均线

In [None]:
df['sum'] = df.apply(lambda x: sum(x[str(code)] for code in code_list) , axis=1)
df.plot(y='sum', figsize=(18,5))
df['sum'].rolling(5).mean().plot(label='5 day moving average')
df['sum'].rolling(20).mean().plot(label='5 day moving average')
plt.legend()

### 计算绝对收益，最大回撤，夏普率

In [None]:
print('code\treturn\tdrawdown\tsharpe')
for code in df.columns:
    print('{}\t{:.2f}\t{:.2f}\t\t{:.2f}'.format(code, abs_return_pcnt(df[code])*100, maximum_drawdown(df[code])*100, sharpe_ratio(df[code], freq='yearly')))

### 计算日收益波动

In [None]:
# 日收益率
daily = df.pct_change()
daily.fillna(0, inplace=True)
plt.figure(figsize=[18,5])
daily['sum'].plot()

In [None]:
# 数据摘要，查看平均值和标准差
daily['sum'].describe()

In [None]:
# 直方图
daily['sum'].hist(bins=50)

### 计算日，月，季度累计收益

In [None]:
#计算累积日收益率
cum_daily_return = (1 + daily).cumprod()-1
cum_daily_return[['sum']].plot(figsize=(18,5))
plt.title('cum_daily_return')
plt.show()

In [None]:
# 月收益率，取每月最后一项
monthly = cum_daily_return['sum'].resample('BM').apply(lambda x: x[-1])
plt.figure(figsize=[18,5])
monthly.plot()

In [None]:
#按季度对数据进行重采样，将均值改为每季度的数值
quarter = cum_daily_return['sum'].resample('3M').mean()
plt.figure(figsize=[18,5])
quarter.plot()

### 按月、年分组查看数据

In [None]:
monthly_df = df.groupby([df.index.year, df.index.month]).median()
monthly_df

In [None]:
yearly_df = df.groupby([df.index.year]).median()
yearly_df

### 计算各基金相关度

In [None]:
# 调用 Pandas 的 pct_change() 方法来计算每月的回报百分比，并且把第一月设为0.
monthly_ret_df = monthly_df.pct_change(1).fillna(0)
# 画出各基金回报率的相关性，并且把相关性数据精度设为3.
monthly_ret_df.corr().style.background_gradient().set_precision(3)

### 收益排序

In [None]:
cum_daily_return.iloc[-1].sort_values(ascending=False)

In [None]:
# 先对每一个回报率 +1，然后做 cumprod() 计算，Pandas 的 cumprod() 对每一行做滚动计算累积盈利。#
monthly_cum_ret_df = monthly_ret_df.add(1).cumprod()-1
# 对 2020-05 月的累积盈利做一个各基金的排名，利用 sort_values()。
monthly_cum_ret_df.loc[2020,5].sort_values(ascending=False)

### 最优化仓位训练

一个投资组合所持有的风险，不一定能带来最大化的收益。换句话说，你的投资组合也许承担了较大的风险，却只能获得较小的预期收益。原因可能在于你过大的权重了一只表现较差的股票，或者投资组合里的股票数量少，并且每一只股票都自带非常大的风险。

最优投资组合是建立在一条叫 有效边际 Efficient Frontier 之上的，

In [None]:
num_stocks = len(code_list)

In [None]:
def portfolio_annualised_performance(weights, mean_returns, cov_matrix):
    '夏普率'
    returns = np.sum(mean_returns*weights ) *252
    std = np.sqrt(np.dot(weights.T, np.dot(cov_matrix, weights))) * np.sqrt(252)
    return std, returns

def random_portfolios(num_portfolios, mean_returns, cov_matrix, risk_free_rate):
    '波动率'
    results = np.zeros((num_stocks,num_portfolios))
    weights_record = []
    for i in range(num_portfolios):
        weights = np.random.random(num_stocks)
        weights /= np.sum(weights)
        weights_record.append(weights)
        portfolio_std_dev, portfolio_return = portfolio_annualised_performance(weights, mean_returns, cov_matrix)
        results[0,i] = portfolio_std_dev #波动率目标
        results[1,i] = portfolio_return
        results[2,i] = (portfolio_return - risk_free_rate) / portfolio_std_dev # 夏普比率目标
    return results, weights_record

In [None]:
all_df = df[code_list]
# 每只基金的日回报百分比。
returns = all_df[code_list].pct_change()
# 每只基金的日回报百分比平均
mean_returns = returns.mean()
# 每只股票的日回报百分比协方差。
cov_matrix = returns.cov()
# 想要生成多少个随机投资组合，数字越大越接近真实的最优解，但是也会消耗CPU资源。
num_portfolios = int(25000 / 2.5 * num_stocks)
# 无风险利率
risk_free_rate = 0.027

In [None]:
def display_simulated_ef_with_random(mean_returns, cov_matrix, num_portfolios, risk_free_rate):
    '最优化方法'
    results, weights = random_portfolios(num_portfolios,mean_returns, cov_matrix, risk_free_rate)
    
    max_sharpe_idx = np.argmax(results[2])
    sdp, rp = results[0,max_sharpe_idx], results[1,max_sharpe_idx]
    max_sharpe_allocation = pd.DataFrame(weights[max_sharpe_idx],index=all_df.columns,columns=['allocation'])
    max_sharpe_allocation.allocation = [round(i*100,2)for i in max_sharpe_allocation.allocation]
    max_sharpe_allocation = max_sharpe_allocation.T
    
    min_vol_idx = np.argmin(results[0])
    sdp_min, rp_min = results[0,min_vol_idx], results[1,min_vol_idx]
    min_vol_allocation = pd.DataFrame(weights[min_vol_idx],index=all_df.columns,columns=['allocation'])
    min_vol_allocation.allocation = [round(i*100,2)for i in min_vol_allocation.allocation]
    min_vol_allocation = min_vol_allocation.T
    
    print("-"*80)
    print("最大夏普比率投资组合配置\n")
    print("年化收益:", round(rp,2))
    print("年化波动:", round(sdp,2))
    print("\n")
    print(max_sharpe_allocation)
    print("-"*80)
    print("最小波动率投资组合配置\n")
    print("年华收益:", round(rp_min,2))
    print("年化波动:", round(sdp_min,2))
    print("\n")
    print(min_vol_allocation)
    
    plt.figure(figsize=(10, 7))
    plt.scatter(results[0,:],results[1,:],c=results[2,:],cmap='YlGnBu', marker='o', s=10, alpha=0.3)
    plt.colorbar()
    plt.scatter(sdp,rp,marker='*',color='r',s=500, label='Maximum Sharpe ratio')
    plt.scatter(sdp_min,rp_min,marker='*',color='g',s=500, label='Minimum volatility')
    plt.title('Simulated Portfolio Optimization based on Efficient Frontier')
    plt.xlabel('annualised volatility')
    plt.ylabel('annualised returns')
    plt.legend(labelspacing=0.8)
    
    return max_sharpe_allocation, min_vol_allocation

In [None]:
max_sharpe_alloc, min_vol_alloc = display_simulated_ef_with_random(mean_returns, cov_matrix, num_portfolios, risk_free_rate)

### 比较等权，最大夏普率和最小波动率组合

In [None]:
weights = list(zip(max_sharpe_alloc.columns, max_sharpe_alloc.values[0]/100))
sorted(weights,key=lambda x: -x[1])

In [None]:
df['max_sharpe'] = df.apply(lambda x: sum(x[w[0]]*w[1] for w in weights ) , axis=1)

In [None]:
weights = list(zip(min_vol_alloc.columns, min_vol_alloc.values[0]/100))
df['min_vol'] = df.apply(lambda x: sum(x[w[0]]*w[1] for w in weights ) , axis=1)

In [None]:
daily = df.pct_change()
daily.fillna(0, inplace=True)
cum_daily_return = (1 + daily).cumprod()-1

In [None]:
cum_daily_return[['sum', 'min_vol','max_sharpe']].plot(figsize=(18,5))

In [None]:
data = {'基金':[], '收益率':[],'最大回撤':[], '夏普率':[]}
for code in df.columns:
    data['收益率'].append(abs_return_pcnt(df[code])*100)
    data['最大回撤'].append(maximum_drawdown(df[code])*100)
    data['夏普率'].append(sharpe_ratio(df[code], freq='yearly'))
    if code == 'sum':
        code = '等权组合'
    if code == 'max_sharpe':
        code = '最大夏普率组合'
    if code == 'min_vol':
        code = '最小波动组合'
    data['基金'].append(code)
df2 = pd.DataFrame(data).set_index('基金')    

In [None]:
df2.sort_values('收益率', ascending=False)