In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from scipy.optimize import minimize
from WindPy import w
from sklearn.linear_model import LinearRegression
import datetime
%matplotlib inline

In [None]:
w.start()

In [None]:
start_date, end_date = '2019-11-01', datetime.datetime.today() - datetime.timedelta(2)

# 获取债券指数历史收益率

In [None]:
codes = 'CBA00111.CS, CBA00121.CS, CBA00131.CS, CBA00141.CS, CBA00151.CS'

In [154]:
data = w.wsd(codes, 'close', start_date, end_date)

In [155]:
data

.ErrorCode=0
.Codes=[CBA00111.CS,CBA00121.CS,CBA00131.CS,CBA00141.CS,CBA00151.CS]
.Fields=[CLOSE]
.Times=[20191101,20191104,20191105,20191106,20191107,20191108,20191111,20191112,20191113,20191114,...]
.Data=[[173.8878,173.9304,173.9492,173.9643,173.9801,173.9994,174.0458,174.0717,174.071,174.0729,...],[189.6161,189.6429,189.6648,189.7324,189.7648,189.7956,189.8523,189.8894,189.9037,189.926,...],[202.8688,202.8424,202.9523,203.0435,203.0818,203.1123,203.259,203.2941,203.3555,203.3767,...],[209.7281,209.6211,209.8975,210.0217,210.1693,210.1673,210.5002,210.6639,210.7372,210.7793,...],[196.8349,196.6795,197.1234,197.1317,197.1489,197.2316,197.5256,197.5199,197.7198,197.7868,...]]

In [156]:
df = pd.DataFrame(np.array(data.Data).T, index=data.Times, columns=data.Codes)

In [157]:
df.head()

Unnamed: 0,CBA00111.CS,CBA00121.CS,CBA00131.CS,CBA00141.CS,CBA00151.CS
2019-11-01,173.8878,189.6161,202.8688,209.7281,196.8349
2019-11-04,173.9304,189.6429,202.8424,209.6211,196.6795
2019-11-05,173.9492,189.6648,202.9523,209.8975,197.1234
2019-11-06,173.9643,189.7324,203.0435,210.0217,197.1317
2019-11-07,173.9801,189.7648,203.0818,210.1693,197.1489


In [131]:
rdf = df.pct_change().dropna()

In [132]:
rdf.tail()

Unnamed: 0,CBA00111.CS,CBA00121.CS,CBA00131.CS,CBA00141.CS,CBA00151.CS
2019-11-18,0.000327,0.000611,0.001103,0.001837,0.002483
2019-11-19,0.000156,0.000423,0.000751,0.0011,0.001942
2019-11-20,0.000157,0.000292,0.000395,0.000341,-6.6e-05
2019-11-21,0.000106,0.000336,0.000608,0.000509,0.000459
2019-11-22,0.000142,0.000128,0.000241,0.000253,0.000134


# 获取债券基金历史收益率

In [133]:
fund_df = pd.read_excel(u'../data/债券基金池.xlsx')

In [134]:
fund_df

Unnamed: 0,代码,名称
0,002549.OF,嘉实稳祥纯债A
1,004544.OF,嘉实稳华纯债
2,070037.OF,嘉实纯债A
3,519723.OF,交银双轮动AB
4,000147.OF,易方达高等级信用债A
5,003358.OF,易方达7-10年国开行
6,070009.OF,嘉实超短债


In [135]:
codes = fund_df[u'代码'].tolist()

In [136]:
data = w.wsd(codes, 'nav', start_date, end_date)

In [137]:
fdf = pd.DataFrame(np.array(data.Data).T, index=data.Times, columns=data.Codes)

In [138]:
fdf.head()

Unnamed: 0,002549.OF,004544.OF,070037.OF,519723.OF,000147.OF,003358.OF,070009.OF
2019-01-02,1.1504,1.103,1.121,1.08,1.25,1.0372,1.0516
2019-01-03,1.1518,1.105,1.122,1.081,1.252,1.0394,1.0521
2019-01-04,1.1518,1.1054,1.123,1.082,1.254,1.0388,1.0524
2019-01-07,1.1525,1.1065,1.124,1.084,1.256,1.0391,1.0529
2019-01-08,1.154,1.1072,1.125,1.085,1.258,1.0411,1.0533


In [139]:
rfdf = fdf.pct_change().dropna()

In [140]:
print rfdf.shape
print rdf.shape

(216, 7)
(216, 5)


# 非负线性优化求解

In [141]:
decomp = pd.DataFrame(index=codes, columns=[u'0-1', u'1-3', u'3-5', u'5-7', u'7-10'])
decomp.index.name = u'基金代码'

In [142]:
def objective(coef, args):
    n = coef.size
    X, y = args[0], args[1]
    target = np.dot(X, coef)
#     error = np.sum(np.square(target - y))
    error = np.sum(np.square(target - y)) + np.linalg.norm(coef, 1) # lasso error
    return error / n

def decomposition(code):
    TOLERANCE = 1e-10
    n = rdf.shape[1]
    init_weights = np.array([1. / n] * n)
    X = rdf.values
    y = rfdf[code].values
    
    constraints = ({'type': 'eq', 'fun': lambda x: np.sum(x) - 1.0},
                   {'type': 'ineq', 'fun': lambda x: x})
    
    opt_res = minimize(fun=objective,
                       x0=init_weights,
                       args=[X, y],
                       constraints=constraints,
                       tol=TOLERANCE,
                       options={'disp': True})
    
    return opt_res.x

In [143]:
decomposition(codes[0])

Optimization terminated successfully.    (Exit mode 0)
            Current function value: 0.200001366011
            Iterations: 9
            Function evaluations: 63
            Gradient evaluations: 9


array([0.36303812, 0.3250906 , 0.21192331, 0.09994796, 0.        ])

In [144]:
for c in codes:
    weight = decomposition(c)
    decomp.loc[c] = weight

Optimization terminated successfully.    (Exit mode 0)
            Current function value: 0.200001366011
            Iterations: 9
            Function evaluations: 63
            Gradient evaluations: 9
Optimization terminated successfully.    (Exit mode 0)
            Current function value: 0.200293332152
            Iterations: 9
            Function evaluations: 63
            Gradient evaluations: 9
Optimization terminated successfully.    (Exit mode 0)
            Current function value: 0.200007529681
            Iterations: 8
            Function evaluations: 56
            Gradient evaluations: 8
Optimization terminated successfully.    (Exit mode 0)
            Current function value: 0.200077910677
            Iterations: 9
            Function evaluations: 63
            Gradient evaluations: 9
Optimization terminated successfully.    (Exit mode 0)
            Current function value: 0.201722553251
            Iterations: 14
            Function evaluations: 98
          

In [145]:
decomp

Unnamed: 0_level_0,0-1,1-3,3-5,5-7,7-10
基金代码,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
002549.OF,0.363038,0.325091,0.211923,0.099948,0.0
004544.OF,0.467622,0.363526,0.124106,0.0,0.0447461
070037.OF,0.344936,0.319649,0.224466,0.110948,0.0
519723.OF,0.332097,0.306938,0.193671,0.0915937,0.0757011
000147.OF,0.38181,0.336458,0.281732,-2.77556e-17,-1.46172e-16
003358.OF,-5.00383e-17,-2.49678e-17,1.94601e-16,6.93889e-17,1.0
070009.OF,0.375719,0.319886,0.205384,0.0990114,0.0


In [146]:
decomp[u'久期'] = decomp.dot(np.array([0.5, 2, 4, 6, 8]))

In [147]:
decomp[u'基金名称'] = fund_df[fund_df[u'代码'].isin(codes)][u'名称'].tolist()

In [148]:
decomp

Unnamed: 0_level_0,0-1,1-3,3-5,5-7,7-10,久期,基金名称
基金代码,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
002549.OF,0.363038,0.325091,0.211923,0.099948,0.0,2.27908,嘉实稳祥纯债A
004544.OF,0.467622,0.363526,0.124106,0.0,0.0447461,1.81526,嘉实稳华纯债
070037.OF,0.344936,0.319649,0.224466,0.110948,0.0,2.37532,嘉实纯债A
519723.OF,0.332097,0.306938,0.193671,0.0915937,0.0757011,2.70978,交银双轮动AB
000147.OF,0.38181,0.336458,0.281732,-2.77556e-17,-1.46172e-16,1.99075,易方达高等级信用债A
003358.OF,-5.00383e-17,-2.49678e-17,1.94601e-16,6.93889e-17,1.0,8.0,易方达7-10年国开行
070009.OF,0.375719,0.319886,0.205384,0.0990114,0.0,2.24323,嘉实超短债


In [149]:
decomp = decomp[[decomp.columns[-1]] + decomp.columns[:-1].tolist()]

In [150]:
decomp

Unnamed: 0_level_0,基金名称,0-1,1-3,3-5,5-7,7-10,久期
基金代码,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
002549.OF,嘉实稳祥纯债A,0.363038,0.325091,0.211923,0.099948,0.0,2.27908
004544.OF,嘉实稳华纯债,0.467622,0.363526,0.124106,0.0,0.0447461,1.81526
070037.OF,嘉实纯债A,0.344936,0.319649,0.224466,0.110948,0.0,2.37532
519723.OF,交银双轮动AB,0.332097,0.306938,0.193671,0.0915937,0.0757011,2.70978
000147.OF,易方达高等级信用债A,0.38181,0.336458,0.281732,-2.77556e-17,-1.46172e-16,1.99075
003358.OF,易方达7-10年国开行,-5.00383e-17,-2.49678e-17,1.94601e-16,6.93889e-17,1.0,8.0
070009.OF,嘉实超短债,0.375719,0.319886,0.205384,0.0990114,0.0,2.24323


In [151]:
decomp.to_excel(u'../data/债券基金收益率分解.xlsx')