In [1]:
'''
(c) 2011, 2012 Georgia Tech Research Corporation
This source code is released under the New BSD license.  Please see
http://wiki.quantsoftware.org/index.php?title=QSTK_License
for license details.

Created on January, 24, 2013

@author: Sourabh Bajaj
@contact: sourabhbajaj@gatech.edu
@summary: Example tutorial code.
'''

# QSTK Imports
import QSTK.qstkutil.qsdateutil as du # 日期工具
import QSTK.qstkutil.tsutil as tsu #根据股价自动计算日收益率
import QSTK.qstkutil.DataAccess as da #数据获取工具

# Third Party Imports
import datetime as dt
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np


def main():
    ''' Main Function'''
    # Reading the portfolio
    na_portfolio = np.loadtxt('tutorial3portfolio.csv', dtype='S5,f4', 
                        delimiter=',', comments="#", skiprows=1)
#对csv文件进行结构性数据组合，这样得到一维数组，每一个元素是tuple，内含每列值。
# 一维数组的好处是，便于loop。一个变量来遍历每个tuple就可以。

    print na_portfolio

    # Sorting the portfolio by symbol name
    na_portfolio = sorted(na_portfolio, key=lambda x: x[0]) #lambda的语法解读为，lambda后面的是输入，:后的是输出.
    print na_portfolio

    # Create two list for symbol names and allocation
    ls_port_syms = [] 
    lf_port_alloc = [] #建立list，用append加元素。甚至可以建立时间序列，用作dataframe的index
    for port in na_portfolio:
        ls_port_syms.append(port[0])
        lf_port_alloc.append(port[1])

    # Creating an object of the dataaccess class with Yahoo as the source.
    c_dataobj = da.DataAccess('Yahoo') #建立数据连接
    ls_all_syms = c_dataobj.get_all_symbols() #获取所有证券的缩写
    # Bad symbols are symbols present in portfolio but not in all syms
    ls_bad_syms = list(set(ls_port_syms) - set(ls_all_syms)) #已有的名称列表，减去市场所有证券名称，剩下的就是写错的了。 

    if len(ls_bad_syms) != 0:
        print "Portfolio contains bad symbols : ", ls_bad_syms #输出bad symbols

    for s_sym in ls_bad_syms:
        i_index = ls_port_syms.index(s_sym) # 获取某个元素的index！！！
        ls_port_syms.pop(i_index)
        lf_port_alloc.pop(i_index) #从列表中提出错误名称的股票和权重

    # Reading the historical data.
    dt_end = dt.datetime(2011, 1, 1)
    dt_start = dt_end - dt.timedelta(days=1095)  # Three years 向前减3年
    # We need closing prices so the timestamp should be hours=16.
    dt_timeofday = dt.timedelta(hours=16)

    # Get a list of trading days between the start and the end.
    ldt_timestamps = du.getNYSEdays(dt_start, dt_end, dt_timeofday) 

    # Keys to be read from the data, it is good to read everything in one go.
    ls_keys = ['open', 'high', 'low', 'close', 'volume', 'actual_close']

    # Reading the data, now d_data is a dictionary with the keys above.
    # Timestamps and symbols are the ones that were specified before.
    ldf_data = c_dataobj.get_data(ldt_timestamps, ls_port_syms, ls_keys)
    d_data = dict(zip(ls_keys, ldf_data))

    # Copying close price into separate dataframe to find rets 修补数据
    df_rets = d_data['close'].copy()
    # Filling the data.
    df_rets = df_rets.fillna(method='ffill')
    df_rets = df_rets.fillna(method='bfill')
    df_rets = df_rets.fillna(1.0)

    # Numpy matrix of filled data values
    na_rets = df_rets.values #  .values将就将dataframe变成数组numpy。失去dataframede的所有特性
    # returnize0 works on ndarray and not dataframes. 计算日收益率，该函数仅作用于数组，dataframe不行
    tsu.returnize0(na_rets)

    # Estimate portfolio returns
    na_portrets = np.sum(na_rets * lf_port_alloc, axis=1) #两个矩阵项城
    na_port_total = np.cumprod(na_portrets + 1) #组合累计收益
    na_component_total = np.cumprod(na_rets + 1, axis=0) #个股累计

    # Plotting the results
    plt.clf()
    fig = plt.figure()
    fig.add_subplot(111)
    plt.plot(ldt_timestamps, na_component_total, alpha=0.4)
    plt.plot(ldt_timestamps, na_port_total)
    ls_names = ls_port_syms
    ls_names.append('Portfolio')
    plt.legend(ls_names)
    plt.ylabel('Cumulative Returns')
    plt.xlabel('Date')
    fig.autofmt_xdate(rotation=45)
    plt.savefig('tutorial3.pdf', format='pdf')

if __name__ == '__main__':
    main()


  return pd.TimeSeries(index=dates, data=dates)


[('SPY', 0.30000001192092896) ('GABBA', 0.20000000298023224)
 ('GLD', 0.30000001192092896) ('7ABBA', 0.20000000298023224)]
[('7ABBA', 0.20000000298023224), ('GABBA', 0.20000000298023224), ('GLD', 0.30000001192092896), ('SPY', 0.30000001192092896)]
Portfolio contains bad symbols :  ['7ABBA', 'GABBA']
