In [6]:
'''
(c) 2011, 2012 Georgia Tech Research Corporation
This source code is released under the New BSD license.  Please see
http://wiki.quantsoftware.org/index.php?title=QSTK_License
for license details.

Created on January, 24, 2013

@author: Sourabh Bajaj
@contact: sourabhbajaj@gatech.edu
@summary: Example tutorial code.
'''

# QSTK Imports
import QSTK.qstkutil.qsdateutil as du
import QSTK.qstkutil.tsutil as tsu
import QSTK.qstkutil.DataAccess as da

# Third Party Imports
import datetime as dt
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import cPickle


def main():
    ''' Main Function'''

    # Start and End date of the charts
    dt_start = dt.datetime(2004, 1, 1)
    dt_end = dt.datetime(2009, 12, 31)

    # We need closing prices so the timestamp should be hours=16.
    dt_timeofday = dt.timedelta(hours=16)

    # Get a list of trading days between the start and the end.
    ldt_timestamps = du.getNYSEdays(dt_start, dt_end, dt_timeofday)

    # Creating an object of the dataaccess class with Yahoo as the source.
    c_dataobj = da.DataAccess('Yahoo')

    # List of symbols - First 20
    ls_symbols = c_dataobj.get_symbols_from_list('sp5002012') # 获取2012年sp50的成分股票名称
    ls_symbols = ls_symbols[:20] #取前20只股票
    ls_symbols.append('_CASH') #增加一个代码，cash

    # Creating the first allocation row
    na_vals = np.random.randint(0, 1000, len(ls_symbols)) #创建一个list，含有21个整数，介于0和1000之间    
    
    # Normalize the row - Typecasting as everything is int.
    na_vals = na_vals / float(sum(na_vals)) #列中每一个数值变成百分比，即总和的权重
    # Reshape to a 2D matrix to append into dataframe.
    na_vals = na_vals.reshape(1, -1) #将1维数组变成2维，原来的list仅成为大list其中的一个元素

    # Creating Allocation DataFrames
    #创建allocation的df，列名是股票名称，index就是时间序列，第一个元素，因为目前只有一行
    df_alloc = pd.DataFrame(na_vals, index=[ldt_timestamps[0]],
                                    columns=ls_symbols)
    
    dt_last_date = ldt_timestamps[0]
    # Looping through all dates and creating monthly allocations 遍历所有日期，创建每月allocation
    for dt_date in ldt_timestamps[1:]:
        if dt_last_date.month != dt_date.month: #每当月初，月份数值跳转到下一个月的第一个时间戳，重新设置权重
            # Create allocation
            na_vals = np.random.randint(0, 1000, len(ls_symbols))
            na_vals = na_vals / float(sum(na_vals))
            na_vals = na_vals.reshape(1, -1)　#变成２维数组
            # Append to the dataframe
            df_new_row = pd.DataFrame(na_vals, index=[dt_date],
                                        columns=ls_symbols)
            df_alloc = df_alloc.append(df_new_row)
        dt_last_date = dt_date
    # print df_alloc #调用每月更新的权重
    # Create the outpul pickle file for the dataframe.
    output = open('allocation.pkl', 'wb')
    cPickle.dump(df_alloc, output)

if __name__ == '__main__':
    main()


                            A        AA      AAPL       ABC       ABT  \
2004-01-02 16:00:00  0.015662  0.038945  0.017920  0.042260  0.035558   
2004-02-02 16:00:00  0.030843  0.044803  0.088191  0.021034  0.058385   
2004-03-01 16:00:00  0.002671  0.033063  0.069994  0.056272  0.044668   
2004-04-01 16:00:00  0.066080  0.076830  0.042076  0.014829  0.013253   
2004-05-03 16:00:00  0.023295  0.002926  0.049629  0.047378  0.021607   
2004-06-01 16:00:00  0.019120  0.062068  0.012779  0.057168  0.017198   
2004-07-01 16:00:00  0.045136  0.079036  0.029963  0.027178  0.021127   
2004-08-02 16:00:00  0.043309  0.027019  0.035993  0.085447  0.002536   
2004-09-01 16:00:00  0.061480  0.067818  0.069566  0.033217  0.063010   
2004-10-01 16:00:00  0.075954  0.049245  0.078291  0.050580  0.071613   
2004-11-01 16:00:00  0.031242  0.021710  0.009267  0.074309  0.076516   
2004-12-01 16:00:00  0.028591  0.095439  0.030088  0.042342  0.043567   
2005-01-03 16:00:00  0.066739  0.058178  0.042719  