In [None]:
%pylab inline 

'''
(c) 2011, 2012 Georgia Tech Research Corporation
This source code is released under the New BSD license.  Please see
http://wiki.quantsoftware.org/index.php?title=QSTK_License
for license details.

Created on October, 4, 2013

@author: Sourabh Bajaj
@contact: sourabhbajaj@gatech.edu
@summary: Example tutorial code.
'''

import pandas as pd
import datetime as dt
import numpy as np

## Tutorial on using Pandas in QSTK
ldt_timestamps = []
for i in range(1, 31):
    ldt_timestamps.append(dt.datetime(2010, 4, i, 16))
for i in range(1, 32):
    ldt_timestamps.append(dt.datetime(2010, 5, i, 16))

print "The index we created has the following dates : "
print ldt_timestamps
print

## TimeSeries
ts_single_value = pd.TimeSeries(0.0, index=ldt_timestamps)
print "A timeseries initialized to one single value : "

na_vals = np.arange(len(ldt_timestamps))
print "Dummy initialized array : "
print na_vals
print

ts_array = pd.TimeSeries(na_vals, index=ldt_timestamps)
print "A timeseries initialized using a numpy array : "
print ts_array
print 

print "Reading the timeseries for a particular date"
print "Date :  ", ldt_timestamps[1]
print "Value : ", ts_array[ldt_timestamps[1]]
print

print "Initializing a list of symbols : "
ls_symbols = ['AAPL', 'GOOG', 'MSFT', 'IBM']
print ls_symbols
print

print "Initializing a dataframe with one value : "
df_single = pd.DataFrame(index=ldt_timestamps, columns=ls_symbols)
df_single = df_single.fillna(0.0)
print df_single
print

print "Initializing a dataframe with a numpy array : "
na_vals_2 = np.random.randn(len(ldt_timestamps), len(ls_symbols))
df_vals = pd.DataFrame(na_vals_2, index=ldt_timestamps, columns=ls_symbols)
print df_vals
print 

print "Access the timeseries of a particular symbol : "
print df_vals[ls_symbols[0]]
print

print "Access the timeseries of a particular date : "
print df_vals.ix[ldt_timestamps[1]]
print

print "Access the value for a specific symbol on a specific date: "
print df_vals[ls_symbols[1]].ix[ldt_timestamps[1]]
print

print "Reindexing the dataframe"
ldt_new_dates = [dt.datetime(2011, 1, 3, 16), 
                 dt.datetime(2011, 1, 5, 16),
                 dt.datetime(2011, 1, 7, 16)]
ls_new_symbols = ['AAPL', 'IBM', 'XOM']
df_new = df_vals.reindex(index=ldt_new_dates, columns=ls_new_symbols)
print df_new
print "Observe that reindex carried over whatever values it could find and set the rest to NAN"
print

print "For pandas rolling statistics please refer : http://pandas.pydata.org/pandas-docs/dev/computation.html#moving-rolling-statistics-moments"



In [None]:
price = df_vals[ls_symbols[1]]
#plot(price)
rolling_mean = pd.rolling_mean(price, 20)
rolling_std = pd.rolling_std(price, 20)
plot(rolling_mean)
plot(rolling_std)
Bollinger_val = (price - rolling_mean) / (rolling_std)
plot(Bollinger_val)
Bollinger_val

In [None]:
'''
(c) 2011, 2012 Georgia Tech Research Corporation
This source code is released under the New BSD license.  Please see
http://wiki.quantsoftware.org/index.php?title=QSTK_License
for license details.

Created on January, 23, 2013

@author: Sourabh Bajaj
@contact: sourabhbajaj@gatech.edu
@summary: Event Profiler Tutorial
'''


import pandas as pd
import numpy as np
import math
import copy
import QSTK.qstkutil.qsdateutil as du
import datetime as dt
import QSTK.qstkutil.DataAccess as da
import QSTK.qstkutil.tsutil as tsu
import QSTK.qstkstudy.EventProfiler as ep

"""
Accepts a list of symbols along with start and end date
Returns the Event Matrix which is a pandas Datamatrix
Event matrix has the following structure :
    |IBM |GOOG|XOM |MSFT| GS | JP |
(d1)|nan |nan | 1  |nan |nan | 1  |
(d2)|nan | 1  |nan |nan |nan |nan |
(d3)| 1  |nan | 1  |nan | 1  |nan |
(d4)|nan |  1 |nan | 1  |nan |nan |
...................................
...................................
Also, d1 = start date
nan = no information about any event.
1 = status bit(positively confirms the event occurence)
"""


def find_events(ls_symbols, d_data):
    ''' Finding the event dataframe '''
    df_close = d_data['close']
    ts_market = df_close['SPY']

    print "Finding Events"

    # Creating an empty dataframe
    df_events = copy.deepcopy(df_close)
    df_events = df_events * np.NAN

    # Time stamps for the event range
    ldt_timestamps = df_close.index

    for s_sym in ls_symbols:
        for i in range(1, len(ldt_timestamps)):
            # Calculating the returns for this timestamp
            f_symprice_today = df_close[s_sym].ix[ldt_timestamps[i]]
            f_symprice_yest = df_close[s_sym].ix[ldt_timestamps[i - 1]]
            f_marketprice_today = ts_market.ix[ldt_timestamps[i]]
            f_marketprice_yest = ts_market.ix[ldt_timestamps[i - 1]]
            f_symreturn_today = (f_symprice_today / f_symprice_yest) - 1
            f_marketreturn_today = (f_marketprice_today / f_marketprice_yest) - 1

            # Event is found if the symbol is down more then 3% while the
            # market is up more then 2%
            if f_symreturn_today <= -0.03 and f_marketreturn_today >= 0.02:
                df_events[s_sym].ix[ldt_timestamps[i]] = 1

    return df_events


#if __name__ == '__main__':
#main():
dt_start = dt.datetime(2008, 1, 1)
dt_end = dt.datetime(2009, 12, 31)
ldt_timestamps = du.getNYSEdays(dt_start, dt_end, dt.timedelta(hours=16))

dataobj = da.DataAccess('Yahoo')
ls_symbols = dataobj.get_symbols_from_list('sp5002012')
ls_symbols.append('SPY')

ls_keys = ['open', 'high', 'low', 'close', 'volume', 'actual_close']
ldf_data = dataobj.get_data(ldt_timestamps, ls_symbols, ls_keys)
d_data = dict(zip(ls_keys, ldf_data))

for s_key in ls_keys:
    d_data[s_key] = d_data[s_key].fillna(method='ffill')
    d_data[s_key] = d_data[s_key].fillna(method='bfill')
    d_data[s_key] = d_data[s_key].fillna(1.0)

df_events = find_events(ls_symbols, d_data)
print "Creating Study"
ep.eventprofiler(df_events, d_data, i_lookback=20, i_lookforward=20,
            s_filename='MyEventStudy.pdf', b_market_neutral=True, b_errorbars=True,
            s_market_sym='SPY')


In [None]:

print ldf_data
# creating an event matrix
df_events = copy.deepcopy(df_close)
df_events = df_events * np.NAN

for s_sym in ls_symbols: # for each symbol
    for i in range(1, len(ldt_timestamps)): # for each day
        # Calculating the returns for this timestamp
        f_symprice_today = df_close[s_sym].ix[ldt_timestamps[i]]
        f_symprice_yest = df_close[s_sym].ix[ldt_timestamps[i - 1]]
        f_marketprice_today = ts_market.ix[ldt_timestamps[i]]
        f_marketprice_yest = ts_market.ix[ldt_timestamps[i - 1]]
        f_symreturn_today = (f_symprice_today / f_symprice_yest) - 1
        f_marketreturn_today = (f_marketprice_today / f_marketprice_yest) - 1

        # Event is found if the symbol is down more then 3% while the
        # market is up more then 2%
        if f_symreturn_today <= -0.03 and f_marketreturn_today >= 0.02:
             df_events[s_sym].ix[ldt_timestamps[i]] = 1
                
# plotting the study
df_events = find_events(ls_symbols, d_data)
print "Creating Study"
ep.eventprofiler(df_events, d_data, i_lookback=20, i_lookforward=20,
                s_filename='MyEventStudy.pdf', b_market_neutral=True, b_errorbars=True,
                s_market_sym='SPY')
