# Table of Contents
 <p><div class="lev1 toc-item"><a href="#Event-Profiler" data-toc-modified-id="Event-Profiler-1"><span class="toc-item-num">1&nbsp;&nbsp;</span>Event Profiler</a></div><div class="lev1 toc-item"><a href="#Event-Profiler-Function" data-toc-modified-id="Event-Profiler-Function-2"><span class="toc-item-num">2&nbsp;&nbsp;</span>Event Profiler Function</a></div><div class="lev1 toc-item"><a href="#Find-Events" data-toc-modified-id="Find-Events-3"><span class="toc-item-num">3&nbsp;&nbsp;</span>Find Events</a></div><div class="lev1 toc-item"><a href="#Main" data-toc-modified-id="Main-4"><span class="toc-item-num">4&nbsp;&nbsp;</span>Main</a></div>

# Event Profiler

QSTK Event profiler scans through a matrix (the event matrix) of stocks by dates, and put a 1 where there is an event, and NaN for not, and feed into the profiler
```
# Event is found if the symbol is down more then 3% while the
# market is up more then 2%
if f_symreturn_today <= -0.03 and f_marketreturn_today >= 0.02:
    df_events[s_sym].ix[ldt_timestamps[i]] = 1
```
```
ep.eventprofiler(df_events, d_data, i_lookback=20, i_lookforward=20,
            s_filename='MyEventStudy.pdf', b_market_neutral=True, b_errorbars=True,
            s_market_sym='SPY')
```
>df_events: events matrix  
d_data: stock data  
i_lookback, i_lookforward: period of time to look for possible events.  Note the first 20 days may miss an event becasue of not enough data.  
b_market_neutral: Look at prices with the market is subtracted out - beta=1  
s_market_sym='SPY': Symbol to use for the market overall.

http://wiki.quantsoftware.org/index.php?title=QSTK_Tutorial_9
* * *

In [1]:
import pandas as pd
import numpy as np
import math
import copy
import QSTK.qstkutil.qsdateutil as du
import datetime as dt
import QSTK.qstkutil.DataAccess as da
import QSTK.qstkutil.tsutil as tsu
import matplotlib.pyplot as plt

%matplotlib inline

print "HERE"

HERE


# Event Profiler Function
>df_events: events matrix  
d_data: stock data  
i_lookback, i_lookforward: period of time to look for possible events.  Note the first 20 days may miss an event becasue of not enough data.  
b_market_neutral: Look at prices with the market is subtracted out - beta=1  
s_market_sym='SPY': Symbol to use for the market overall.

In [2]:
'''
(c) 2011, 2012 Georgia Tech Research Corporation
This source code is released under the New BSD license.  Please see
http://wiki.quantsoftware.org/index.php?title=QSTK_License
for license details.

Created on Jan 16, 2013

@author: Sourabh Bajaj
@contact: sourabh@sourabhbajaj.com
@summary: EventProfiler

'''
def eventprofiler(df_events_arg, d_data, i_lookback=20, i_lookforward=20,
                s_filename='study', b_market_neutral=True, b_errorbars=True,
                s_market_sym='SPY'):
    ''' Event Profiler for an event matix'''
    df_close = d_data['close'].copy()
    df_rets = df_close.copy()

    # Do not modify the original event dataframe.
    df_events = df_events_arg.copy()
    tsu.returnize0(df_rets.values)

    if b_market_neutral == True:
        df_rets = df_rets - df_rets[s_market_sym]
        del df_rets[s_market_sym]
        del df_events[s_market_sym]

    df_close = df_close.reindex(columns=df_events.columns)

    # Removing the starting and the end events
    df_events.values[0:i_lookback, :] = np.NaN
    df_events.values[-i_lookforward:, :] = np.NaN

    # Number of events
    i_no_events = int(np.logical_not(np.isnan(df_events.values)).sum())
    assert i_no_events > 0, "Zero events in the event matrix"
    na_event_rets = "False"

    df_events
    
    '''
    # Looking for the events and pushing them to a matrix
    for i, s_sym in enumerate(df_events.columns):
        for j, dt_date in enumerate(df_events.index):
            if df_events[s_sym][dt_date] == 1:
                na_ret = df_rets[s_sym][j - i_lookback:j + 1 + i_lookforward]
                if type(na_event_rets) == type(""):
                    na_event_rets = na_ret
                else:
                    na_event_rets = np.vstack((na_event_rets, na_ret))

    if len(na_event_rets.shape) == 1:
        na_event_rets = np.expand_dims(na_event_rets, axis=0)

    # Computing daily rets and retuns
    na_event_rets = np.cumprod(na_event_rets + 1, axis=1)
    na_event_rets = (na_event_rets.T / na_event_rets[:, i_lookback]).T

    # Study Params
    na_mean = np.mean(na_event_rets, axis=0)
    na_std = np.std(na_event_rets, axis=0)
    li_time = range(-i_lookback, i_lookforward + 1)

    # Plotting the chart
    plt.clf()
    plt.axhline(y=1.0, xmin=-i_lookback, xmax=i_lookforward, color='k')
    if b_errorbars == True:
        plt.errorbar(li_time[i_lookback:], na_mean[i_lookback:],
                    yerr=na_std[i_lookback:], ecolor='#AAAAFF',
                    alpha=0.1)
    plt.plot(li_time, na_mean, linewidth=3, label='mean', color='b')
    plt.xlim(-i_lookback - 1, i_lookforward + 1)
    if b_market_neutral == True:
        plt.title('Market Relative mean return of ' +\
                str(i_no_events) + ' events')
    else:
        plt.title('Mean return of ' + str(i_no_events) + ' events')
    plt.xlabel('Days')
    plt.ylabel('Cumulative Returns')
    plt.savefig(s_filename, format='pdf')
    '''

# Find Events

This function only uses 1 CPU, how can it be parallelized?

In [3]:
def find_events(ls_symbols, d_data):
    ''' Finding the event dataframe '''
    df_close = d_data['close']
    ts_market = df_close['SPY']

    print "Finding Events"

    # Creating an empty dataframe
    df_events = copy.deepcopy(df_close)
    df_events = df_events * np.NAN

    # Time stamps for the event range
    ldt_timestamps = df_close.index.astype(str)

    for s_sym in ls_symbols:
        for i in range(1, len(ldt_timestamps)):
            # Calculating the returns for this timestamp
            f_symprice_today = df_close[s_sym].ix[ldt_timestamps[i]]
            f_symprice_yest = df_close[s_sym].ix[ldt_timestamps[i - 1]]
            f_marketprice_today = ts_market.ix[ldt_timestamps[i]]
            f_marketprice_yest = ts_market.ix[ldt_timestamps[i - 1]]
            f_symreturn_today = (f_symprice_today / f_symprice_yest) - 1
            f_marketreturn_today = (f_marketprice_today / f_marketprice_yest) - 1

            # Event is found if the symbol is down more then 3% while the
            # market is up more then 2%
            if f_symreturn_today <= -0.03 and f_marketreturn_today >= 0.02:
                df_events[s_sym].ix[ldt_timestamps[i]] = 1

    return df_events

# Main

In [4]:
dt_start = dt.datetime(2008, 1, 1)
dt_end = dt.datetime(2009, 12, 31)
ldt_timestamps = du.getNYSEdays(dt_start, dt_end, dt.timedelta(hours=16))

dataobj = da.DataAccess('Yahoo')
ls_symbols = dataobj.get_symbols_from_list('sp5002012')
ls_symbols.append('SPY')

In [5]:
ls_keys = ['open', 'high', 'low', 'close', 'volume', 'actual_close']

# returns list
ldf_data = dataobj.get_data(ldt_timestamps, ls_symbols, ls_keys)

d_data = dict(zip(ls_keys, ldf_data))

In [None]:
type(d_data['close'].index)
d_data['close'].index = d_data['close'].index.astype(str)
type(d_data['close'].index)


pandas.indexes.base.Index

In [None]:
for s_key in ls_keys:
    d_data[s_key] = d_data[s_key].fillna(method='ffill')
    d_data[s_key] = d_data[s_key].fillna(method='bfill')
    d_data[s_key] = d_data[s_key].fillna(1.0)

print "HERE"

df_events = find_events(ls_symbols, d_data)
print "Creating Study"
eventprofiler(df_events, d_data, i_lookback=20, i_lookforward=20,
            s_filename='MyEventStudy.pdf', b_market_neutral=True, b_errorbars=True,
            s_market_sym='SPY')

HERE
Finding Events
