In [61]:
"""
Template for implementing StrategyLearner  (c) 2016 Tucker Balch
"""

import datetime as dt
import QLearner as ql
import pandas as pd
import util as ut
import numpy as np

class StrategyLearner(object):

    def author(self):
        return 'llee81'
    
    def indicators(sd = dt.datetime(2008,1,1), ed = dt.datetime(2009,1,1),syms = ['AAPL'], n=10, gen_plot=False, verbose=False):
        original_sd = sd
        sd = sd+timedelta(days=-3*n)
        # Read in adjusted closing prices for given symbols, date range
        dates = pd.date_range(sd, ed)
        prices_all = get_data(syms, dates)  # automatically adds SPY
        prices = prices_all[syms]  # only portfolio symbols
        prices_SPY = prices_all['SPY']  # only SPY, for comparison later

        # Get daily portfolio value
        port_val = prices_SPY # add code here to compute daily portfolio values
        df = prices.copy()
        df[1:] = (df[1:]/df[0:-1].values)

        days = prices.shape[0]
        for i in range(1,days):
            df.ix[i,:] = df.ix[i,:] *df.ix[i-1,:]
        df['momentum']= (df.ix[2*n:,0]/df.ix[0:-2*n:,0].values)-1.
        df['sma']= (df.ix[:,0] / df.ix[:,0].rolling(window=n,center=False).mean())-1
        df['normal_price']= (df.ix[:,0]/df.ix[0,0])
        df['sma_raw']=  (df.ix[:,'normal_price'].rolling(window=n,center=False).mean())
        df['price']= df.ix[:,0]

        #ema
        ema_multiplier = 2/(n+1.)
        df['sma_actual']=  (df.ix[:,0].rolling(window=n,center=False).mean())
        df.ix[n-1,'ema_raw'] =  df.ix[n-1,'sma_actual']
        df.ix[n-1,'ema_raw_normal'] =  df.ix[n-1,'sma_raw']
        for i in range(n,days):
            df.ix[i,'ema_raw'] = (df.ix[i,0] - df.ix[i-1,'ema_raw'])*ema_multiplier +  df.ix[i-1,'ema_raw']
            df.ix[i,'ema_raw_normal'] = (df.ix[i,'normal_price'] - df.ix[i-1,'ema_raw_normal'])*ema_multiplier +  df.ix[i-1,'ema_raw_normal']
        df['ema'] = df.ix[n-1:,0]/df.ix[n-1:,'ema_raw']-1.
        df = df.fillna(0)
        df = df.ix[original_sd:,:]

        #zscore
        df['sma_zscore'] =  (df.sma-df.sma.mean())/df.sma.std()
        df['ema_zscore'] =  (df.ema-df.ema.mean())/df.ema.std()
        df['momentum_zscore'] =  (df.momentum-df.momentum.mean())/df.momentum.std()
        df['normal_price']= (df.ix[:,'normal_price']/df.ix[0,'normal_price'])

        return df[['price','normal_price','sma','ema','momentum','sma_zscore','ema_zscore','momentum_zscore']]

    # constructor
    def __init__(self, verbose = False):
        self.verbose = verbose

    # this method should create a QLearner, and train it for trading
    def addEvidence(self, symbol = "IBM", \
        sd=dt.datetime(2008,1,1), \
        ed=dt.datetime(2009,1,1), \
        sv = 10000): 

        #Actions: BUY, SELL, NOTHING - 0,1,2
        self.learner  = ql.QLearner(num_states=1000,\
        num_actions = 3, \
        alpha = 0.2, \
        gamma = 0.9, \
        rar = 0.98, \
        radr = 0.999, \
        dyna = 0, \
        verbose=False) #initialize the learner

        # convert the discretize values
        def get_discrete(thresholds,my_value):
            positions = np.where(thresholds>my_value)[0]
            return len(thresholds)-1 if len(positions)==0 else positions[0]
        
        #convert indicator value to a state
        def get_state(idx,my_holding):
            ema = df_ema.ix[idx,0]
            momentum = df_momentum.ix[idx,0]
            return get_discrete(thres_ema,ema)*100 + get_discrete(thres_momentum,momentum)*10 + my_holding
        
        def make_trade(data,my_idx,my_holding,my_action):
            #Action BUY, SELL, NOTHING - 0,1,2
            #Holding BUY, SELL, NOTHING - 1,2,3
            reward=None
            new_holding = None
            
            if my_holding==1: # OWN LONG
                if my_action==0 or my_action==2: #BUY,Hold
                    reward = data.ix[i,0]-data.ix[i-1,0]
                    new_holding = 1
                else: #sell close
                    reward = 0
                    new_holding = 3
            elif my_holding==2: #OWN SHORT
                if my_action==1 or my_action==2: #sell,Hold
                    reward = data.ix[i-1,0] - data.ix[i,0] 
                    new_holding = 2
                else: #buy close
                    reward = 0
                    new_holding = 3
            elif my_holding==3: # OWN NOTHING
                if my_action==0: #buy
                    reward = data.ix[i,0]-data.ix[i-1,0]
                    new_holding = 1
                elif my_action==1: #sell
                    reward = data.ix[i-1,0] - data.ix[i,0] 
                    new_holding = 2
                else: #hold
                    reward = 0
                    new_holding = 3       
                    
            return new_holding,reward

        # convert the discretize values
        def discretize(values,level=10):
            step_size = values.shape[0]/level
            df_sort = values.sort_values(by=values.columns[0])
            threshold = np.zeros(level)
            for i in range(level):
                threshold[i] = df_sort.ix[step_size*(i+1)-1,0]
            return threshold

        #GET PRICE
        dates = pd.date_range(sd, ed)
        prices_all = ut.get_data([symbol], dates)  # automatically adds SPY
        trades = prices_all[[symbol,]]  # only portfolio symbols
        trades_SPY = prices_all['SPY']  # only SPY, for comparison later
        
        #GET INDICATOR
        df_indicators = indicators(sd = sd, ed = ed,syms =[symbol],gen_plot = False,n=10,verbose=False)
        df_ema = pd.DataFrame(data=df_indicators.ema,index=df_indicators.index)
        df_momentum = pd.DataFrame(data=df_indicators.momentum,index=df_indicators.index)
        thres_ema = discretize(df_ema)
        thres_momentum = discretize(df_momentum)
        
        # each iteration involves one trip to the goal
        iterations = 50
        scores = np.zeros((iterations,1))
        for iteration in range(iterations):
            total_reward = 0
            current_hold = 3 #current_hold BUY, SELL, NOTHING - 1,2,3
            state = get_state(trades.index[0],current_hold)  #XYZ, XY are indicators
            action = self.learner.querysetstate(state) #action BUY, SELL, NOTHING - 0,1,2
            
            for i in range(1,trades.shape[0]):
                trades.ix[trades.index[i-1],'action']=action #last action
                trades.ix[trades.index[i-1],'hold']=current_hold #last hold
                trades.ix[trades.index[i-1],'state']=state
                
                current_hold,r= make_trade(trades,trades.index[i],current_hold,action)
                state = get_state(trades.index[i],current_hold)
                total_reward = total_reward + r
                trades.ix[trades.index[i],'reward']=r
                action = self.learner.query(state,r)

            if self.verbose: print total_reward
            scores[iteration] = total_reward
            if iteration>2 and np.absolute(scores[iteration]-scores[iteration-1])<0.005:
                if self.verbose: print 'total run: ',iteration
                break
    
    # this method should use the existing policy and test it against new data
    def testPolicy(self, symbol = "AAPL", \
        sd=dt.datetime(2009,1,1), \
        ed=dt.datetime(2010,1,1), \
        sv = 100000):

        #GET PRICE
        dates = pd.date_range(sd, ed)
        prices_all = ut.get_data([symbol], dates)  # automatically adds SPY
        trades = prices_all[[symbol,]]  # only portfolio symbols
        trades_SPY = prices_all['SPY']  # only SPY, for comparison later

        # convert the discretize values
        def discretize(values,level=10):
            step_size = values.shape[0]/level
            df_sort = values.sort_values(by=values.columns[0])
            threshold = np.zeros(level)
            for i in range(level):
                threshold[i] = df_sort.ix[step_size*(i+1)-1,0]
            return threshold
                
        # convert the discretize values
        def get_discrete(thresholds,my_value):
            positions = np.where(thresholds>my_value)[0]
            return len(thresholds)-1 if len(positions)==0 else positions[0]
        
        #convert indicator value to a state
        def get_state(idx,my_holding):
            ema = df_ema.ix[idx,0]
            momentum = df_momentum.ix[idx,0]
            return get_discrete(thres_ema,ema)*100 + get_discrete(thres_momentum,momentum)*10 + my_holding
        
        def make_trade(data,my_idx,my_holding,my_action):
            #Action BUY, SELL, NOTHING - 0,1,2
            #Holding BUY, SELL, NOTHING - 1,2,3
            reward=None
            new_holding = None
            
            if my_holding==1: # OWN LONG
                if my_action==0 or my_action==2: #BUY,Hold
                    reward = data.ix[i,0]-data.ix[i-1,0]
                    new_holding = 1
                else: #sell close
                    reward = 0
                    new_holding = 3
            elif my_holding==2: #OWN SHORT
                if my_action==1 or my_action==2: #sell,Hold
                    reward = data.ix[i-1,0] - data.ix[i,0] 
                    new_holding = 2
                else: #buy close
                    reward = 0
                    new_holding = 3
            elif my_holding==3: # OWN NOTHING
                if my_action==0: #buy
                    reward = data.ix[i,0]-data.ix[i-1,0]
                    new_holding = 1
                elif my_action==1: #sell
                    reward = data.ix[i-1,0] - data.ix[i,0] 
                    new_holding = 2
                else: #hold
                    reward = 0
                    new_holding = 3       
                    
            return new_holding,reward
        
        # here we build a fake set of trades
        #GET INDICATOR
        df_indicators = indicators(sd = sd, ed = ed,syms =[symbol],gen_plot = False,n=10,verbose=False)
        df_ema = pd.DataFrame(data=df_indicators.ema,index=df_indicators.index)
        df_momentum = pd.DataFrame(data=df_indicators.momentum,index=df_indicators.index)
        thres_ema = discretize(df_ema)
        thres_momentum = discretize(df_momentum)
        
        current_hold = 3 #current_hold BUY, SELL, NOTHING - 1,2,3
        state = get_state(trades.index[0],current_hold)  #XYZ, XY are indicators
        action = self.learner.querysetstate(state) #action BUY, SELL, NOTHING - 0,1,2

        hold_amount = 0
        for i in range(0,trades.shape[0]):
            trades.ix[trades.index[i-1],'action']=action #last action
            trades.ix[trades.index[i-1],'hold']=current_hold #last hold
            trades.ix[trades.index[i-1],'state']=state
            
            if action==0 and hold_amount!=200:
                hold_amount = hold_amount + 200
                trades.ix[trades.index[i],'order'] = 200
            elif action==1 and hold_amount!=-200:
                hold_amount = hold_amount - 200
                trades.ix[trades.index[i],'order'] = -200
            else:
                trades.ix[trades.index[i],'order'] = 0

            current_hold,r= make_trade(trades,trades.index[i],current_hold,action)
            state = get_state(trades.index[i],current_hold)
            action = self.learner.querysetstate(state)
            
        return pd.DataFrame(data=trades.order,index=trades.index)
    
            
# instantiate the strategy learner
learner = StrategyLearner(verbose = True)

# set parameters for training the learner
sym = "AAPL"
stdate =dt.datetime(2008,1,1)
enddate =dt.datetime(2009,12,31) # just a few days for "shake out"

# train the learner
learner.addEvidence(symbol = sym, sd = stdate, \
    ed = enddate, sv = 10000) 


df_trades = learner.testPolicy(symbol = sym, sd = stdate, \
    ed = enddate, sv = 100000)

print df_trades

-200.63
42.67
185.83
156.01
245.29
207.25
306.33
322.49
364.38
346.49
313.12
342.15
346.62
346.62
total run:  13
            order
2008-01-02  200.0
2008-01-03 -200.0
2008-01-04    0.0
2008-01-07 -200.0
2008-01-08    0.0
2008-01-09  200.0
2008-01-10    0.0
2008-01-11 -200.0
2008-01-14  200.0
2008-01-15 -200.0
2008-01-16  200.0
2008-01-17    0.0
2008-01-18    0.0
2008-01-22    0.0
2008-01-23    0.0
2008-01-24    0.0
2008-01-25    0.0
2008-01-28    0.0
2008-01-29    0.0
2008-01-30    0.0
2008-01-31    0.0
2008-02-01    0.0
2008-02-04    0.0
2008-02-05    0.0
2008-02-06    0.0
2008-02-07    0.0
2008-02-08    0.0
2008-02-11    0.0
2008-02-12 -200.0
2008-02-13  200.0
...           ...
2009-11-18 -200.0
2009-11-19 -200.0
2009-11-20  200.0
2009-11-23    0.0
2009-11-24 -200.0
2009-11-25    0.0
2009-11-27    0.0
2009-11-30  200.0
2009-12-01    0.0
2009-12-02    0.0
2009-12-03    0.0
2009-12-04 -200.0
2009-12-07  200.0
2009-12-08 -200.0
2009-12-09  200.0
2009-12-10    0.0
2009-12-11    0.0
2009-

In [34]:
signal = np.array([1, 2, 3, 4, 4, 3, 2, 1, 0, 3, 2, 1, 0, 0, 1, 1, 4, 8, 7, 6, 5, 0])
print len(np.where(signal>99)[0])


0


In [57]:
"""
Test a Strategy Learner.  (c) 2016 Tucker Balch
"""

import pandas as pd
import datetime as dt
import util as ut
import StrategyLearner as sl

def test_code(verb = True):

    # instantiate the strategy learner
    learner =StrategyLearner(verbose = verb)

    # set parameters for training the learner
    sym = "AAPL"
    stdate =dt.datetime(2008,1,1)
    enddate =dt.datetime(2009,12,31) # just a few days for "shake out"

    # train the learner
    learner.addEvidence(symbol = sym, sd = stdate, \
        ed = enddate, sv = 100000) 

    # set parameters for testing
    sym = "AAPL"
    stdate =dt.datetime(2010,1,1)
    enddate =dt.datetime(2011,12,31)

    # get some data for reference
    syms=[sym]
    dates = pd.date_range(stdate, enddate)
    prices_all = ut.get_data(syms, dates)  # automatically adds SPY
    prices = prices_all[syms]  # only portfolio symbols

    # test the learner
    df_trades = learner.testPolicy(symbol = sym, sd = stdate, \
        ed = enddate, sv = 100000)

    # a few sanity checks
    # df_trades should be a single column DataFrame (not a series)
    # including only the values 500, 0, -500
    if isinstance(df_trades, pd.DataFrame) == False:
        print "Returned result is not a DataFrame"
    if prices.shape != df_trades.shape:
        print "Returned result is not the right shape"
    tradecheck = abs(df_trades.cumsum()).values
    tradecheck[tradecheck<=500] = 0
    tradecheck[tradecheck>0] = 1
    if tradecheck.sum(axis=0) > 0:
        print "Returned result violoates holding restrictions (more than 500 shares)"

    if verb: 
        print 'df_trades'
        print df_trades

if __name__=="__main__":
    test_code(verb = True)


-104.15
df_trades
            order
2010-01-04  200.0
2010-01-05    0.0
2010-01-06    0.0
2010-01-07    0.0
2010-01-08 -200.0
2010-01-11  200.0
2010-01-12    0.0
2010-01-13    0.0
2010-01-14    0.0
2010-01-15    0.0
2010-01-19 -200.0
2010-01-20 -200.0
2010-01-21    0.0
2010-01-22    0.0
2010-01-25  200.0
2010-01-26 -200.0
2010-01-27    0.0
2010-01-28    0.0
2010-01-29  200.0
2010-02-01 -200.0
2010-02-02  200.0
2010-02-03    0.0
2010-02-04 -200.0
2010-02-05    0.0
2010-02-08    0.0
2010-02-09  200.0
2010-02-10 -200.0
2010-02-11    0.0
2010-02-12    0.0
2010-02-16    0.0
...           ...
2011-11-17    0.0
2011-11-18  200.0
2011-11-21 -200.0
2011-11-22  200.0
2011-11-23 -200.0
2011-11-25  200.0
2011-11-28  200.0
2011-11-29    0.0
2011-11-30 -200.0
2011-12-01 -200.0
2011-12-02    0.0
2011-12-05    0.0
2011-12-06    0.0
2011-12-07    0.0
2011-12-08    0.0
2011-12-09    0.0
2011-12-12  200.0
2011-12-13  200.0
2011-12-14    0.0
2011-12-15    0.0
2011-12-16    0.0
2011-12-19    0.0
2011-12-20