In [73]:
"""
Template for implementing StrategyLearner  (c) 2016 Tucker Balch
"""

import datetime as dt
import QLearner as ql
import pandas as pd
import util as ut
import numpy as np
from datetime import datetime, timedelta

class StrategyLearner(object):

    def testPolicy(self, symbol = "AAPL", sd=dt.datetime(2009,1,1), ed=dt.datetime(2010,1,1),sv = 100000):

        #GET PRICE
        dates = pd.date_range(sd, ed)
        prices_all = ut.get_data([symbol], dates)  # automatically adds SPY
        trades = prices_all[[symbol,]]  # only portfolio symbols
        trades_SPY = prices_all['SPY']  # only SPY, for comparison later

        # here we build a fake set of trades
        #GET INDICATOR
        df_indicators = self.indicators(sd = sd, ed = ed,syms =[symbol],gen_plot = False,n=10,verbose=False)
        df_ema = pd.DataFrame(data=df_indicators.ema,index=df_indicators.index)
        df_momentum = pd.DataFrame(data=df_indicators.momentum,index=df_indicators.index)
        self.thres_ema = self.discretize(df_ema)
        self.thres_momentum = self.discretize(df_momentum)
        
        current_hold = 3 #current_hold BUY, SELL, NOTHING - 1,2,3
        state = self.get_state(trades.index[0],current_hold,df_ema,df_momentum)  #XYZ, XY are indicators
        action = self.learner.querysetstate(state) #action BUY, SELL, NOTHING - 0,1,2

        hold_amount = 0
        for i in range(0,trades.shape[0]):
            if action==0 and hold_amount!=200:
                hold_amount = hold_amount + 200
                trades.ix[trades.index[i],'order'] = 200
            elif action==1 and hold_amount!=-200:
                hold_amount = hold_amount - 200
                trades.ix[trades.index[i],'order'] = -200
            else:
                trades.ix[trades.index[i],'order'] = 0

            current_hold,r= self.make_trade(trades,i,current_hold,action)
            state = self.get_state(trades.index[i],current_hold,df_ema,df_momentum)
            action = self.learner.querysetstate(state)

        return pd.DataFrame(data=trades.order,index=trades.index)
    
    def author(self):
        return 'llee81'

    # constructor
    def __init__(self, verbose = False):
        self.verbose = verbose

    # this method should create a QLearner, and train it for trading
    def addEvidence(self, symbol = "IBM", \
        sd=dt.datetime(2008,1,1), \
        ed=dt.datetime(2009,1,1), \
        sv = 10000): 

        #Actions: BUY, SELL, NOTHING - 0,1,2
        self.learner  = ql.QLearner(num_states=1000,\
        num_actions = 3, \
        alpha = 0.2, \
        gamma = 0.9, \
        rar = 0.98, \
        radr = 0.999, \
        dyna = 0, \
        verbose=False) #initialize the learner
        
        #GET PRICE
        dates = pd.date_range(sd, ed)
        prices_all = ut.get_data([symbol], dates)  # automatically adds SPY
        trades = prices_all[[symbol,]]  # only portfolio symbols
        trades_SPY = prices_all['SPY']  # only SPY, for comparison later
        
        #GET INDICATOR
        df_indicators = self.indicators(sd = sd, ed = ed,syms =[symbol],gen_plot = False,n=10,verbose=False)
        df_ema = pd.DataFrame(data=df_indicators.ema,index=df_indicators.index)
        df_momentum = pd.DataFrame(data=df_indicators.momentum,index=df_indicators.index)
        self.thres_ema = self.discretize(df_ema)
        self.thres_momentum = self.discretize(df_momentum)
        
        # each iteration involves one trip to the goal
        iterations = 50
        scores = np.zeros((iterations,1))
        for iteration in range(iterations):
            total_reward = 0
            current_hold = 3 #current_hold BUY, SELL, NOTHING - 1,2,3
            state = self.get_state(trades.index[0],current_hold,df_ema,df_momentum)  #XYZ, XY are indicators
            action = self.learner.querysetstate(state) #action BUY, SELL, NOTHING - 0,1,2

            for i in range(1,trades.shape[0]):
                current_hold,r= self.make_trade(trades,i,current_hold,action)
                state = self.get_state(trades.index[i],current_hold,df_ema,df_momentum)
                total_reward = total_reward + r
                trades.ix[trades.index[i],'reward']=r
                action = self.learner.query(state,r)

            if self.verbose: print total_reward
            scores[iteration] = total_reward
            if iteration>2 and np.absolute(scores[iteration]-scores[iteration-1])<0.005:
                if self.verbose: print 'total run: ',iteration
                break
    
    # convert the discretize values
    def discretize(self,values,level=10):
        step_size = values.shape[0]/level
        df_sort = values.sort_values(by=values.columns[0])
        threshold = np.zeros(level)
        for i in range(level):
            threshold[i] = df_sort.ix[step_size*(i+1)-1,0]
        return threshold
        
    # convert the discretize values
    def get_discrete(self,thresholds,my_value):
        positions = np.where(thresholds>my_value)[0]
        return len(thresholds)-1 if len(positions)==0 else positions[0]

    #convert indicator value to a state
    def get_state(self,idx,my_holding,df_ema,df_momentum):
        ema = df_ema.ix[idx,0]
        momentum = df_momentum.ix[idx,0]
        return self.get_discrete(self.thres_ema,ema)*100 + self.get_discrete(self.thres_momentum,momentum)*10 + my_holding
    
    def indicators(self,sd = dt.datetime(2008,1,1), ed = dt.datetime(2009,1,1),syms = ['AAPL'], n=10, gen_plot=False, verbose=False):
        original_sd = sd
        sd = sd+timedelta(days=-3*n)
        dates = pd.date_range(sd, ed)
        prices_all = ut.get_data(syms, dates)  # automatically adds SPY
        prices = prices_all[syms]  # only portfolio symbols
        df = prices.copy()
        df[1:] = (df[1:]/df[0:-1].values)
        days = prices.shape[0]
        for i in range(1,days):
            df.ix[i,:] = df.ix[i,:] *df.ix[i-1,:]

        #df['momentum'] = (df.ix[2*n:,0]/df.ix[0:-2*n:,0].values)-1.
        df['momentum']= prices.pct_change(periods=3).fillna(0)

        volume_all = ut.get_data(syms, dates, colname = "Volume")  # automatically adds SPY
        volume = volume_all[syms]  # only portfolio symbols
        df['ema']= volume.ix[:,0]

        return df[['ema','momentum']]

    def make_trade(self,data,i,my_holding,my_action):
        #Action BUY, SELL, NOTHING - 0,1,2
        #Holding BUY, SELL, NOTHING - 1,2,3
        reward=None
        new_holding = None

        if my_holding==1: # OWN LONG
            if my_action==0 or my_action==2: #BUY,Hold
                reward = data.ix[i,0]-data.ix[i-1,0]
                new_holding = 1
            else: #sell close
                reward = 0
                new_holding = 3
        elif my_holding==2: #OWN SHORT
            if my_action==1 or my_action==2: #sell,Hold
                reward = data.ix[i-1,0] - data.ix[i,0] 
                new_holding = 2
            else: #buy close
                reward = 0
                new_holding = 3
        elif my_holding==3: # OWN NOTHING
            if my_action==0: #buy
                reward = data.ix[i,0]-data.ix[i-1,0]
                new_holding = 1
            elif my_action==1: #sell
                reward = data.ix[i-1,0] - data.ix[i,0] 
                new_holding = 2
            else: #hold
                reward = 0
                new_holding = 3       

        return new_holding,reward    
            
# instantiate the strategy learner
learner = StrategyLearner(verbose = True)

# set parameters for training the learner
sym = "SINE_FAST_NOISE"
stdate =dt.datetime(2008,1,1)
enddate =dt.datetime(2009,12,31) # just a few days for "shake out"

# train the learner
learner.addEvidence(symbol = sym, sd = stdate, \
    ed = enddate, sv = 10000) 

df_trades = learner.testPolicy(symbol = sym, sd = stdate, \
    ed = enddate, sv = 100000)

print df_trades

238.96455319
833.1658987
1227.04707429
1747.84621213
2019.84353968
2175.46589423
2216.18964739
2301.57348611
2201.63457342
2292.64458743
2230.23936667
2289.92428758
2302.15747652
2289.50159225
2302.15747652
2302.15747652
total run:  15
            order
2008-01-02  200.0
2008-01-03 -200.0
2008-01-04    0.0
2008-01-07    0.0
2008-01-08  200.0
2008-01-09 -200.0
2008-01-10 -200.0
2008-01-11  200.0
2008-01-14 -200.0
2008-01-15    0.0
2008-01-16  200.0
2008-01-17  200.0
2008-01-18 -200.0
2008-01-22  200.0
2008-01-23 -200.0
2008-01-24    0.0
2008-01-25 -200.0
2008-01-28    0.0
2008-01-29  200.0
2008-01-30    0.0
2008-01-31 -200.0
2008-02-01  200.0
2008-02-04    0.0
2008-02-05 -200.0
2008-02-06    0.0
2008-02-07    0.0
2008-02-08    0.0
2008-02-11    0.0
2008-02-12    0.0
2008-02-13  200.0
...           ...
2009-11-18    0.0
2009-11-19  200.0
2009-11-20 -200.0
2009-11-23  200.0
2009-11-24    0.0
2009-11-25 -200.0
2009-11-27 -200.0
2009-11-30  200.0
2009-12-01    0.0
2009-12-02    0.0
2009-12-

In [68]:

for i in range(20):
    print np.random.rand()

0.263392992137
0.758408126408
0.335236089218
0.770362848713
0.206508329258
0.783025760477
0.603920035055
0.240142022738
0.677365329826
0.272943496269
0.708151613045
0.287974873952
0.00730979428806
0.145710120306
0.0895434855684
0.42097291134
0.518748686985
0.101215410014
0.185229840202
0.671916873507


In [58]:
"""
Test a Strategy Learner.  (c) 2016 Tucker Balch
"""

import pandas as pd
import datetime as dt
import util as ut
import StrategyLearner as sl

def test_code(verb = True):

    # instantiate the strategy learner
    learner =StrategyLearner(verbose = verb)

    # set parameters for training the learner
    sym = "SINE_FAST_NOISE"
    stdate =dt.datetime(2008,1,1)
    enddate =dt.datetime(2009,12,31) # just a few days for "shake out"

    # train the learner
    learner.addEvidence(symbol = sym, sd = stdate, \
        ed = enddate, sv = 100000) 

    # set parameters for testing
    sym = "SINE_FAST_NOISE"
    stdate =dt.datetime(2010,1,1)
    enddate =dt.datetime(2011,12,31)

    # get some data for reference
    syms=[sym]
    dates = pd.date_range(stdate, enddate)
    prices_all = ut.get_data(syms, dates)  # automatically adds SPY
    prices = prices_all[syms]  # only portfolio symbols

    # test the learner
    df_trades = learner.testPolicy(symbol = sym, sd = stdate, \
        ed = enddate, sv = 100000)

    # a few sanity checks
    # df_trades should be a single column DataFrame (not a series)
    # including only the values 500, 0, -500
    if isinstance(df_trades, pd.DataFrame) == False:
        print "Returned result is not a DataFrame"
    if prices.shape != df_trades.shape:
        print "Returned result is not the right shape"
    tradecheck = abs(df_trades.cumsum()).values
    tradecheck[tradecheck<=500] = 0
    tradecheck[tradecheck>0] = 1
    if tradecheck.sum(axis=0) > 0:
        print "Returned result violoates holding restrictions (more than 500 shares)"

    if verb: 
        total = 0
        for i in range(0,df_trades.shape[0]):
            total = total +  df_trades.ix[df_trades.index[i],'order']
            df_trades.ix[df_trades.index[i],'total'] =  total
        print 'df_trades'
        print df_trades
       

if __name__=="__main__":
    test_code(verb = True)


29.5054827277
512.464598224
681.134529762
1114.24138217
1393.7430349
1970.05210667
2219.32310421
2177.56155554
2329.32390686
2342.60662319
2424.23191004
2418.99402618
2420.52345771
2415.05419381
2420.52345771
2420.52345771
total run:  15
df_trades
            order  total
2010-01-04 -200.0 -200.0
2010-01-05    0.0 -200.0
2010-01-06    0.0 -200.0
2010-01-07    0.0 -200.0
2010-01-08  200.0    0.0
2010-01-11  200.0  200.0
2010-01-12 -200.0    0.0
2010-01-13    0.0    0.0
2010-01-14  200.0  200.0
2010-01-15    0.0  200.0
2010-01-19    0.0  200.0
2010-01-20    0.0  200.0
2010-01-21    0.0  200.0
2010-01-22    0.0  200.0
2010-01-25 -200.0    0.0
2010-01-26  200.0  200.0
2010-01-27    0.0  200.0
2010-01-28    0.0  200.0
2010-01-29 -200.0    0.0
2010-02-01 -200.0 -200.0
2010-02-02  200.0    0.0
2010-02-03    0.0    0.0
2010-02-04    0.0    0.0
2010-02-05    0.0    0.0
2010-02-08  200.0  200.0
2010-02-09 -200.0    0.0
2010-02-10    0.0    0.0
2010-02-11    0.0    0.0
2010-02-12    0.0    0.0
20