In [60]:
"""
Template for implementing StrategyLearner  (c) 2016 Tucker Balch
"""

import datetime as dt
import QLearner as ql
import pandas as pd
import util as ut
import numpy as np
from datetime import datetime, timedelta

class StrategyLearner(object):

    def testPolicy(self, symbol = "AAPL", sd=dt.datetime(2009,1,1), ed=dt.datetime(2010,1,1),sv = 100000):

        #GET PRICE
        dates = pd.date_range(sd, ed)
        prices_all = ut.get_data([symbol], dates)  # automatically adds SPY
        trades = prices_all[[symbol,]]  # only portfolio symbols
        trades_SPY = prices_all['SPY']  # only SPY, for comparison later

        # here we build a fake set of trades
        #GET INDICATOR
        df_indicators = self.indicators(sd = sd, ed = ed,syms =[symbol],gen_plot = False,n=10,verbose=False)
        df_ema = pd.DataFrame(data=df_indicators.ema,index=df_indicators.index)
        df_momentum = pd.DataFrame(data=df_indicators.momentum,index=df_indicators.index)
        self.thres_ema = self.discretize(df_ema)
        self.thres_momentum = self.discretize(df_momentum)
        
        current_hold = 3 #current_hold BUY, SELL, NOTHING - 1,2,3
        state = self.get_state(trades.index[0],current_hold,df_ema,df_momentum)  #XYZ, XY are indicators
        action = self.learner.querysetstate(state) #action BUY, SELL, NOTHING - 0,1,2

        hold_amount = 0
        for i in range(0,trades.shape[0]):
            if action==0 and hold_amount!=200:
                hold_amount = hold_amount + 200
                trades.ix[trades.index[i],'order'] = 200
            elif action==1 and hold_amount!=-200:
                hold_amount = hold_amount - 200
                trades.ix[trades.index[i],'order'] = -200
            else:
                trades.ix[trades.index[i],'order'] = 0

            current_hold,r= self.make_trade(trades,i,current_hold,action)
            state = self.get_state(trades.index[i],current_hold,df_ema,df_momentum)
            #print 'state: ',state
            action = self.learner.query(state,r)
            #action = self.learner.querysetstate(state)

        return pd.DataFrame(data=trades.order,index=trades.index)
    
    def author(self):
        return 'llee81'

    # constructor
    def __init__(self, verbose = False):
        self.verbose = verbose

    # this method should create a QLearner, and train it for trading
    def addEvidence(self, symbol = "IBM", \
        sd=dt.datetime(2008,1,1), \
        ed=dt.datetime(2009,1,1), \
        sv = 10000): 

        #Actions: BUY, SELL, NOTHING - 0,1,2
        self.learner  = ql.QLearner(num_states=10000,\
        num_actions = 3, \
        alpha = 0.2, \
        gamma = 0.9, \
        rar = 0.999, \
        radr = 0.999, \
        dyna = 0, \
        verbose=False) #initialize the learner
        
        #GET PRICE
        dates = pd.date_range(sd, ed)
        prices_all = ut.get_data([symbol], dates)  # automatically adds SPY
        trades = prices_all[[symbol,]]  # only portfolio symbols
        trades_SPY = prices_all['SPY']  # only SPY, for comparison later
        
        #GET INDICATOR
        df_indicators = self.indicators(sd = sd, ed = ed,syms =[symbol],gen_plot = False,n=10,verbose=False)
        df_ema = pd.DataFrame(data=df_indicators.ema,index=df_indicators.index)
        df_momentum = pd.DataFrame(data=df_indicators.momentum,index=df_indicators.index)
        self.thres_ema = self.discretize(df_ema)
        self.thres_momentum = self.discretize(df_momentum)
        
        #print 'df_momentum thres_momentum'
        #print self.thres_momentum 
        #print ' self.thres_ema'
        #print  self.thres_ema
        # each iteration involves one trip to the goal
        iterations = 30
        scores = np.zeros((iterations,1))
        for iteration in range(iterations):
            total_reward = 0
            current_hold = 3 #current_hold BUY, SELL, NOTHING - 1,2,3
            state = self.get_state(trades.index[0],current_hold,df_ema,df_momentum)  #XYZ, XY are indicators
            action = self.learner.querysetstate(state) #action BUY, SELL, NOTHING - 0,1,2

            for i in range(1,trades.shape[0]):
                current_hold,r= self.make_trade(trades,i,current_hold,action)
                state = self.get_state(trades.index[i],current_hold,df_ema,df_momentum)
                total_reward = total_reward + r
                trades.ix[trades.index[i],'reward']=r
                action = self.learner.query(state,r)

            if self.verbose: print total_reward
            scores[iteration] = total_reward
            if iteration>2 and np.absolute(scores[iteration]-scores[iteration-1])<0.005:
                if self.verbose: print 'total run: ',iteration
                break
    
    # convert the discretize values
    def discretize(self,values,level=500):
        step_size = values.shape[0]/level
        df_sort = values.sort_values(by=values.columns[0])
        threshold = np.zeros(level)
        for i in range(level):
            threshold[i] = df_sort.ix[step_size*(i+1)-1,0]
        return threshold
        
    # convert the discretize values
    def get_discrete(self,thresholds,my_value):
        positions = np.where(thresholds>my_value)[0]
        return len(thresholds)-1 if len(positions)==0 else positions[0]

    #convert indicator value to a state
    def get_state(self,idx,my_holding,df_ema,df_momentum):
        ema = df_ema.ix[idx,0]
        momentum = df_momentum.ix[idx,0]
        #return self.get_discrete(self.thres_ema,ema)*100 + self.get_discrete(self.thres_momentum,momentum)*10 + my_holding
        return self.get_discrete(self.thres_ema,ema)*10 + my_holding


    
    def indicators(self,sd = dt.datetime(2008,1,1), ed = dt.datetime(2009,1,1),syms = ['AAPL'], n=10, gen_plot=False, verbose=False):
        original_sd = sd
        sd = sd+timedelta(days=-3*n)
        dates = pd.date_range(sd, ed)
        prices_all = ut.get_data(syms, dates)  # automatically adds SPY
        prices = prices_all[syms]  # only portfolio symbols
        df = prices.copy()
        df[1:] = (df[1:]/df[0:-1].values)
        days = prices.shape[0]
        for i in range(1,days):
            df.ix[i,:] = df.ix[i,:] *df.ix[i-1,:]

        volume_all = ut.get_data(syms, dates, colname = "Volume")  # automatically adds SPY
        volume = volume_all[syms]  # only portfolio symbols
        df['ema']= volume.ix[:,0]

        #df['momentum'] = (df.ix[2*n:,0]/df.ix[0:-2*n:,0].values)-1.
        df['momentum']= prices.pct_change(periods=21).fillna(0)
        
        return df[['ema','momentum']]

    def make_trade(self,data,i,my_holding,my_action):
        #Action BUY, SELL, NOTHING - 0,1,2
        #Holding BUY, SELL, NOTHING - 1,2,3
        reward=None
        new_holding = None

        if my_holding==1: # OWN LONG
            if my_action==0 or my_action==2: #BUY,Hold
                reward = data.ix[i,0]-data.ix[i-1,0]
                new_holding = 1
            else: #sell close
                reward = 0
                new_holding = 3
        elif my_holding==2: #OWN SHORT
            if my_action==1 or my_action==2: #sell,Hold
                reward = data.ix[i-1,0] - data.ix[i,0] 
                new_holding = 2
            else: #buy close
                reward = 0
                new_holding = 3
        elif my_holding==3: # OWN NOTHING
            if my_action==0: #buy
                reward = data.ix[i,0]-data.ix[i-1,0]
                new_holding = 1
            elif my_action==1: #sell
                reward = data.ix[i-1,0] - data.ix[i,0] 
                new_holding = 2
            else: #hold
                reward = 0
                new_holding = 3       

        return new_holding,reward    
            
# instantiate the strategy learner
learner = StrategyLearner(verbose = True)

# set parameters for training the learner
sym = "SINE_FAST_NOISE"
stdate =dt.datetime(2008,1,1)
enddate =dt.datetime(2009,12,31) # just a few days for "shake out"

# train the learner
learner.addEvidence(symbol = sym, sd = stdate, \
    ed = enddate, sv = 10000) 

df_trades = learner.testPolicy(symbol = sym, sd = stdate, \
    ed = enddate, sv = 100000)

df_trades.ix[:,'Symbol']=sym
df_trades.ix[:,'Shares']=200
for i in df_trades.index:
    if df_trades.ix[i,'order']==200:
        df_trades.ix[i,'Order']= 'BUY' 
    if df_trades.ix[i,'order']==-200:
        df_trades.ix[i,'Order']= 'SELL'

df_trades = df_trades[df_trades.order!=0]
df_trades = df_trades.drop('order',1)
df_trades.ix[:,'Date'] = df_trades.index
df_trades.to_csv('qqq.csv',index=False)

print df_trades

-117.743352608
752.230109986
1260.8591388
1693.25542521
1845.9675065
1984.09646589
1983.53480299
2038.28841102
2062.20376135
2065.7482576
2083.57912506
2083.57912506
total run:  11
                     Symbol  Shares Order       Date
2008-01-04  SINE_FAST_NOISE     200  SELL 2008-01-04
2008-01-08  SINE_FAST_NOISE     200   BUY 2008-01-08
2008-01-10  SINE_FAST_NOISE     200  SELL 2008-01-10
2008-01-11  SINE_FAST_NOISE     200   BUY 2008-01-11
2008-01-15  SINE_FAST_NOISE     200  SELL 2008-01-15
2008-01-17  SINE_FAST_NOISE     200   BUY 2008-01-17
2008-01-22  SINE_FAST_NOISE     200   BUY 2008-01-22
2008-01-23  SINE_FAST_NOISE     200  SELL 2008-01-23
2008-01-24  SINE_FAST_NOISE     200   BUY 2008-01-24
2008-01-25  SINE_FAST_NOISE     200  SELL 2008-01-25
2008-01-28  SINE_FAST_NOISE     200   BUY 2008-01-28
2008-01-29  SINE_FAST_NOISE     200  SELL 2008-01-29
2008-01-30  SINE_FAST_NOISE     200  SELL 2008-01-30
2008-02-01  SINE_FAST_NOISE     200   BUY 2008-02-01
2008-02-04  SINE_FAST_NO

In [13]:

#signal_merge.to_csv('order_manual.csv',index=False)


print df_trades

           Symbol  Shares Order
2008-01-11   AAPL     200  SELL
2008-01-17   AAPL     200   BUY
2008-01-18   AAPL     200  SELL
2008-01-22   AAPL     200   BUY
2008-01-25   AAPL     200  SELL
2008-01-28   AAPL     200   BUY
2008-02-01   AAPL     200   BUY
2008-02-04   AAPL     200  SELL
2008-02-05   AAPL     200  SELL
2008-02-06   AAPL     200   BUY
2008-02-11   AAPL     200  SELL
2008-02-13   AAPL     200   BUY
2008-02-14   AAPL     200   BUY
2008-02-20   AAPL     200  SELL
2008-02-22   AAPL     200   BUY
2008-02-25   AAPL     200  SELL
2008-02-26   AAPL     200   BUY
2008-02-28   AAPL     200  SELL
2008-03-03   AAPL     200   BUY
2008-03-04   AAPL     200  SELL
2008-03-05   AAPL     200  SELL
2008-03-06   AAPL     200   BUY
2008-03-07   AAPL     200  SELL
2008-03-10   AAPL     200   BUY
2008-03-11   AAPL     200  SELL
2008-03-12   AAPL     200   BUY
2008-03-13   AAPL     200  SELL
2008-03-17   AAPL     200   BUY
2008-03-19   AAPL     200  SELL
2008-03-20   AAPL     200   BUY
...     

In [40]:
"""
Test a Strategy Learner.  (c) 2016 Tucker Balch
"""

import pandas as pd
import datetime as dt
import util as ut
import StrategyLearner as sl

def test_code(verb = True):

    # instantiate the strategy learner
    learner =StrategyLearner(verbose = verb)

    # set parameters for training the learner
    sym = "SINE_FAST_NOISE"
    stdate =dt.datetime(2008,1,1)
    enddate =dt.datetime(2009,12,31) # just a few days for "shake out"

    # train the learner
    learner.addEvidence(symbol = sym, sd = stdate, \
        ed = enddate, sv = 100000) 

    # set parameters for testing
    sym = "SINE_FAST_NOISE"
    stdate =dt.datetime(2010,1,1)
    enddate =dt.datetime(2011,12,31)

    # get some data for reference
    syms=[sym]
    dates = pd.date_range(stdate, enddate)
    prices_all = ut.get_data(syms, dates)  # automatically adds SPY
    prices = prices_all[syms]  # only portfolio symbols

    # test the learner
    df_trades = learner.testPolicy(symbol = sym, sd = stdate, \
        ed = enddate, sv = 100000)

    # a few sanity checks
    # df_trades should be a single column DataFrame (not a series)
    # including only the values 500, 0, -500
    if isinstance(df_trades, pd.DataFrame) == False:
        print "Returned result is not a DataFrame"
    if prices.shape != df_trades.shape:
        print "Returned result is not the right shape"
    tradecheck = abs(df_trades.cumsum()).values
    tradecheck[tradecheck<=500] = 0
    tradecheck[tradecheck>0] = 1
    if tradecheck.sum(axis=0) > 0:
        print "Returned result violoates holding restrictions (more than 500 shares)"

    if verb: 
        total = 0
        for i in range(0,df_trades.shape[0]):
            total = total +  df_trades.ix[df_trades.index[i],'order']
            df_trades.ix[df_trades.index[i],'total'] =  total
        print 'df_trades'
        print df_trades
       

if __name__=="__main__":
    test_code(verb = True)


 self.thres_ema
[  61.91025217   63.79569939   70.34869437   70.89377268   70.94981314
   71.40994484   71.65931816   73.24176952   73.85783843   73.9827732
   74.03200901   74.44402979   76.08420975   76.21776625   76.29315831
   76.41387094   76.45708401   77.12905819   77.47017914   77.73469829
   77.76617513   77.80598353   78.05856222   78.14540286   78.2932464
   78.44615174   78.65974179   78.92681191   80.00620745   80.23691487
   80.45671829   80.68141018   80.89128716   80.93185712   81.43013928
   81.43744421   81.53646948   81.61491212   81.64065506   81.67947987
   81.80745143   82.07817446   82.11233791   82.23175716   82.25017443
   82.35374527   82.40533404   82.50204971   82.5426697    82.59847998
   82.63180501   82.71426766   82.7320993    82.76040608   82.86314045
   82.98616944   82.999287     83.01189534   83.07189654   83.13409313
   83.15556453   83.21342776   83.34139155   83.37325403   83.46267552
   83.52254766   83.56512408   83.79039266   83.81422209   83.8