In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

In [2]:
df = pd.read_csv('sp500_closefull.csv', index_col=0, parse_dates=True)
df.dropna(axis=0, how='all', inplace=True)
df.dropna(axis=1, how='any', inplace=True)

In [3]:
df.head()

Unnamed: 0_level_0,CSCO,UAL,TROW,ISRG,PRGO,TPR,DVN,MRO,BA,VRTX,...,M,CRM,PGR,WAT,BWA,LRCX,NWL,UAA,BLK,PPL
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2010-01-04,24.690001,12.8,54.400002,102.923332,40.349998,36.310001,76.57,19.153616,56.18,44.240002,...,17.059999,18.705,18.030001,61.630001,16.889999,39.880001,15.2,3.51,238.580002,30.242558
2010-01-05,24.58,13.91,55.009998,102.459999,38.790001,36.75,76.650002,19.171511,58.02,42.779999,...,16.860001,18.625,17.969999,60.790001,17.695,39.610001,15.11,3.615,239.610001,29.85137
2010-01-06,24.42,13.27,54.150002,103.946663,38.299999,37.470001,76.419998,19.595024,59.779999,42.029999,...,17.1,18.592501,17.790001,60.900002,18.344999,39.43,15.38,3.695,234.669998,29.916569
2010-01-07,24.530001,13.55,54.110001,103.556664,37.990002,37.490002,75.970001,19.475725,62.200001,41.5,...,17.49,18.51,17.549999,61.16,18.594999,39.360001,15.82,3.65125,237.25,29.627834
2010-01-08,24.66,13.33,53.900002,102.986664,37.779999,37.27,76.120003,19.50555,61.599998,40.669998,...,16.92,18.5375,17.709999,61.209999,18.254999,40.349998,15.77,3.64375,238.919998,29.534695


In [4]:
ticker = 'CSCO'

# make features
df['FastSMA']  = df[ticker].rolling(16).mean()
df['SlowSMA']  = df[ticker].rolling(33).mean()
feats = ['FastSMA', 'SlowSMA']

In [5]:
df['LogReturn'] = np.log(df[ticker]).diff()
df.head()

Unnamed: 0_level_0,CSCO,UAL,TROW,ISRG,PRGO,TPR,DVN,MRO,BA,VRTX,...,WAT,BWA,LRCX,NWL,UAA,BLK,PPL,FastSMA,SlowSMA,LogReturn
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2010-01-04,24.690001,12.8,54.400002,102.923332,40.349998,36.310001,76.57,19.153616,56.18,44.240002,...,61.630001,16.889999,39.880001,15.2,3.51,238.580002,30.242558,,,
2010-01-05,24.58,13.91,55.009998,102.459999,38.790001,36.75,76.650002,19.171511,58.02,42.779999,...,60.790001,17.695,39.610001,15.11,3.615,239.610001,29.85137,,,-0.004465
2010-01-06,24.42,13.27,54.150002,103.946663,38.299999,37.470001,76.419998,19.595024,59.779999,42.029999,...,60.900002,18.344999,39.43,15.38,3.695,234.669998,29.916569,,,-0.006531
2010-01-07,24.530001,13.55,54.110001,103.556664,37.990002,37.490002,75.970001,19.475725,62.200001,41.5,...,61.16,18.594999,39.360001,15.82,3.65125,237.25,29.627834,,,0.004494
2010-01-08,24.66,13.33,53.900002,102.986664,37.779999,37.27,76.120003,19.50555,61.599998,40.669998,...,61.209999,18.254999,40.349998,15.77,3.64375,238.919998,29.534695,,,0.005286


In [6]:
# split into train and test
Ntest = 1000
train_data = df.iloc[:-Ntest].copy()
test_data = df.iloc[-Ntest:].copy()

In [7]:
class Env:
  def __init__(self, df):
    self.df = df
    self.n = len(df)
    self.current_idx = 0
    self.action_space = [0, 1, 2] # BUY, SELL, HOLD
    self.invested = 0

    self.states = self.df[feats].to_numpy()
    self.rewards = self.df['LogReturn'].to_numpy()
    self.total_buy_and_hold = 0

  def reset(self):
    self.current_idx = 0
    self.total_buy_and_hold = 0
    self.invested = 0
    return self.states[self.current_idx]

  def step(self, action):
    # need to return (next_state, reward, done)
    self.current_idx += 1
    if self.current_idx >= self.n:
      raise Exception("Episode already done")

    if action == 0: # BUY
      self.invested = 1
    elif action == 1: # SELL
      self.invested = 0
    
    # compute reward
    if self.invested:
      reward = self.rewards[self.current_idx]
    else:
      reward = 0

    # state transition
    next_state = self.states[self.current_idx]

    # baseline
    self.total_buy_and_hold += self.rewards[self.current_idx]

    # done flag
    done = (self.current_idx == self.n - 1)
    return next_state, reward, done

In [8]:
class Agent:
  def __init__(self):
    self.is_invested = False

  def act(self, state):
    assert(len(state) == 2)
    # (fast, slow)

    if state[0] > state[1] and not self.is_invested:
      self.is_invested = True
      return 0 # Buy

    if state[0] < state[1] and self.is_invested:
      self.is_invested = False
      return 1 # sell

    return 2 # Do nothing

In [9]:
def play_one_episode(agent, env):
  state = env.reset()
  done = False
  total_reward = 0
  agent.is_invested = False

  while not done:
    action = agent.act(state)
    next_state, reward, done = env.step(action)
    total_reward += reward
    state = next_state

  return total_reward

In [10]:
train_env = Env(train_data)
test_env = Env(test_data)

In [11]:
agent = Agent()

In [12]:
train_reward = play_one_episode(agent, train_env)

In [13]:
test_reward = play_one_episode(agent, test_env)

In [14]:
train_reward, train_env.total_buy_and_hold

(0.0016875816564572155, 0.1081512582688231)

In [15]:
test_reward, test_env.total_buy_and_hold

(-0.19240634206874718, 0.43116065624283184)