In [None]:
!pip install alpaca-trade-api
!pip install transformers
!pip install nltk
!pip install ipython ipykernel --upgrade
# !pip install pandas-profiling

In [None]:
import alpaca_trade_api as alp
from transformers import pipeline
import keras
import tensorflow as tf
import matplotlib.pyplot as plt
import sys, os, glob, random, time
import numpy as np
import sklearn as sk
import pandas as pd
import re
import nltk
from collections import deque

Retrieve market data 


In [None]:
from alpaca_trade_api.rest import REST, TimeFrame, TimeFrameUnit
# Secret key must to be changed if regenerating the key id
api_key = ''
secret_key = ''

# api = REST(api_key, secret_key)
# start_date = '2021-01-01'; end_date = '2022-01-01'
# btc_bars = api.get_crypto_bars('BTCUSD', TimeFrame.Day, start_date, end_date).df # get_trades, get_quotes, ETHUSD
# btc_bars
# stock_choice = 'TSLA' # 'AAPL','AMD','AMZN','FB','GOOG','GOOGL','MSFT','NFLX','NVDA'
# stock_bars = api.get_bars(stock_choice, TimeFrame.Day, start_date, end_date).df
# stock_bars

Retrieve news data

In [None]:
from alpaca_trade_api.stream import Stream
# For historic data use the REST class, for live data use the Stream class
rest_client = REST(api_key, secret_key)
stream_client = Stream(api_key, secret_key)

Transformer sentiment analysis using (HuggingFace) BERT network

In [None]:
from nltk.tokenize import word_tokenize
from nltk.corpus import stopwords
nltk.download('punkt')
nltk.download('stopwords')
classifier1 = pipeline('sentiment-analysis', model = 'distilbert-base-uncased-finetuned-sst-2-english')
classifier2 = pipeline('sentiment-analysis', model = 'finiteautomata/bertweet-base-sentiment-analysis')
classifier = classifier2

def news_data_processor(news):
  date = str(news.created_at)[:10]
  summary = news.summary
  headline = news.headline
  text = headline + ' ' + summary
  # sentiment = classifier(text)
  word_list = word_tokenize(text)
  words = [word for word in word_list if word.isalpha()]

  stop_words = set(stopwords.words('english'))
  filtered_words = [w for w in words if not w in stop_words]

  sentence = ' '.join(filtered_words)
  sentiment = classifier(sentence)

  return sentence, sentiment, date

# for i in range(0):
#   test = news_data_loader(news[i])[0]
#   word_list = word_tokenize(test)
#   words = [word for word in word_list if word.isalpha()]

#   stop_words = set(stopwords.words('english'))
#   filtered_words = [w for w in words if not w in stop_words]
#   print(word_list)
#   print(test)
#   print(classifier(test))
#   print(classifier(word_list))
#   print(words)
#   print(classifier(words))
#   print(filtered_words)
#   print(classifier(filtered_words))
#   joint_sentence = ' '.join(words)
#   print(joint_sentence)
#   print(classifier(joint_sentence))

In [None]:
class DQRL_Trader():

  def __init__(self, state_size, action_space=3):
    self.state_size = state_size
    self.action_space = action_space    
    self.memory = deque(maxlen=1500)
    self.inventory = []
    self.n_data_inputs = 5 # open, open_avg_diff_5_days, open_avg_diff_5_days, portfolio_value, number_of_stocks, sentiment (=6 if including sentiment)

    self.gamma = 0.95
    self.epsilon = 1.0
    self.epsilon_min = 0.1
    self.epsilon_decay = 0.995
    self.learning_rate = 0.001

    self.model = self.model_builder()

  def model_builder(self):
    init = tf.keras.initializers.HeUniform(seed=1)
    model = tf.keras.models.Sequential([
        tf.keras.layers.Dense(64, activation='relu', input_shape=(self.state_size, self.n_data_inputs), kernel_initializer=init),
        tf.keras.layers.Dense(64, activation='relu', kernel_initializer=init),
        tf.keras.layers.Dense(64, activation='relu', kernel_initializer=init),
        # tf.keras.layers.Dense(64, activation='relu', kernel_initializer=init),
        tf.keras.layers.Flatten(),
        tf.keras.layers.Dense(self.action_space, activation='linear', kernel_initializer=init)
    ])
    model.compile(loss=tf.keras.losses.Huber(), optimizer=tf.keras.optimizers.Adam(learning_rate=self.learning_rate))
    return model 

  def trade(self, state):
    if random.random() <= self.epsilon:
      return random.randrange(self.action_space)
    else:
      options = self.model.predict(state)
      # print('options', options)
    return np.argmax(options[0])

  def batch_Q_learn(self, batch_size):
    mini_batch = []
    mem = len(self.memory)
    # print('len(self.memory) = ', L)
    for i in range(mem - batch_size + 1, mem):
      mini_batch.append(self.memory[i])

    for state, action, reward, next_state, done in mini_batch:
      target = reward
      if not done:
        target = reward + self.gamma * np.amax(self.model.predict(next_state)[0])
      
      target_func = self.model.predict(state)
      # print('len target_func[0]', len(target_func[0]))
      # print('state ', state)
      # print('action ', action)
      target_func[0][action] = target
      self.model.fit(state, target_func, epochs=1, verbose=0)

      if self.epsilon > self.epsilon_min:
        self.epsilon *= self.epsilon_decay


In [None]:
def price_format(p):
  return ('-$' if p<0 else '$') + '{0:.2f}'.format(abs(p))

def sigmoid(x):
  return 1/(1+np.exp(-x))

In [None]:
# N-day state representation ending at time t
def create_state(data, time_step, window_size, portfolio, n_stocks):
  start_index = time_step - window_size + 1
  data_window = data[start_index : time_step+1] if start_index >= 0 else (-start_index * [data[0]]) + list(data[0 : time_step+1])
  # print(len(data_window), window_size)
  state=[]
  for i in range(len(data_window) -1):
    indicators = sigmoid(data_window[i+1] - data_window[i]).flatten()
    joint = [*indicators, portfolio, n_stocks]
    state.append(joint)
    # print(sigmoid(data_window[i+1] - data_window[i]))

  return np.array(state).astype('float32')

In [None]:
start_date = '2021-01-01'; end_date = '2022-01-01'
stock_choice = 'TSLA' # 'AAPL','AMD','AMZN','FB','GOOG','GOOGL','MSFT','NFLX','NVDA'
stock_bars = rest_client.get_bars(stock_choice, TimeFrame.Day, start_date, end_date).df

# async def live_news_data(news):
# 	print(news)

# stream_client.subscribe_news(live_news_data, stock_choice)
# stream_client.run()

# print(stock_bars)
# stock_dates = str(stock_bars.index[i][:10])
stock_open = stock_bars['open']
# print(stock_open)

# Compute the difference between the opening price of a day with the mean opening price over a previous set of days e.g. 5 and 50
def avg_open_diff(open_price_data, n_days, start_index):
  # if start_index < n_days:
  #   print('Beware the moving average difference in opening price is only computed over the last {} day(s), as opposed to {}.'.format(start_index, n_days))
  if start_index < len(open_price_data):
    start_price = open_price_data[start_index]
    prior_avg_price = open_price_data[start_index-n_days : start_index].mean()
    diff = start_price - prior_avg_price if start_index-n_days >= 0 else start_price - open_price_data[:start_index].mean()
    return diff
  else:
    return IndexError('Failed to compute moving average difference: invalid request')

def create_avg_diff_data(data, n):
  stock_avg_diff_n = [0] # avg diff = 0 for first open price
  for i in range(1, len(data)):
    stock_avg_diff_n_i = avg_open_diff(data, n, i)
    stock_avg_diff_n.append(stock_avg_diff_n_i)
  return np.array(stock_avg_diff_n)

stock_avg_diff_5 = create_avg_diff_data(stock_open, 5)
stock_avg_diff_50 = create_avg_diff_data(stock_open, 50)

stock_data = np.transpose([stock_open, stock_avg_diff_5, stock_avg_diff_50])#.astype(np.float16)

news = rest_client.get_news(stock_choice, start_date, end_date, limit=10)
print(type(news))
N_news = len(news)
NEG, POS, NEU = 0, 0, 0
for i in range(N_news):
  # print(news_data_processor(news[i])[2])
  news_date = news_data_processor(news[i])[2]
  sentim = news_data_processor(news[i])[1][0]
  if sentim['label'] == 'NEG':
    NEG+=1
  elif sentim['label'] == 'POS':
    POS+=1
  elif sentim['label'] == 'NEU':
    NEU+=1
# print(NEG, POS, NEU)
  # print(news_data_processor(news[i])[0])
  # print(news_data_processor(news[i])[1])
  # print('\n')


In [None]:
window_size = 10 # days inbetween action e.g. buy/sell/sit
epochs = 1000
batch_size = 32
data = stock_data     
N_data = len(data)-1

trader = DQRL_Trader(window_size)
trader.model.summary()

Model: "sequential_24"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense_107 (Dense)           (None, 10, 64)            384       
                                                                 
 dense_108 (Dense)           (None, 10, 64)            4160      
                                                                 
 dense_109 (Dense)           (None, 10, 64)            4160      
                                                                 
 flatten_9 (Flatten)         (None, 640)               0         
                                                                 
 dense_110 (Dense)           (None, 3)                 1923      
                                                                 
Total params: 10,627
Trainable params: 10,627
Non-trainable params: 0
_________________________________________________________________


In [None]:
for epoch in range(epochs):
  print('Epoch {}/{} ...'.format(epoch+1, epochs))

  # state = create_state(data, 0, window_size+1)
  trader.inventory = []
  total_profit = 0
  n_stocks_owned = 10
  initial_funds = 1000
  portfolio_value = (n_stocks_owned * data[0][0]) + initial_funds

  state = create_state(data, 0, window_size+1, portfolio_value, n_stocks_owned)
  # print(state)

  for t in range(N_data):
    action = trader.trade(state)
    # next_state = create_state(data, t+1, window_size+1)
    next_state = create_state(data, t+1, window_size+1, portfolio_value, n_stocks_owned)
    reward = 0

    if portfolio_value < data[t][0] or n_stocks_owned == 0: # punish inability to buy/sell stock
      reward = -10

    if portfolio_value < data[t][0] and n_stocks_owned == 0: # huge penalty for bankcruptcy
      reward = -100 

    if action == 0: # punish repeated sitting over time
      print('Sitting...')
      reward = -0.1
    
    elif action == 1 and portfolio_value >= data[t][0]: # buy
      n_stocks_owned += 1
      portfolio_value -= data[t][0]
      trader.inventory.append(data[t])
      print('Trader bought a stock for {}'.format(price_format(data[t][0])))
      print('Total portfolio value: {} | Number of stocks: {}'.format(price_format(portfolio_value), n_stocks_owned))

    elif action == 2 and len(trader.inventory) > 0: # sell
      bought_price = trader.inventory.pop(0)
      n_stocks_owned -= 1
      portfolio_value += data[t][0]
      # print('bought_price: ', bought_price)
      if data[t][0] - bought_price[0] > 0:
        reward = 1
      elif data[t][0] - bought_price[0] == 0:
        reward = 0
      else:
        reward = -1

      total_profit += data[t][0] - bought_price[0]
      print('Trader sold one stock at {} | Transaction profit: {}'.format(price_format(data[t][0]), price_format(data[t][0] - bought_price[0])))
      print('Total portfolio value: {} | Number of stocks: {}'.format(price_format(portfolio_value), n_stocks_owned))

    done = True if t == N_data-1 else False

    trader.memory.append([state, action, reward, next_state, done])
    print('memory', trader.memory)

    state = next_state

    if done:
      print('\n Total profit = {} \n'.format(price_format(total_profit)))

    if len(trader.memory) > batch_size:
      trader.batch_Q_learn(batch_size)

In [None]:
state_data, action_data = [], []
for i in range(len(trader.memory)):
  state_data.append(np.squeeze(trader.memory[i][0]))
  action_data.append(np.squeeze(trader.memory[i][1]))  

print(np.array(state_data[4]))