## CHEME 5660: Building a Trading Bot using Model-Free Reinforcement Learning

### Introduction

### Example setup

In [1]:
import Pkg; Pkg.activate("."); Pkg.resolve(); Pkg.instantiate();

[32m[1m  Activating[22m[39m project at `~/Desktop/julia_work/CHEME-5660-Markets-Mayhem-Example-Notebooks/jupyter-notebooks/CHEME-5660-Q-Learning-TradeBot-notebook`
[32m[1m  No Changes[22m[39m to `~/Desktop/julia_work/CHEME-5660-Markets-Mayhem-Example-Notebooks/jupyter-notebooks/CHEME-5660-Q-Learning-TradeBot-notebook/Project.toml`
[32m[1m  No Changes[22m[39m to `~/Desktop/julia_work/CHEME-5660-Markets-Mayhem-Example-Notebooks/jupyter-notebooks/CHEME-5660-Q-Learning-TradeBot-notebook/Manifest.toml`


In [2]:
# load req packages -
using DataFrames
using Dates
using FileIO
using JLD2
using PrettyTables
using Distributions
using Statistics

# setup paths -
const _ROOT = pwd();
const _PATH_TO_DATA = joinpath(_ROOT, "data");

In [3]:
include("CHEME-5660-Example-CodeLib.jl");

### Setup constants and other resources

In [4]:
# how many days of historical data are we using?
d = 12;         # we nₐ buy shares of XYZ for d-periods at the open of the training data 
lotsize = 10.0; # how many shares do we buy at the open?
nₐ = 1.0;       # how many shares do we want to buy, sell each day

# what does our training data start?
cursor = d;

# how many steps in an episode?
number_of_episodes = 6;            # number of episodes
number_of_steps_per_episodes = 12; # episodes are two hours long

# setup actions states -
actions = [1,2,3]; # buy, sell, hold

# setup my episodes container -
episodes = Array{Dict{DateTime,TransactionModel},1}();

In [5]:
# what is my prior data -
action_distribution = Categorical([0.60,0.10,0.30]); # when exploring, we are biased toward buying

#### Load and partition the OHLC price data set

In [6]:
# load the JLD2 portfolio data file -
price_data_dictionary = load(joinpath(_PATH_TO_DATA, "CHEME-5660-Portfolio-Q-learning-5min-11-20-22.jld2"))["dd"];

# we have these ticker symbols in our data set -
ticker_symbol_array = sort(keys(price_data_dictionary) |> collect);

# Partition the data into a training and prediction set
(price_training_dict, price_prediction_dict) = partition(price_data_dictionary; fraction=0.80);

#### Establish an initial position

In [7]:
# what ticker do we want to explore?
ticker_symbol = "AMD";

# get the df_training -
df_training = price_training_dict[ticker_symbol];

# whate are my timestamps -
timestamp_array = sort(df_training[:,:timestamp]) |> collect;

#### Run a purely random agent

In [8]:
# setup -
initialize_start_index = 1;
initialize_stop_index = d; # d = 12 = 1hr

# main loop -
for i = 1:number_of_episodes
    
    # initialize -
    ledger = initialize(df_training; lotsize = lotsize, start=initialize_start_index, stop=initialize_stop_index);
    
    # compute the vwap -
    Sᵢ = vwap(ledger);
    
    println("vwap Sᵢ = $(Sᵢ)")
    
    # steps per epsiode -
    for i ∈ 1:number_of_steps_per_episodes
        
        # pick a random action -
        aᵢ = rand(action_distribution);
    
        # grab the time stamp and price data -
        current_cursor = initialize_stop_index + i
        next_cursor = current_cursor + 1;
        event_open_timestamp = timestamp_array[current_cursor];
        event_close_timestamp = timestamp_array[next_cursor];
        p̂₁ = price(df_training, event_open_timestamp)
        p̂₂ = price(df_training, event_close_timestamp);
    
        # we implement the action now, we get the next price -
        sense_flag = 1;
        if (aᵢ == 2)
            sense_flag = -1;
        elseif (aᵢ == 3)
            sense_flag = 0;
        end;
    
        # build a proposed trade model -
        proposed_trade = build(TransactionModel, volume = nₐ, sense = sense_flag, p₁ = p̂₁, p₂ = p̂₂);

        # confirm this trade -
        if (confirm(ledger, proposed_trade) == true)
            ledger[event_open_timestamp] = proposed_trade;
        end
    end
    
    # update the cursor positions -
    initialize_start_index = (initialize_stop_index + 3);
    initialize_stop_index = (initialize_start_index + d - 1);
    
    # grab -
    push!(episodes, ledger);
end

vwap Sᵢ = 73.89213144925395
vwap Sᵢ = 74.12140255322464
vwap Sᵢ = 74.41017107796586
vwap Sᵢ = 74.76991945234694
vwap Sᵢ = 74.77735251190552
vwap Sᵢ = 73.93148130854148


In [19]:
Z = episodes[1]

Dict{DateTime, TransactionModel} with 24 entries:
  DateTime("2022-11-14T10:10:00") => TransactionModel(1, 74.2557, 1)
  DateTime("2022-11-14T09:05:00") => TransactionModel(10, 73.7098, 1)
  DateTime("2022-11-14T09:55:00") => TransactionModel(10, 74.007, 1)
  DateTime("2022-11-14T09:20:00") => TransactionModel(10, 74.2279, 1)
  DateTime("2022-11-14T09:25:00") => TransactionModel(10, 73.8785, 1)
  DateTime("2022-11-14T10:55:00") => TransactionModel(1, 74.2917, 0)
  DateTime("2022-11-14T10:20:00") => TransactionModel(1, 74.1568, 1)
  DateTime("2022-11-14T10:50:00") => TransactionModel(1, 74.1829, 0)
  DateTime("2022-11-14T09:30:00") => TransactionModel(10, 73.8522, 1)
  DateTime("2022-11-14T10:00:00") => TransactionModel(10, 73.9681, 1)
  DateTime("2022-11-14T09:40:00") => TransactionModel(10, 73.9802, 1)
  DateTime("2022-11-14T10:15:00") => TransactionModel(1, 74.2442, 1)
  DateTime("2022-11-14T10:25:00") => TransactionModel(1, 74.12, 1)
  DateTime("2022-11-14T09:50:00") => TransactionM