## CHEME 5660: Building a Trading Bot using Model-Free Reinforcement Learning

### Introduction

### Example setup

In [1]:
import Pkg; Pkg.activate("."); Pkg.resolve(); Pkg.instantiate();

[32m[1m  Activating[22m[39m project at `~/Desktop/julia_work/CHEME-5660-Markets-Mayhem-Example-Notebooks/jupyter-notebooks/CHEME-5660-Q-Learning-TradeBot-notebook`
[32m[1m  No Changes[22m[39m to `~/Desktop/julia_work/CHEME-5660-Markets-Mayhem-Example-Notebooks/jupyter-notebooks/CHEME-5660-Q-Learning-TradeBot-notebook/Project.toml`
[32m[1m  No Changes[22m[39m to `~/Desktop/julia_work/CHEME-5660-Markets-Mayhem-Example-Notebooks/jupyter-notebooks/CHEME-5660-Q-Learning-TradeBot-notebook/Manifest.toml`


In [2]:
# load req packages -
using DataFrames
using Dates
using FileIO
using JLD2
using PrettyTables
using Distributions
using Statistics
using DataFrames
using Plots
using Colors

# setup paths -
const _ROOT = pwd();
const _PATH_TO_DATA = joinpath(_ROOT, "data");

In [3]:
include("CHEME-5660-Example-CodeLib.jl");

### Setup constants and other resources

In [4]:
# 5-mim risk-free rate
r̄ = 0.0403;
risk_free_rate = ((1+r̄)^(1/19656) - 1);

# how many days of historical data are we using?
d = 12;         # we nₐ buy shares of XYZ for d-periods at the open of the training data 
lotsize = 10.0; # how many shares do we buy at the open?
nₐ = 1.0;       # how many shares do we want to buy, sell each day

# what does our training data start?
cursor = d;

# how many steps in an episode?
number_of_episodes = 12;            # number of episodes
number_of_steps_per_episodes = 12; # episodes are two hours long

# setup actions states -
actions = [1,2,3]; # buy, sell, hold

# setup my episodes container -
episodes = Array{Dict{DateTime,TransactionModel},1}();

In [5]:
# what is my prior data -
action_distribution = Categorical([0.40,0.30,0.30]); # buy, sell, hold

#### Load and partition the OHLC price data set

In [6]:
# load the JLD2 portfolio data file -
price_data_dictionary = load(joinpath(_PATH_TO_DATA, "CHEME-5660-Portfolio-Q-learning-5min-11-20-22.jld2"))["dd"];

# we have these ticker symbols in our data set -
ticker_symbol_array = sort(keys(price_data_dictionary) |> collect);

# Partition the data into a training and prediction set
(price_training_dict, price_prediction_dict) = partition(price_data_dictionary; fraction=0.90);

#### Establish an initial position

In [7]:
# what ticker do we want to explore?
ticker_symbol = "AMD";

# get the df_training -
df_training = price_training_dict[ticker_symbol];

# whate are my timestamps -
timestamp_array = sort(df_training[:,:timestamp]) |> collect;

#### Establish state classes

In [8]:
# fit a distribution to vwap data -
vwap_data = df_training[!,:volume_weighted_average_price];
Nd = fit_mle(Normal, vwap_data);

# get parameters -
θ = params(Nd);

#### Run a purely random agent

In [9]:
# setup -
initialize_start_index = 1;
initialize_stop_index = d; # d = 12 = 1hr

# main loop -
for i = 1:number_of_episodes
    
    # initialize -
    ledger = initialize(df_training; lotsize = lotsize, start=initialize_start_index, stop=initialize_stop_index);
    
    # compute the vwap -
    Sᵢ = vwap(ledger);
    
    println("vwap Sᵢ = $(Sᵢ)")
    
    # steps per epsiode -
    for i ∈ 1:number_of_steps_per_episodes
        
        # pick a random action -
        aᵢ = rand(action_distribution);
    
        # grab the time stamp and price data -
        current_cursor = initialize_stop_index + i
        next_cursor = current_cursor + 1;
        event_open_timestamp = timestamp_array[current_cursor];
        event_close_timestamp = timestamp_array[next_cursor];
        p̂₁ = price(df_training, event_open_timestamp)
        p̂₂ = price(df_training, event_close_timestamp);
    
        # we implement the action now, we get the next price -
        sense_flag = 1;
        if (aᵢ == 2)
            sense_flag = -1;
        elseif (aᵢ == 3)
            sense_flag = 0;
        end;
    
        # build a proposed trade model -
        proposed_trade = build(TransactionModel, volume = nₐ, sense = sense_flag, p₁ = p̂₁, p₂ = p̂₂);

        # confirm this trade -
        if (confirm(ledger, proposed_trade) == true)
            ledger[event_open_timestamp] = proposed_trade;
        end
    end
    
    # update the cursor positions -
    initialize_start_index = (initialize_stop_index + 3);
    initialize_stop_index = (initialize_start_index + d - 1);
    
    # grab -
    push!(episodes, ledger);
end

vwap Sᵢ = 73.8596719060785
vwap Sᵢ = 74.1200044907203
vwap Sᵢ = 74.44399361689884
vwap Sᵢ = 74.6823695271466
vwap Sᵢ = 74.74497432902115
vwap Sᵢ = 73.99885067442507
vwap Sᵢ = 74.17668389700987
vwap Sᵢ = 74.63260172033549
vwap Sᵢ = 74.92335400809586
vwap Sᵢ = 74.66382513441171
vwap Sᵢ = 73.761865697035
vwap Sᵢ = 73.80226746283965


In [10]:
Z = episodes[2]

Dict{DateTime, TransactionModel} with 24 entries:
  DateTime("2022-11-14T11:05:00") => TransactionModel(10, 74.1, 74.1, 1)
  DateTime("2022-11-14T11:45:00") => TransactionModel(1, 74.3846, 74.4835, -1)
  DateTime("2022-11-14T12:00:00") => TransactionModel(1, 74.3409, 74.6121, 0)
  DateTime("2022-11-14T10:55:00") => TransactionModel(10, 74.1653, 74.1653, 1)
  DateTime("2022-11-14T11:40:00") => TransactionModel(1, 74.3863, 74.3634, -1)
  DateTime("2022-11-14T10:20:00") => TransactionModel(10, 74.2596, 74.2596, 1)
  DateTime("2022-11-14T10:50:00") => TransactionModel(10, 74.0091, 74.0091, 1)
  DateTime("2022-11-14T11:10:00") => TransactionModel(10, 73.9508, 73.9508, 1)
  DateTime("2022-11-14T12:10:00") => TransactionModel(1, 74.6864, 74.6475, -1)
  DateTime("2022-11-14T10:15:00") => TransactionModel(10, 74.2507, 74.2507, 1)
  DateTime("2022-11-14T10:25:00") => TransactionModel(10, 74.2415, 74.2415, 1)
  DateTime("2022-11-14T11:20:00") => TransactionModel(1, 74.0387, 73.9933, 1)
  DateTime

In [11]:
R = results(episodes, Nd);

 # initialize -
𝒮 = [1,2,3];
𝒜 = [1,-1,0]; # buy, sell, hold

# build Q array -
Q_array = Array{Float64,2}(undef, length(𝒮), length(𝒜))

for s ∈ 1:length(𝒮)
    for a ∈ 1:length(𝒜)

        # ok, so get all the rewards for this state -
        rewards_vector = filter([:s,:a]=>(x,y)->(x==s && y==𝒜[a]), R)[:,:r];
        Q_array[s,a] = mean(rewards_vector)
    end
end

In [12]:
idx = π(Q_array)

3-element Vector{Int64}:
 3
 3
 3

In [13]:
Q_array

3×3 Matrix{Float64}:
 -0.333333  -0.75      1.0
 -0.142857   0.2       1.0
 -0.333333  -0.692308  1.0

In [14]:
rewards_vector = filter([:s,:a]=>(x,y)->(x==1 && y==0), R)[:,:r]

7-element Vector{Float64}:
 1.0
 1.0
 1.0
 1.0
 1.0
 1.0
 1.0

In [15]:
mean(rewards_vector)

1.0