## CHEME 5660: Building a Trading Bot using Model-Free Reinforcement Learning

### Introduction

### Example setup

In [1]:
import Pkg; Pkg.activate("."); Pkg.resolve(); Pkg.instantiate();

[32m[1m  Activating[22m[39m project at `~/Desktop/julia_work/CHEME-5660-Markets-Mayhem-Example-Notebooks/jupyter-notebooks/CHEME-5660-Q-Learning-TradeBot-notebook`
[32m[1m  No Changes[22m[39m to `~/Desktop/julia_work/CHEME-5660-Markets-Mayhem-Example-Notebooks/jupyter-notebooks/CHEME-5660-Q-Learning-TradeBot-notebook/Project.toml`
[32m[1m  No Changes[22m[39m to `~/Desktop/julia_work/CHEME-5660-Markets-Mayhem-Example-Notebooks/jupyter-notebooks/CHEME-5660-Q-Learning-TradeBot-notebook/Manifest.toml`


In [2]:
# load req packages -
using DataFrames
using Dates
using FileIO
using JLD2
using PrettyTables
using Distributions
using Statistics
using DataFrames
using Plots
using Colors

# setup paths -
const _ROOT = pwd();
const _PATH_TO_DATA = joinpath(_ROOT, "data");

In [3]:
include("CHEME-5660-Example-CodeLib.jl");

### Setup constants and other resources

In [4]:
# 5-mim risk-free rate
r̄ = 0.0403;
risk_free_rate = ((1+r̄)^(1/19656) - 1);

# how many days of historical data are we using?
d = 1;         # we nₐ buy shares of XYZ for d-periods at the open of the training data 
lotsize = 10.0; # how many shares do we buy at the open?
nₐ = 1.0;       # how many shares do we want to buy, sell each day

# setup actions states -
actions = [1,2,3]  ; # buy, sell, hold
states = [1,2,3,4] ; # states defined below -

# initialize an empty ledger -
ledger_df = DataFrame(
    time = DateTime[],
    n = Float64[],
    price = Float64[],
    action = Int64[]
);

# initialize an empty Q -
Q_array = Array{Float64,2}(undef, length(states), length(actions));
Q_array[1:2,2] .= 100.0;
Q_array[3:4,1] .= 100.0;

# build Q model -
QMODEL = QLearningModel();
QMODEL.γ = 0.75;
QMODEL.α = 0.05;
QMODEL.𝒮 = states;
QMODEL.𝒜 = actions;
QMODEL.Q = Q_array;

#### Load and partition the OHLC price data set

In [5]:
# load the JLD2 portfolio data file -
price_data_dictionary = load(joinpath(_PATH_TO_DATA, "CHEME-5660-Portfolio-Q-learning-5min-11-20-22.jld2"))["dd"];

# we have these ticker symbols in our data set -
ticker_symbol_array = sort(keys(price_data_dictionary) |> collect);

# Partition the data into a training and prediction set
(price_training_dict, price_prediction_dict) = partition(price_data_dictionary; fraction=0.90);

#### Establish an initial position

In [6]:
# what ticker do we want to explore?
ticker_symbol = "AMD";

# get the df_training -
df_training = price_training_dict[ticker_symbol];

# whate are my timestamps -
timestamp_array = sort(df_training[:,:timestamp]) |> collect;

#### Establish state classes

In [7]:
# fit a distribution to vwap data -
vwap_data = df_training[!,:volume_weighted_average_price];
Nd = fit_mle(Normal, vwap_data);

# get parameters -
θ = params(Nd);


In [8]:
# initialize -

# we buy 100 shares at initial price in the data -
transaction = (
    time = df_training[1,:timestamp],
    n = 100.0,
    action = 1,
    price = price(df_training,1)
);
push!(ledger_df, transaction)

# what is the initial vwap_best -
vwap_best = vwap(ledger_df);

In [9]:
# setup ϵ sim -
ϵ = 0.30;

# setup categorical distribution for drawing a random action -
action_distribution = Categorical([0.5,0.25,0.25]);

# main random simulation -
for i ∈ 2:(nrow(df_training) - 1)
    
    
    # get data from the df_training -
    p = price(df_training, i);
    p′ = price(df_training, i+1);
    
    # convert that to the current state -
    s = state(p; μ = θ[1], σ = θ[2]);
    s′ = state(p′; μ = θ[1], σ = θ[2]);
    
    # roll a random number -
    if (rand()<ϵ)
        
        # roll a random action - 
        aᵢ = rand(action_distribution);
        if (aᵢ == 1) # random action: buy

            # compute a buy action -
            transaction = (
                time = df_training[i,:timestamp],
                n = nₐ,
                action = 1,
                price = p 
            );
            push!(ledger_df, transaction)

        elseif (aᵢ == 2) # random action: sell

            # compute a buy action -
            transaction = (
                time = df_training[i,:timestamp],
                n = nₐ,
                action = 2,
                price = p 
            );
            push!(ledger_df, transaction)

        elseif (aᵢ == 3) # random action: hold

            # compute a buy action -
            transaction = (
                time = df_training[i,:timestamp],
                n = nₐ,
                action = 3,
                price = p 
            );
            push!(ledger_df, transaction)
        end
    else
        
        # ok, what action does my best guess say that I should take?
        policy = π(QMODEL.Q);
        aᵢ = policy[s];
        
        # compute a buy action -
        transaction = (
            time = df_training[i,:timestamp],
            n = nₐ,
            action = aᵢ,
            price = p 
        );
        push!(ledger_df, transaction)
    end
    
    # we've update the ledger - compute the return per share if we sold everything -
    Rᵢ = liquidate(ledger_df, p);
    
    # update the QMODEL -
    update!(QMODEL,s,aᵢ,Rᵢ,s′);
end

In [10]:
Q_array

4×3 Matrix{Float64}:
  8.88445  60.3486   10.6282
 15.8871   16.2576   14.2771
 65.5999    0.0       7.02071
  4.13045   6.58439   6.33831

In [11]:
π(Q_array)

4-element Vector{Int64}:
 2
 2
 1
 2

In [15]:
idx = filter(:action=>x->x==1,ledger_df)

Row,time,n,price,action
Unnamed: 0_level_1,DateTime,Float64,Float64,Int64
1,2022-11-14T09:00:00,100.0,72.944,1
2,2022-11-14T09:05:00,1.0,73.6489,1
3,2022-11-14T09:10:00,1.0,73.7095,1
4,2022-11-14T09:15:00,1.0,73.8939,1
5,2022-11-14T09:20:00,1.0,74.2044,1
6,2022-11-14T09:25:00,1.0,73.996,1
7,2022-11-14T09:30:00,1.0,73.8233,1
8,2022-11-14T09:35:00,1.0,73.9176,1
9,2022-11-14T09:40:00,1.0,73.9937,1
10,2022-11-14T09:55:00,1.0,74.0238,1
