Import Data

Uses QuantConnect

In [None]:
# QuantBook Analysis Tool 
# For more information see [https://www.quantconnect.com/docs/research/overview]
qb = QuantBook()
spy = qb.AddEquity('SPY')
msft = qb.AddEquity('MSFT')
# start_time = datetime(2017, 1, 1)
# end_time = datetime(2021, 1, 1)
# history = qb.History(qb.Securities.Keys, start_time, end_time, Resolution.Daily)
history = qb.History(qb.Securities.Keys, 360, Resolution.Daily)

# Indicator Analysis
# bbdf = qb.Indicator(BollingerBands(30, 2), spy.Symbol, 360, Resolution.Daily)
# bbdf.drop('standarddeviation', 1).plot()

In [None]:
history = history['close'].unstack(level = 0)

In [None]:
history['Spread'] = history['MSFT R735QTJ8XC9X'] - history ['SPY R735QTJ8XC9X']
history = history.rename(columns = {'MSFT R735QTJ8XC9X': 'MSFT','SPY R735QTJ8XC9X': 'SPY'})

Create Environment

In [None]:
import gym
from stable_baselines3.common.vec_env import DummyVecEnv
from stable_baselines3 import A2C

import numpy as np
import pandas as pd
from matplotlib import pyplot as plt

In [None]:
class StockEnv(gym.Env):
    metadata = {"render_modes": ["human", "rgb_array"], "render_fps": 4}

    def __init__(self, history, render_mode=None):

        # Observations are dictionaries with the MSFT's and SPY's prices.
        # For this purpose, assume prices cannot go above 1,000.
        self.observation_space = gym.spaces.Dict(
            {
                "msft": gym.spaces.Box(low = 0, high = 1_000, shape = (1,), dtype=np.float32),
                "spy": gym.spaces.Box(low = 0, high = 1_000, shape = (1,), dtype=np.float32),
            }
        )

        self._msft = history.loc[:, 'MSFT']
        self._spy = history.loc[:, 'SPY']

        self.date = 0

        self.portfolio = {'MSFT': 0, 'SPY': 0, 'Cash': 100_000}
        self.cost_basis = 0
        
        # We have 3 actions, corresponding to "Long", "Hold", "Short"
        self.action_space = gym.spaces.Discrete(3)

        """
        The following dictionary maps abstract actions from `self.action_space` to 
        the action taken.
        I.e. 0 corresponds to "Long", which will multiply our portfolio by 1.
        """
        self._action_to_direction = {
            0: 'Long',
            1: 'Hold',
            2: 'Short'
        }

        assert render_mode is None or render_mode in self.metadata["render_modes"]
        self.render_mode = render_mode

        """
        If human-rendering is used, `self.window` will be a reference
        to the window that we draw to. `self.clock` will be a clock that is used
        to ensure that the environment is rendered at the correct framerate in
        human-mode. They will remain `None` until human-mode is used for the
        first time.
        """
        self.window = None
        self.clock = None

    def _get_obs(self):
        return {"msft": self._msft.iloc[self.date], "spy": self._spy.iloc[self.date]}

    def _get_info(self):
        return self.portfolio

    def reset(self, seed=None, options=None):
        # We need the following line to seed self.np_random
        # super().reset(seed=seed)
        self.date = 0
        observation = self._get_obs()
        self.portfolio = {'MSFT': 0, 'SPY': 0, 'Cash': 100_000}
        self.cost_basis = 0

        return observation

    def step(self, action):
        observation = self._get_obs()
        info = self._get_info()

        fees = 0
        direction = self._action_to_direction[action]
        if direction == 'Hold':
            pass
        elif direction == 'Long':
            if self.portfolio['MSFT'] > 0:
                pass
            else:
                self.portfolio['Cash'] += self.portfolio['MSFT'] * observation['msft'] + self.portfolio['SPY'] * observation['spy'] - .005 * (self.portfolio['SPY'] + self.portfolio['MSFT'])
                fees += - .005 * (self.portfolio['SPY'] + self.portfolio['MSFT'])
                self.portfolio['MSFT'] = self.portfolio['Cash'] // observation['msft']
                self.portfolio['SPY'] = - self.portfolio['Cash'] // observation['spy']
                self.cost_basis = self.portfolio["Cash"]
                self.portfolio["Cash"] += self.portfolio['MSFT'] * observation['msft'] + self.portfolio['SPY'] * observation['spy'] - .005 * (self.portfolio['SPY'] + self.portfolio['MSFT'])
                fees += - .005 * (self.portfolio['SPY'] + self.portfolio['MSFT'])
        elif direction == "Short":
            if self.portfolio['MSFT'] < 0:
                pass
            else:
                self.portfolio['Cash'] += self.portfolio['MSFT'] * observation['msft'] + self.portfolio['SPY'] * observation['spy'] - .005 * (self.portfolio['SPY'] + self.portfolio['MSFT'])
                fees += - .005 * (self.portfolio['SPY'] + self.portfolio['MSFT'])
                self.portfolio['MSFT'] = - self.portfolio['Cash'] / observation['msft']
                self.portfolio['SPY'] = self.portfolio['Cash'] / observation['spy']
                self.cost_basis = self.portfolio["Cash"]
                self.portfolio["Cash"] += self.portfolio['MSFT'] * observation['msft'] + self.portfolio['SPY'] * observation['spy'] - .005 * (self.portfolio['SPY'] + self.portfolio['MSFT'])
                fees += - .005 * (self.portfolio['SPY'] + self.portfolio['MSFT'])
            
        self.date += 1
        
        # An episode is done if the agent has reached the target
        terminated = self.date > self._msft.shape[0] - 3

        next_observation = self._get_obs()
        reward = self.portfolio['MSFT'] * (next_observation['msft']  - observation ['msft']) + self.portfolio['SPY'] * (next_observation['spy'] - observation['spy']) + fees
        
        # if self.render_mode == "human":
        #     self._render_frame()

        return observation, reward, terminated, info

In [None]:
env = StockEnv(history)

In [None]:
import warnings
warnings.filterwarnings('ignore')

def run_pass(predictions, epsilon, env):
    state = env.reset()
    record = {}
    for i in range(500):
        if np.random.rand() < epsilon:
            action = env.action_space.sample()
        else:
            action = predictions[i]
        n_state, reward, done, info = env.step(action)
        record[i] = {'MSFT Price': n_state['msft'], 'SPY Price': n_state['spy'], 'Portfolio-MSFT': info['MSFT'], 'Portfolio-SPY': info['SPY'], 'Portfolio-Cash': info['Cash'], 'reward': reward, 'action': action}
        if done:
            break
    return pd.DataFrame(record).T


In [None]:
import warnings
warnings.filterwarnings('ignore')

def run_pass_w_warmup(predictions, epsilon, env, sequence_length):
    state = env.reset()
    record = {}
    for i in range(500):
        if i >= sequence_length - 1 and i <= predictions.shape[0]:
            if np.random.rand() < epsilon:
                action = env.action_space.sample()
            else:
                action = predictions[i - sequence_length + 1]
            n_state, reward, done, info = env.step(action)
            record[i] = {'MSFT Price': n_state['msft'], 'SPY Price': n_state['spy'], 'Portfolio-MSFT': info['MSFT'], 'Portfolio-SPY': info['SPY'], 'Portfolio-Cash': info['Cash'], 'reward': reward, 'action': action}
        else:
            action = 1
            n_state, reward, done, info = env.step(action)
        if done:
            break
    return pd.DataFrame(record).T

Create First Pass

In [None]:
this_pass = run_pass(None, 1000, env)

In [None]:
ideal_action = ((this_pass['MSFT Price'] - this_pass['SPY Price']).shift(-1) - (this_pass['MSFT Price'] - this_pass['SPY Price'])).apply(lambda x: 0 if x > 0 else 2).to_numpy()

Learn with MLP

In [None]:
bootstrap_mean = run_pass(None, 1000, env).mean()
bootstrap_std = run_pass(None, 1000, env).std()
for epochs in range(100):
    bootstrap_mean = pd.concat((bootstrap_mean, run_pass(None, 1000, env).mean()), axis = 1)
    bootstrap_std = pd.concat((bootstrap_std, run_pass(None, 1000, env).std()), axis = 1)
# bootstrap = (bootstrap_mean.mean(axis = 1)[['MSFT Price', 'SPY Price', 'action']], bootstrap_std.mean(axis = 1)[['MSFT Price', 'SPY Price', 'action']])
bootstrap = (bootstrap_mean.mean(axis = 1)[['MSFT Price', 'SPY Price']], bootstrap_std.mean(axis = 1)[['MSFT Price', 'SPY Price']])

Learn as Unsequenced State

In [None]:
# q = MLPRegressor(learning_rate_init = .01)
# q_target = MLPRegressor(learning_rate_init = .01)

# init = True
# this_pass = run_pass(None, 40)
# average_output = np.zeros((this_pass.shape[0], 3))
# counter = np.ones((this_pass.shape[0], 3))
# for epochs in range(100):
#     this_output = np.zeros((this_pass.shape[0], 3))
#     this_pass = run_pass(q.predict(x)[:, :3].argmax(1) if not init else ideal_action, 20 / (epochs + 1))
#     x = this_pass.loc[:, ['MSFT Price', 'SPY Price', 'Portfolio-MSFT', 'Portfolio-SPY', 'Portfolio-Cash']]
#     mean = bootstrap[0]
#     std = bootstrap[1]
#     x = (x - mean) / std
#     for index in this_pass.index:
#         this_output[index, int(this_pass.loc[index, 'action'])] = this_pass.loc[index, 'reward']
#         this_counter = np.zeros((this_pass.shape[0], 3))
#         this_counter[index, int(this_pass.loc[index, 'action'])] = 1
#         counter = counter + this_counter
#     average_output = ((counter - 1) * average_output + this_output) / counter
    
#     if not init:
#         q_target.coefs_ = q.coefs_
#     else:
#         y_init = 100 * np.random.rand(this_pass.shape[0] - 1, 3 + x.shape[1])
#         q_target.partial_fit(x[:-1], y_init)
#         init = False
#     y_target = np.concatenate(((average_output + .000009 * q_target.predict(q_target.predict(x)[:, 3:])[:, :3].max(1).reshape(-1, 1) / 1.0003)[:-1], x.to_numpy()[1:]), axis = 1)
#     for i in range(5):
#         q.partial_fit(x[:-1], y_target)

Learn with MLP as Sequences

In [None]:
q = MLPRegressor(learning_rate_init = .01)
q_target = MLPRegressor(learning_rate_init = .01)

init = True
this_pass = run_pass(None, 40)
average_output = np.zeros((this_pass.shape[0], 3))
counter = np.zeros((this_pass.shape[0], 3))
for epochs in range(30):
    this_output = np.zeros((this_pass.shape[0], 3))
    this_pass = run_pass(None, 40 / (epochs + 1))
    x = this_pass.loc[:, ['MSFT Price', 'SPY Price']]
    mean = bootstrap[0]
    std = bootstrap[1]
    x = (x - mean) / std
    for index in this_pass.index:
        this_output[index, int(this_pass.loc[index, 'action'])] = this_pass.loc[index, 'reward']
        this_counter = np.zeros((this_pass.shape[0], 3))
        this_counter[index, int(this_pass.loc[index, 'action'])] = 1
    counter = counter + this_counter
    average_output = (np.where(counter == 0, 0, counter - 1) * average_output + this_output) / np.where(counter == 0, 1, counter)
    
    sequence_length = 15
    batched = np.stack((x[:sequence_length], x[1:1 + sequence_length]))
    for i in range(2, x.shape[0] - sequence_length):
        batched = np.concatenate((batched, x[i: i + sequence_length].to_numpy().reshape(1, sequence_length, x.shape[1])), axis = 0)

    flattened_batched = batched.reshape(batched.shape[0], -1)

    predicted_batches = batched.copy()
    
    if not init:
        q_target.coefs_ = q.coefs_
    else:
        y_init = 100 * np.random.rand(flattened_batched.shape[0], 3 + x.shape[1])
        q_target.partial_fit(flattened_batched, y_init)
        init = False

    
    predicted_batches[1:, -1, :] = q_target.predict(flattened_batched)[:-1, 3:]

    y_target = np.concatenate(((average_output[sequence_length: -1] + .9 * q_target.predict(predicted_batches.reshape(batched.shape[0], -1))[1:].max(1).reshape(-1, 1) / 1.0003), x.to_numpy()[sequence_length + 1:]), axis = 1)


    
    # # y_target = np.concatenate(((average_output + .000009 * q_target.predict(q_target.predict(x)[:, 3:])[:, :3].max(1).reshape(-1, 1) / 1.0003)[:-1], x.to_numpy()[1:]), axis = 1)
    for i in range(5):
        q.partial_fit(flattened_batched[:-1], y_target)

Learn Q-Table

init = False
this_pass = run_pass(None, 40)
all_outputs = np.zeros((this_pass.shape[0], 3))
average_output = np.zeros((this_pass.shape[0], 3))
counter = np.ones((this_pass.shape[0], 3))
for epochs in range(10):
    this_output = np.zeros((this_pass.shape[0], 3))
    this_pass = run_pass(average_output.argmax(1), 100000 / (epochs + 1))
    for index in this_pass.index:
        this_output[index, int(this_pass.loc[index, 'action'])] = this_pass.loc[index, 'reward']
        this_counter = np.zeros((this_pass.shape[0], 3))
        this_counter[index, int(this_pass.loc[index, 'action'])] = 1
        counter = counter + this_counter
    average_output = ((counter - 1) * average_output + this_output) / counter
    # if epochs > 0:
    #     if len(all_outputs.shape) > 2:
    #         all_outputs = np.concatenate((all_outputs, this_output.reshape(-1, this_output.shape[0], this_output.shape[1])), axis = 0)
    #     else:
    #         all_outputs = np.stack((this_output, all_outputs))
    # else:
    #     all_outputs = this_output
(average_output.argmax(1) == ideal_action).sum(), len(ideal_action)

Learn with Linear Regression

In [None]:
# from sklearn.linear_model import LinearRegression

# q = LinearRegression()
# q_target = LinearRegression()

# scores = []
# init = True
# this_pass = run_pass(None, 40)
# average_output = np.zeros((this_pass.shape[0], 3))
# counter = np.ones((this_pass.shape[0], 3))
# for epochs in range(10000):
#     this_output = np.zeros((this_pass.shape[0], 3))
#     this_pass = run_pass(q.predict(x)[:, :3].argmax(1) if not init else ideal_action, 20 / (epochs + 1))
#     x = this_pass.loc[:, ['MSFT Price', 'SPY Price', 'Portfolio-MSFT', 'Portfolio-SPY', 'Portfolio-Cash']]
#     for index in this_pass.index:
#         this_output[index, int(this_pass.loc[index, 'action'])] = this_pass.loc[index, 'reward']
#         this_counter = np.zeros((this_pass.shape[0], 3))
#         this_counter[index, int(this_pass.loc[index, 'action'])] = 1
#         counter = counter + this_counter
#     average_output = ((counter - 1) * average_output + this_output) / counter
    
#     if not init:
#         q_target.coef_ = q.coef_
#     else:
#         y_init = 100 * np.random.rand(this_pass.shape[0] - 1, 3 + x.shape[1])
#         q_target.fit(x[:-1], y_init)
#         init = False
#     y_target = np.concatenate(((average_output + .9 * q_target.predict(q_target.predict(x)[:, 3:])[:, :3].max(1).reshape(-1, 1) / 1.0003)[:-1], x.to_numpy()[1:]), axis = 1)
#     if epochs % 1 == 0:
#         q.fit(x[:-1], y_init)

#     scores.append(q.score(x[:-1], y_init))

Learn with Transformer

In [None]:
import torch.nn as nn
import torch

In [None]:
class PositionalEncoding(nn.Module):
    r"""Inject some information about the relative or absolute position of the tokens in the sequence.
        The positional encodings have the same dimension as the embeddings, so that the two can be summed.
        Here, we use sine and cosine functions of different frequencies.
    .. math:
        \text{PosEncoder}(pos, 2i) = sin(pos/10000^(2i/d_model))
        \text{PosEncoder}(pos, 2i+1) = cos(pos/10000^(2i/d_model))
        \text{where pos is the word position and i is the embed idx)
    Args:
        d_model: the embed dim (required).
        dropout: the dropout value (default=0.1).
        max_len: the max. length of the incoming sequence (default=5000).
    Examples:
        >>> pos_encoder = PositionalEncoding(d_model)
    """

    def __init__(self, d_model, dropout=0.1, max_len=5000):
        super(PositionalEncoding, self).__init__()
        self.dropout = nn.Dropout(p=dropout)

        pe = torch.zeros(max_len, d_model)
        position = torch.arange(0, max_len, dtype=torch.float).unsqueeze(1)
        div_term = torch.exp(torch.arange(0, d_model, 2).float() * (-math.log(10000.0) / d_model))
        pe[:, 0::2] = torch.sin(position * div_term)
        pe[:, 1::2] = torch.cos(position * div_term)
        pe = pe.unsqueeze(0).transpose(0, 1)
        self.register_buffer('pe', pe)

    def forward(self, x):
        r"""Inputs of forward function
        Args:
            x: the sequence fed to the positional encoder model (required).
        Shape:
            x: [sequence length, batch size, embed dim]
            output: [sequence length, batch size, embed dim]
        Examples:
            >>> output = pos_encoder(x)
        """
        if len(x.size()) == 2:
            return self.dropout(torch.cat((x.reshape(x.size(0), -1, x.size(1)), self.pe[:x.size(0), :, :4]), axis = -1)).transpose(0, 1)
        else:
            return self.dropout(torch.cat((x, self.pe[:x.size(0), :, :4].expand(-1, x.size(1), -1)), axis = -1))   

In [None]:
class TransformerModel(nn.Module):
    """Container module with an encoder, a recurrent or transformer module, and a decoder."""

    def __init__(self, length, ninp, nhead, nhid, nlayers, dropout=0.5):
        super(TransformerModel, self).__init__()
        try:
            from torch.nn import Transformer, TransformerEncoder, TransformerEncoderLayer
        except BaseException as e:
            raise ImportError('TransformerEncoder module does not exist in PyTorch 1.1 or '
                              'lower.') from e
        self.model_type = 'Transformer'
        self.src_mask = None
        self.pos_encoder = PositionalEncoding(ninp, dropout)
        
        encoder_layer = nn.TransformerEncoderLayer(
            d_model=ninp, 
            nhead=nhead,
            batch_first=True
            )

        self.encoder = nn.TransformerEncoder(
            encoder_layer=encoder_layer,
            num_layers=nlayers, 
            norm=None
            )

        decoder_layer = nn.TransformerDecoderLayer(
            d_model=ninp, 
            nhead=nhead,
            batch_first=True
            )

        self.decoder = nn.TransformerDecoder(
            decoder_layer=decoder_layer,
            num_layers=nlayers, 
            norm=None
            )

        self.norm1 = nn.LayerNorm(
            ninp
        )

        self.hidden1= nn.Linear(
            in_features = ninp * length,
            out_features = 100
        )

        self.hidden2 = nn.Linear(
            in_features = 100,
            out_features = 5
        )

        self.linear1 = nn.Linear(
            in_features=length,
            out_features=1
            )

        self.linear2 = nn.Linear(
            in_features = ninp,
            out_features = 3
        )

        self.init_weights()

    def _generate_square_subsequent_mask(self, sz):
        mask = (torch.triu(torch.ones(sz, sz)) == 1).transpose(0, 1)
        mask = mask.float().masked_fill(mask == 0, float('-inf')).masked_fill(mask == 1, float(0.0))
        return mask

    def init_weights(self):
        initrange = 0.1
        self.linear1.bias.data.zero_()
        self.linear1.weight.data.uniform_(-initrange, initrange)
        self.linear2.bias.data.zero_()
        self.linear2.weight.data.uniform_(-initrange, initrange)

    def forward(self, src, has_mask=True):
        if has_mask:
            # device = src.device
            if self.src_mask is None or self.src_mask.size(0) != len(src):
                mask = self._generate_square_subsequent_mask(src.size(0) if len(src.size()) == 2 else src.size(1))#.to(device)
                self.src_mask = mask

        else:
            self.src_mask = None
        # print(f"input: {src.shape}")
        src = self.pos_encoder(src)
        # print(f"pos encoded: {src.shape}")
        encoder_output = self.encoder(src, mask = self.src_mask)
        # print(f"transformer encoded: {encoder_output.shape}")
        # output = self.decoder(
        #     tgt=src,
        #     memory=encoder_output,
        #     tgt_mask=self.src_mask,
        #     memory_mask=self.src_mask
        #     )
        # output = self.norm1(src)
        # print(src.shape)
        output = self.hidden1(encoder_output.reshape(encoder_output.size(0), -1))
        # print(f"hidden layer 1: {output.shape}")
        # print(output.shape)
        output = self.hidden2(output)
        # print(f"hidden Layer 2: {output.shape}")
        # print(output.shape)
        # output = self.linear1(output.transpose(-2, -1)).transpose(-2, -1)
        # output = self.linear2(output)
        return output

In [None]:
import torch


def train_one_epoch(q, flattened_batched, y_target):

    # Zero your gradients for every batch!
    optimizer.zero_grad()

    outputs = q(flattened_batched)

    # Compute the loss and its gradients
    loss = loss_fn(outputs, y_target)
    loss.backward()

    # Adjust learning weights
    optimizer.step()

    return loss, q, outputs

    # # # Gather data and report
    # # running_loss += loss.item()

    # MLP Code Below
    
    # q.partial_fit(flattened_batched, y_target)

    # return q

In [None]:
def train_one_pass(this_pass, average_output, counter, q, q_target, init):


    ## Code for Torch

    this_output = torch.zeros((this_pass.shape[0], 3))
    this_counter = torch.zeros((this_pass.shape[0], 3))
    this_pass = run_pass(None if not init else ideal_action, 40)
    x = this_pass.loc[:, ['MSFT Price', 'SPY Price']]
    mean = bootstrap[0]
    std = bootstrap[1]
    x = torch.from_numpy(((x - mean) / std).to_numpy()).to(torch.float32)

    with torch.no_grad():
        for index in this_pass.index:
            this_output[index, int(this_pass.loc[index, 'action'])] = this_pass.loc[index, 'reward']
            this_counter[index, int(this_pass.loc[index, 'action'])] = 1
        counter += this_counter
        average_output = (torch.where(counter == 0, 0, counter - 1) * average_output + this_output) / torch.where(counter == 0, 1, counter)
    
        sequence_length = 15
        batched = torch.stack((x[:sequence_length], x[1:1 + sequence_length]))
        for i in range(2, x.shape[0] - sequence_length):
            batched = torch.cat((batched, x[i: i + sequence_length].reshape(1, sequence_length, x.shape[1])), axis = 0)

        # flattened_batched = batched.reshape(batched.size(0), -1)

        predicted_batches = batched.detach().clone()

        predicted_batches[1:, -1, :] = q_target(batched)[:-1, 3:]
        # predicted_batches[1:, -1, :] = q_target(flattened_batched)[:-1, 3:] 
    
        y_target = torch.cat(((average_output[sequence_length: -1] + .9 * q_target(predicted_batches)[1:, 3:].max(1).values.reshape(-1, 1) / 1.0003), x[sequence_length + 1:]), axis = 1)
        # y_target = torch.cat(((average_output[sequence_length: -1] + .9 * q_target(predicted_batches.reshape(batched.shape[0], -1))[1:, 3:].max(1).values.reshape(-1, 1) / 1.0003), x[sequence_length + 1:]), axis = 1)

    for i in range(5):
        # loss, q, outputs = train_one_epoch(q, flattened_batched[:-1], y_target)
        loss, q, outputs = train_one_epoch(q, batched[:-1], y_target)

    with torch.no_grad():
        q_parameters = [eachParameter for eachParameter in q.parameters()]
        for i, eachParameter in enumerate(q_target.parameters()):
            eachParameter = eachParameter.copy_(q_parameters[i])

    return loss, average_output, counter, outputs, y_target, q, q_target, init

In [None]:
def prep_average_output(average_output, counter):
    for passes in range(50):
        this_pass = run_pass(ideal_action, 1000)
        this_output = np.zeros((this_pass.shape[0], 3))
        this_counter = np.zeros((this_pass.shape[0], 3))
        for index in this_pass.index:
            this_output[index, int(this_pass.loc[index, 'action'])] = this_pass.loc[index, 'reward']
            this_counter[index, int(this_pass.loc[index, 'action'])] = 1
        counter = counter + this_counter
        average_output = (np.where(counter == 0, 0, counter - 1) * average_output + this_output) / np.where(counter == 0, 1, counter)
    return average_output

In [None]:
q = TransformerModel(15, 6, 6, 6, 2)
q_target = TransformerModel(15, 6, 6, 6, 2)

loss_fn = nn.MSELoss()

from torch.optim import SGD, Adam

# Optimizers specified in the torch.optim package
# optimizer = SGD(q.parameters(), lr=0.001, momentum=0.9)
optimizer = Adam(q.parameters(), lr=0.01)

epoch_number = 0

EPOCHS = 75

best_vloss = 1_000_000.

init = True
average_output = torch.zeros((this_pass.shape[0], 3))
counter = torch.zeros((this_pass.shape[0], 3))
# average_output = prep_average_output(average_output, counter)
this_output = torch.zeros((this_pass.shape[0], 3))
this_pass = run_pass(ideal_action, 10000)
x = torch.from_numpy(this_pass.loc[:, ['MSFT Price', 'SPY Price', 'Portfolio-MSFT', 'Portfolio-SPY', 'Portfolio-Cash']].to_numpy()).to(torch.float32)
for epoch in range(EPOCHS):

    # q.train(False)
    # q_predictions = q(x).argmax(1).numpy()

    # if not init and epoch % 5 == 0:
    #     q_target.coefs_ = q.coefs_

    # Make sure gradient tracking is on, and do a pass over the data
    q.train(True)

    this_pass = run_pass(None if epoch > EPOCHS else ideal_action, 1000 / (epoch + 1))
    avg_loss, average_output, counter, outputs, y_target, q, q_target, init = train_one_pass(this_pass, average_output, counter, q, q_target, init)
    print(f'EPOCH {epoch_number + 1}: {avg_loss}')

    # if epoch % 5 == 0:
    #     with torch.no_grad():
    #         # We don't need gradients on to do reporting
    #         q.train(False)
    #         q_parameters = [eachParameter for eachParameter in q.parameters()]
    #         for i, eachParameter in enumerate(q_target.parameters()):
    #             eachParameter = eachParameter.copy_(q_parameters[i])
    

    epoch_number += 1