In [1]:
!apt-get update -y -qq && apt-get install -y -qq cmake libopenmpi-dev python3-dev zlib1g-dev libgl1-mesa-glx swig

Selecting previously unselected package libgl1-mesa-glx:amd64.
(Reading database ... 120882 files and directories currently installed.)
Preparing to unpack .../libgl1-mesa-glx_23.0.4-0ubuntu1~22.04.1_amd64.deb ...
Unpacking libgl1-mesa-glx:amd64 (23.0.4-0ubuntu1~22.04.1) ...
Selecting previously unselected package swig4.0.
Preparing to unpack .../swig4.0_4.0.2-1ubuntu1_amd64.deb ...
Unpacking swig4.0 (4.0.2-1ubuntu1) ...
Selecting previously unselected package swig.
Preparing to unpack .../swig_4.0.2-1ubuntu1_all.deb ...
Unpacking swig (4.0.2-1ubuntu1) ...
Setting up libgl1-mesa-glx:amd64 (23.0.4-0ubuntu1~22.04.1) ...
Setting up swig4.0 (4.0.2-1ubuntu1) ...
Setting up swig (4.0.2-1ubuntu1) ...
Processing triggers for man-db (2.10.2-1) ...


In [2]:
%%capture
!pip install -q condacolab wrds swig
!pip install lightning
!pip install cohere openai tiktoken
# import condacolab
# condacolab.install()
!pip install -U git+https://github.com/AI4Finance-Foundation/FinRL.git

In [3]:
import torch
import lightning.pytorch as pl
import finrl
from finrl import config
from lightning.pytorch.loggers import TensorBoardLogger
from finrl.meta.preprocessor.yahoodownloader import YahooDownloader
from finrl.meta.preprocessor.preprocessors import FeatureEngineer, data_split
from finrl.meta.env_stock_trading.env_stocktrading import StockTradingEnv
from finrl.agents.stablebaselines3.models import DRLAgent, DRLEnsembleAgent
from finrl.plot import backtest_stats, backtest_plot, get_daily_return, get_baseline
from torch.utils.data.dataset import IterableDataset
from torch.utils.data import DataLoader
from typing import Iterator, List, Tuple

class FinRLLightning(pl.LightningModule):
    def __init__(self, start_date, end_date, ticker_list, total_timesteps):
        super(FinRLLightning, self).__init__()

        # Initialize and configure the custom environment
        self.df = YahooDownloader(start_date=start_date, end_date=end_date, ticker_list=ticker_list).fetch_data()
        self.fe = FeatureEngineer(
            use_technical_indicator=True,
            tech_indicator_list=config.INDICATORS,
            use_turbulence=False,
            user_defined_feature=False,
        )
        self.df = self.fe.preprocess_data(self.df)

        # Split data into training and evaluation sets
        self.trade = data_split(self.df, start_date, end_date)

        stock_dimension = len(self.trade.tic.unique())
        state_space = 1 + 2 * stock_dimension + len(config.INDICATORS) * stock_dimension
        self.env_kwargs = {
            "hmax": 100,
            "num_stock_shares": [0] * stock_dimension,
            "initial_amount": 1000000,
            "buy_cost_pct": 0.001,
            "sell_cost_pct": 0.001,
            "state_space": state_space,
            "stock_dim": stock_dimension,
            "tech_indicator_list": config.INDICATORS,
            "action_space": stock_dimension,
            "reward_scaling": 1e-4,
        }
        self.e_train_gym = StockTradingEnv(df=self.trade, **self.env_kwargs)

        # Initialize and configure the DRL agent
        self.agent = DRLAgent(model_name="ddpg", env=self.e_train_gym)

        self.total_timesteps = total_timesteps

    # def train_dataloader(self):
    #     # Implement your data loading logic here.
    #     # Load and preprocess the training data.
    #     train_data = self.trade  # Assuming 'self.train' contains your training data
    #     dataset = Dataset(train_data)
    #     dataloader = torch.utils.data.DataLoader(dataset, batch_size=batch_size, shuffle=True)
    #     return dataloader

    def forward(self, x):
        # Define forward pass (if needed)
        pass

    def training_step(self, batch, batch_idx):
        # Define the training logic
        trained_model = self.agent.train_model("ppo", total_timesteps=self.total_timesteps)
        return None

    def configure_optimizers(self):
        # You don't need an optimizer for reinforcement learning typically
        return []

class RLDataset(IterableDataset):
    """Iterable Dataset containing the ExperienceBuffer which will be updated with new experiences during training.

    Args:
        buffer: replay buffer
        sample_size: number of experiences to sample at a time
    """

    def __init__(self, buffer, sample_size: int = 1) -> None:
        self.buffer = buffer
        self.sample_size = sample_size

    def __iter__(self) -> Iterator[Tuple]:
        states, actions, rewards, dones, new_states = self.buffer.sample(batch_size=self.sample_size)
        for i in range(len(dones)):
            yield states[i], actions[i], rewards[i], new_states[i], dones[i]

# Create a lightning module to train the agent
class StockTradingModule(pl.LightningModule):
    def __init__(self, start_date, end_date, ticker_list, total_timesteps):
        super(StockTradingModule, self).__init__()
        torch.multiprocessing.set_start_method('fork',force=True)

        # Initialize and configure the custom environment
        self.df = YahooDownloader(start_date=start_date, end_date=end_date, ticker_list=ticker_list).fetch_data()
        self.fe = FeatureEngineer(
            use_technical_indicator=True,
            tech_indicator_list=config.INDICATORS,
            use_turbulence=False,
            user_defined_feature=False,
        )
        self.df = self.fe.preprocess_data(self.df)

        # Split data into training and evaluation sets
        self.trade = data_split(self.df, start_date, end_date)
        stock_dimension = len(self.trade.tic.unique())
        state_space = 1 + 2 * stock_dimension + len(config.INDICATORS) * stock_dimension
        self.env_kwargs = {
            "hmax": 100,
            "num_stock_shares": [0] * stock_dimension,
            "initial_amount": 1000000,
            "buy_cost_pct": 0.001,
            "sell_cost_pct": 0.001,
            "state_space": state_space,
            "stock_dim": stock_dimension,
            "tech_indicator_list": config.INDICATORS,
            "action_space": stock_dimension,
            "reward_scaling": 1e-4,
        }
        self.e_train_gym = StockTradingEnv(df=self.trade, **self.env_kwargs)

        # Initialize and configure the DRL agent
        self.agent = DRLAgent(env=self.e_train_gym)
        self.model = self.agent.get_model(model_name="ddpg")
        self.total_timesteps = total_timesteps
        self.model.replay_buffer.full = True

        self.automatic_optimization = False

        display(dir(self.model.env))

    def forward(self, state):
        # Return the action given the state
        return self.model.act(state)

    def training_step(self, batch, batch_idx):
        # Perform one step of training
        state, action, reward, next_state, done = batch
        display(self.model.env.step(action))
        loss = self.model.learn(total_timesteps=5000,)
        self.log('train_loss', loss, on_step=True, on_epoch=True, prog_bar=True, logger=True)
        return loss

    def configure_optimizers(self):
        # Return the optimizers for actor and critic networks
        return [self.model.actor.optimizer, self.model.critic.optimizer]

    # def configure_optimizers(self):
    #     # You don't need an optimizer for reinforcement learning typically
    #     return []

    def train_dataloader(self):
        # Return a dataloader for training
        return DataLoader(
            dataset=RLDataset(buffer=self.model.replay_buffer),
            batch_size=self.model.batch_size,
            num_workers=0,)


if __name__ == '__main__':
    # Set your parameters here
    start_date = "2000-01-01"
    end_date = "2022-01-01"
    ticker_list = ["AAPL", "MSFT"]
    total_timesteps = 20000

    # Create the Lightning model and trainer
    # model = FinRLLightning(start_date, end_date, ticker_list, total_timesteps)
    # trainer = pl.Trainer(accelerator="gpu", devices=1, max_epochs=1)

    # Train the model
    # trainer.fit(model)

    # Run backtest and evaluate the model
    # e_trade_gym = StockTradingEnv(df=model.trade, turbulence_threshold=250.0, **model.env_kwargs)
    # df_account_value, df_actions = DRLAgent.DRL_prediction(model=model.agent, environment=e_trade_gym)
    # Continue with backtesting analysis...
    # Create a trainer object
    trainer = pl.Trainer(max_epochs=10,  # number of epochs to train
                        accelerator="gpu",  # number of GPUs to use
                        logger=TensorBoardLogger('logs/'),  # logger for tensorboard
                        callbacks=[pl.callbacks.ModelCheckpoint('checkpoints/'),
                                   pl.callbacks.TQDMProgressBar(refresh_rate=3)],  # checkpoint callback for saving models
                        )  # refresh rate of progress bar

    # Train the agent
    trainer.fit(StockTradingModule(start_date, end_date, ticker_list, total_timesteps))

  from tensorflow.tsl.python.lib.core import pywrap_ml_dtypes
  PANDAS_VERSION = LooseVersion(pd.__version__)
INFO: GPU available: True (cuda), used: True
INFO:lightning.pytorch.utilities.rank_zero:GPU available: True (cuda), used: True
INFO: TPU available: False, using: 0 TPU cores
INFO:lightning.pytorch.utilities.rank_zero:TPU available: False, using: 0 TPU cores
INFO: IPU available: False, using: 0 IPUs
INFO:lightning.pytorch.utilities.rank_zero:IPU available: False, using: 0 IPUs
INFO: HPU available: False, using: 0 HPUs
INFO:lightning.pytorch.utilities.rank_zero:HPU available: False, using: 0 HPUs


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
Shape of DataFrame:  (11072, 8)
Successfully added technical indicators
{'batch_size': 128, 'buffer_size': 50000, 'learning_rate': 0.001}
Using cuda device
Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.


['__abstractmethods__',
 '__annotations__',
 '__class__',
 '__delattr__',
 '__dict__',
 '__dir__',
 '__doc__',
 '__eq__',
 '__format__',
 '__ge__',
 '__getattribute__',
 '__gt__',
 '__hash__',
 '__init__',
 '__init_subclass__',
 '__le__',
 '__lt__',
 '__module__',
 '__ne__',
 '__new__',
 '__reduce__',
 '__reduce_ex__',
 '__repr__',
 '__setattr__',
 '__sizeof__',
 '__slots__',
 '__str__',
 '__subclasshook__',
 '__weakref__',
 '_abc_impl',
 '_get_indices',
 '_get_target_envs',
 '_obs_from_buf',
 '_options',
 '_reset_options',
 '_reset_seeds',
 '_save_obs',
 '_seeds',
 'action_space',
 'buf_dones',
 'buf_infos',
 'buf_obs',
 'buf_rews',
 'close',
 'env_is_wrapped',
 'env_method',
 'envs',
 'get_attr',
 'get_images',
 'getattr_depth_check',
 'keys',
 'metadata',
 'num_envs',
 'observation_space',
 'render',
 'render_mode',
 'reset',
 'reset_infos',
 'seed',
 'set_attr',
 'set_options',
 'step',
 'step_async',
 'step_wait',
 'unwrapped']

INFO: LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
INFO:lightning.pytorch.accelerators.cuda:LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
INFO: 
  | Name | Type | Params
------------------------------
------------------------------
0         Trainable params
0         Non-trainable params
0         Total params
0.000     Total estimated model params size (MB)
INFO:lightning.pytorch.callbacks.model_summary:
  | Name | Type | Params
------------------------------
------------------------------
0         Trainable params
0         Non-trainable params
0         Total params
0.000     Total estimated model params size (MB)
/usr/local/lib/python3.10/dist-packages/lightning/pytorch/trainer/connectors/data_connector.py:441: The 'train_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=1` in the `DataLoader` to improve performance.


Training: |          | 0/? [00:00<?, ?it/s]

RuntimeError: ignored

In [None]:
da