<h1>Using Existing Technical Indicators</h1>

In [46]:
import pandas as pd
import numpy as np
import talib as ta

class TechnicalIndicators:
    def __init__(self, data):
        self.data = data

    def add_momentum_indicators(self):
        self.data['RSI'] = ta.RSI(self.data['Close'], timeperiod=14)
        self.data['MACD'], self.data['MACD_signal'], self.data['MACD_hist'] = ta.MACD(self.data['Close'], fastperiod=12, slowperiod=26, signalperiod=9)
        self.data['Stoch_k'], self.data['Stoch_d'] = ta.STOCH(self.data['High'], self.data['Low'], self.data['Close'],
                                                              fastk_period=14, slowk_period=3, slowd_period=3)

    def add_volume_indicators(self):
        self.data['OBV'] = ta.OBV(self.data['Close'], self.data['Volume'])

    def add_volatility_indicators(self):
        self.data['Upper_BB'], self.data['Middle_BB'], self.data['Lower_BB'] = ta.BBANDS(self.data['Close'], timeperiod=20)
        self.data['ATR_1'] = ta.ATR(self.data['High'], self.data['Low'], self.data['Close'], timeperiod=1)
        self.data['ATR_2'] = ta.ATR(self.data['High'], self.data['Low'], self.data['Close'], timeperiod=2)
        self.data['ATR_5'] = ta.ATR(self.data['High'], self.data['Low'], self.data['Close'], timeperiod=5)
        self.data['ATR_10'] = ta.ATR(self.data['High'], self.data['Low'], self.data['Close'], timeperiod=10)
        self.data['ATR_20'] = ta.ATR(self.data['High'], self.data['Low'], self.data['Close'], timeperiod=20)

    def add_trend_indicators(self):
        self.data['ADX'] = ta.ADX(self.data['High'], self.data['Low'], self.data['Close'], timeperiod=14)
        self.data['+DI'] = ta.PLUS_DI(self.data['High'], self.data['Low'], self.data['Close'], timeperiod=14)
        self.data['-DI'] = ta.MINUS_DI(self.data['High'], self.data['Low'], self.data['Close'], timeperiod=14)
        self.data['CCI'] = ta.CCI(self.data['High'], self.data['Low'], self.data['Close'], timeperiod=5)

    def add_other_indicators(self):
        self.data['DLR'] = np.log(self.data['Close'] / self.data['Close'].shift(1))
        self.data['TWAP'] = self.data['Close'].expanding().mean()
        self.data['VWAP'] = (self.data['Volume'] * (self.data['High'] + self.data['Low']) / 2).cumsum() / self.data['Volume'].cumsum()

    def add_all_indicators(self):
        self.add_momentum_indicators()
        self.add_volume_indicators()
        self.add_volatility_indicators()
        self.add_trend_indicators()
        self.add_other_indicators()
        return self.data

In [2]:
data = pd.read_csv('xnas-itch-20230703.tbbo.csv')

# Preprocessing to create necessary columns
data['price']=data['price']/1e9
data['bid_px_00']=data['bid_px_00']/1e9
data['ask_px_00']=data['ask_px_00']/1e9

data['Close'] = data['price']
data['Volume'] = data['size']
data['High'] = data[['bid_px_00', 'ask_px_00']].max(axis=1)
data['Low'] = data[['bid_px_00', 'ask_px_00']].min(axis=1)
data['Open'] = data['Close'].shift(1).fillna(data['Close'])


ti = TechnicalIndicators(data)
df_with_indicators = ti.add_all_indicators()

In [3]:
df_with_indicators

Unnamed: 0,ts_recv,ts_event,rtype,publisher_id,instrument_id,action,side,depth,price,size,...,ATR_5,ATR_10,ATR_20,ADX,+DI,-DI,CCI,DLR,TWAP,VWAP
0,1688371200660869841,1688371200660704717,1,2,32,T,B,0,194.12,1,...,,,,,,,,,194.120000,193.875000
1,1688371201201402566,1688371201201237816,1,2,32,T,B,0,194.11,2,...,,,,,,,,-0.000052,194.115000,193.961667
2,1688371201233688992,1688371201233524761,1,2,32,T,B,0,194.11,8,...,,,,,,,,0.000000,194.113333,193.993182
3,1688371201317556361,1688371201317392163,1,2,32,T,B,0,194.11,2,...,,,,,,,,0.000000,194.112500,193.995000
4,1688371201478520666,1688371201478356044,1,2,32,T,B,0,194.00,7,...,,,,,,,-74.468085,-0.000567,194.090000,193.979250
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
59266,1688417954514485218,1688417954514320323,1,2,32,T,B,0,192.44,6,...,0.031258,0.028636,0.027592,15.474528,6.612534,3.765166,118.055556,0.000208,192.701135,192.722211
59267,1688417961020718430,1688417961020553920,1,2,32,T,B,0,192.44,1,...,0.033006,0.029772,0.028212,16.329018,5.953254,3.389772,83.333333,0.000000,192.701131,192.722211
59268,1688417973297905504,1688417973297741235,1,2,32,T,A,0,192.40,5,...,0.038405,0.032795,0.029802,19.013869,9.751295,2.919558,12.820513,-0.000208,192.701125,192.722211
59269,1688417996889779362,1688417996889614660,1,2,32,T,B,0,192.45,3,...,0.040724,0.034515,0.030812,21.506945,8.671762,2.596344,100.000000,0.000260,192.701121,192.722211


<h1>Creating Transformer Model</h1>

In [4]:
import warnings
warnings.filterwarnings("ignore")
warnings.filterwarnings("ignore", category=UserWarning)
import torch
from transformers import BertTokenizer, BertForSequenceClassification, Trainer, TrainingArguments
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")
# Load the pre-trained BERT model and tokenizer
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
model = BertForSequenceClassification.from_pretrained('bert-base-uncased', num_labels=3)  # 3 labels for Buy, Sell, Hold
model.to(device)

def preprocess_data(data):
    texts = data['text'].tolist()
    labels = data['label'].tolist()
    encodings = tokenizer(texts, truncation=True, padding=True, max_length=128)
    return encodings, labels

def derive_action(row):
    if row['side'] == 'B':  # Buy aggressor
        return 0  # Buy
    elif row['side'] == 'A':  # Sell aggressor
        return 1  # Sell
    else:
        return 2  # Hold (for 'N' or any other scenario)

# Add a derived action column
df_with_indicators['trade_action'] = df_with_indicators.apply(derive_action, axis=1)

data = {
    'text': [],
    'label': []
}

# Example: Loop over the DataFrame and create text data
for idx, row in df_with_indicators.iterrows():
    text = (
        f"Price is {row['Close']}, RSI is {row['RSI']}, MACD is {row['MACD']}, "
        f"Stochastic K is {row['Stoch_k']}, OBV is {row['OBV']}, "
        f"Upper BB is {row['Upper_BB']},DI+ is {row['+DI']}, DI- is {row['-DI']}, "
        f"CCI is {row['CCI']}"
    )
    action = row['trade_action']  # Assuming this column holds actions: 0 for Buy, 1 for Sell, 2 for Hold
    data['text'].append(text)
    data['label'].append(action)

# Convert to DataFrame
fine_tuning_data = pd.DataFrame(data)


encodings, labels = preprocess_data(fine_tuning_data)

class TradingDataset(torch.utils.data.Dataset):
    def __init__(self, encodings, labels):
        self.encodings = encodings
        self.labels = labels

    def __getitem__(self, idx):
        item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
        item['labels'] = torch.tensor(self.labels[idx])
        return item

    def __len__(self):
        return len(self.labels)

# Create a dataset and dataloader
dataset = TradingDataset(encodings, labels)
dataloader = torch.utils.data.DataLoader(dataset, batch_size=2, shuffle=True)


Using device: cuda


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


<h1>Training the transformer Model</h1>

In [5]:
from transformers import Trainer, TrainingArguments, EarlyStoppingCallback

training_args = TrainingArguments(
    output_dir='./results',
    num_train_epochs=5,
    per_device_train_batch_size=16,
    per_device_eval_batch_size=16,
    warmup_steps=500,
    weight_decay=0.01,
    logging_dir='./logs',
    logging_steps=10,
    eval_strategy="epoch",
    save_strategy="epoch",
    load_best_model_at_end=True,
    learning_rate=2e-5,
    lr_scheduler_type="linear",
)


trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=dataset,
    eval_dataset=dataset,
    callbacks=[EarlyStoppingCallback(early_stopping_patience=3)],
)

trainer.train()


Epoch,Training Loss,Validation Loss
1,0.463,0.465123
2,0.4112,0.351153
3,0.2466,0.308569
4,0.2245,0.262368
5,0.2409,0.225595


TrainOutput(global_step=18525, training_loss=0.38521907868816463, metrics={'train_runtime': 7622.5066, 'train_samples_per_second': 38.879, 'train_steps_per_second': 2.43, 'total_flos': 1.949374422769536e+16, 'train_loss': 0.38521907868816463, 'epoch': 5.0})

In [6]:
# Save the fine-tuned model
model.save_pretrained('./fine-tuned-model_v4')
tokenizer.save_pretrained('./fine-tuned-model_v4')

('./fine-tuned-model_v4\\tokenizer_config.json',
 './fine-tuned-model_v4\\special_tokens_map.json',
 './fine-tuned-model_v4\\vocab.txt',
 './fine-tuned-model_v4\\added_tokens.json')

<h1>Using Existing Training Environment to Integrate with Transformer Environment</h1>

In [7]:
import gym
from gym import spaces
import numpy as np
import pandas as pd

class TradingEnvironment(gym.Env):
    metadata = {'render.modes': ['human']}

    def __init__(self, data, daily_trading_limit):
        super(TradingEnvironment, self).__init__()
        self.data = data
        self.daily_trading_limit = daily_trading_limit
        self.current_step = 0

        # Extract state columns
        self.state_columns = ['Close', 'Volume', 'RSI', 'MACD', 'MACD_signal', 'MACD_hist', 'Stoch_k', 'Stoch_d',
                              'OBV', 'Upper_BB', 'Middle_BB', 'Lower_BB', 'ATR_1', 'ADX', '+DI', '-DI', 'CCI']

        # Initialize balance, shares held, and total shares traded
        self.balance = 10_000_000.0  # $10 million
        self.shares_held = 0
        self.total_shares_traded = 0

        # Define action space: [Hold, Buy, Sell]
        self.action_space = spaces.Discrete(3)

        # Define observation space based on state columns
        self.observation_space = spaces.Box(
            low=-np.inf, high=np.inf, shape=(len(self.state_columns),), dtype=np.float32
        )

    def reset(self):
        self.current_step = 0
        self.balance = 10_000_000.0  # $10 million
        self.shares_held = 0
        self.total_shares_traded = 0
        self.cumulative_reward = 0
        self.trades = []
        return self._next_observation()

    def _next_observation(self):
        return self.data[self.state_columns].iloc[self.current_step].values

    def step(self, action):
        expected_price = self.data.iloc[self.current_step]['ask_px_00']
        actual_price = self.data.iloc[self.current_step]['price']
        transaction_time = self.data.iloc[self.current_step]['ts_in_delta']
        self._take_action(action)
        reward = 0
        
        if self.current_step >= len(self.data) - 1:
            self.current_step = 0
        if action != 0:
            transaction_cost= self._calculate_transaction_cost(self.data.iloc[self.current_step]['Volume'], 0.3, self.data['Volume'].mean())
            reward = self._calculate_reward(expected_price, actual_price, transaction_time, transaction_cost)
            self.cumulative_reward += reward
            if self.trades:
                self.trades[-1]['reward'] = reward
                self.trades[-1]['transaction_cost'] = transaction_cost
                self.trades[-1]['slippage'] = expected_price - actual_price
                self.trades[-1]['time_penalty'] = 100*transaction_time/1e9
        done = self.current_step == len(self.data) - 1
        obs = self._next_observation()
        info = {
        'step': self.current_step,
        'action': action,
        'price': actual_price,
        'shares': self.trades[-1]['shares'] if self.trades else 0
    }
        self.current_step += 1

        return obs, reward, done, info

    def _take_action(self, action):
        current_price = self.data.iloc[self.current_step]['Close']
        current_time = pd.to_datetime(self.data.iloc[self.current_step]['ts_event'])
        trade_info = {'step': self.current_step, 'timestamp': current_time, 'action': action, 'price': current_price, 'shares': 0, 'reward': 0, 'transaction_cost': 0, 'slippage': 0, 'time_penalty': 0}

        if action == 1: # and self.total_shares_traded < self.daily_trading_limit:  # Buy
            shares_bought = (self.balance * np.random.uniform(0.001, 0.005)) // current_price
            self.balance -= shares_bought * current_price
            self.shares_held += shares_bought
            self.total_shares_traded += shares_bought
            trade_info['shares'] = shares_bought
            if(shares_bought>0):
                self.trades.append(trade_info)
        elif action == 2: # and self.total_shares_traded < self.daily_trading_limit:  # Sell
            shares_sold = min((self.balance * np.random.uniform(0.001, 0.005)) // current_price, self.shares_held)
            self.balance += shares_sold * current_price
            self.shares_held -= shares_sold
            self.total_shares_traded -= shares_sold
            trade_info['shares'] = shares_sold
            if(shares_sold>0):
                self.trades.append(trade_info)

    def _calculate_reward(self, expected_price, actual_price, transaction_time, transaction_cost):
        slippage = expected_price - actual_price
        time_penalty = 100*transaction_time/1e9
        reward = - (slippage + time_penalty + transaction_cost)
        return reward
    
    def _calculate_transaction_cost(self, volume, volatility, daily_volume):
        return volatility * np.sqrt(volume / daily_volume)
    
    def run(self):
        self.reset()
        for _ in range(len(self.data)):
            self.step()
        return self.cumulative_reward, self.trades

    def render(self, mode='human', close=False):
        print(f'Step: {self.current_step}')
        print(f'Balance: {self.balance}')
        print(f'Shares held: {self.shares_held}')
        print(f'Total shares traded: {self.total_shares_traded}')
        print(f'Total portfolio value: {self.balance + self.shares_held * self.data.iloc[self.current_step]["Close"]}')
        print(f'Cumulative reward: {self.cumulative_reward}')
        self.print_trades()

    def print_trades(self):
        # download all trades in a pandas dataframe using .csv
        trades_df = pd.DataFrame(self.trades)
        # Save a csv
        trades_df.to_csv('trades_ppo_v4.csv', index=False)
        for trade in self.trades:
            print(f"Step: {trade['step']}, Timestamp: {trade['timestamp']}, Action: {trade['action']}, Price: {trade['price']}, Shares: {trade['shares']}, Reward: {trade['reward']}, Transaction Cost: {trade['transaction_cost']}, Slippage: {trade['slippage']}, Time Penalty: {trade['time_penalty']}")

<h1>Creating TradingEnvironment With Transformer</h1>

In [8]:
import torch
from transformers import BertTokenizer, BertForSequenceClassification

# Load the fine-tuned transformer model and tokenizer
transformer_model_path = './fine-tuned-model_v4'
tokenizer = BertTokenizer.from_pretrained(transformer_model_path)
transformer_model = BertForSequenceClassification.from_pretrained(transformer_model_path)

class TradingEnvironmentWithTransformer(TradingEnvironment):
    def __init__(self, data, daily_trading_limit, transformer_model, tokenizer):
        super().__init__(data, daily_trading_limit)
        self.transformer_model = transformer_model
        self.tokenizer = tokenizer

    def _take_action(self, action):
        # Example logic for integrating Transformer prediction with PPO action
        current_price = self.data.iloc[self.current_step]['Close']
        text = (
        f"Price is {current_price}, RSI is {self.data.iloc[self.current_step]['RSI']}, MACD is {self.data.iloc[self.current_step]['MACD']}, "
        f"Stochastic K is {self.data.iloc[self.current_step]['Stoch_k']}, OBV is {self.data.iloc[self.current_step]['OBV']}, "
        f"Upper BB is {self.data.iloc[self.current_step]['Upper_BB']},DI+ is {self.data.iloc[self.current_step]['+DI']}, DI- is {self.data.iloc[self.current_step]['-DI']}, "
        f"CCI is {self.data.iloc[self.current_step]['CCI']}"
    )

        inputs = self.tokenizer(text, return_tensors='pt')
        outputs = self.transformer_model(**inputs)
        transformer_prediction = torch.argmax(outputs.logits, dim=1).item()

        # Example of combining PPO action with Transformer prediction
        if transformer_prediction == 0:  # Transformer suggests Buy
            final_action = 1  # Override PPO to Buy
        elif transformer_prediction == 1:  # Transformer suggests Sell
            final_action = 2  # Override PPO to Sell
        else:
            final_action = action  # Keep PPO's decision if Hold

        # Execute the final action
        super()._take_action(final_action)

    def step(self, action):
        expected_price = self.data.iloc[self.current_step]['ask_px_00']
        actual_price = self.data.iloc[self.current_step]['price']
        transaction_time = self.data.iloc[self.current_step]['ts_in_delta']
        self._take_action(action)
        reward = 0
        
        if self.current_step >= len(self.data) - 1:
            self.current_step = 0
        if action != 0:
            transaction_cost= self._calculate_transaction_cost(self.data.iloc[self.current_step]['Volume'], 0.3, self.data['Volume'].mean())
            reward = self._calculate_reward(expected_price, actual_price, transaction_time, transaction_cost)
            self.cumulative_reward += reward
            if self.trades:
                self.trades[-1]['reward'] = reward
                self.trades[-1]['transaction_cost'] = transaction_cost
                self.trades[-1]['slippage'] = expected_price - actual_price
                self.trades[-1]['time_penalty'] = 100*transaction_time/1e9
        done = self.current_step == len(self.data) - 1
        obs = self._next_observation()
        info = {
        'step': self.current_step,
        'action': action,
        'price': actual_price,
        'shares': self.trades[-1]['shares'] if self.trades else 0
    }
        self.current_step += 1

        return obs, reward, done, info




<h1>Integrate Transformer with PPO </h1>

In [9]:
# Continue training the PPO model with the new environment
from stable_baselines3 import PPO
market_features_df = df_with_indicators[35:]
daily_trading_limit = 1000
ticker = 'AAPL'  # Specify the ticker you want to trade
ticker_data = market_features_df[market_features_df['symbol'] == ticker]

env = TradingEnvironmentWithTransformer(ticker_data, daily_trading_limit, transformer_model, tokenizer)
best_hyperparameters = {'learning_rate': 0.0009931989008886031,'n_steps': 512,'batch_size': 128, 'gamma': 0.9916829193042708,'clip_range': 0.21127653449387027,'n_epochs': 6}
model = PPO('MlpPolicy', env, verbose=1, **best_hyperparameters)
model.learn(total_timesteps=10000)

Using cuda device
Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.
----------------------------
| time/              |     |
|    fps             | 3   |
|    iterations      | 1   |
|    time_elapsed    | 131 |
|    total_timesteps | 512 |
----------------------------
-----------------------------------------
| time/                   |             |
|    fps                  | 3           |
|    iterations           | 2           |
|    time_elapsed         | 261         |
|    total_timesteps      | 1024        |
| train/                  |             |
|    approx_kl            | 0.012462247 |
|    clip_fraction        | 0.151       |
|    clip_range           | 0.211       |
|    entropy_loss         | -1.09       |
|    explained_variance   | -0.21       |
|    learning_rate        | 0.000993    |
|    loss                 | 0.123       |
|    n_updates            | 6           |
|    policy_gradient_loss | -0.0179     |
|    value_loss           | 0.

<stable_baselines3.ppo.ppo.PPO at 0x29056c11970>

In [10]:
# Save the model
model.save("trading_agent_transformer_v4")

In [11]:
model = PPO.load("trading_agent_transformer_v4",device=device)

# Evaluate the model
obs = env.reset()
for _ in range(len(ticker_data)):
    action, _states = model.predict(obs)
    obs, rewards, done, info = env.step(action)
    if done:
        break

# Render the final state
env.render()

Step: 1
Balance: 3610311.9199999445
Shares held: 33164.0
Total shares traded: 33164.0
Total portfolio value: 10045786.119999945
Cumulative reward: -11746.744797677315


IOPub data rate exceeded.
The Jupyter server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--ServerApp.iopub_data_rate_limit`.

Current values:
ServerApp.iopub_data_rate_limit=1000000.0 (bytes/sec)
ServerApp.rate_limit_window=3.0 (secs)



Step: 59215, Timestamp: 2023-07-03 20:42:10.970147539, Action: 1, Price: 192.41, Shares: 81.0, Reward: -0.05192281820429248, Transaction Cost: 0.025398218204301576, Slippage: 0.009999999999990905, Time Penalty: 0.0165246
Step: 59216, Timestamp: 2023-07-03 20:47:40.259304386, Action: 2, Price: 192.41, Shares: 45.0, Reward: -0.051884318204292484, Transaction Cost: 0.025398218204301576, Slippage: 0.009999999999990905, Time Penalty: 0.0164861
Step: 59217, Timestamp: 2023-07-03 20:48:11.839440549, Action: 2, Price: 192.42, Shares: 43.0, Reward: -0.04190091820430157, Transaction Cost: 0.025398218204301576, Slippage: 0.0, Time Penalty: 0.0165027
Step: 59218, Timestamp: 2023-07-03 20:48:32.097485966, Action: 2, Price: 192.41, Shares: 62.0, Reward: -0.05187601820429248, Transaction Cost: 0.025398218204301576, Slippage: 0.009999999999990905, Time Penalty: 0.0164778
Step: 59219, Timestamp: 2023-07-03 20:54:25.380638480, Action: 1, Price: 192.42, Shares: 19.0, Reward: -0.04186601820430158, Transac

In [95]:
model_res = pd.read_csv('trades_ppo_v4.csv')

In [98]:
model_res['action'] = model_res['action'].replace({1: 'BUY', 2: 'SELL'})
model_res.tail()

Unnamed: 0,step,timestamp,action,price,shares,reward,transaction_cost,slippage,time_penalty
53000,59231,2023-07-03 20:59:14.514320323,SELL,192.44,74.0,-0.078702,0.062213,0.0,0.01649
53001,59232,2023-07-03 20:59:21.020553920,SELL,192.44,28.0,-0.041849,0.025398,0.0,0.016451
53002,59233,2023-07-03 20:59:33.297741235,SELL,192.4,32.0,-0.133219,0.056792,0.06,0.016427
53003,59234,2023-07-03 20:59:56.889614660,BUY,192.45,36.0,-0.060461,0.043991,0.0,0.01647
53004,59235,2023-07-03 20:59:58.907265922,BUY,192.45,48.0,-0.196062,0.179593,0.0,0.016469
