# Libraries

In [1]:
import torch
import torch.nn as nn
import pandas as pd
import numpy as np 
import tqdm 
import pickle 
import random 
import datetime 

from tensordict import TensorDict, TensorDictBase

In [2]:
from PortfolioConstructor import PortfolioConstructor

In [3]:
if torch.cuda.is_available():
    torch.set_default_device('cuda') 
    torch.get_default_device()
    device = 'cuda'
else:
    device = 'cpu'

# Reading data

In [4]:
dataset_ = pd.read_csv("/home/naradaw/dev/Charles_Schwab/data/historical/2024_09_05/dataset_20_2024_09_05_14_49.csv")
dataset_.timestamp = pd.to_datetime(dataset_.timestamp)
dataset_.timestamp = dataset_.timestamp.apply(lambda x: x.date())
dataset_.head(5)

Unnamed: 0,symbol,timestamp,open,high,low,close,volume,trade_count,vwap,target_,return_
0,AAPL,2019-09-12,54.31,54.7,53.84,53.89,138908008.0,295050.0,54.24,55.59,0.031546
1,AAPL,2019-09-13,53.15,53.34,52.43,52.85,166028864.0,481648.0,52.89,57.07,0.079849
2,AAPL,2019-09-16,52.6,53.18,52.56,53.12,91632552.0,241286.0,52.99,56.98,0.072666
3,AAPL,2019-09-17,53.14,53.35,52.94,53.32,79440556.0,154466.0,53.18,56.85,0.066204
4,AAPL,2019-09-18,53.41,53.84,53.01,53.82,109082328.0,214155.0,53.56,56.62,0.052025


In [5]:
dates = np.sort(dataset_.timestamp.unique())
dates

array([datetime.date(2019, 9, 12), datetime.date(2019, 9, 13),
       datetime.date(2019, 9, 16), ..., datetime.date(2024, 8, 2),
       datetime.date(2024, 8, 5), datetime.date(2024, 8, 6)], dtype=object)

In [6]:
seq_len = (3 * 20)
time_horizon = 20

today_date = datetime.date(2023, 1, 30)
print(f"today date : {today_date}")

today date : 2023-01-30


In [7]:
start_date = dates[np.where(dates==today_date)[0][0] - seq_len]
print(f"start date : {start_date}")

target_date = dates[np.where(dates == today_date)[0][0] + time_horizon]
print(f"target date : {target_date}")

start date : 2022-11-01
target date : 2023-02-28


In [8]:
insample_df = dataset_.loc[(dataset_.timestamp <= today_date) & (dataset_.timestamp > start_date)]

In [9]:
test_symbol_uni = random.choices(list(insample_df.symbol.unique()), k = 20)
print(f"test_symbol_uni : \n{test_symbol_uni}")

test_symbol_uni : 
['WFC', 'INCY', 'LVS', 'MOH', 'EMR', 'BEN', 'ENPH', 'IFF', 'F', 'EMR', 'BEN', 'VTRS', 'CF', 'INCY', 'CTAS', 'CTRA', 'EXC', 'LKQ', 'ABNB', 'MMC']


In [10]:
historical_price_sqs = np.array([np.array(insample_df.loc[insample_df.symbol == symbol].close.values) for symbol in test_symbol_uni])
target_df = dataset_.loc[(dataset_.symbol.isin(test_symbol_uni) & (dataset_.timestamp == target_date))]

In [11]:
historical_price_sqs

array([[ 44.25,  43.27,  44.41, ...,  43.52,  43.82,  43.98],
       [ 77.15,  76.9 ,  77.3 , ...,  86.01,  85.7 ,  83.65],
       [ 37.39,  37.07,  39.41, ...,  57.17,  57.71,  56.69],
       ...,
       [ 49.75,  49.29,  49.35, ...,  55.23,  55.26,  55.51],
       [ 94.41,  92.02,  96.09, ..., 109.42, 115.94, 109.48],
       [155.12, 155.09, 156.39, ..., 171.89, 168.43, 168.24]])

In [12]:
portfolio_constructor = PortfolioConstructor(
    device= device,
    symbol_universe = test_symbol_uni,
    seq_length = seq_len,
    multihead_dim = 2,
    num_transformer_layers = 2
)

historical_price_tensor = torch.from_numpy(historical_price_sqs).to(device).to(torch.float32)
portfolio_symbols, allocations = portfolio_constructor(historical_price_tensor)



In [13]:
portfolio_symbols, allocations

(['LVS', 'MOH', 'BEN', 'ENPH', 'F', 'BEN', 'VTRS', 'CTAS', 'CTRA', 'MMC'],
 tensor([0.0000, 0.0000, 0.1000, 0.1000, 0.0000, 0.1000, 0.1000, 0.0000, 0.1000,
         0.0000, 0.1000, 0.1000, 0.0000, 0.0000, 0.1001, 0.1000, 0.0000, 0.0000,
         0.0000, 0.1000], device='cuda:0', grad_fn=<SoftmaxBackward0>))

# Config

In [14]:
funds = 20000

# Utility Functions

In [15]:
# Old

''' 
inputs -> symbols and there allocations

intermediate -> allocate funds based on total funds and allocation persentage of portfolio

outputs -> returns of the portfolio
'''

def get_returns(symbols, allocations, date):

    current_df = dataset_.loc[(dataset_.timestamp == date)&(dataset_.symbol.isin(symbols))].sort_values('symbol')[['symbol','close','return_']]
    current_df['allocation'] = allocations
    current_df['return_pnl'] = current_df.apply(lambda row : funds * row['allocation']*row['return_'], axis = 1)
    portfolio_return = current_df['return_pnl'].sum()/funds
    
    return portfolio_return

In [16]:
#New

def get_returns(allocations, t_, data):

    return (data[t_, : , -2] * data[t_, : , -1] * allocations.detach().cpu().numpy()).sum()

# get_state()

In [17]:
"""" 
this functions returns the state (price values when given date)
"""

'" \nthis functions returns the state (price values when given date)\n'

In [18]:
#old

def get_state(date, dates_index, lookback_window, symbol_galaxy):
    
    start_date = dates_index[np.where(dates_index==date)[0][0] - lookback_window]
    insample_df = dataset_.loc[(dataset_.timestamp <= today_date) & (dataset_.timestamp > start_date)]
    
    state_ = np.array([np.array(insample_df.loc[insample_df.symbol == symbol].close.values) for symbol in symbol_galaxy])

    return state_

In [19]:
#new

def get_state(data, t):
    price_seq = data[t , : , :-1]
    # return_ = data[t , : , -1]

    return price_seq

# reset()

In [20]:
def reset(data, mode, split_, transaction_cost = 1e-7):

    if mode == 'train':
        t_ = random.randint(0, split_)
        end_t_ = split_
    elif mode == 'test':
        t_ = split_
        end_t_ = len(data)-1

    init_state = np.zeros((len(data.shape[1])))

    return t_, end_t_, init_state, transaction_cost

# _step()

In [21]:
'''
The _step() method should do the following:

Read the input keys (such as "action") and execute the simulation based on these;

Retrieve observations, done state and reward;

Write the set of observation values along with the reward and done state at the corresponding entries in a new TensorDict.
'''

'\nThe _step() method should do the following:\n\nRead the input keys (such as "action") and execute the simulation based on these;\n\nRetrieve observations, done state and reward;\n\nWrite the set of observation values along with the reward and done state at the corresponding entries in a new TensorDict.\n'

In [22]:
''' 

'''
def is_end(t_, split_):

    return t_ > split_


def step(data, t_, allocations, holding_period, split_):

    return_ = get_returns(allocations, t_ , data)
    t_ = t_ + holding_period
    state_ = get_state(data, t_)

    return state_, t_, return_, is_end(t_, split_)

In [23]:
is_end(100, 99)

True

# Environment

In [24]:
''' 
to do : consider transaction cost when calculating return
'''


class MarketEnvironment:
    
    def __init__(
            self,
            data_path,
            holding_period,
            train_test_split,
            symbol_universe,
            ):
    
        self.holding_period = 20 * holding_period #Days
        
        with open(data_path, 'rb') as f:
            self.data_dict = pickle.load(f)
        
        self.data = (np.array([self.data_dict[symbol] for symbol in symbol_universe])).transpose(1,0,2)
        self.split_ = int(self.data.shape[0] * train_test_split)
        self.t_ = 0

    def reset(self, mode, transaction_cost = 1e-7):

        if mode == 'train':
            self.t_ = random.randint(0, self.split_)
            self.end_t_ = self.split_

        elif mode == 'test':
            self.t_ = self.split_
            self.end_t_ = len(self.data)-1

        self.current_allocations = np.zeros((self.data.shape[1]))
        self.transaction_cost = transaction_cost

        return self
    
    def get_return(self, allocations):
        
        return (self.data[self.t_, : , -2] * self.data[self.t_, : , -1] * allocations.detach().cpu().numpy()).sum()

    def get_state(self):
        
        return torch.from_numpy(self.data[self.t_ , : , :-1]).to(device).to(torch.float32)

    def is_end(self):
        return self.t_ > self.end_t_

    def step(self, allocations):

        return_ = self.get_return(allocations)
        self.t_ = self.t_ + self.holding_period
        state_ = self.get_state()
        is_end = self.is_end()

        return state_, return_, is_end, self.transaction_cost
       

# Sandbox

In [25]:
pkl_fpath = '/home/naradaw/dev/Charles_Schwab/data/historical/2024_09_11/historical_price_seq_2024_09_11_12_04.pkl'

with open(pkl_fpath, 'rb') as f:
    price_sqs_dict = pickle.load(f)

In [26]:
price_sqs_dict['A'].shape

(1174, 61)

In [27]:
test_symbol_uni = random.choices(list(price_sqs_dict.keys()), k = 20)

In [28]:
portfolio_constructor_sb = PortfolioConstructor(
    device= device,
    symbol_universe = test_symbol_uni,
    seq_length = 60,
    multihead_dim = 2,
    num_transformer_layers = 2
)

In [29]:
portfolio_constructor_sb.train()

PortfolioConstructor(
  (SREM): SREM(
    (transformer_encoder_layer): TransformerEncoderLayer(
      (self_attn): MultiheadAttention(
        (out_proj): NonDynamicallyQuantizableLinear(in_features=60, out_features=60, bias=True)
      )
      (linear1): Linear(in_features=60, out_features=2048, bias=True)
      (dropout): Dropout(p=0.1, inplace=False)
      (linear2): Linear(in_features=2048, out_features=60, bias=True)
      (norm1): LayerNorm((60,), eps=1e-05, elementwise_affine=True)
      (norm2): LayerNorm((60,), eps=1e-05, elementwise_affine=True)
      (dropout1): Dropout(p=0.1, inplace=False)
      (dropout2): Dropout(p=0.1, inplace=False)
    )
    (transformer_encoder): TransformerEncoder(
      (layers): ModuleList(
        (0-1): 2 x TransformerEncoderLayer(
          (self_attn): MultiheadAttention(
            (out_proj): NonDynamicallyQuantizableLinear(in_features=60, out_features=60, bias=True)
          )
          (linear1): Linear(in_features=60, out_features=2048,

In [30]:
market_env = MarketEnvironment(
    data_path = pkl_fpath,
    holding_period = 1,
    train_test_split= 0.8,
    symbol_universe = test_symbol_uni
    )

In [31]:
market_env.data.shape

(1174, 20, 61)

In [32]:
rewards = []
transaction_costs = []
market_env.reset(mode = 'train')
state_ = market_env.get_state()
is_end_ = False

In [33]:
while not is_end_:
    chosen, portfolio = portfolio_constructor_sb(state_)
    state_, reward_, is_end_, trans_cost = market_env.step(portfolio)
    rewards.append(reward_)
    transaction_costs.append(trans_cost)

In [None]:
rewards

In [35]:
rewards_tt = np.array(rewards.copy())
rewards_tt

array([ -2.58316117,  -7.28032087,   7.57444846,   4.73028908,
        10.07012079,   0.09037463,  -1.45279456,   1.17449401,
         1.96075105,  -3.17935164,  -2.4959386 ,   5.44669044,
         5.5739951 ,   1.94999782,  -2.57326119, -10.90192575,
        16.67943423,  -9.38908936,  10.05504363,  -1.42166629,
        10.41073215,  -4.1050796 ,  -7.88177713,  12.97902375,
        18.0587627 ,  -2.22916579,   1.25358838,   0.4022629 ,
        -3.25438022,  11.33583267,  -1.95157867,   1.73196586,
         3.21365214,  -5.21944285,   3.01302067])

In [36]:
mean = rewards_tt.mean()
mean

1.765301335596612

# Train

In [38]:
rewards = torch.Tensor(rewards_tt)

In [39]:
''' 
sharpe ratio measures the excess return of the portfolio over the 
volatility of it -> risk adjusted performance
'''


def sharp_ratio_(rewards, tran_costs):
	rewards = [r.detach().cpu().numpy() for r in rewards]
	mean = sum(rewards) / len(rewards)
	At = sum(r - t for r, t in zip(rewards, tran_costs)) / len(rewards)
	vol = sum((r - mean) ** 2 for r in rewards) / len(rewards)
	vol = vol ** 0.5

	return (At - 1e-7) / (vol + 1e-9)

In [40]:
sharp_ratio_(rewards, transaction_costs)

0.25358556713127106

In [42]:
port_creator_model = PortfolioConstructor(
    device= device,
    symbol_universe = test_symbol_uni,
    seq_length = 60,
    multihead_dim = 2,
    num_transformer_layers = 2
)

port_creator_model.train()




PortfolioConstructor(
  (SREM): SREM(
    (transformer_encoder_layer): TransformerEncoderLayer(
      (self_attn): MultiheadAttention(
        (out_proj): NonDynamicallyQuantizableLinear(in_features=60, out_features=60, bias=True)
      )
      (linear1): Linear(in_features=60, out_features=2048, bias=True)
      (dropout): Dropout(p=0.1, inplace=False)
      (linear2): Linear(in_features=2048, out_features=60, bias=True)
      (norm1): LayerNorm((60,), eps=1e-05, elementwise_affine=True)
      (norm2): LayerNorm((60,), eps=1e-05, elementwise_affine=True)
      (dropout1): Dropout(p=0.1, inplace=False)
      (dropout2): Dropout(p=0.1, inplace=False)
    )
    (transformer_encoder): TransformerEncoder(
      (layers): ModuleList(
        (0-1): 2 x TransformerEncoderLayer(
          (self_attn): MultiheadAttention(
            (out_proj): NonDynamicallyQuantizableLinear(in_features=60, out_features=60, bias=True)
          )
          (linear1): Linear(in_features=60, out_features=2048,

In [44]:
market_env = MarketEnvironment(
    data_path = pkl_fpath,
    holding_period = 1,
    train_test_split= 0.8,
    symbol_universe = test_symbol_uni
    )

market_env.reset(mode = 'train')

<__main__.MarketEnvironment at 0x7fa1800068e0>