In [109]:
import torch as T
import torch.nn as nn
import torch.optim as optim
import os 
import numpy as np
import pandas as pd
import random
from dateutil.relativedelta import relativedelta
from datetime import datetime, timedelta
from collections import namedtuple 
from collections import deque
from sklearn.preprocessing import StandardScaler
import torch.nn.functional as F
from sklearn.preprocessing import MinMaxScaler

STATE_SPACE = 36
ACTION_SPACE = 3

ACTION_LOW = -1
ACTION_HIGH = 1

GAMMA = 0.9995
TAU = 1e-3
EPS_START = 1.0
EPS_END = 0.1
EPS_DECAY = 0.9

MEMORY_LEN = 10000
MEMORY_THRESH = 500
BATCH_SIZE = 200

LR_DQN = 5e-4

LEARN_AFTER = MEMORY_THRESH
LEARN_EVERY = 3
UPDATE_EVERY = 9

COST = 3e-4
CAPITAL = 100000
NEG_MUL = 2
DEVICE = T.device('cuda' if T.cuda.is_available() else 'cpu')

MONTHLY_DATA = ['2023-02-28 23:15:00','2023-03-31']

MONTH = '03'
DAY = '15' 

TT_WINDOW = 10
TRADE_DAY = f"2023-{MONTH}-{DAY} 00:00:00" 

POSITION_LIMIT_COEF = 3 

train_start_date = datetime.strptime(TRADE_DAY, '%Y-%m-%d %H:%M:%S') - timedelta(days=TT_WINDOW, minutes=44)
train_start_date1 = datetime.strptime(TRADE_DAY, '%Y-%m-%d %H:%M:%S') - timedelta(days=TT_WINDOW,minutes=4)
TRAIN_START = train_start_date1.strftime('%Y-%m-%d %H:%M:%S')
START = train_start_date.strftime('%Y-%m-%d %H:%M:%S')
TRAIN_END = TRADE_DAY
print(TRAIN_START)
print(TRAIN_END)
trade_end_date = datetime.strptime(TRADE_DAY, '%Y-%m-%d %H:%M:%S') + timedelta(days=1)
TRADE_END = trade_end_date.strftime('%Y-%m-%d %H:%M:%S')
print(TRADE_END)


### Trading Environment 

class TradingEnvironment():
    
    def __init__(self, asset_data, bank=100_000, trans_coef=3e-4, portofolio_position=0,  
                position_limit_coef=POSITION_LIMIT_COEF, store_flag=1):
        
        self.scaler = MinMaxScaler()
        
        ### Trading Variables
        self.pnl = bank
        
        self.position = portofolio_position
        self.position_limit_coef = position_limit_coef
        self.trans_coef = trans_coef
        self.bank = bank
        self.running_cap = bank
        self.portofolio = bank
        self.profit = bank
          ### data variables
        self.asset_data = asset_data
        self.terminal_idx = len(self.asset_data) - 1   


         ### pointers, actions, rewards
        
        self.pointer = 0
        self.next_return, self.current_state = 0, None
        self.prev_position = 0
        self.prev_act = 0
        self.current_act = 0
        self.current_reward = 0
        self.current_price = self.asset_data.iloc[self.pointer, :]['close']
        self.done = False

        self.store_flag = store_flag
        if self.store_flag == 1:
            self.store = {"action_store": [],
                          "reward_store": [],
                          "pnl": [],
                          "position": [],
                          "portofolio":[]
                         }

    def reset(self):
        self.pnl = self.bank
        self.position = 0
        self.portofolio = self.bank
        self.reward = 0
        self.profit = self.bank
        
        self.pointer = 0
        self.next_return, self.current_state = self.get_state()
        self.prev_position = 0
        self.prev_act = 0
        self.current_act = 0
        self.current_reward = 0
        self.current_price = self.asset_data.iloc[self.pointer, :]['close']
        self.done = False

        if self.store_flag == 1:
            self.store = {"action_store": [],
                        "reward_store": [],
                        "pnl": [],
                        "position": [],
                        "portofolio":[]
                        }

        return self.current_state
        
    def step(self, action):
        self.current_act = action
        self.current_price = self.asset_data.iloc[self.pointer, :]['close']
        self.stc_pointer = self.asset_data.iloc[self.pointer, :]['stc']
        self.current_reward = self.calculate_reward()
        self.prev_position = self.position
        self.prev_act = self.current_act
        self.pointer += 1
        self.next_return, self.current_state = self.get_state()
        self.done = self.check_terminal()
        
        #         auto htan apo katw to efera edw giati kanei append to teleutaio act enw den to 8eloume 
        if self.store_flag:
#             print('apo panw triggered index ', self.pointer)
            self.store["action_store"].append(self.current_act)
            self.store["reward_store"].append(self.current_reward)
            info = self.store
        else:
            info = None
            
        if self.done:

            reward_offset = 0

            if self.store_flag:

                if self.position > 0:
                    self.portofolio += self.position * self.current_price + (self.position * self.current_price) * self.trans_coef 
                    self.store["action_store"].append(-1)
                    self.store["reward_store"].append(-self.position * self.next_return * 100)
                    self.profit = self.portofolio
                elif self.position < 0:
                    self.portofolio -= abs(self.position) * self.current_price + (abs(self.position) * self.current_price) * self.trans_coef 
                    self.store["action_store"].append(1)
                    self.store["reward_store"].append(self.position * self.next_return * 100)
                    self.profit = self.portofolio
                else:
                    self.store["action_store"].append(0)
                    self.store["reward_store"].append(0)
        
                self.store["position"].append(0)
                
                self.store["pnl"].append(self.portofolio)
                self.store["portofolio"].append(self.portofolio)
                
                ret = (self.store['portofolio'][-1]/100_000) - 1
                print(f"{self.store['portofolio'][-1]}/100_000 - 1")
                print(ret)
                reward_offset =  ret
                self.current_reward += reward_offset
        
        return self.current_state, self.current_reward, self.done, info

    
    def calculate_reward(self):
#         self.order_size, _ = divmod((0.02*self.bank),self.current_day_open_price)
        self.order_size = 1
        investment = self.order_size * self.current_price 
        trans_cost = investment * self.trans_coef
#         print(self.pointer)
        total_cost = investment + trans_cost
        
        self.position_limit = self.order_size * self.position_limit_coef
        
        reward = 0 
        reward_offset = 0
        prev_port = self.portofolio
        prev_pnl = self.pnl
        prev_cap = self.running_cap
        limit_up_flag = False
        limit_down_flag = False
        time_flag = False
        

        if self.position >= self.position_limit:
            limit_up_flag = True
            
        elif self.position <= -self.position_limit:
            limit_down_flag = True

        
        if self.current_act == 1:
            if not limit_up_flag:
                self.position += self.order_size
                self.portofolio -= total_cost
                self.pnl = (self.portofolio + self.position * self.current_price) 
                
                if self.prev_position < 0:
                    self.profit += prev_pnl - self.pnl
            else:
                if self.current_act == self.prev_act:
                    reward_offset += -0.1
                    
        elif self.current_act == -1:
            if not limit_down_flag:
                self.position -= self.order_size
                self.portofolio += investment - trans_cost
                self.pnl = (self.portofolio + self.position * self.current_price) 
                
                if self.prev_position > 0:
                    self.profit += prev_pnl - self.pnl
            else:
                if self.current_act == self.prev_act:
                    reward_offset += -0.1

        else:
            if self.current_act == self.prev_act:
                reward_offset += -0.1
#         print(self.current_act, self.profit, self.current_price)
        reward = 100*(self.next_return) * self.current_act - np.abs(self.current_act - self.prev_act) * self.trans_coef
        
         
        if self.store_flag==1:
            self.store["position"].append(self.position)
            self.store['pnl'].append(self.pnl)
            self.store['portofolio'].append(self.portofolio)
        
        if reward < 0:
            reward *= NEG_MUL  # To make the Agent more risk averse towards negative returns.
            
        reward += reward_offset
        self.reward = reward
        return self.reward
    
    
    def check_terminal(self):
        if self.pointer == self.terminal_idx:
            return True
        else:
            return False
    
    
    def get_state(self):
        
        state = []
        observation = ['sig-5','sig-10','sig-20','sig-40','v-1', 'r-1', 'v-2', 'r-2',
       'v-5', 'r-5', 'v-10', 'r-10', 'v-20', 'r-20', 'v-40', 'r-40',
       'bollinger', 'low_bollinger', 'high_bollinger', 'rsi', 'macd_lmw',
       'macd_smw', 'macd_bl', 'macd', 'macd_signal', 'macd_histogram', 'stc',
       'stc_smoothed', 'TR', 'ATR_11', '%K', '%D']
        observation = [obs + '_norm' for obs in observation]
#             na valw to profit mono tou
        port_state = [
            self.profit/self.pnl,
            self.pnl/self.bank,
            self.position * self.current_price/self.bank,
            self.prev_act]
        
        
        for column in observation:
            state.append(self.asset_data.loc[self.asset_data.index[self.pointer], column])
        state.extend(port_state)
        state = np.array(state)
        next_ret = self.asset_data['next_state_return'].iloc[self.pointer]
#         print(state)
        return next_ret, state

2023-03-04 23:56:00
2023-03-15 00:00:00
2023-03-16 00:00:00


In [2]:
Transition = namedtuple("Transition", ["States", "Actions", "Rewards", "NextStates", "Dones"])

class ReplayMemory():
    
    def __init__(self, capacity=MEMORY_LEN):
        self.memory = deque(maxlen=capacity)
        
    def store(self, t):
        
        self.memory.append(t)
        
    def sample(self, n):
        a = random.sample(self.memory, n)
        return a
    
    def __len__(self):
        return len(self.memory)
    
class DuellingDQN(nn.Module):
    
    def __init__(self, input_dim=STATE_SPACE, output_dim=ACTION_SPACE):
        super(DuellingDQN, self).__init__()
        self.input_dim = input_dim
        self.output_dim = output_dim
        
        self.lstm1 = nn.LSTM(input_size=input_dim, hidden_size=128)
        self.layer_norm1 = nn.LayerNorm(128)
        
        
        self.lstm2 = nn.LSTM(input_size=128, hidden_size=64)
        self.layer_norm2 = nn.LayerNorm(64)
        
        
        self.lstm3 = nn.LSTM(input_size=64, hidden_size=32)
        self.layer_norm3 = nn.LayerNorm(32)
        
        
        self.V = nn.Linear(32, 1)
        self.A = nn.Linear(32, self.output_dim)
       
        
        self.relu = nn.ReLU()
        
    def forward(self, state):
        
        lstm1_output, _ = self.lstm1(state)
        x = self.layer_norm1(self.relu(lstm1_output))

        lstm2_output, _ = self.lstm2(x)
        x = self.layer_norm2(self.relu(lstm2_output))

        lstm3_output, _ = self.lstm3(x)
        x = self.layer_norm3(self.relu(lstm3_output))
        
        V = self.relu(self.V(x))
        A = self.relu(self.A(x))
        
        x = V + A - A.mean()
        
        return x
        
class DQNAgent():
    def __init__(self, actor_net=DuellingDQN, memory= ReplayMemory()):
        
        self.actor_online = actor_net(STATE_SPACE, ACTION_SPACE).to(DEVICE)
        self.actor_target = actor_net(STATE_SPACE, ACTION_SPACE).to(DEVICE)
        self.actor_target.load_state_dict(self.actor_online.state_dict())
        
        self.memory = memory
        
        self.actor_criterion = nn.MSELoss()
        self.actor_op = optim.Adam(self.actor_online.parameters(), lr=LR_DQN)
        
        self.t_step = 0
        
    def act(self, state, eps=0.):
        self.t_step += 1
        
        state = T.from_numpy(state).float().to(DEVICE).view(1, 1, -1)
        
        self.actor_online.eval()
        with T.no_grad():
            actions = self.actor_online(state)
        self.actor_online.train()
        
        if random.random() > eps:
            act = np.argmax(actions.cpu().data.numpy())
        else:
            act = random.choice(np.arange(ACTION_SPACE))
        return int(act)
    
    def learn(self):
        if len(self.memory) <= MEMORY_THRESH:
            return 0
        ## Sample experiences from the MEMORY
        if self.t_step > LEARN_AFTER and self.t_step % LEARN_EVERY == 0:
            
            
            batch = self.memory.sample(BATCH_SIZE)
            
            states = np.vstack([t.States for t in batch])
            states = T.from_numpy(states).float().to(DEVICE)
            
            actions = np.vstack([t.Actions for t in batch])
            actions = T.from_numpy(actions).float().to(DEVICE)
            
            rewards = np.vstack([t.Rewards for t in batch])
            rewards = T.from_numpy(rewards).float().to(DEVICE)
            
            next_states = np.vstack([t.NextStates for t in batch])
            next_states = T.from_numpy(next_states).float().to(DEVICE)
            
            dones = np.vstack([t.Dones for t in batch])
            dones = T.from_numpy(dones).float().to(DEVICE)

            ## Actor update 
            ## compute next state actions and state values 
            
            next_state_values = self.actor_target(next_states).max(1)[0].unsqueeze(1)
            y = rewards + (1-dones) * GAMMA * next_state_values
            state_values = self.actor_online(states).gather(1, actions.type(T.int64))
            ## td error
            
            
             ## Compute Actor loss
            actor_loss = self.actor_criterion(y, state_values)
             ## Minize Actor loss
                
            self.actor_op.zero_grad()
            actor_loss.backward()
            self.actor_op.step()
            
            if self.t_step % UPDATE_EVERY == 0:
                self.soft_update(self.actor_online, self.actor_target)

    def td_errors(self, y , state_values):
        td_errors = y - state_values
        return td_errors
                
    def soft_update(self, local_model, target_model, tau=TAU):
        
        for target_param, local_param in zip(target_model.parameters(), local_model.parameters()):
              target_param.data.copy_(tau*local_param.data + (1.0-tau)*target_param.data)
                

            

In [4]:
coin = 'ethusd'
class crypto_reader():
    def __init__(self, coin, train_start=TRAIN_START, train_end= TRAIN_END, 
                 start = START,
                 trade_end=TRADE_END,
                 trade_day=TRADE_DAY,
                 timeframes=[1, 2, 5, 10, 20, 40]):
        
        self.timeframes = timeframes 
        
        self.coin_path = rf'\Users\steli\OneDrive\Desktop\Thesis\{coin}.csv'
        self.data = None
        
        
        self.scalar = StandardScaler()
        
        self.start = start
        
        self.train_start = train_start
        self.train_end = train_end 
        
        self.trade_start = trade_day
        self.trade_end = trade_end
        print('self.train_start',self.train_start)
        print('self.train_end',self.train_end)
        print('self.test_start',self.trade_start)
        print('self.test_end',self.trade_end)
        
        
    def read_csv_file(self):
        self.data = pd.read_csv(self.coin_path)
        self.data['DateTime'] = pd.to_datetime(self.data['time'], unit='ms')
        self.data.set_index('DateTime', inplace=True)
        
        self.data = self.data.loc[self.start:self.trade_end]
        for i in self.timeframes:
            self.data[f"v-{i}"] = self.data['volume'].pct_change(i)
            self.data[f"r-{i}"] = self.data['close'].pct_change(i)
        
        # Volatility
        for i in [5, 10, 20, 40]:
            self.data[f'sig-{i}'] = np.log(1 + self.data["r-1"]).rolling(i).std()
        #         Relative Strength Indicator (RSI)
        
        # Bollinger Bands
        self.bollinger_lback = 10
        self.data["bollinger"] = self.data["r-1"].ewm(self.bollinger_lback).mean()
        self.data["low_bollinger"] = self.data["bollinger"] - 2 * self.data["r-1"].rolling(self.bollinger_lback).std()
        self.data["high_bollinger"] = self.data["bollinger"] + 2 * self.data["r-1"].rolling(self.bollinger_lback).std()

        self.rsi_lb = 5
        self.pos_gain = self.data["r-1"].where(self.data["r-1"] > 0, 0).ewm(self.rsi_lb).mean()
        self.neg_gain = self.data["r-1"].where(self.data["r-1"] < 0, 0).ewm(self.rsi_lb).mean()
        self.rs = np.abs(self.pos_gain/self.neg_gain)
        self.data["rsi"] = 100 * self.rs/(1 + self.rs)
        
        # Moving Average Convergence Divergence (MACD)
        self.data["macd_lmw"] = self.data["r-1"].ewm(span=20, adjust=False).mean()
        self.data["macd_smw"] = self.data["r-1"].ewm(span=12, adjust=False).mean()
        self.data["macd_bl"] = self.data["r-1"].ewm(span=9, adjust=False).mean()
        self.data["macd"] = self.data["macd_smw"] - self.data["macd_lmw"]
        
        self.data["macd_signal"] = self.data["macd"].ewm(span=9, adjust=False).mean()
        self.data["macd_histogram"] = self.data["macd"] - self.data["macd_signal"]
        
        # Calculate Schaff Trend Cycle (STC)
        macd_range = self.data["macd"].rolling(window=9).max() - self.data["macd"].rolling(window=9).min()
        self.data["stc"] = 100 * (self.data["macd"] - self.data["macd"].rolling(window=9).min()) / macd_range

        # Additional smoothing (optional)
        self.data["stc_smoothed"] = self.data["stc"].rolling(window=3).mean()
        
        # Calculate True Range (TR)
        self.data['HL'] = self.data['high'] - self.data['low']
        self.data['HC'] = abs(self.data['high'] - self.data['close'].shift(-1))
        self.data['LC'] = abs(self.data['high'] - self.data['close'].shift(-1))
        self.data['TR'] = self.data[['HL', 'HC', 'LC']].max(axis=1)
        self.data.drop(['HL', 'HC', 'LC'], axis=1, inplace=True)
        
#         stochastic oscillator
        
        self.data['Lowest_Low'] = self.data['low'].rolling(window=14).min()
        self.data['Highest_High'] = self.data['high'].rolling(window=14).max()
        self.data['%K'] = ((self.data['close']-self.data['Lowest_Low'])/(self.data['Highest_High'] - self.data['Lowest_Low']))*100

        # Calculate %D (3-day SMA of %K)
        self.data['%D'] = self.data['%K'].rolling(window=3).mean()

        # Calculate Average True Range (ATR) for 11 periods
        self.atr_period = 11
        self.data['ATR_11'] = self.data['TR'].rolling(window=self.atr_period).mean()
        
        #its previous not next 
        self.data['next_state_return'] = self.data['close'].pct_change().shift(-1)
        
        self.train_days = self.data.loc[self.train_start:self.train_end]
        self.trade_day = self.data.loc[self.trade_start:self.trade_end]
        
        self.train_mean = self.train_days.mean()
        self.train_std = self.train_days.std()
        
        for column in self.train_days.columns[4:]:
            if column == 'next_state_return':
                pass
            else:
                self.train_days[f"{column}_norm"] = self.train_days[column]
                self.trade_day[f"{column}_norm"] =  self.trade_day[column]
        
        for i in self.train_days.index:
            for c in self.train_days.columns[6:]:
                if c[-4:] == 'norm':
                    self.train_days.loc[i, c] = (self.train_days.loc[i, c]-self.train_mean[c[:-5]])/self.train_std[c[:-5]]
        
        for i in self.trade_day.index:
            for c in self.trade_day.columns[6:]:
                if c[-4:] == 'norm':
                    self.trade_day.loc[i, c] = (self.trade_day.loc[i, c]-self.train_mean[c[:-5]])/self.train_std[c[:-5]]
                
        
eth = crypto_reader(coin=coin)
eth.read_csv_file()  

print('Environment Done')

self.train_start 2023-03-04 23:56:00
self.train_end 2023-03-15 00:00:00
self.test_start 2023-03-15 00:00:00
self.test_end 2023-03-16 00:00:00


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self.train_days[f"{column}_norm"] = self.train_days[column]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self.trade_day[f"{column}_norm"] =  self.trade_day[column]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self.train_days[f"{column}_norm"] = self.train_days[column]
A value is trying to be se

Environment Done


In [5]:
env = TradingEnvironment(eth.train_days)
test_env = TradingEnvironment(eth.trade_day)

In [110]:
import time

start_time = time.time()        



env = TradingEnvironment(eth.train_days)
test_env = TradingEnvironment(eth.trade_day)
## Agent
memory = ReplayMemory()
agent = DQNAgent(actor_net=DuellingDQN, memory=memory)

N_EPISODES = 20 # No of episodes/epochs
scores = []
eps = EPS_START
act_dict = {0:-1, 1:1, 2:0}

te_score_min = -np.Inf

test_decisions = []
train_decisions = []
for episode in range(1, 1+ N_EPISODES):
    start_time1 = time.time()  
    print(f'episode {episode} start', start_time1)
    counter = 0 
    
    episode_score = 0 
    episode_score2 = 0 
    test_score = 0 
    score = 0 
    
    state = env.reset()
    
    state = state.reshape(-1, STATE_SPACE)
    
    while True:
        actions = agent.act(state, eps)
        action = act_dict[actions]

        next_state, reward, done, _ = env.step(action)
        
        next_state = next_state.reshape(-1, STATE_SPACE)
        
        t = Transition(state, actions, reward, next_state, done)
        
        agent.memory.store(t)
        agent.learn()

        state = next_state
        score += reward
        counter += 1

        if done:
            break
    train_decisions.append(_)
    
    episode_score += score
    episode_score2 += (env.store['pnl'][-1])
    
     # Print episode information
    print(f"Episode {episode}: Score: {episode_score}, Counter: {counter}")
    print(f"Episode {episode}: Score2: {episode_score2}, Counter: {counter}")
    
    
    scores.append(episode_score)
    eps = max(EPS_END, EPS_DECAY * eps)
    
    state = test_env.reset()
    done = False
    score_te = 0
    scores_te = [score_te]
    
    test_score = 0
    test_score2 = 0
    
    end_time1 = time.time()
    ex1 = end_time1 - start_time1
    minutes, seconds = divmod(ex1, 60)
    seconds, milliseconds = divmod(seconds, 1)

    print(f"Train {episode}Execution Time: {int(minutes)} minutes, {int(seconds)} seconds")
    while True: 
        
        actions = agent.act(state)
        action = act_dict[actions]
    
        next_state, reward, done, _ = test_env.step(action)
        
        
        next_state = next_state.reshape(-1, STATE_SPACE)
        state = next_state
        score_te += reward
        scores_te.append(score_te)
        if done:
            break
    print(_['action_store'])        
    test_decisions.append(_)
    test_score += score_te
    test_score2 += (test_env.store['pnl'][-1])
        
    print(f"Episode: {episode}, Train Score: {episode_score:.5f}, Validation Score: {test_score:.5f}")
    print(f"Episode: {episode}, Train Value: ${episode_score2:.5f}, Validation Value: ${test_score2:.5f}", "\n")
    end_time2 = time.time()
    ex2 = end_time2 - start_time1
    minutes, seconds = divmod(ex2, 60)
    seconds, milliseconds = divmod(seconds, 1)
    print(f"Train {episode}Execution Time: {int(minutes)} minutes, {int(seconds)} seconds")
end_time = time.time()

execution_time = end_time - start_time
print(f"Execution Time: {execution_time} seconds")

episode 1 start 1711568857.1234102
96458.43013999988/100_000 - 1
-0.035415698600001155
Episode 1: Score: -495.4382059501943, Counter: 13771
Episode 1: Score2: 96458.43013999988, Counter: 13771
Train 1Execution Time: 1 minutes, 50 seconds
100154.46825/100_000 - 1
0.0015446825000000608
[-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 0, -1, 0, -1, -1, 0, -1, -1, -1, -1, -1, -1, 0, -1, 0, 0, -1, -1, -1, -1, 0, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 0, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1

96558.32707999922/100_000 - 1
-0.03441672920000782
Episode 3: Score: -564.9787629835979, Counter: 13771
Episode 3: Score2: 96558.32707999922, Counter: 13771
Train 3Execution Time: 1 minutes, 55 seconds
100170.25127000001/100_000 - 1
0.0017025127000001028
[0, -1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -1, -1, 0, 0, 0, -1, 0, 0, 0, 0, 0, 0, 0, -1, 0, -1, -1, 0, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 0, -1, -1, -1, -1, -1, -1, -1, -1, 0, -1, -1, -1, -1, -1, -1, -1, -1, -1, 0, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 0, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 0, 0, -1, -1, -1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -1, 0, 0, 0, 0, -1, -1, -1, -1, -1, -1, -1, -1, -1, 0, 0, -1, 0, 0, -1, 0, 0, 0, 0, 0, -1, -1, -1, 0, 0, 0, 0, 0, 0, 0, -1, -1, -1, 0, -1, 0, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 0, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 0, -1, -1, -1, -1, -1, -1, -1, 0, 0, -1, 0, -1, -1, -1

96760.4273299991/100_000 - 1
-0.03239572670000901
Episode 5: Score: -486.09588101322447, Counter: 13771
Episode 5: Score2: 96760.4273299991, Counter: 13771
Train 5Execution Time: 1 minutes, 56 seconds
100151.57984/100_000 - 1
0.0015157984000000457
[0, -1, 0, -1, -1, -1, -1, 0, -1, 0, -1, 0, -1, 0, -1, -1, 0, -1, 0, -1, 0, -1, -1, -1, -1, -1, 0, -1, -1, -1, -1, 0, 0, 0, 0, 0, -1, 0, 0, -1, -1, -1, -1, 0, -1, 0, 0, 0, -1, 0, 0, -1, 0, 0, -1, 0, -1, 0, 0, 0, 0, 0, 0, -1, 0, 0, -1, 0, -1, -1, -1, 0, -1, -1, 0, -1, 0, -1, 0, -1, 0, -1, -1, -1, -1, 0, 0, -1, -1, -1, 0, -1, -1, 0, -1, -1, -1, -1, 0, -1, -1, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, 0, -1, 0, -1, -1, -1, -1, -1, -1, 0, -1, -1, 0, -1, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, -1, -1, -1, 0, -1, 0, -1, 0, -1, 0, -1, -1, -1, 0, -1, -1, -1, -1, -1, -1, -1, 0, -1, 0, -1, 0, -1, 0, -1, -1, -1, -1, -1, -1, -1, -1, 0, -1, 0, -1

97861.20525999973/100_000 - 1
-0.021387947400002716
Episode 7: Score: -491.2281240679061, Counter: 13771
Episode 7: Score2: 97861.20525999973, Counter: 13771
Train 7Execution Time: 1 minutes, 55 seconds
100153.42923000001/100_000 - 1
0.0015342923000001285
[-1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, -1, 0, -1, 0, -1, 0, 0, -1, 0, -1, 0, -1, 0, -1, -1, -1, -1, 0, -1, 0, -1, 0, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, -1, 0, -1, -1, -1, -1, -1, 0, -1, 0, -1, 0, -1, 0, 0, 0, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, -1, -1, -1, 0, -1, 0, -1, 0, -1, -1, -1, -1, -1, -1, -1, -1, 0, -1, 0, -1, 0, -1, 0, -1, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1

98193.13389999913/100_000 - 1
-0.0180686610000087
Episode 9: Score: -424.1997762803223, Counter: 13771
Episode 9: Score2: 98193.13389999913, Counter: 13771
Train 9Execution Time: 1 minutes, 55 seconds
99961.25441000008/100_000 - 1
-0.0003874558999992228
[-1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, -1, 0, -1, 0, -1, 0, -1, -1, -1, -1, -1, 0, -1, 0, -1, -1, -1, 1, 0, 0, 0, 0, 1, -1, 1, 0, -1, 0, -1, 0, -1, 1, -1, 1, -1, 1, -1, -1, -1, 0, 0, -1, 1, 1, -1, -1, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, 0, 0, 0, 0, 0, 0, 0, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, -1, 1, -1, 0, -1, 0, -1, 0, -1, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, 1, -1, 0, -1, 0, -1, -1, -1, -1, 0, -1, 0, -1, 0, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, -1, 0, -1, 0, -1, 0, -1, 0, -1, -1, 0, -1, 0, -1, -1, -1, -1, -1, 0, -1, 0, -1, 0, -1, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, 

98402.7330399998/100_000 - 1
-0.015972669600001987
Episode 11: Score: -279.052795777958, Counter: 13771
Episode 11: Score2: 98402.7330399998, Counter: 13771
Train 11Execution Time: 1 minutes, 53 seconds
99737.67462000018/100_000 - 1
-0.002623253799998171
[-1, 0, -1, 0, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 1, -1, 0, -1, 0, -1, 0, 0, 0, -1, 0, -1, 0, -1, -1, 0, 0, -1, -1, -1, 0, -1, 1, -1, -1, 1, -1, 1, -1, 1, -1, 0, 1, 0, 0, -1, 1, 0, -1, 1, -1, -1, -1, 1, -1, 1, -1, 0, 0, 1, 1, 0, -1, 0, -1, 0, 0, 1, 0, 0, -1, 0, 0, 0, 0, 0, 0, 1, -1, -1, 0, 0, -1, 1, 0, -1, 0, 0, -1, 0, -1, -1, 0, -1, 0, -1, 0, 0, -1, 1, -1, 0, -1, 1, 0, -1, 0, -1, 0, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 1, -1, 0, 1, 0, -1, 1, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 1, -1, 1, -1, 1, -1, 1, -1, 0, -1, 1, -1, 1, -1, -1, 1, -1, 1, -1, 0, -1, 0, -1, 0, -1, 1, -1, 1, -1, 1, -1, 1, -1, 1, -1, 1, -1, 1, -1, 1, -1, 0, -1, 1, -1, 1, -1, 1, -1, 1, -1, 0, -1, 1, -1, 1, -1, 1, -1, 1,

97873.31964999961/100_000 - 1
-0.021266803500003872
Episode 13: Score: -189.8995722136684, Counter: 13771
Episode 13: Score2: 97873.31964999961, Counter: 13771
Train 13Execution Time: 1 minutes, 53 seconds
99911.10480000009/100_000 - 1
-0.0008889519999991657
[-1, 1, -1, 0, 0, -1, 0, -1, 0, -1, 0, -1, 1, -1, 0, 0, 0, -1, 1, -1, 0, -1, 1, 1, 0, 0, 0, 0, 0, -1, 1, -1, -1, 1, 1, 1, -1, -1, 1, -1, 1, -1, 1, 1, 0, -1, 1, 1, 1, 1, 1, 1, -1, 1, -1, -1, -1, 1, -1, 1, -1, 1, 0, -1, -1, 1, -1, -1, -1, -1, 0, 1, -1, 1, -1, 1, 0, 1, 0, -1, 0, 0, -1, -1, -1, -1, -1, 1, -1, 0, 1, 1, -1, 0, 0, 1, 0, -1, 0, -1, 0, 0, -1, 1, 0, -1, 0, -1, 0, 0, 0, -1, 0, -1, 0, -1, -1, 0, -1, 1, -1, 0, -1, 1, -1, 0, 1, 0, 1, 1, -1, 0, -1, 1, 0, 0, 1, -1, 0, -1, 1, -1, 0, -1, 0, -1, 0, -1, 0, -1, 1, -1, 0, -1, 0, -1, 1, -1, 1, -1, 1, -1, 1, -1, 0, -1, 1, -1, 0, -1, -1, 1, -1, 0, -1, 0, -1, 1, -1, 1, -1, 1, -1, 1, -1, 1, -1, 1, -1, 1, -1, 1, -1, 1, 1, -1, -1, 0, -1, 1, -1, 1, -1, 1, -1, 1, 1, 0, -1, 1, -1, 1, -1, 0, -1, 1

98923.49773999889/100_000 - 1
-0.01076502260001111
Episode 15: Score: -160.1375119530752, Counter: 13771
Episode 15: Score2: 98923.49773999889, Counter: 13771
Train 15Execution Time: 1 minutes, 53 seconds
100149.82506000015/100_000 - 1
0.0014982506000014162
[-1, 0, 0, 0, -1, -1, -1, 1, 0, -1, 0, 1, 1, 0, -1, 0, -1, 0, -1, 1, 0, -1, 1, 1, -1, 1, 0, -1, -1, -1, -1, 0, -1, -1, -1, 0, -1, -1, -1, -1, 1, -1, -1, -1, -1, -1, 0, 1, 1, -1, 1, 1, 1, 1, -1, 0, -1, 1, -1, 1, -1, -1, 1, 0, 0, 1, -1, -1, -1, 0, -1, 1, 1, -1, 0, 1, 1, 1, 0, 0, 0, 0, -1, -1, -1, -1, -1, 1, 0, -1, 0, -1, 0, -1, -1, 1, 1, -1, 0, 0, 0, 0, -1, 1, -1, 0, -1, 1, 0, 0, 0, -1, 1, -1, 0, -1, 1, -1, 1, -1, 0, 0, -1, 0, -1, 1, -1, 0, 1, 1, -1, 0, -1, 1, 1, 1, 1, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 1, 0, -1, 1, -1, 1, -1, 0, -1, 1, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, 0, -1, 1, -1, 1, -1, 1, -1, 0, -1, 0, -1, 1, -1, 1, -1, 1, -1, 1, -1, 0, -1, 1, -1, 0, -1, 0, -1, -1, 1, -1, 1, -1, 1, -1, 1, 1, 0, 0, 1, 0, -1, 1, -1, 0, 

99844.79746999919/100_000 - 1
-0.0015520253000080864
Episode 17: Score: -110.49610267499924, Counter: 13771
Episode 17: Score2: 99844.79746999919, Counter: 13771
Train 17Execution Time: 1 minutes, 53 seconds
99867.25156999996/100_000 - 1
-0.0013274843000004033
[-1, 1, -1, 0, -1, 0, -1, 0, 0, -1, 0, 1, 1, -1, 0, 0, 0, -1, 1, 1, 0, -1, 0, 1, 0, 0, 0, 0, 0, -1, -1, 1, -1, -1, 1, 0, -1, -1, -1, -1, -1, -1, -1, 1, 1, -1, 1, 1, -1, -1, 1, 1, -1, 0, -1, -1, -1, 1, -1, 1, -1, -1, 1, 0, 0, 1, -1, 1, -1, 0, -1, 1, -1, 0, -1, 1, 1, 1, 0, -1, 0, 1, 0, -1, -1, 0, -1, 1, 1, -1, 0, 0, -1, 0, -1, 1, 1, -1, 0, -1, 0, 0, -1, 0, 0, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, 1, 1, -1, 1, -1, 0, -1, 1, -1, 0, -1, 1, -1, 1, -1, 0, -1, 1, 1, 0, 1, -1, 0, -1, 0, -1, 0, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 1, -1, 0, 0, -1, 1, -1, 1, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 1, -1, 1, -1, 0, -1, 1, 1, -1, 0, 0, 0, -1, 1, 1, -1, 0, 1, 1, 1, 1, -1, 0, 0, -1, 1, 1, -1, 0, -1,

100859.13803999932/100_000 - 1
0.008591380399993298
Episode 19: Score: -114.72999904510442, Counter: 13771
Episode 19: Score2: 100859.13803999932, Counter: 13771
Train 19Execution Time: 1 minutes, 53 seconds
100054.94439999998/100_000 - 1
0.000549443999999788
[-1, 0, -1, 0, 1, -1, 0, -1, 0, -1, 0, 1, 1, -1, 0, -1, -1, 0, -1, 0, -1, -1, 0, 1, 0, -1, 0, 0, -1, -1, 0, 1, -1, -1, 1, 0, -1, 1, -1, 1, 1, 1, -1, -1, 1, -1, 1, 1, 1, 1, 1, -1, 1, -1, -1, -1, -1, 1, -1, -1, 0, -1, 1, 1, -1, 1, 0, -1, 0, -1, 0, -1, 0, -1, 0, 1, 1, 1, 0, -1, 0, 1, 0, -1, -1, -1, 0, 1, -1, 0, -1, 0, -1, 0, -1, 0, 1, -1, 0, 1, -1, 0, -1, 0, -1, 0, 0, 0, -1, 0, -1, -1, -1, 0, 0, 1, 1, -1, 0, -1, 1, -1, 0, 1, 1, -1, 0, 1, -1, 1, -1, 0, -1, 1, 1, 1, 1, -1, 0, -1, 0, -1, 0, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, 1, 1, -1, 0, -1, 0, -1, 0, -1, -1, 0, -1, 0, -1, 0, -1,

In [None]:
trains_scores = [-495.4382059501943,-500.77437,-564.97876,-603.17632,-486.09588,-499.11414,-422.19360,-424.1997762803223]
validation_scores = [-181.36337,-188.87937,-156.06216,-76.02382,-84.82654,-62.06406,-56.47560]

In [656]:
# Specify the file path including the filename where you want to save the CSV file
csv_file_path = r'C:\Users\steli\OneDrive\Desktop\Thesis\eth.trade_day1.csv'

# Use the to_csv method to save the DataFrame to a CSV file
eth.trade_day.to_csv(csv_file_path, index=False)  # Set index=False to exclude row numbers in the CSV

print(f"CSV file '{csv_file_path}' has been created.")

CSV file 'C:\Users\steli\OneDrive\Desktop\Thesis\eth.trade_day1.csv' has been created.


In [128]:
import matplotlib.pyplot as plt
%matplotlib qt
buy_actions = []
buy_timestamps = []

sell_actions = []
sell_timestamps = []


pnl = []
position = []
action_list = []
reward_list = []

test_number = 19

train_flag = False
test_flag = True

eth.trade_day['actions'] = test_decisions[test_number]['action_store']
eth.trade_day['pnl']= test_decisions[test_number]['pnl']
eth.trade_day['position'] = test_decisions[test_number]['position']
eth.trade_day['reward'] = test_decisions[test_number]['reward_store']

eth.train_days['actions'] = train_decisions[test_number]['action_store']
eth.train_days['pnl']= train_decisions[test_number]['pnl']
eth.train_days['position'] = train_decisions[test_number]['position']
eth.train_days['reward'] = train_decisions[test_number]['reward_store']
# eth.train_days['portofolio'] = train_decisions[test_number]['portofolio'].append(0)


train_decisions[test_number]['action_store']
train_decisions[test_number]['pnl']
train_decisions[test_number]['position']
train_decisions[test_number]['reward_store']

if test_flag == True:        

    for i in range(len(eth.trade_day['actions'])):
        if i > 0:
            prev_position = eth.trade_day['position'][i-1]
            current_position = eth.trade_day['position'][i]
            if current_position > prev_position:
                buy_actions.append(eth.trade_day['close'][i])
                buy_timestamps.append(eth.trade_day.index[i])
            elif current_position < prev_position:
                sell_actions.append(eth.trade_day['close'][i])
                sell_timestamps.append(eth.trade_day.index[i])
        else:
            current_position = eth.trade_day['position'][i]
            if current_position > 0:
                buy_actions.append(eth.trade_day['close'][i])
                buy_timestamps.append(eth.trade_day.index[i])
            elif current_position < 0:
                sell_actions.append(eth.trade_day['close'][i])
                sell_timestamps.append(eth.trade_day.index[i])
        

    # Plot multiple lists in the same figure
    fig, axes = plt.subplots(nrows=3, ncols=1, figsize=(10, 8), sharex = True)

    # Plot on the first subplot (top-left)
#     axes[0].plot(eth.trade_day['close'], label='ETH 2023-03-15', color='blue')
#     axes[0].set_title('trade_day_close')
#     axes[0].scatter(buy_timestamps, buy_actions, color='green', marker='^', label='Buy Actions')
#     axes[0].scatter(sell_timestamps, sell_actions, color='red', marker='v', label='Sell Actions')


#     # Plot on the third subplot (bottom-left)
#     axes[1].plot(eth.trade_day['pnl'], label='pnl', color='blue', drawstyle='steps-post')
#     axes[1].set_title('pnl')

#     axes[2].plot(eth.trade_day['position'], label='position', color='blue', drawstyle='steps-post')
#     axes[2].set_title('position')
    
    axes[0].plot(eth.trade_day['close'], label='ETH 2023-03-15', color='blue')
    axes[0].set_title('trade_day_close')
    axes[0].scatter(buy_timestamps, buy_actions, color='green', marker='^', label='Buy Actions')
    axes[0].scatter(sell_timestamps, sell_actions, color='red', marker='v', label='Sell Actions')


    # Plot on the third subplot (bottom-left)
    axes[2].plot(eth.trade_day['pnl'], label='pnl', color='blue', drawstyle='steps-post')
    axes[2].set_title('pnl')

    axes[1].plot(eth.trade_day['position'], label='position', color='blue', drawstyle='steps-post')
    axes[1].set_title('position')

#     axes[3].plot(eth.trade_day['next_state_return'], label='return', color='blue',drawstyle='steps-post')
#     axes[3].axhline(y=0.0005, color='red', linestyle='--', label='Zero Line')
#     axes[3].axhline(y=-0.0005, color='red', linestyle='--', label='Zero Line')
#     axes[3].set_title('next_state_return')

#     axes[4].plot(eth.trade_day['reward'], label='reward', color='blue',drawstyle='steps-post')
#     axes[4].set_title('reward')
    
#     axes[5].plot(eth.trade_day['portofolio'], label='portofolio', color='blue',drawstyle='steps-post')
#     axes[5].set_title('portofolio')
    # Add legend to the last subplot
    axes[0].legend()
    axes[1].legend()
    axes[2].legend()
#     axes[3].legend()

    # Adjust layout for better spacing
    plt.tight_layout()

    # Show the plot
    plt.show()


if train_flag == True:
    for i in range(len(eth.train_days['actions'])):
        if i > 0:
            prev_position = eth.train_days['position'][i-1]
            current_position = eth.train_days['position'][i]
            if current_position > prev_position:
                buy_actions.append(eth.train_days['close'][i])
                buy_timestamps.append(eth.train_days.index[i])
            elif current_position < prev_position:
                sell_actions.append(eth.train_days['close'][i])
                sell_timestamps.append(eth.train_days.index[i])
        else:
            current_position = eth.train_days['position'][i]
            if current_position > 0:
                buy_actions.append(eth.train_days['close'][i])
                buy_timestamps.append(eth.train_days.index[i])
            elif current_position < 0:
                sell_actions.append(eth.train_days['close'][i])
                sell_timestamps.append(eth.train_days.index[i])
    # Plot multiple lists in the same figure
    fig, axes = plt.subplots(nrows=5, ncols=1, figsize=(10, 8),sharex=True)

    # Plot on the first subplot (top-left)
    axes[0].plot(eth.train_days['close'], label='ETH 2023-03-15', color='blue')
    axes[0].set_title('trade_day_close')
    axes[0].scatter(buy_timestamps, buy_actions, color='green', marker='^', label='Buy Actions')
    axes[0].scatter(sell_timestamps, sell_actions, color='red', marker='v', label='Sell Actions')


    # Plot on the third subplot (bottom-left)
    axes[1].plot(eth.train_days['pnl'], label='pnl', color='blue', drawstyle='steps-post')
    axes[1].set_title('pnl')

    axes[2].plot(eth.train_days['position'], label='position', color='blue', drawstyle='steps-post')
    axes[2].set_title('position')
    
    

#     axes[3].plot(eth.train_days['next_state_return'], label='return', color='blue',drawstyle='steps-post')
#     axes[3].axhline(y=0.0005, color='red', linestyle='--', label='Zero Line')
#     axes[3].axhline(y=-0.0005, color='red', linestyle='--', label='Zero Line')
#     axes[3].set_title('next_state_return')

#     axes[4].plot(eth.train_days['reward'], label='reward', color='blue',drawstyle='steps-post')
#     axes[4].set_title('reward')
    
#     axes[5].plot(eth.train_days['portofolio'], label='portofolio', color='blue',drawstyle='steps-post')
#     axes[5].set_title('portofolio')
    # Add legend to the last subplot
    axes[0].legend()
    axes[1].legend()
    axes[2].legend()
    axes[3].legend()

    # Adjust layout for better spacing
    plt.tight_layout()

    # Show the plot
    plt.show()
    

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  eth.trade_day['actions'] = test_decisions[test_number]['action_store']
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  eth.trade_day['pnl']= test_decisions[test_number]['pnl']
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  eth.trade_day['position'] = test_decisions[test_number]['position']
A value i

In [122]:
eth.trade_day

Unnamed: 0_level_0,time,open,close,high,low,volume,v-1,r-1,v-2,r-2,...,TR_norm,Lowest_Low_norm,Highest_High_norm,%K_norm,%D_norm,ATR_11_norm,actions,pnl,position,reward
DateTime,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2023-03-15 00:00:00,1678838400000,1702.7,1703.1,1703.2,1702.1,5.197723,7.433952,-0.000059,-0.696319,0.000294,...,-0.171213,1.873792,1.838032,-0.206162,-0.547464,-0.149926,0,100000.00000,0,-0.100000
2023-03-15 00:01:00,1678838460000,1703.1,1703.7,1704.0,1703.1,46.462498,7.939010,0.000352,74.391184,0.000294,...,-0.120559,1.873792,1.838032,0.176525,-0.152555,-0.125087,-1,99999.48889,-1,0.052526
2023-03-15 00:02:00,1678838520000,1703.4,1702.8,1703.4,1701.0,10.445923,-0.775175,-0.000528,1.009711,-0.000176,...,0.487291,1.872615,1.831030,-0.197050,-0.081191,-0.100249,0,99999.48889,-1,-0.000600
2023-03-15 00:03:00,1678838580000,1703.0,1702.2,1704.2,1701.8,15.174564,0.452678,-0.000352,-0.673402,-0.000880,...,0.487291,1.872615,1.831030,-0.625295,-0.231230,-0.019525,0,99999.48889,-1,-0.100000
2023-03-15 00:04:00,1678838640000,1701.7,1705.0,1705.0,1701.5,74.683191,3.921604,0.001645,6.149506,0.001292,...,1.044487,1.872615,1.831030,1.373180,0.197141,0.117086,-1,99997.67739,-2,0.052486
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2023-03-15 23:56:00,1678924560000,1650.7,1651.0,1651.3,1650.7,0.030880,-0.930761,-0.000242,-0.983563,0.000727,...,-0.323176,1.254262,1.220704,0.221462,0.136170,-0.062992,-1,100178.66931,-3,-0.134453
2023-03-15 23:57:00,1678924620000,1650.9,1652.1,1652.1,1650.8,3.737542,120.032752,0.000666,7.380135,0.000424,...,-0.069905,1.254262,1.220704,0.970890,0.634106,-0.069201,1,100174.87368,-2,-0.001200
2023-03-15 23:58:00,1678924680000,1652.1,1652.1,1652.1,1652.1,0.155597,-0.958369,0.000000,4.038704,0.000666,...,-0.728409,1.254262,1.220704,0.970890,0.774341,-0.162345,-1,100174.37805,-3,-0.001200
2023-03-15 23:59:00,1678924740000,1652.1,1652.1,1652.1,1651.9,3.352914,20.548666,0.000000,-0.102909,0.000000,...,-0.627101,1.254262,1.220704,0.970890,1.042616,-0.218231,0,100174.37805,-3,-0.000600


In [171]:
for i in range(len(eth.trade_day['position'])):
    if i > 0:
        prev_position = eth.trade_day['position'][i-1]
        current_position = eth.trade_day['position'][i]
        if current_position > prev_position:
            buy_actions.append(i)
            buy_timestamps.append(eth.trade_day.index[i])
        elif current_position < prev_position:
            sell_actions.append(i)
            sell_timestamps.append(eth.trade_day.index[i])

  prev_position = eth.trade_day['position'][i-1]
  current_position = eth.trade_day['position'][i]


In [None]:
import matplotlib.pyplot as plt
%matplotlib qt
buy_actions = []
buy_timestamps = []

sell_actions = []
sell_timestamps = []


pnl = []
position = []
action_list = []
reward_list = []
    


    
ax1_signal = eth.trade_day['stc']

run = 19
eth.trade_day['actions'] = test_decisions[run]['action_store']
eth.trade_day['pnl']= test_decisions[run]['pnl']
eth.trade_day['position'] = test_decisions[run]['position']
eth.trade_day['reward'] = test_decisions[run]['reward_store']
counter = 0 
for i in range(len(eth.trade_day['actions'])):
    if counter < POSITION_LIMIT_COEF:
        if eth.trade_day['actions'][i] == 1:
            counter += 1
            buy_actions.append(eth.trade_day['close'][i])
            buy_timestamps.append(eth.trade_day.index[i])
    if counter > -POSITION_LIMIT_COEF:
        if eth.trade_day['actions'][i] == -1:
            counter -= 1
            sell_actions.append(eth.trade_day['close'][i])
            sell_timestamps.append(eth.trade_day.index[i])
#    
# 
# Plot multiple lists in the same figure
fig, axes = plt.subplots(nrows=5, ncols=1, figsize=(10, 8),sharex=True)

# Plot on the first subplot (top-left)
axes[0].plot(eth.trade_day['close'], label='trade_day_close', color='blue')
axes[0].set_title('trade_day_close')
axes[0].scatter(buy_timestamps, buy_actions, color='green', marker='^', label='Buy Actions')
axes[0].scatter(sell_timestamps, sell_actions, color='red', marker='v', label='Sell Actions')


# Plot on the third subplot (bottom-left)
axes[1].plot(ax1_signal, label='stc', color='blue', drawstyle='steps-post')
axes[1].set_title('stc')

axes[2].plot(eth.trade_day['position'], label='position', color='blue', drawstyle='steps-post')
axes[2].set_title('position')

axes[3].plot(eth.trade_day['next_state_return'], label='return', color='blue',drawstyle='steps-post')
axes[3].axhline(y=0.0008, color='red', linestyle='--', label='Zero Line')
axes[3].axhline(y=-0.0008, color='red', linestyle='--', label='Zero Line')
axes[3].set_title('next_state_return')

axes[4].plot(eth.trade_day['position'], label='position', color='blue',drawstyle='steps-post')
axes[4].set_title('position')
# Add legend to the last subplot
axes[0].legend()
axes[1].legend()
axes[2].legend()
axes[3].legend()

# Adjust layout for better spacing
plt.tight_layout()

# Show the plot
plt.show()

In [None]:
# #         IF NEXT_RETURN > BUY COEF
#         if self.next_return >= 0.0005:
#             if self.current_act == 1:
#                 reward = 100 * self.next_return * self.current_act 
#                 if not limit_up_flag:
#                     self.position += self.order_size
#                     self.portofolio -= total_cost
#                 else:
#                     self.position = self.position_limit
#                     self.portofolio = prev_port

#             elif self.current_act == -1:
#                 reward = -0.01
#                 if not limit_down_flag:
#                     self.position -= self.order_size
#                     self.portofolio += investment - trans_cost 
#                 else:
#                     self.position = -self.position_limit
#                     self.portofolio = prev_port

#             else:
#                 reward = -0.01
                
# #       ELIF NEXT_RETURN < TRADE COEF
#         elif self.next_return <= -0.0005:
#             if self.current_act == -1:
#                 reward = 100 * self.next_return * self.current_act 
#                 if not limit_down_flag:
#                     self.position -= self.order_size
#                     self.portofolio += investment - trans_cost 
#                 else:
#                     self.position = -self.position_limit
#                     self.portofolio = prev_port
#                     if self.current_act == self.prev_act:
#                         reward_offset += -0.1
                        
#             elif self.current_act == 1:
#                 reward = -0.01
#                 if not limit_up_flag:
#                     self.position += self.order_size
#                     self.portofolio -= total_cost
                    
#                 else:
#                     self.position = self.position_limit
#                     self.portofolio = prev_port
#                     if self.current_act == self.prev_act:
#                         reward_offset += -0.1
                    
#             else:
#                 reward = -0.01
#         else:
#             if self.current_act == 1:
#                 reward = -0.01
#                 if not limit_up_flag:
#                     self.position += self.order_size
#                     self.portofolio -= total_cost
#                 else:
#                     self.position = self.position_limit
#                     self.portofolio = prev_port
#                     if self.current_act == self.prev_act:
#                         reward_offset += -0.1

#             elif self.current_act == -1:
#                 reward = -0.01
#                 if not limit_down_flag:
#                     self.position -= self.order_size
#                     self.portofolio += investment - trans_cost 
                    
#                 else:
#                     self.position = -self.position_limit
#                     self.portofolio = prev_port
#                     if self.current_act == self.prev_act:
#                         reward_offset += -0.1
       
#             else:
#                 reward = 0

NameError: name 'eth' is not defined