In [178]:
class CustomEnv(gym.Env):
    # A custom Bitcoin trading environment
    def __init__(self, df, initial_balance=1000, lookback_window_size=50):
        # Define action space and state size and other custom parameters
        self.df = df.dropna().reset_index()
        self.df_total_steps = len(self.df)-1
        self.initial_balance = initial_balance
        self.lookback_window_size = lookback_window_size

        # Action space from 0 to 3, 0 is hold, 1 is buy, 2 is sell
        self.action_space = gym.spaces.Discrete(3)

        # Orders history contains the balance, net_worth, crypto_bought, crypto_sold, crypto_held values for the last lookback_window_size steps
        self.orders_history = deque(maxlen=self.lookback_window_size)
        
        # Market history contains the OHCL values for the last lookback_window_size prices
        self.market_history = deque(maxlen=self.lookback_window_size)

        # State size contains Market+Orders history for the last lookback_window_size steps
        self.observation_space = gym.spaces.Box(low=-np.inf, high=np.inf, shape=(self.lookback_window_size, 10), dtype=np.float64)
        
    # Reset the state of the environment to an initial state
    def reset(self, env_steps_size = 0):
        self.balance = self.initial_balance
        self.net_worth = self.initial_balance
        self.prev_net_worth = self.initial_balance
        self.crypto_held = 0
        self.crypto_sold = 0
        self.crypto_bought = 0
        if env_steps_size > 0: # used for training dataset
            self.start_step = random.randint(self.lookback_window_size, self.df_total_steps - env_steps_size)
            self.end_step = self.start_step + env_steps_size
        else: # used for testing dataset
            self.start_step = self.lookback_window_size
            self.end_step = self.df_total_steps
            
        self.current_step = self.start_step

        for i in reversed(range(self.lookback_window_size)):
            current_step = self.current_step - i
            self.orders_history.append([self.balance, self.net_worth, self.crypto_bought, self.crypto_sold, self.crypto_held])
            self.market_history.append([self.df.loc[current_step, 'Open'],
                                        self.df.loc[current_step, 'High'],
                                        self.df.loc[current_step, 'Low'],
                                        self.df.loc[current_step, 'Close'],
                                        self.df.loc[current_step, 'Volume']
                                        ])

        state = np.concatenate((self.market_history, self.orders_history), axis=1)
        print('---', state.shape)
        print('--00---', self.observation_space.shape)
        return state

    # Get the data points for the given current_step
    def _next_observation(self):
        self.market_history.append([self.df.loc[self.current_step, 'Open'],
                                    self.df.loc[self.current_step, 'High'],
                                    self.df.loc[self.current_step, 'Low'],
                                    self.df.loc[self.current_step, 'Close'],
                                    self.df.loc[self.current_step, 'Volume']
                                    ])
        obs = np.concatenate((self.market_history, self.orders_history), axis=1)
        return obs

    # Execute one time step within the environment
    def step(self, action):
        self.crypto_bought = 0
        self.crypto_sold = 0
        self.current_step += 1

        # Set the current price to a random price between open and close
        current_price = random.uniform(
            self.df.loc[self.current_step, 'Open'],
            self.df.loc[self.current_step, 'Close'])
        
        if action == 0: # Hold
            pass
        
        elif action == 1 and self.balance > 0:
            # Buy with 100% of current balance
            self.crypto_bought = self.balance / current_price
            self.balance -= self.crypto_bought * current_price
            self.crypto_held += self.crypto_bought

        elif action == 2 and self.crypto_held>0:
            # Sell 100% of current crypto held
            self.crypto_sold = self.crypto_held
            self.balance += self.crypto_sold * current_price
            self.crypto_held -= self.crypto_sold

        self.prev_net_worth = self.net_worth
        self.net_worth = self.balance + self.crypto_held * current_price

        self.orders_history.append([self.balance, self.net_worth, self.crypto_bought, self.crypto_sold, self.crypto_held])

        # Calculate reward
        reward = self.net_worth - self.prev_net_worth

        if self.net_worth <= self.initial_balance/2:
            done = True
        else:
            done = False

        obs = self._next_observation()
        info = {}
        return obs, reward, done, info

    # render environment
    def render(self):
        print(f'Step: {self.current_step}, Net Worth: {self.net_worth}')

In [None]:
class CustomEnv:
	# A custom Bitcoin trading environment
	def __init__(self, df, df_normalized, initial_balance=1000, stocks=['USDCUSDT','BTCUSDT','BNBBTC','BNBBTC'],lookback_window_size=50, model=''):
		# Define action space and state size and other custom parameters
		self.xarray = df_normalized
		self.df = df
		self.df_total_steps = self.xarray.shape[0]
		self.initial_balance = initial_balance
		self.lookback_window_size = lookback_window_size
		self.normalize_value = 40000
		self.weights = [1]+[0]*(self.xarray.shape[2]-1)
		self.quants = [0]*self.xarray.shape[2]
		self.quants_ubah = [0]*self.xarray.shape[2]
		self.short_sell = [1]*self.xarray.shape[2]
		self.cash = 0
		self.stocks =  stocks
		self.market_state = dict.fromkeys(self.stocks)
		self.model = model
		self.ubah = initial_balance
		self.errors = 0
		self.acerto = 0
		self.tax = 0
        self.action_space = gym.spaces.Box(low=-1,
                               high=1,
                               dtype=np.float32)

		#print('model1',self.model, model)
        self.observation_space = gym.spaces.Box(low=-np.inf, high=np.inf, shape=(self.lookback_window_size, 10), dtype=np.float64)

		self.orders_history = deque(maxlen=self.lookback_window_size)
		
		
		# Market history contains the OHCL values for the last lookback_window_size prices
		self.market_history = deque(maxlen=self.lookback_window_size)

		# State size contains Market+Orders history for the last lookback_window_size steps
		#create tensorboard writer
		
	# Reset the state of the environment to an initial state
	def reset(self, env_steps_size = 0):
		
		self.balance = self.initial_balance
		self.net_worth = self.initial_balance
		self.prev_net_worth = self.initial_balance
		self.weights = [1]+[0]*(self.xarray.shape[2]-1)
		self.quants = [0]*self.xarray.shape[2]
		self.quants_ubah = [0]*self.xarray.shape[2]
		self.short_sell = [1]*self.xarray.shape[2]
		self.cash = self.initial_balance
		self.ubah = self.initial_balance
		self.errors = 0
		self.acerto = 0
		#print('weights', self.weights, self.quants)
		
		
		#Quando ele rebalanceia o portfolio, ele incrementa com +1 %% Responder pq sempre 500, o modelo sempre faz o rebalanceio?
		


		if env_steps_size > 0: # used for training dataset
		   #Ele pega o step inicial, que esté contido entre a janela de observação e o tamanho do dataset - o tamanho dos seteps
			
			self.start_step = random.randint(self.lookback_window_size, self.df_total_steps -env_steps_size-100)
			
			self.end_step = self.start_step + env_steps_size
		else: # used for testing dataset
			self.start_step = random.randint(self.lookback_window_size, self.df_total_steps -env_steps_size)
			#print('start', self.start_step)
			self.end_step = self.start_step + env_steps_size
			
			
		self.current_step = self.start_step
		
		self.quants_ubah = [(self.initial_balance/len(self.weights))/ np.array([self.df[self.current_step,2,x] for x in range(0,len(self.stocks))])]

		self.quants[0] = self.initial_balance/self.df[self.current_step,2,0]
		

		for i in reversed(range(self.lookback_window_size)):
			current_step = self.current_step - i
			self.orders_history.append([self.net_worth/self.normalize_value,
										self.cash/self.normalize_value] +
										[number for number in self.quants] +
										[number for number in self.weights])

		self.current_step = self.start_step
		

		
		#Append for each stock, need to end, Cria um dict com o market history de cada ativo
		
		for j in range(0,len(self.stocks)):
			
			
			self.market_state[str(j)] = deque(maxlen=self.lookback_window_size)
				 
			for i in reversed(range(self.lookback_window_size)):
				
				current_step = self.current_step - i

				self.market_state[str(j)].append(self.xarray[current_step, :,j])
		#print('order', np.array(self.market_state),np.array(self.market_state))
		state = np.concatenate(([self.market_state[str(x)] for x in range(0,len(self.stocks))]), axis=1) 
		state = np.concatenate((state, self.orders_history) , axis=1)
		
		return state

	# Get the data points for the given current_step
	def _next_observation(self):
		
		
		#Nesse passo, ele atualiza o estado com o ponto mais recent que foi utilizado em step, por exemplo, no Step ele pega o ponto seguinte após o market history, logo se o market history vai até t, no step ele pega o ponto t+1, no next observation, ele da append desse ponto. Porém como tamanho máximo é 10, quando ele da append
		# ele perde o ponto mais antigo, e o novo é adicionado. No step, ele adiciona +1 no self.current_step, devido a isso a janela vai andando.

		for j in range(0, len(self.stocks)):
			
			
		   
			self.market_state[str(j)].append(self.xarray[self.current_step, :, j])
			
			
			
		
		obs = np.concatenate(([self.market_state[str(x)] for x in range(0,len(self.stocks))]), axis=1) 
		obs = np.concatenate((obs, self.orders_history) , axis=1)
		 
		return obs

	# Execute one time step within the environment
	def step(self, prediction):
		#pegamos o preço anterior, para calcular a taxa, a taxa é a diferença da quantidade que temos que compramos hoje menos a do dia anterior
		prices_ant =  np.array([self.df[self.current_step,2,x] for x in range(0,len(self.stocks))])
		#Da um passo no Env/One step on env
		self.current_step += 1
		prediction = np.array(softmax(prediction))


		#Pega os preços no passo atual/ Get the prices in the current step
		prices = np.array([self.df[self.current_step,2,x] for x in range(0,len(self.stocks))])


		#Calcula o balanço considerando as quantidades compradas no passo anteriors, e os preços no momento atual
		#Calculates the balance considering the quantities purchased in the previous step, and the prices at the current time
		self.cash = self.quants[0]*prices[0]

		self.balance = self.cash + np.dot(prices[1:],self.quants[1:])

		
		#Utlizado para calcular a taxa de transação/Use to calculate the transactions fee
		quants_ant = self.quants

		#Pega as quantidades, considerando os valores atuais e o balanço da transação anterior
		#Get the quantities, considering the current values and the balance of the previous transaction
		self.quants = [self.balance*prediction[x]/prices[x] for x in range(0,len(self.stocks))]
		
		#Calcule o imposto de compra e venda, 10% da diferença entre quantos dos períodos
		# 0,1% é a fonte do imposto da binance: https://www.binance.com/en/fee/schedule
		#Calculate the tax of buying and selling, 10% of the difference between quants of the periods
		# 0,1% is the binance tax source:	https://www.binance.com/en/fee/schedule
		tax = np.sum(abs(np.dot(np.array(self.quants),prices) - np.dot(np.array(quants_ant),prices_ant)))*0.001


		#Ve o valor do termo cash(Moeda estável, no futuro considerar se essa abordagem é válida)
		#See the value of the cash term(Stable currency, in the future consider whether this approach is valid)
		

		
		#Salva o valor do portfólio anterior
		#Save the previous net worth
		self.prev_net_worth = self.net_worth
		
		#Calcula o novo valor do portfólio
		#Calculate the new portfolio value
		self.net_worth = np.dot(self.quants,prices) - tax
		
		#Apenda os valores de transção ao deque
		#Append the transactions values to deque
		self.orders_history.append([self.net_worth/self.normalize_value, 
									self.cash/self.normalize_value] + 
									[number/self.normalize_value for number in self.quants] + prediction.tolist())

		# Calculate reward

		
		reward = np.log(self.net_worth/self.prev_net_worth)
		#reward = self.net_worth - self.prev_net_worthh
		
		if self.net_worth <= self.initial_balance/2:
			done = True
		else:
			done = False
		obs = self._next_observation() 

	
		return obs, reward, done

	# render environment
	def render(self):
		print(f'Step: {self.current_step}, Net Worth: {self.net_worth}')

In [179]:
import gym
import pandas as pd
import numpy as np
import random
from collections import deque

df = pd.read_csv('/home/fernando/Downloads/RL-Bitcoin-trading-bot-main/RL-Bitcoin-trading-bot_4/pricedata.csv')
df = df.iloc[:,1:]
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 4300 entries, 0 to 4299
Data columns (total 5 columns):
 #   Column  Non-Null Count  Dtype  
---  ------  --------------  -----  
 0   Open    4300 non-null   float64
 1   High    4300 non-null   float64
 2   Low     4300 non-null   float64
 3   Close   4300 non-null   float64
 4   Volume  4300 non-null   float64
dtypes: float64(5)
memory usage: 168.1 KB


In [180]:
env = CustomEnv(df)

In [1]:
from stable_baselines3.common.env_checker import check_env
check_env(env)

ModuleNotFoundError: No module named 'stable_baselines3'

In [5]:
import random

print(random.randint(3, 9))

8


In [6]:
from collections import deque
deque(50)

TypeError: 'int' object is not iterable

In [None]:
!pip install stable-baselines3[extra]

Collecting stable-baselines3[extra]
  Using cached stable_baselines3-1.7.0-py3-none-any.whl (171 kB)
Collecting torch>=1.11
  Downloading torch-1.13.1-cp39-cp39-manylinux1_x86_64.whl (887.4 MB)
[2K     [38;2;114;156;31m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m887.4/887.4 MB[0m [31m2.8 MB/s[0m eta [36m0:00:00[0mm eta [36m0:00:01[0m[36m0:00:01[0m
[?25hCollecting gym==0.21
  Using cached gym-0.21.0.tar.gz (1.5 MB)
  Preparing metadata (setup.py) ... [?25ldone
Collecting importlib-metadata~=4.13
  Using cached importlib_metadata-4.13.0-py3-none-any.whl (23 kB)
Collecting autorom[accept-rom-license]~=0.4.2
  Using cached AutoROM-0.4.2-py3-none-any.whl (16 kB)
Collecting rich
  Using cached rich-13.3.1-py3-none-any.whl (239 kB)
Collecting opencv-python
  Downloading opencv_python-4.7.0.72-cp37-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (61.8 MB)
[2K     [38;2;114;156;31m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m61.8/61.8 MB[0m [31m20.1 MB/s[0m eta [

Collecting mdurl~=0.1
  Using cached mdurl-0.1.2-py3-none-any.whl (10.0 kB)
Collecting libtorrent
  Using cached libtorrent-2.0.7-cp39-cp39-manylinux_2_12_x86_64.manylinux2010_x86_64.whl (8.6 MB)
Building wheels for collected packages: gym, AutoROM.accept-rom-license
  Building wheel for gym (setup.py) ... [?25ldone
[?25h  Created wheel for gym: filename=gym-0.21.0-py3-none-any.whl size=1616800 sha256=7d1ad3a789303fdf61304c4478386550a948ee48f33cb0eaa08d3b096e759ac5
  Stored in directory: /home/fernando/.cache/pip/wheels/b3/50/6c/0a82c1358b4da2dbd9c1bb17e0f89467db32812ab236dbf6d5
  Building wheel for AutoROM.accept-rom-license (pyproject.toml) ... [?25l-

In [None]:
from stable_baselines3.common.env_checker import check_env
check_env(env)