In [2]:
pip install gym

Collecting gym
  Downloading gym-0.26.2.tar.gz (721 kB)
     ---------------------------------------- 0.0/721.7 kB ? eta -:--:--
      --------------------------------------- 10.2/721.7 kB ? eta -:--:--
     ---- -------------------------------- 81.9/721.7 kB 919.0 kB/s eta 0:00:01
     ---------------------- --------------- 419.8/721.7 kB 3.3 MB/s eta 0:00:01
     -------------------------------------- 721.7/721.7 kB 4.1 MB/s eta 0:00:00
  Installing build dependencies: started
  Installing build dependencies: finished with status 'done'
  Getting requirements to build wheel: started
  Getting requirements to build wheel: finished with status 'done'
  Preparing metadata (pyproject.toml): started
  Preparing metadata (pyproject.toml): finished with status 'done'
Collecting cloudpickle>=1.2.0 (from gym)
  Obtaining dependency information for cloudpickle>=1.2.0 from https://files.pythonhosted.org/packages/48/41/e1d85ca3cab0b674e277c8c4f678cf66a91cd2cecf93df94353a606fe0db/cloudpickle-3.1.


[notice] A new release of pip is available: 23.2.1 -> 24.3.1
[notice] To update, run: python.exe -m pip install --upgrade pip


In [3]:
import gym
from gym import spaces
import numpy as np
import pandas as pd
import random

# Simulating a simple environment for portfolio management
class StockPortfolioEnv(gym.Env):
    def __init__(self, stock_data, risk_tolerance=0.5, budget=10000):
        super(StockPortfolioEnv, self).__init__()

        # Stock data should be a pandas DataFrame with the columns: 
        # 'Date', 'Open', 'High', 'Low', 'Close', 'Volume', 'Adj Close'
        self.stock_data = stock_data
        self.current_step = 0
        self.risk_tolerance = risk_tolerance
        self.budget = budget
        
        # Define action and observation space
        # Action space: [0 = Sell, 1 = Hold, 2 = Buy]
        self.action_space = spaces.Discrete(3)
        
        # Observation space: [stock price, portfolio value, available cash]
        self.observation_space = spaces.Box(low=np.array([0.0, 0.0, 0.0]), 
                                             high=np.array([np.inf, np.inf, np.inf]), 
                                             dtype=np.float32)
        
        # Portfolio state: holding stocks and cash
        self.current_cash = self.budget
        self.stock_held = 0
        self.current_price = 0
        self.portfolio_value = self.budget

    def reset(self):
        # Reset environment to initial state
        self.current_step = 0
        self.current_cash = self.budget
        self.stock_held = 0
        self.portfolio_value = self.budget
        self.current_price = self.stock_data.loc[self.current_step, 'Close']
        
        return np.array([self.current_price, self.portfolio_value, self.current_cash], dtype=np.float32)
    
    def step(self, action):
        # Execute one time step within the environment
        
        self.current_price = self.stock_data.loc[self.current_step, 'Close']
        
        # Action logic
        if action == 0:  # Sell
            if self.stock_held > 0:
                self.current_cash += self.stock_held * self.current_price
                self.stock_held = 0
        
        elif action == 1:  # Hold
            pass
        
        elif action == 2:  # Buy
            max_buyable = int(self.current_cash // self.current_price)
            if max_buyable > 0:
                # Buy stocks, update portfolio and cash
                self.stock_held += max_buyable
                self.current_cash -= max_buyable * self.current_price
        
        # Update portfolio value
        self.portfolio_value = self.current_cash + self.stock_held * self.current_price
        
        # Increment step
        self.current_step += 1
        
        # Check if we've reached the end of the stock data
        done = False
        if self.current_step >= len(self.stock_data) - 1:
            done = True
        
        # Reward is portfolio value after the action
        reward = self.portfolio_value - self.budget  # Profit/loss relative to initial investment
        
        # Return the new state, reward, done, and info
        return np.array([self.current_price, self.portfolio_value, self.current_cash], dtype=np.float32), reward, done, {}

    def render(self):
        # Render the environment (print current state)
        print(f"Step: {self.current_step}")
        print(f"Stock Price: {self.current_price}")
        print(f"Portfolio Value: {self.portfolio_value}")
        print(f"Cash: {self.current_cash}")
        print(f"Stocks Held: {self.stock_held}")
        print("-" * 20)
        
    def set_risk_tolerance(self, risk_tolerance):
        # Adjust risk tolerance if needed (this can affect buying/selling decisions)
        self.risk_tolerance = risk_tolerance

# Example usage:
if __name__ == "__main__":
    # Example stock data (you can use your actual stock dataset here)
    # Sample data with 'Date', 'Open', 'High', 'Low', 'Close', 'Volume'
    stock_data = pd.DataFrame({
        'Date': pd.date_range(start='2023-01-01', periods=100, freq='D'),
        'Open': np.random.rand(100) * 100,
        'High': np.random.rand(100) * 100 + 10,
        'Low': np.random.rand(100) * 100 - 10,
        'Close': np.random.rand(100) * 100,
        'Volume': np.random.randint(1000, 10000, size=100),
        'Adj Close': np.random.rand(100) * 100
    })
    
    # Initialize the environment
    env = StockPortfolioEnv(stock_data)
    
    # Reset the environment and observe the initial state
    state = env.reset()
    print(f"Initial State: {state}")
    
    # Example of interacting with the environment
    for _ in range(10):  # Run for 10 steps
        action = env.action_space.sample()  # Random action (0=Sell, 1=Hold, 2=Buy)
        state, reward, done, _ = env.step(action)
        env.render()  # Display the current state
        if done:
            break


Initial State: [1.0699822e+00 1.0000000e+04 1.0000000e+04]
Step: 1
Stock Price: 1.0699822135973869
Portfolio Value: 10000.0
Cash: 10000
Stocks Held: 0
--------------------
Step: 2
Stock Price: 21.209433539867774
Portfolio Value: 10000.0
Cash: 10.356802722279099
Stocks Held: 471
--------------------
Step: 3
Stock Price: 23.422011368705043
Portfolio Value: 11042.124157382354
Cash: 10.356802722279099
Stocks Held: 471
--------------------
Step: 4
Stock Price: 47.87356018190692
Portfolio Value: 22558.803648400437
Cash: 10.356802722279099
Stocks Held: 471
--------------------
Step: 5
Stock Price: 13.724475785422573
Portfolio Value: 6474.584897656311
Cash: 6474.584897656311
Stocks Held: 0
--------------------
Step: 6
Stock Price: 93.79723557782637
Portfolio Value: 6474.584897656311
Cash: 6474.584897656311
Stocks Held: 0
--------------------
Step: 7
Stock Price: 94.70930036136036
Portfolio Value: 6474.584897656311
Cash: 34.35247308380622
Stocks Held: 68
--------------------
Step: 8
Stock Price