In [1]:
!pip install gym
!pip install tensorflow



In [None]:
import gym
import numpy as np
import tensorflow
# from tensorflow.keras import layers
from typing import Tuple

In [None]:
class StockMarketEnv(gym.Env):
    def __init__(self, data: np.ndarray, window_size: int = 30, initial_balance: float = 10000):
        super().__init__()
        self.data = data
        self.window_size = window_size
        self.initial_balance = initial_balance
        self.reset()
        
    def reset(self):
        self.balance = self.initial_balance
        self.position = np.zeros(self.data.shape[1])
        self.timestep = self.window_size
        return self._get_obs()
    
    def step(self, action: int) -> Tuple[np.ndarray, float, bool, dict]:
        reward = 0
        done = False
        
        # Update position based on action
        if action == 0:  # Buy
            self.position = self.balance / self.data[self.timestep]
            self.balance = 0
        elif action == 1:  # Sell
            self.balance = self.position * self.data[self.timestep]
            self.position = 0
        
        # Update timestep
        self.timestep += 1
        
        # Calculate reward and check if episode is done
        if self.timestep == self.data.shape[0]:
            done = True
            reward = self.balance
        else:
            reward = (self.balance + self.position * self.data[self.timestep]) - self.initial_balance
        
        return self._get_obs(), reward, done, {}
    
    def _get_obs(self) -> np.ndarray:
        return self.data[self.timestep - self.window_size:self.timestep]
