In [5]:
import yfinance as yf

play_list = ["AAPL", "SPY", "^VIX", "^SOX"]

df = yf.download("AAPL", period="10y", interval="1d", ignore_tz=True)
df.to_csv("aapl_data.csv", index=True)

[*********************100%***********************]  1 of 1 completed


In [6]:
import numpy as np
import talib
import pandas as pd

# Load the CSV file
aapl_df = pd.read_csv("aapl_data.csv")

# Ensure the Date column is in datetime format
if 'Date' in aapl_df.columns:
    aapl_df['Date'] = pd.to_datetime(aapl_df['Date'])

# Sort by date
aapl_df = aapl_df.sort_values(by='Date', ascending=True) if 'Date' in aapl_df.columns else aapl_df

### 1. Candlestick Pattern Recognition ###
patterns = {
    "Doji": talib.CDLDOJI,
    "Engulfing": talib.CDLENGULFING,
    "Hammer": talib.CDLHAMMER,
    "Morning Star": talib.CDLMORNINGSTAR,
    "Evening Star": talib.CDLEVENINGSTAR,
}

for pattern_name, pattern_func in patterns.items():
    aapl_df[pattern_name] = pattern_func(aapl_df['Open'], aapl_df['High'], aapl_df['Low'], aapl_df['Close'])

### 2. Technical Indicators ###
aapl_df['SMA_10'] = talib.SMA(aapl_df['Close'], timeperiod=10)
aapl_df['SMA_50'] = talib.SMA(aapl_df['Close'], timeperiod=50)
aapl_df['EMA_10'] = talib.EMA(aapl_df['Close'], timeperiod=10)
aapl_df['EMA_50'] = talib.EMA(aapl_df['Close'], timeperiod=50)
aapl_df['RSI_14'] = talib.RSI(aapl_df['Close'], timeperiod=14)
aapl_df['MACD'], aapl_df['MACD_Signal'], aapl_df['MACD_Hist'] = talib.MACD(aapl_df['Close'], fastperiod=12, slowperiod=26, signalperiod=9)

### 3. Normalization ###
price_columns = ['Open', 'High', 'Low', 'Close', 'Adj Close', 'Volume', 'SMA_10', 'SMA_50', 'EMA_10', 'EMA_50', 'RSI_14', 'MACD', 'MACD_Signal', 'MACD_Hist']
aapl_df[price_columns] = (aapl_df[price_columns] - aapl_df[price_columns].min()) / (aapl_df[price_columns].max() - aapl_df[price_columns].min())

### 4. Windowed Representation ###
window_size = 3  
feature_columns = ['Close', 'Volume', 'SMA_10', 'SMA_50', 'RSI_14', 'MACD', 'MACD_Signal', 'MACD_Hist']

for col in feature_columns:
    for i in range(1, window_size + 1):
        aapl_df[f"{col}_lag{i}"] = aapl_df[col].shift(i)

aapl_df = aapl_df.dropna().reset_index(drop=True)

# Save the processed data
aapl_df.to_csv("processed_aapl_data.csv", index=False)

print("Feature engineering completed! Processed data saved as 'processed_aapl_data.csv'.")

Feature engineering completed! Processed data saved as 'processed_aapl_data.csv'.


In [18]:
import numpy as np
import gymnasium as gym
from gymnasium import spaces

class StockTradingEnv(gym.Env):
    def __init__(self, df, initial_balance=10000, trading_fee=0.001, window_size=252):
        super(StockTradingEnv, self).__init__()
        
        # Load market data
        self.df = df.copy()
        self.initial_balance = initial_balance
        self.trading_fee = trading_fee
        self.window_size = window_size
        self.current_step = 0
        self.done = False
        
        # Normalize prices using rolling min-max scaling
        self._normalize_prices()
        
        # Define state space (features from the dataset)
        self.feature_columns = [
            'Close', 'Volume', 'SMA_10', 'SMA_50', 'RSI_14', 
            'MACD', 'MACD_Signal', 'MACD_Hist', 
            'Doji', 'Engulfing', 'Hammer'
        ]
        self.state_size = len(self.feature_columns)
        
        # Define action space (Buy, Sell, Hold)
        self.action_space = spaces.Discrete(3)
        self.observation_space = spaces.Box(low=0, high=1, shape=(self.state_size,), dtype=np.float32)
        
        # Portfolio state variables
        self.balance = initial_balance
        self.shares_held = 0
        self.portfolio_value = initial_balance
        self.last_portfolio_value = initial_balance
        self.returns = []
        
    def _normalize_prices(self):
        """Apply rolling min-max normalization to price features and standard min-max to other features."""
        price_columns = ['Close', 'SMA_10', 'SMA_50', 'MACD', 'MACD_Signal', 'MACD_Hist']
        non_price_columns = ['Volume', 'RSI_14', 'Doji', 'Engulfing', 'Hammer']
        
        # Rolling min-max normalization for price features
        for col in price_columns:
            self.df[f'{col}_norm'] = (self.df[col] - self.df[col].rolling(self.window_size, min_periods=1).min()) / \
                                    (self.df[col].rolling(self.window_size, min_periods=1).max() - self.df[col].rolling(self.window_size, min_periods=1).min())

        # Standard min-max normalization for non-price features (fixed range 0-1)
        for col in non_price_columns:
            self.df[f'{col}_norm'] = (self.df[col] - self.df[col].min()) / (self.df[col].max() - self.df[col].min())
        
    def reset(self):
        """Resets the environment to the initial state."""
        self.current_step = 0
        self.done = False
        self.balance = self.initial_balance
        self.shares_held = 0
        self.portfolio_value = self.initial_balance
        self.returns = []
        
        return self._next_observation()
    
    def _next_observation(self):
        """Returns the current market state as a feature vector."""
        return np.array(self.df.iloc[self.current_step][[f'{col}_norm' for col in self.feature_columns]], dtype=np.float32)
    
    def step(self, action):
        """Executes the given action and moves the environment forward."""
        current_price = self.df.iloc[self.current_step]['Close']
        
        if action == 1:  # Buy
            if self.balance > 0:
                shares_to_buy = self.balance / (current_price * (1 + self.trading_fee))
                self.shares_held = shares_to_buy
                self.balance -= shares_to_buy * current_price * (1 + self.trading_fee)
        elif action == 2:  # Sell
            if self.shares_held > 0:
                self.balance += self.shares_held * current_price * (1 - self.trading_fee)
                self.shares_held = 0
        
        self.portfolio_value = self.balance + (self.shares_held * current_price)
        
        # Compute returns=
        self.returns.append((self.portfolio_value - self.last_portfolio_value) / self.last_portfolio_value)
        
        # Compute Sharpe Ratio (risk-adjusted reward)
        if len(self.returns) > 1:
            mean_return = np.mean(self.returns)
            std_return = np.std(self.returns) if np.std(self.returns) > 0 else 1
            sharpe_ratio = mean_return / std_return
        else:
            sharpe_ratio = 0
        
        reward = sharpe_ratio
        
        self.last_portfolio_value = self.portfolio_value

        # Move to the next step
        self.current_step += 1
        if self.current_step >= len(self.df) - 1:
            self.done = True
        
        return self._next_observation(), reward, self.done, {}
    
    def render(self):
        """Displays the current portfolio state."""
        print(f'Step: {self.current_step}, Balance: {self.balance}, Shares Held: {self.shares_held}, Portfolio Value: {self.portfolio_value}, Sharpe Ratio: {reward}')


In [19]:
import numpy as np
import pandas as pd
import gymnasium as gym

# Load the processed AAPL stock data
processed_aapl_df = pd.read_csv("processed_aapl_data.csv")

# Ensure Date is in datetime format
if 'Date' in processed_aapl_df.columns:
    processed_aapl_df['Date'] = pd.to_datetime(processed_aapl_df['Date'])

# Sort the dataset
processed_aapl_df = processed_aapl_df.sort_values(by='Date', ascending=True)


# Initialize the environment
env = StockTradingEnv(processed_aapl_df)

# Reset the environment
state = env.reset()

# Run a small test episode (10 steps)
num_steps = 10
print("\n--- Running Test Episode ---\n")
for _ in range(num_steps):
    action = env.action_space.sample()  # Take a random action (Buy, Sell, Hold)
    next_state, reward, done, _ = env.step(action)  # Step the environment
    print(f"action: {action}")
    env.render()  # Print current portfolio state
    
    if done:
        print("Episode ended early.")
        break  # Stop if episode is done

print("\n--- Test Completed ---")



--- Running Test Episode ---

action: 1
Step: 1, Balance: 0.0, Shares Held: 244702.1884529423, Portfolio Value: 9990.00999000999, Sharpe Ratio: 0
action: 2
Step: 2, Balance: 9915.399287050188, Shares Held: 0, Portfolio Value: 9915.399287050188, Sharpe Ratio: -1.3088326176051965
action: 1
Step: 3, Balance: -1.8189894035458565e-12, Shares Held: 264185.5478652883, Portfolio Value: 9905.493793256932, Sharpe Ratio: -1.0346732384268766
action: 1
Step: 4, Balance: -1.8189894035458565e-12, Shares Held: 264185.5478652883, Portfolio Value: 9684.812809506204, Sharpe Ratio: -0.9131020050407773
action: 1
Step: 5, Balance: -1.8189894035458565e-12, Shares Held: 264185.5478652883, Portfolio Value: 9754.64848292546, Sharpe Ratio: -0.4978461450837492
action: 0
Step: 6, Balance: -1.8189894035458565e-12, Shares Held: 264185.5478652883, Portfolio Value: 10413.897410500489, Sharpe Ratio: 0.2519741501919127
action: 0
Step: 7, Balance: -1.8189894035458565e-12, Shares Held: 264185.5478652883, Portfolio Value: