In [1]:
import yfinance as yf
import os
import pandas as pd

In [2]:
tickers = ["AAPL", "MSFT", "INTC", "AMZN", "TSLA", "GOOGL", "NFLX", "DIS"]
data = yf.download(tickers, start="2010-01-01", end="2025-01-01")
if data.empty:
    print("No data found for the specified ticker and date range.")
    exit()
else:
    os.makedirs('../data/raw', exist_ok=True)

    for ticker in tickers:
        tickers_data = data.xs(ticker, level=1, axis=1)
        tickers_data.to_csv(f'../data/raw/{ticker}.csv')

print(data.head())

YF.download() has changed argument auto_adjust default to True


[*********************100%***********************]  8 of 8 completed


Price          Close                                                      \
Ticker          AAPL    AMZN        DIS      GOOGL       INTC       MSFT   
Date                                                                       
2010-01-04  6.440331  6.6950  27.593046  15.609804  13.519422  23.254053   
2010-01-05  6.451466  6.7345  27.524214  15.541064  13.512949  23.261568   
2010-01-06  6.348847  6.6125  27.377943  15.149294  13.467624  23.118807   
2010-01-07  6.337109  6.5000  27.386551  14.796625  13.338130  22.878382   
2010-01-08  6.379241  6.6760  27.429565  14.993881  13.487059  23.036165   

Price                          High          ...      Open          Volume  \
Ticker          NFLX TSLA      AAPL    AMZN  ...      NFLX TSLA       AAPL   
Date                                         ...                             
2010-01-04  7.640000  NaN  6.455077  6.8305  ...  7.931429  NaN  493729600   
2010-01-05  7.358571  NaN  6.487879  6.7740  ...  7.652857  NaN  601904800   
2

In [3]:
def preprocess_data(tickers, data_dir='../data/raw/'):
    preprocess_data = {}

    for ticker in tickers:
        df = pd.read_csv(f'{data_dir}{ticker}.csv', index_col='Date', parse_dates=True)

        df.fillna(method='ffill', inplace=True)
        df['Close'] = df['Close'] / df['Close'].iloc[0]
        df['SMA_50'] = df['Close'].rolling(window=50).mean()

        preprocess_data[ticker] = df
    
    return preprocess_data

preprocessed_data = preprocess_data(tickers)

  df.fillna(method='ffill', inplace=True)
  df.fillna(method='ffill', inplace=True)
  df.fillna(method='ffill', inplace=True)
  df.fillna(method='ffill', inplace=True)
  df.fillna(method='ffill', inplace=True)
  df.fillna(method='ffill', inplace=True)
  df.fillna(method='ffill', inplace=True)
  df.fillna(method='ffill', inplace=True)


In [4]:
import gym
from gym import spaces
import numpy as np

In [11]:
class TradingEnv(gym.Env):
    def __init__(self, data, window_size=30):
        super(TradingEnv, self).__init__()

        self.data = data
        self.window_size = window_size
        self.current_step = 0
        self.stock_price = self.data['Close'].values

        # Track agent's portfolio (initially no stock, and all cash)
        self.stock_held = 0
        self.balance = 1000  # starting balance (cash)
        
        self.action_space = spaces.Discrete(3)  # 0 - sell, 1 - hold, 2 - buy
        self.observation_space = spaces.Box(
            low=0, high=np.inf, shape=(self.window_size, len(self.data.columns)), dtype=np.float32
        )
    
    def reset(self):
        self.current_step = self.window_size
        self.stock_held = 0
        self.balance = 1000
        return self._next_observation()  # Corrected here

    
    def _next_observation(self):
        return np.array(self.data.iloc[self.current_step - self.window_size:self.current_step].values)
    
    def step(self, action):
        current_price = self.stock_price[self.current_step]
        prev_price = self.stock_price[self.current_step - 1]
        reward = 0

        if action == 0:  # Sell
            if self.stock_held > 0:  # Only sell if we have stock
                reward = (current_price - prev_price) * self.stock_held
                self.balance += reward
                self.stock_held = 0  # Selling all stock
        elif action == 1:  # Hold
            reward = 0  # No reward for holding, just keeping balance intact
        elif action == 2:  # Buy
            if self.balance >= current_price:  # Only buy if we have enough balance
                self.stock_held += self.balance // current_price  # Buy as many shares as possible
                self.balance -= self.stock_held * current_price  # Deduct cash for the purchase
                reward = -self.stock_held * current_price  # Negative reward to penalize buy action

        self.current_step += 1
        done = self.current_step >= len(self.data)  # End when data is finished

        return self._next_observation(), reward, done, {}

    def render(self, mode='human', close=False):
        # You can add a way to visualize the current portfolio or balance here if needed
        print(f"Step: {self.current_step}, Stock Held: {self.stock_held}, Balance: {self.balance}")


env = TradingEnv(preprocessed_data['AAPL'])

In [12]:
# Test the trained agent
obs = env.reset()
for _ in range(1000):
    action, _states = model.predict(obs)  # Predict action from the model
    obs, reward, done, info = env.step(action)  # Take action in the environment
    print(f"Action: {action}, Reward: {reward}, Balance: {env.balance}")  # Print reward and balance
    if done:
        break

Action: 0, Reward: 0, Balance: 1000
Action: 0, Reward: 0, Balance: 1000
Action: 0, Reward: 0, Balance: 1000
Action: 0, Reward: 0, Balance: 1000
Action: 0, Reward: 0, Balance: 1000
Action: 0, Reward: 0, Balance: 1000
Action: 0, Reward: 0, Balance: 1000
Action: 0, Reward: 0, Balance: 1000
Action: 0, Reward: 0, Balance: 1000
Action: 0, Reward: 0, Balance: 1000
Action: 0, Reward: 0, Balance: 1000
Action: 0, Reward: 0, Balance: 1000
Action: 0, Reward: 0, Balance: 1000
Action: 0, Reward: 0, Balance: 1000
Action: 0, Reward: 0, Balance: 1000
Action: 0, Reward: 0, Balance: 1000
Action: 0, Reward: 0, Balance: 1000
Action: 0, Reward: 0, Balance: 1000
Action: 0, Reward: 0, Balance: 1000
Action: 0, Reward: 0, Balance: 1000
Action: 0, Reward: 0, Balance: 1000
Action: 0, Reward: 0, Balance: 1000
Action: 0, Reward: 0, Balance: 1000
Action: 0, Reward: 0, Balance: 1000
Action: 0, Reward: 0, Balance: 1000
Action: 0, Reward: 0, Balance: 1000
Action: 0, Reward: 0, Balance: 1000
Action: 0, Reward: 0, Balanc