In [5]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [13]:
!pip install yfinance TA-lib

Collecting TA-lib
  Downloading ta_lib-0.6.8-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl.metadata (23 kB)
Downloading ta_lib-0.6.8-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl (4.1 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m4.1/4.1 MB[0m [31m56.5 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: TA-lib
Successfully installed TA-lib-0.6.8


In [1]:
import gymnasium as gym
from gymnasium import spaces
import numpy as np
import talib
import yfinance as yf
import os
import pandas as pd

In [2]:
class SingleStockTradingEnv(gym.Env):
    metadata = {"render_modes": ["human"]}

    def __init__(self, prices, indicators, max_k, initial_cash=1e5):
        super().__init__()
        self.prices = prices      # np.array shape (T, 5) -> [O,H,L,C,V]
        self.indicators = indicators  # np.array shape (T, K) -> [MACD, RSI, ...]
        self.max_k = max_k
        self.initial_cash = initial_cash

        obs_dim = 3 + self.prices.shape[1] + self.indicators.shape[1]
        self.observation_space = spaces.Box(
            low=-np.inf, high=np.inf, shape=(obs_dim,), dtype=np.float32
        )

        self.action_space = spaces.Discrete(2 * max_k + 1)  # {-k,...,0,...,k}

    def _decode_action(self, action_idx):
      # maps 0..2k -> -k..k
      a = action_idx - self.max_k
      return int(a)

    def reset(self, seed=None, options=None):
      super().reset(seed=seed)
      self.t = 0
      self.cash = self.initial_cash
      self.position = 0  # shares
      self._update_portfolio_value()
      obs = self._get_obs()
      return obs, {}

    def step(self, action_idx):
        a = self._decode_action(action_idx)

        price = self.prices[self.t, 3]  # close price

        # max buy/sell given constraints
        max_buy = int(self.cash // price)
        max_sell = self.position

        if a > 0:
            size = min(a, max_buy)
        elif a < 0:
            size = -min(-a, max_sell)
        else:
            size = 0

        # execute trade
        self.cash -= size * price
        self.position += size

        old_value = self.portfolio_value
        self._update_portfolio_value()

        self.t += 1
        terminated = self.t >= len(self.prices) - 1
        truncated = False  # or limit max_steps

        reward = (self.portfolio_value - old_value) / self.portfolio_value  # r = (v' - v)/v'

        obs = self._get_obs()
        info = {"portfolio_value": self.portfolio_value}

        return obs, reward, terminated, truncated, info

    def _update_portfolio_value(self):
        price = self.prices[self.t, 3]
        self.portfolio_value = self.cash + self.position * price

    def _get_obs(self):
        price_feats = self.prices[self.t]    # [O,H,L,C,V]
        ind_feats = self.indicators[self.t]  # [MACD, RSI, ...]
        cash_norm = self.cash / self.initial_cash
        pos_norm = self.position / self.max_k
        val_norm = self.portfolio_value / self.initial_cash
        obs = np.concatenate([
            np.array([cash_norm, pos_norm, val_norm], dtype=np.float32),
            price_feats.astype(np.float32),
            ind_feats.astype(np.float32),
        ])
        return obs




In [4]:
class MAG7TradingEnv(gym.Env):
    metadata = {"render_modes": ["human"]}

    def __init__(self, prices, indicators, max_k=100, initial_cash=1e6):
        """
        prices: np.array shape (T, 7, 5)  # [O,H,L,C,V] per asset
        indicators: np.array shape (T, 7, K)  # e.g. [RSI, MACD, MACD_signal, MACD_hist]
        """
        super().__init__()
        self.prices = prices
        self.indicators = indicators
        self.T, self.n_assets, self.n_price_feats = prices.shape
        _, _, self.n_ind_feats = indicators.shape

        self.max_k = max_k
        self.initial_cash = initial_cash

        # obs: cash_norm(1) + value_norm(1) + positions_norm(7) +
        #      prices_flat(7*5) + ind_flat(7*K)
        obs_dim = 2 + self.n_assets + self.n_assets * (self.n_price_feats + self.n_ind_feats)
        self.observation_space = spaces.Box(
            low=-np.inf, high=np.inf, shape=(obs_dim,), dtype=np.float32
        )

    def _decode_action(self, action_vec):
        # action_vec in {0..2k}, map to [-k..k]
        a = action_vec.astype(int) - self.k
        return a  # shape (7,)

    def reset(self, seed=None, options=None):
        super().reset(seed=seed)
        self.t = 0
        self.cash = self.initial_cash
        self.positions = np.zeros(self.n_assets, dtype=np.int32)
        self._update_portfolio_value()
        obs = self._get_obs()
        return obs, {}  # no info at reset

    def step(self, action):
        action = np.asarray(action)
        a = self._decode_action(action)  # [-k..k]^7

        prices_t = self.prices[self.t, :, 3]  # close prices, shape (7,)

        # execute asset-by-asset with constraints
        for i in range(self.n_assets):
            desired = int(a[i])
            if desired == 0:
                continue

            price = prices_t[i]

            if desired > 0:
                # buy
                max_buy = int(self.cash // price)
                size = min(desired, max_buy)
            else:
                # sell
                max_sell = self.positions[i]
                size = -min(-desired, max_sell)

            if size != 0:
                self.cash -= size * price
                self.positions[i] += size

        old_value = self.portfolio_value
        self._update_portfolio_value()

        # advance time
        self.t += 1
        terminated = self.t >= self.T - 1
        truncated = False

        # normalized reward: (v' - v) / v
        if old_value > 0:
            reward = (self.portfolio_value - old_value) / old_value
        else:
            reward = 0.0

        obs = self._get_obs()
        info = {
            "portfolio_value": float(self.portfolio_value),
            "step_return": float(reward),
        }
        return obs, reward, terminated, truncated, info

    def _update_portfolio_value(self):
        prices_t = self.prices[self.t, :, 3]
        self.portfolio_value = self.cash + float(np.dot(self.positions, prices_t))

    def _get_obs(self):
        prices_t = self.prices[self.t]        # (7, 5)
        inds_t = self.indicators[self.t]      # (7, K)

        cash_norm = self.cash / self.initial_cash
        value_norm = self.portfolio_value / self.initial_cash
        pos_norm = self.positions / self.max_k

        obs = np.concatenate([
            np.array([cash_norm, value_norm], dtype=np.float32),
            pos_norm.astype(np.float32),
            prices_t.reshape(-1).astype(np.float32),
            inds_t.reshape(-1).astype(np.float32),
        ])
        return obs


In [8]:
# Define tickers and output directory
tickers = ['AAPL', 'MSFT', 'GOOGL', 'AMZN', 'NVDA', 'TSLA', 'META']
output_dir = '/content/drive/MyDrive/MAG7_Data'
os.makedirs(output_dir, exist_ok=True)

# Fetch and save data
start_date = '2024-11-01'
end_date = '2025-10-31'

print(f"Fetching data from {start_date} to {end_date}...")

for ticker in tickers:
    try:
        # Download data using yfinance
        # auto_adjust=False ensures we get OHLCV columns similar to standard APIs if preferred,
        # but default is fine. Let's use standard download.
        df = yf.download(ticker, start=start_date, end=end_date, progress=False)

        if not df.empty:
            # Reset index to keep Date as a column in CSV
            df = df.reset_index()

            # Construct file path
            file_path = os.path.join(output_dir, f"{ticker}.csv")

            # Save to CSV
            df.to_csv(file_path, index=False)
            print(f"Successfully saved {ticker}.csv ({len(df)} rows)")
        else:
            print(f"No data found for {ticker}")

    except Exception as e:
        print(f"Error processing {ticker}: {e}")

# Verify files
print("\nFiles in output directory:")
if os.path.exists(output_dir):
    print(os.listdir(output_dir))

Fetching data from 2024-11-01 to 2025-10-31...


  df = yf.download(ticker, start=start_date, end=end_date, progress=False)


Successfully saved AAPL.csv (249 rows)
Successfully saved MSFT.csv (249 rows)


  df = yf.download(ticker, start=start_date, end=end_date, progress=False)
  df = yf.download(ticker, start=start_date, end=end_date, progress=False)


Successfully saved GOOGL.csv (249 rows)
Successfully saved AMZN.csv (249 rows)


  df = yf.download(ticker, start=start_date, end=end_date, progress=False)
  df = yf.download(ticker, start=start_date, end=end_date, progress=False)


Successfully saved NVDA.csv (249 rows)
Successfully saved TSLA.csv (249 rows)


  df = yf.download(ticker, start=start_date, end=end_date, progress=False)
  df = yf.download(ticker, start=start_date, end=end_date, progress=False)


Successfully saved META.csv (249 rows)

Files in output directory:
['AAPL.csv', 'MSFT.csv', 'GOOGL.csv', 'AMZN.csv', 'NVDA.csv', 'TSLA.csv', 'META.csv']


In [12]:
# Define configuration
data_dir = '/content/drive/MyDrive/MAG7_Data'
output_file = os.path.join(data_dir, 'ohlcv.csv')

all_dfs = []
for ticker in tickers:
    file_path = os.path.join(data_dir, f"{ticker}.csv")
    if os.path.exists(file_path):
        try:
            # Read CSV. Based on previous output, headers might be multi-level.
            # We try reading with header=[0, 1] first to handle (Price, Ticker) structure.
            df = pd.read_csv(file_path, header=[0, 1])

            # Check if it's actually a MultiIndex and flatten it
            if isinstance(df.columns, pd.MultiIndex):
                # Flatten columns: Keep level 0 ('Date', 'Open', etc.) and ignore level 1 ('Ticker')
                # However, 'Date' might have an empty string in level 1.
                new_columns = []
                for col in df.columns:
                    # col is a tuple like ('Close', 'AAPL') or ('Date', 'Unnamed...')
                    col_name = col[0]
                    if 'Date' in str(col_name):
                         new_columns.append('Date')
                    else:
                         new_columns.append(col_name)
                df.columns = new_columns

            df['Date'] = pd.to_datetime(df['Date'])
            df['symbol'] = ticker

            all_dfs.append(df)
            print(f"Loaded {ticker}: {len(df)} rows")

        except Exception as e:
            print(f"Error processing {ticker}: {e}")
    else:
        print(f"File not found: {file_path}")

# Concatenate and save
if all_dfs:
    consolidated_df = pd.concat(all_dfs, ignore_index=True)
    consolidated_df.sort_values('Date', inplace=True)

    consolidated_df.to_csv(output_file, index=False)
    print(f"\nSuccessfully saved consolidated data to {output_file}")
    print(f"Total rows: {len(consolidated_df)}")
    display(consolidated_df.head())
else:
    print("No data consolidated.")

Loaded AAPL: 249 rows
Loaded MSFT: 249 rows
Loaded GOOGL: 249 rows
Loaded AMZN: 249 rows
Loaded NVDA: 249 rows
Loaded TSLA: 249 rows
Loaded META: 249 rows

Successfully saved consolidated data to /content/drive/MyDrive/MAG7_Data/ohlcv.csv
Total rows: 1743
           Date       Close        High         Low        Open    Volume  \
0    2024-11-01  221.662506  224.088853  219.037281  219.733361  65276700   
1494 2024-11-01  565.382202  571.871806  560.796645  565.830804  15303200   
249  2024-11-01  406.551147  411.633413  403.707860  405.203818  24230400   
1245 2024-11-01  248.979996  254.000000  246.630005  252.039993  57544800   
747  2024-11-01  197.929993  200.500000  197.020004  199.000000  99687800   

     symbol  
0      AAPL  
1494   META  
249    MSFT  
1245   TSLA  
747    AMZN  


In [23]:
# Sort by symbol and Date to ensure contiguous time series for each stock
consolidated_df.sort_values(['symbol', 'Date'], inplace=True)

def calculate_indicators(group):
    # TA-Lib requires inputs as float (double)
    close = group['Close'].values.astype(float)

    # Calculate RSI
    group['RSI_14'] = talib.RSI(close, timeperiod=14)

    # Calculate MACD
    macd, macd_signal, macd_hist = talib.MACD(
        close,
        fastperiod=12,
        slowperiod=26,
        signalperiod=9
    )
    group['MACD'] = macd
    group['MACD_signal'] = macd_signal
    group['MACD_hist'] = macd_hist

    return group

# Apply indicators per symbol
consolidated_df = consolidated_df.groupby('symbol', group_keys=False).apply(calculate_indicators)

# Sort back by Date as per the original requirement
consolidated_df.sort_values('Date', inplace=True)

  consolidated_df = consolidated_df.groupby('symbol', group_keys=False).apply(calculate_indicators)


In [24]:
# Verify that we have values now (first 14 rows per symbol will still be NaN)
print("Sample data with indicators:")
display(consolidated_df[['Date', 'symbol', 'Close', 'RSI_14', 'MACD']].dropna().head())

print("\nDataframe Tail:")
display(consolidated_df.tail())

Sample data with indicators:


Unnamed: 0,Date,symbol,Close,RSI_14,MACD
1029,2024-12-19,NVDA,130.65152,41.861937,-3.498057
780,2024-12-19,AMZN,223.289993,57.616766,5.92236
282,2024-12-19,MSFT,433.829712,53.57748,7.485169
33,2024-12-19,AAPL,248.665344,69.774785,6.885883
531,2024-12-19,GOOGL,187.897415,59.451302,4.173787



Dataframe Tail:


Unnamed: 0,Date,Close,High,Low,Open,Volume,symbol,RSI_14,MACD,MACD_signal,MACD_hist
995,2025-10-30,222.860001,228.440002,222.75,227.059998,102252900,AMZN,50.072024,0.57289,-0.702921,1.275811
1244,2025-10-30,202.889999,206.160004,201.410004,205.149994,178864400,NVDA,67.073018,4.808148,2.653951,2.154197
248,2025-10-30,271.137146,273.874513,268.219991,271.726571,69886500,AAPL,71.085551,6.170689,5.053562,1.117127
1742,2025-10-30,666.469971,680.960022,650.169983,669.150024,88440100,META,30.102469,-3.918098,-3.209426,-0.708672
1493,2025-10-30,440.100006,455.059998,439.609985,451.049988,72447900,TSLA,52.343908,10.72496,11.883125,-1.158165


In [27]:
consolidated_df.to_csv(output_file, index=False)
print(f"\nSuccessfully saved consolidated data to {output_file}")
print(f"Total rows: {len(consolidated_df)}")


Successfully saved consolidated data to /content/drive/MyDrive/MAG7_Data/ohlcv.csv
Total rows: 1743


In [26]:
symbols = ["AAPL", "MSFT", "GOOGL", "AMZN", "META", "TSLA", "NVDA"]
price_list = []
ind_list = []

for sym in symbols:
    # Filter data for the specific symbol
    df_sym = consolidated_df[consolidated_df['symbol'] == sym]

    # Append Price features (T, 5)
    price_list.append(df_sym[["Open","High","Low","Close","Volume"]].to_numpy())

    # Append Indicator features (T, 4)
    ind_list.append(df_sym[["RSI_14","MACD","MACD_signal","MACD_hist"]].to_numpy())

# Stack along axis 1 to form (Time, Assets, Features)
prices = np.stack(price_list, axis=1)      # (T, 7, 5)
indicators = np.stack(ind_list, axis=1)    # (T, 7, 4)

print(f"Prices shape: {prices.shape}")
print(f"Indicators shape: {indicators.shape}")

Prices shape: (249, 7, 5)
Indicators shape: (249, 7, 4)
