In [69]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import gym
from gym import spaces
import tensorflow as tf
from tensorflow.keras import layers
import torch
import torch.nn as nn
import torch.optim as optim
import random
import yfinance as yf
from sklearn.preprocessing import MinMaxScaler
from stable_baselines3 import DDPG
from stable_baselines3.common.vec_env import DummyVecEnv
import ta


In [74]:
def set_seed(seed=42):
    np.random.seed(seed)
    random.seed(seed)
    tf.random.set_seed(seed)
    torch.manual_seed(seed)
    if torch.cuda.is_available():
        torch.cuda.manual_seed_all(seed)

set_seed()

In [77]:
# Define parameters such as:
START_DATE = '2010-01-01'
END_DATE = '2020-12-31'
STOCKS = ['AAPL', 'TSLA', 'AMZN', 'GOOGL', 'MSFT', 'META','NVDA' ]  # Mega 7 stocks
TRAIN_TEST_SPLIT = 0.8
WINDOW_SIZE = 60  # Number of past days to consider
INITIAL_BALANCE = 100000  # Starting cash

In [91]:
def download_data(tickers, start, end):
    data = {}
    for ticker in tickers:
        data[ticker] = yf.download(ticker, start=start, end=end)
    return data

data = download_data(STOCKS, START_DATE, END_DATE)


[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed


In [92]:
print(data)

{'AAPL': Price        Adj Close       Close        High         Low        Open  \
Ticker            AAPL        AAPL        AAPL        AAPL        AAPL   
Date                                                                     
2010-01-04    6.447412    7.643214    7.660714    7.585000    7.622500   
2010-01-05    6.458559    7.656429    7.699643    7.616071    7.664286   
2010-01-06    6.355828    7.534643    7.686786    7.526786    7.656429   
2010-01-07    6.344078    7.520714    7.571429    7.466071    7.562500   
2010-01-08    6.386255    7.570714    7.571429    7.466429    7.510714   
...                ...         ...         ...         ...         ...   
2020-12-23  128.059860  130.960007  132.429993  130.779999  132.160004   
2020-12-24  129.047516  131.970001  133.460007  131.100006  131.320007   
2020-12-28  133.662994  136.690002  137.339996  133.509995  133.990005   
2020-12-29  131.883286  134.869995  138.789993  134.339996  138.050003   
2020-12-30  130.758743  133.7

In [95]:
def clean_data(data):
    cleaned_data = {}
    for ticker, df in data.items():
        df = df.dropna()
        cleaned_data[ticker] = df
    return cleaned_data

data = clean_data(data)


In [96]:
print(data)

{'AAPL': Price        Adj Close       Close        High         Low        Open  \
Ticker            AAPL        AAPL        AAPL        AAPL        AAPL   
Date                                                                     
2010-01-04    6.447412    7.643214    7.660714    7.585000    7.622500   
2010-01-05    6.458559    7.656429    7.699643    7.616071    7.664286   
2010-01-06    6.355828    7.534643    7.686786    7.526786    7.656429   
2010-01-07    6.344078    7.520714    7.571429    7.466071    7.562500   
2010-01-08    6.386255    7.570714    7.571429    7.466429    7.510714   
...                ...         ...         ...         ...         ...   
2020-12-23  128.059860  130.960007  132.429993  130.779999  132.160004   
2020-12-24  129.047516  131.970001  133.460007  131.100006  131.320007   
2020-12-28  133.662994  136.690002  137.339996  133.509995  133.990005   
2020-12-29  131.883286  134.869995  138.789993  134.339996  138.050003   
2020-12-30  130.758743  133.7

In [97]:
def compute_RSI(series, period=14):
    delta = series.diff()

    gain = delta.clip(lower=0)
    loss = -delta.clip(upper=0)

    avg_gain = gain.rolling(window=period, min_periods=period).mean()[:period+1]
    avg_loss = loss.rolling(window=period, min_periods=period).mean()[:period+1]

    rsi = pd.Series(index=series.index, dtype='float64')

    # Initialize RSI values
    for i in range(period, len(series)):
        if i == period:
            current_avg_gain = avg_gain.iloc[-1]
            current_avg_loss = avg_loss.iloc[-1]
        else:
            current_avg_gain = (current_avg_gain * (period - 1) + gain.iloc[i]) / period
            current_avg_loss = (current_avg_loss * (period - 1) + loss.iloc[i]) / period

        if current_avg_loss == 0:
            rs = np.inf
            rsi.iloc[i] = 100
        else:
            rs = current_avg_gain / current_avg_loss
            rsi.iloc[i] = 100 - (100 / (1 + rs))

    return rsi

def compute_MACD(series, span_short=12, span_long=26, span_signal=9):
    ema_short = series.ewm(span=span_short, adjust=False).mean()
    ema_long = series.ewm(span=span_long, adjust=False).mean()
    macd = ema_short - ema_long
    signal = macd.ewm(span=span_signal, adjust=False).mean()
    macd_diff = macd - signal
    return macd, signal, macd_diff

In [101]:
def add_technical_indicators(df):
    df['Close'] = df['Close'].squeeze()
    # Moving Averages
    df['MA50'] = ta.trend.sma_indicator(df['Close'], window=50)
    df['MA200'] = ta.trend.sma_indicator(df['Close'], window=200)
    
    # RSI
    df['RSI'] = ta.momentum.RSIIndicator(df['Close'], window=14).rsi()
    
    # MACD
    macd = ta.trend.MACD(df['Close'])
    df['MACD'] = macd.macd()
    df['MACD_Signal'] = macd.macd_signal()
    df['MACD_Diff'] = macd.macd_diff()
    
    # Drop rows with NaN values resulting from indicator calculations
    df = df.dropna()
    
    return df

for ticker in STOCKS:
    # Replace with your data fetching method
    # data[ticker] = fetch_stock_data(ticker)
    
    # Ensure 'Close' is correctly formatted
    data[ticker]['Close'] = data[ticker]['Close'].squeeze()
    
    # Add technical indicators
    data[ticker] = add_technical_indicators(data[ticker])

# Now, `data` contains DataFrames with technical indicators for each stock


ValueError: Data must be 1-dimensional, got ndarray of shape (2768, 1) instead

In [71]:

scalers = {}
for ticker in STOCKS:
    scaler = MinMaxScaler()
    data[ticker][['Open', 'High', 'Low', 'Close', 'Volume', 'MA50', 'MA200', 'RSI', 'MACD']] = scaler.fit_transform(
        data[ticker][['Open', 'High', 'Low', 'Close', 'Volume', 'MA50', 'MA200', 'RSI', 'MACD']])
    scalers[ticker] = scaler


KeyError: "['MA50' 'MA200' 'RSI' 'MACD'] not in index"