In [20]:
import yfinance as yf
import pandas_ta as ta
import pandas as pd
import statsmodels.api as sm
from sklearn.metrics import mean_absolute_error, mean_squared_error
from plotly.subplots import make_subplots
import plotly.graph_objects as go

In [21]:

# Load Ticker data
df = yf.download('NVDA', start="2022-10-25", end="2024-10-25")
df = df[['Open', 'High', 'Low', 'Close', 'Volume']]

  df = yf.download('NVDA', start="2022-10-25", end="2024-10-25")
[*********************100%***********************]  1 of 1 completed


In [22]:
# Shift data backward by one day to ensure no data leakage
df['Previous_Close'] = df['Close'].shift(1)  # Add previous day's close as a feature
df['Close_shifted'] = df['Close'].shift(1)
df['Open_shifted'] = df['Open'].shift(1)
df['High_shifted'] = df['High'].shift(1)
df['Low_shifted'] = df['Low'].shift(1)

df

Price,Open,High,Low,Close,Volume,Previous_Close,Close_shifted,Open_shifted,High_shifted,Low_shifted
Ticker,NVDA,NVDA,NVDA,NVDA,NVDA,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
Date,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2
2022-10-25,12.678767,13.284039,12.648802,13.245086,505482000,,,,,
2022-10-26,12.853558,13.371934,12.692751,12.880525,532953000,13.245086,13.245086,12.678767,13.284039,12.648802
2022-10-27,13.613643,13.821394,13.106253,13.160188,583113000,12.880525,12.880525,12.853558,13.371934,12.692751
2022-10-28,13.088276,13.833382,13.045328,13.817400,521040000,13.160188,13.160188,13.613643,13.821394,13.106253
2022-10-31,13.761467,13.821396,13.281045,13.480804,486341000,13.817400,13.817400,13.088276,13.833382,13.045328
...,...,...,...,...,...,...,...,...,...,...
2024-10-18,138.630240,138.860170,137.240639,137.960434,176090200,136.890747,136.890747,139.300060,140.849619,136.830767
2024-10-21,138.090406,143.668808,137.960438,143.668808,264554500,137.960434,137.960434,138.630240,138.860170,137.240639
2024-10-22,142.869046,144.378607,141.739365,143.548843,226311600,143.668808,143.668808,138.090406,143.668808,137.960438
2024-10-23,141.989282,142.389161,137.420600,139.519989,285930000,143.548843,143.548843,142.869046,144.378607,141.739365


In [None]:
# Calculate technical indicators on the shifted data

# Simple Moving Average (SMA): Average price over the last 50 periods
df['SMA_50'] = ta.sma(df['Close_shifted'], length=50)

# Exponential Moving Average (EMA): Weighted average that reacts faster to recent price changes, using 50 periods
df['EMA_50'] = ta.ema(df['Close_shifted'], length=50)

# Relative Strength Index (RSI): Momentum indicator that measures the magnitude of recent price changes to evaluate overbought/oversold conditions, using a 14-period lookback
df['RSI'] = ta.rsi(df['Close_shifted'], length=14)

# Moving Average Convergence Divergence (MACD): Trend-following momentum indicator, using 12 and 26 periods for the fast and slow EMAs and a 9-period signal line
macd = ta.macd(df['Close_shifted'], fast=12, slow=26, signal=9)
df['MACD'] = macd['MACD_12_26_9']        # MACD line
df['Signal_Line'] = macd['MACDs_12_26_9'] # Signal line

# Bollinger Bands: Volatility indicator using a 20-period moving average and 2 standard deviations
bollinger = ta.bbands(df['Close_shifted'], length=20, std=2)
df['BB_Upper'] = bollinger['BBM_20_2.0_2.0']  # Upper Bollinger Band
df['BB_Middle'] = bollinger['BBM_20_2.0_2.0'] # Middle Band (20-period SMA)
df['BB_Lower'] = bollinger['BBL_20_2.0_2.0']  # Lower Bollinger Band

# Stochastic Oscillator: Momentum indicator comparing closing prices to price ranges over 14 periods with a 3-period %D moving average
stoch = ta.stoch(df['High_shifted'], df['Low_shifted'], df['Close_shifted'], k=14, d=3)
df['%K'] = stoch['STOCHk_14_3_3'] # %K line (main line)
df['%D'] = stoch['STOCHd_14_3_3'] # %D line (3-period moving average of %K)

# Average True Range (ATR): Volatility indicator measuring the average range of price movement over the last 14 periods
df['ATR'] = ta.atr(df['High_shifted'], df['Low_shifted'], df['Close_shifted'], length=14)

KeyError: 'BBM_20_2.0'

In [28]:
# Drop rows with missing values due to shifting and indicator calculation
df.dropna(inplace=True)

In [29]:
df

Price,Open,High,Low,Close,Volume,Previous_Close,Close_shifted,Open_shifted,High_shifted,Low_shifted,...,EMA_50,RSI,MACD,Signal_Line,BB_Upper,BB_Middle,BB_Lower,%K,%D,ATR
Ticker,NVDA,NVDA,NVDA,NVDA,NVDA,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,...,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
Date,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2,Unnamed: 19_level_2,Unnamed: 20_level_2,Unnamed: 21_level_2
2023-01-06,14.460331,14.995825,14.020747,14.844968,405044000,14.251527,14.251527,14.477313,14.550244,14.134637,...,15.401304,38.758795,-0.399859,-0.140036,18.466825,15.844122,13.221419,13.225559,13.496120,0.721143
2023-01-09,15.269565,16.040836,15.126700,15.613240,504231000,14.844968,14.844968,14.460331,14.995825,14.020747,...,15.379487,44.506894,-0.380759,-0.188180,18.437771,15.781132,13.124492,20.591767,16.102218,0.739281
2023-01-10,15.492355,15.946926,15.457389,15.893976,384101000,15.613240,15.613240,15.269565,16.040836,15.126700,...,15.388654,50.928351,-0.300168,-0.210578,18.281483,15.704154,13.126825,35.184089,23.000472,0.771894
2023-01-11,15.825039,16.012861,15.548301,15.985887,353285000,15.893976,15.893976,15.492355,15.946926,15.457389,...,15.408470,53.065591,-0.211212,-0.210705,18.158058,15.649606,13.141154,56.096068,37.290641,0.751726
2023-01-12,16.084794,16.621286,15.477368,16.495405,551409000,15.985887,15.985887,15.825039,16.012861,15.548301,...,15.431114,53.775409,-0.131778,-0.194919,17.930126,15.572978,13.215831,71.527562,54.269240,0.731214
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2024-10-18,138.630240,138.860170,137.240639,137.960434,176090200,136.890747,136.890747,139.300060,140.849619,136.830767,...,122.499805,64.340687,4.968088,3.961316,141.440153,126.580206,111.720259,78.684069,81.946485,5.159512
2024-10-21,138.090406,143.668808,137.960438,143.668808,264554500,137.960434,137.960434,138.630240,138.860170,137.240639,...,123.106104,65.339756,5.118997,4.192852,142.487880,127.679890,112.871900,85.845110,82.084812,4.931649
2024-10-22,142.869046,144.378607,141.739365,143.548843,226311600,143.668808,143.668808,138.090406,143.668808,137.960438,...,123.912485,70.146566,5.634263,4.481134,144.464770,129.051996,113.639222,91.132661,85.220613,4.987129
2024-10-23,141.989282,142.389161,137.420600,139.519989,285930000,143.548843,143.548843,142.869046,144.378607,141.739365,...,124.682538,69.927086,5.964183,4.777744,146.378831,130.187671,113.996511,95.109933,90.695901,4.819423


In [30]:
# Parameters
window_size = 20  # 4 weeks of trading days (5 days per week * 4)

# List of indicators to test, including Previous_Close
indicators = ['SMA_50', 'EMA_50', 'RSI', 'MACD', 'Signal_Line', '%K', '%D', 'ATR', 'Close_shifted', 'Previous_Close']

# Initialize a dictionary to store predictions, actuals, daily MAE for each indicator
results = {indicator: {'predictions': [], 'actual': [], 'daily_mae': []} for indicator in indicators}

In [31]:
# Sequentially predict the actual close price using a rolling 4 weeks window, set by window_size
for i in range(window_size, len(df) - 1):
    train_df = df.iloc[i - window_size:i]  # Training window 
    test_index = i + 1  # Index of next day's prediction
    actual_close_price = df['Close'].iloc[test_index]  # Next day's actual closing price

    # Individual indicators as predictors (plus Previous_Close)
    for indicator in indicators[:-1]:  # Exclude Previous_Close from standalone tests
        X_train = train_df[[indicator, 'Previous_Close']]
        y_train = train_df['Close']
        X_train = sm.add_constant(X_train)  # Add constant for intercept

        model = sm.OLS(y_train, X_train).fit()
        X_test = pd.DataFrame({indicator: [df[indicator].iloc[test_index]], 'Previous_Close': [df['Previous_Close'].iloc[test_index]]})
        X_test = sm.add_constant(X_test, has_constant='add')  # Add constant for prediction

        prediction = model.predict(X_test)[0]
        results[indicator]['predictions'].append(prediction)
        results[indicator]['actual'].append(actual_close_price)
        
        daily_mae = mean_absolute_error([actual_close_price], [prediction])
        results[indicator]['daily_mae'].append(daily_mae)

In [22]:
# Calculate accuracy metrics (MAE, MSE) for each individual indicator and the combined model
accuracy_data = {
    'Indicator': [],
    'MAE': [],
    'MSE': []
}

for indicator in indicators[:-1]:  # Exclude Previous_Close from standalone tests in accuracy table
    if results[indicator]['actual']:  # Check if there are results for this indicator
        mae = mean_absolute_error(results[indicator]['actual'], results[indicator]['predictions'])
        mse = mean_squared_error(results[indicator]['actual'], results[indicator]['predictions'])
        accuracy_data['Indicator'].append(indicator)
        accuracy_data['MAE'].append(mae)
        accuracy_data['MSE'].append(mse)


# Create accuracy DataFrame
accuracy_df = pd.DataFrame(accuracy_data).sort_values(by='MAE').reset_index(drop=True)
accuracy_df

Unnamed: 0,Indicator,MAE,MSE
0,Close_shifted,1.824421,7.758742
1,MACD,1.953043,9.282104
2,EMA_50,1.975697,8.985387
3,%D,1.999099,9.482705
4,Signal_Line,2.00565,10.229199
5,%K,2.03322,9.881244
6,RSI,2.091683,10.421504
7,ATR,2.118336,10.859924
8,SMA_50,2.17582,11.648777


In [23]:
from plotly.subplots import make_subplots
import plotly.graph_objects as go

# Create faceted plot with each indicator's daily MAE
fig = make_subplots(rows=len(indicators), cols=1, shared_xaxes=True, vertical_spacing=0.02,
                    subplot_titles=[f"{indicator} Daily MAE" for indicator in indicators[:-1]])

# Find the global y-axis range across all indicators
y_values = [results[indicator]['daily_mae'] for indicator in indicators[:-1]]
y_min = min(min(y) for y in y_values)
y_max = max(max(y) for y in y_values)

# Add each individual indicator's daily MAE
for idx, indicator in enumerate(indicators[:-1]):
    fig.add_trace(
        go.Scatter(
            x=df.index[window_size + 1:],  # Start date after the initial window
            y=results[indicator]['daily_mae'],
            mode='lines',
            name=f'{indicator} Daily MAE'
        ),
        row=idx + 1, col=1
    )

# Update layout with shared y-axis range and individual x-axis labels
fig.update_yaxes(range=[y_min, y_max])  # Apply the common y-axis range across all subplots
fig.update_xaxes(title_text="Date", row=len(indicators), col=1)  # Add x-axis label for the last row

# Final layout adjustments
fig.update_layout(
    height=150 * (len(indicators)),  # Adjust height for the combined model
    title="Daily MAE of Each Technical Indicator on NVDA Closing Price",
    yaxis_title="Daily MAE",
    showlegend=False,
    template="plotly_white"
)

fig.show()

In [27]:
# Create the figure
fig = go.Figure()

# Add Close price
fig.add_trace(go.Scatter(x=df.index, y=df['Close'], mode='lines', name='Close Price', line=dict(color='white', width=1)))

# Add SMA, EMA
fig.add_trace(go.Scatter(x=df.index, y=df['SMA_50'], mode='lines', name='SMA 50', line=dict(color='yellow', width=1)))
fig.add_trace(go.Scatter(x=df.index, y=df['EMA_50'], mode='lines', name='EMA 50', line=dict(color='orange', width=1)))


# Add MACD and Signal Line
fig.add_trace(go.Scatter(x=df.index, y=df['MACD'], mode='lines', name='MACD', line=dict(color='cyan', width=1)))
fig.add_trace(go.Scatter(x=df.index, y=df['Signal_Line'], mode='lines', name='Signal Line', line=dict(color='purple', width=1)))

# Configure layout
fig.update_layout(
    title="Overlay of Technical Indicators on NVDA Close Price",
    xaxis_title="Date",
    yaxis_title="Price",
    template="plotly_dark",
    plot_bgcolor='black',
    paper_bgcolor='black',
    font=dict(color="white"),
    width=800,  # Width of the slide, adjust as needed
    height=600   # Height of the slide, adjust as needed
)

fig.show()