# Vanguard S&P 500 ETF Forecast

&emsp;This project builds a **predictive model** for the *Vanguard S&P 500 ETF* stock price.

In [11]:
import random
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.dates as mdates

import yfinance as yf
from sklearn.preprocessing import MinMaxScaler

import tensorflow as tf
from tensorflow.keras import layers

from sklearn.metrics import r2_score

# Seed code
np.random.seed(1)
random.seed(1)
tf.random.set_seed(1)

# Create Windowed Dataset Function

In [None]:
def create_dataset(orig_dataset, window_size=1):
    data_x, data_y = [], []
    for i in range(len(orig_dataset) - window_size):
        window = orig_dataset.iloc[i:(i + window_size), 0]
        target = orig_dataset.iloc[i + window_size, 0]
        data_x.append(window)
        data_y.append(target)
    return np.array(data_x), np.array(data_y)

# Import Data

In [33]:
# Define the ticker symbol
ticker_symbol = "VOO"

# Create a Ticker object
ticker = yf.Ticker(ticker_symbol)

# Fetch historical market data
voo = ticker.history(period='max')  # data for the last year
print(f"Rows: {voo.shape[0]}\nColumns: {voo.shape[1]}")
voo.head()

Rows: 3626
Columns: 8


Unnamed: 0_level_0,Open,High,Low,Close,Volume,Dividends,Stock Splits,Capital Gains
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
2010-09-09 00:00:00-04:00,78.626477,78.626477,77.583237,77.721313,26500,0.0,0.0,0.0
2010-09-10 00:00:00-04:00,77.997496,78.135572,77.706005,78.074203,8600,0.0,0.0,0.0
2010-09-13 00:00:00-04:00,78.979376,79.117452,78.626516,79.056084,33750,0.0,0.0,0.0
2010-09-14 00:00:00-04:00,78.887312,79.378254,78.534453,79.040733,59400,0.0,0.0,0.0
2010-09-15 00:00:00-04:00,78.718531,79.301513,78.54977,79.24015,9250,0.0,0.0,0.0


In [28]:
voo.info()

<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 3625 entries, 2010-09-09 00:00:00-04:00 to 2025-02-05 00:00:00-05:00
Data columns (total 8 columns):
 #   Column         Non-Null Count  Dtype  
---  ------         --------------  -----  
 0   Open           3625 non-null   float64
 1   High           3625 non-null   float64
 2   Low            3625 non-null   float64
 3   Close          3625 non-null   float64
 4   Volume         3625 non-null   int64  
 5   Dividends      3625 non-null   float64
 6   Stock Splits   3625 non-null   float64
 7   Capital Gains  3625 non-null   float64
dtypes: float64(7), int64(1)
memory usage: 254.9 KB


&emsp;Apparently, there are no missing values.