In [1]:
# Use pip install yfinance pandas to get yfinance

import yfinance as yf
import pandas as pd
from datetime import datetime, timedelta
from sklearn.preprocessing import MinMaxScaler
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error
import numpy as np
from datetime import datetime, timedelta
from pandas.tseries.holiday import USFederalHolidayCalendar

In [2]:
# Function pulling date from Yahoo Finance 

def fetch_save_data(ticker, start_date, end_date, file_name):
    
    data = yf.download(ticker, start=start_date, end=end_date)
    
    # Sort the DataFrame by date in descending order
    data_sorted = data.sort_index(ascending=False)
    
    # Print the head of the sorted data
    print(data_sorted.head())
    
    data.to_csv(file_name)


In [3]:
# Check function works:

end_date = datetime.today().strftime('%Y-%m-%d')
fetch_save_data("INTC", start_date="1990-01-01", end_date=end_date, file_name="intel_data.csv")

[*********************100%%**********************]  1 of 1 completed
                 Open       High        Low      Close  Adj Close    Volume
Date                                                                       
2023-10-12  36.799999  37.220001  36.389999  36.840000  36.840000  23616000
2023-10-11  36.619999  37.029999  36.380001  36.880001  36.880001  19262600
2023-10-10  36.139999  36.740002  35.810001  36.430000  36.430000  24044100
2023-10-09  35.700001  36.169998  35.500000  36.060001  36.060001  19376100
2023-10-06  35.869999  36.459999  35.180000  36.189999  36.189999  33252300


In [4]:
# Loading Data in order to scale it from -1 to 1

data = pd.read_csv('intel_data.csv', parse_dates=True, index_col='Date')

In [5]:
# Scaling the Data from -1 to 1
# Question: How much does scaling matter when almost all variables are based in dollars? Volume is not on the same scale, but everything else is.

scaler = MinMaxScaler(feature_range=(-1, 1))
scaled_data = pd.DataFrame(scaler.fit_transform(data), columns=data.columns, index=data.index)
print(scaled_data.head())

                Open      High       Low     Close  Adj Close    Volume
Date                                                                   
1990-01-02 -0.995399 -0.994993 -0.994845 -0.994191  -0.996109 -0.778739
1990-01-03 -0.994145 -0.994784 -0.993986 -0.995036  -0.996675 -0.758721
1990-01-04 -0.994772 -0.994993 -0.994416 -0.994402  -0.996251 -0.797626
1990-01-05 -0.994354 -0.994993 -0.993771 -0.994613  -0.996392 -0.873334
1990-01-08 -0.994354 -0.994784 -0.993557 -0.994191  -0.996109 -0.851205


In [9]:
# Reads the file
# Tells pd to parse any columns that look like dates into DateTime objects
# Specify that Date is the index for the df
data = pd.read_csv('intel_data.csv', parse_dates=True, index_col='Date')

# Create previous time steps that include the last 5 days data
lags = 5
# Loop to create lagged features ranging from 1 to 5
for i in range(1, lags + 1):
    data[f'lag_{i}'] = data['Close'].shift(i)

# Drop rows with missing values
data = data.dropna()

# Dropping all values that aren't independent variables
# Assigning 'Close' as the dependent variable
X = data.drop(['Close', 'Adj Close'], axis=1)  # Using all columns as features except Close and Adj Close
y = data['Close']

# Split data and assign 20% of the data for testing and 80% for training
# Make sure the data is not being randomly shuffled
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, shuffle=False)

# Initializing the regression with the assigned training data
model = LinearRegression()
model.fit(X_train, y_train)

# Predict what the X_test value is
y_pred = model.predict(X_test)

# Evaluating the MSE between the true (y_yest) values and predicted (y_pred) values
mse = mean_squared_error(y_test, y_pred)
print(f'Mean Squared Error: {mse}')


Mean Squared Error: 0.12256577050149399


In [8]:
# 1. Get the latest `lags` closing prices
latest_closing_prices = data['Close'][-lags:].values

# 2. Construct a feature vector for the next day
next_day_data = data.iloc[-1].drop(['Close', 'Adj Close']).copy()

for i, price in enumerate(reversed(latest_closing_prices)):
    next_day_data[f'lag_{i+1}'] = price

# Convert to DataFrame to maintain feature names
next_day_features = pd.DataFrame([next_day_data.values], columns=next_day_data.index)

# 3. Predict the closing price using your trained model
predicted_next_day_close = model.predict(next_day_features)

print(f"Predicted closing price for the next trading day: ${predicted_next_day_close[0]:.2f}")



Predicted closing price for the next trading day: $36.79
