In [18]:
# Data Collection
import yfinance as yf

# Download historical data for Apple (AAPL) from 2010 to 2021
data = yf.download('AAPL', start='2010-01-01', end='2021-01-01')

# Display the first few rows of the data
print(data.head())


[*********************100%%**********************]  1 of 1 completed

                Open      High       Low     Close  Adj Close     Volume
Date                                                                    
2010-01-04  7.622500  7.660714  7.585000  7.643214   6.461977  493729600
2010-01-05  7.664286  7.699643  7.616071  7.656429   6.473149  601904800
2010-01-06  7.656429  7.686786  7.526786  7.534643   6.370184  552160000
2010-01-07  7.562500  7.571429  7.466071  7.520714   6.358408  477131200
2010-01-08  7.510714  7.571429  7.466429  7.570714   6.400679  447610800





In [19]:
# Data Preprocessing

# Calculate the 50-day Simple Moving Average (SMA)
data['SMA_50'] = data['Close'].rolling(window=50).mean()

# Drop rows with missing values
data = data.dropna()

# Display the first few rows of the updated data
print(data.head())


                Open      High       Low     Close  Adj Close     Volume  \
Date                                                                       
2010-03-16  8.006429  8.035000  7.946786  8.016071   6.777211  446908000   
2010-03-17  8.032143  8.087500  7.973929  8.004286   6.767245  450956800   
2010-03-18  8.003571  8.035714  7.950357  8.023214   6.783249  342109600   
2010-03-19  8.028214  8.044286  7.901071  7.937500   6.710782  559445600   
2010-03-22  7.873929  8.071429  7.862500  8.026786   6.786268  456419600   

              SMA_50  
Date                  
2010-03-16  7.391336  
2010-03-17  7.398557  
2010-03-18  7.405893  
2010-03-19  7.413950  
2010-03-22  7.424071  


In [20]:
# Feature Selection

# Features: 'Close' price and 'SMA_50'
# Simple Moving Average (SMA): A commonly used technical indicator that smooths out price data.
# rolling(window=50).mean(): Calculates the 50-day moving average of the 'Close' price.
features = data[['Close', 'SMA_50']]

# Target: Next day's 'Close' price, shifted by -1 to align with the current day
target = data['Close'].shift(-1)

# Drop the last row of features and target since the target has NaN
features = features[:-1]
target = target.dropna()

# Input features are the pieces of information that you give to the model to help it make predictions.
# e.g. features that describe different aspects of a house: size, number of bedrooms, number of bathrooms

# The target variable is what you want the model to predict. It's the final outcome or the result you're interested in.
# e.g. the predicted value of the house

print(features.head())
print(target.head())


               Close    SMA_50
Date                          
2010-03-16  8.016071  7.391336
2010-03-17  8.004286  7.398557
2010-03-18  8.023214  7.405893
2010-03-19  7.937500  7.413950
2010-03-22  8.026786  7.424071
Date
2010-03-16    8.004286
2010-03-17    8.023214
2010-03-18    7.937500
2010-03-19    8.026786
2010-03-22    8.155714
Name: Close, dtype: float64


In [21]:
# Model Training

from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression

# Split data into training and testing sets (80% train, 20% test)
X_train, X_test, y_train, y_test = train_test_split(features, target, test_size=0.2, random_state=42)

# Initialize and train a Linear Regression model
# X_train: This represents the training set's input features.
# Each row corresponds to a single training example, and each column corresponds to a feature used to make predictions.
# y_train: This represents the training set's target variable.
# Each value in y_train corresponds to the target outcome for the corresponding row in X_train.
model = LinearRegression()
model.fit(X_train, y_train)


In [23]:
# Evaluation

from sklearn.metrics import mean_squared_error

# Make predictions on the test set
predictions = model.predict(X_test)

# Calculate Mean Squared Error (MSE)
mse = mean_squared_error(y_test, predictions)

print(f'Mean Squared Error: {mse}')


Mean Squared Error: 0.9345793666817325
