In [1]:
import yfinance as yf
import pandas as pd
import numpy as np
#setup
#code
name = 'MSFT'
ticker = yf.Ticker(name) #importÂ data
aapl_df = ticker.history(period="5y") #get data from 5 year period in dataframe
aapl_df.drop(['High','Close','Volume','Dividends','Stock Splits'], axis=1, inplace=True) #only have two columns, not seven
open = np.empty(shape = (1259), dtype = float)
close = np.empty(shape = (1259), dtype = float)

open=aapl_df[['Open']].to_numpy()

In [2]:
X = np.zeros((1260,3))
Y = [0]*1260
#sliding window with a fixed window of 3 days, x is the 3 days, y is the next day outside the window (our prediction)
for i in range(1250):
  X[i] = [open[i][0],open[i+1][0],open[i+2][0]]
  Y[i] = open[i+3][0]

#gives you 3 consecutive days as input and the next day as output
print(X[0], Y[0])

[194.01234671 192.57519545 195.83266627] 202.65425777868654


In [3]:
from sklearn.model_selection import train_test_split
from sklearn import linear_model

#shuffle so that we won't be biased and we want the general pattern of the last 5 years
X_train, X_test, y_train, y_test = train_test_split(
    X, Y, test_size=0.2, random_state=42)

In [4]:
import torch
import torch.nn as nn
import torch.optim as optim
from sklearn.metrics import mean_squared_error
import numpy as np

# Example: Convert numpy arrays to PyTorch tensors
X_train_tensor = torch.tensor(X_train, dtype=torch.float32)
Y_train_tensor = torch.tensor(y_train, dtype=torch.float32)

X_test_tensor = torch.tensor(X_test, dtype=torch.float32)
Y_test_tensor = torch.tensor(y_test, dtype=torch.float32)

# Ensure Y tensors are 2D (N, 1)
if Y_train_tensor.ndim == 1:
    Y_train_tensor = Y_train_tensor.unsqueeze(1)
if Y_test_tensor.ndim == 1:
    Y_test_tensor = Y_test_tensor.unsqueeze(1)

In [8]:
#multi layer perceptron (MLP) used for regression
class MLPRegressor(nn.Module):
    def __init__(self, input_dim, hidden_dim=64):
        super(MLPRegressor, self).__init__()
        self.model = nn.Sequential(
            nn.Linear(input_dim, hidden_dim),
            nn.ReLU(),
            nn.Linear(hidden_dim, hidden_dim),
            nn.ReLU(),
            nn.Linear(hidden_dim, 1)  # Output 1 value for regression
        )

    def forward(self, x):
        return self.model(x)

# Instantiate model
input_dim = X_train.shape[1]
model = MLPRegressor(input_dim)
criterion = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

In [11]:
# Training loop
n_epochs = 200
for epoch in range(n_epochs):
    model.train()
    optimizer.zero_grad()
    outputs = model(X_train_tensor)
    loss = criterion(outputs, Y_train_tensor)
    loss.backward()
    optimizer.step()
    if epoch % 10 == 0:
        print(f"Epoch {epoch}, Loss: {loss.item():.4f}")

#Convergence : Model loss has stopped decreasing, reaching a plateau

Epoch 0, Loss: 44.3579
Epoch 10, Loss: 45.7850
Epoch 20, Loss: 44.2877
Epoch 30, Loss: 44.3659
Epoch 40, Loss: 44.1931
Epoch 50, Loss: 44.1390
Epoch 60, Loss: 44.0953
Epoch 70, Loss: 44.0466
Epoch 80, Loss: 44.0069
Epoch 90, Loss: 43.9669
Epoch 100, Loss: 43.9286
Epoch 110, Loss: 43.8910
Epoch 120, Loss: 43.8536
Epoch 130, Loss: 43.8162
Epoch 140, Loss: 43.7787
Epoch 150, Loss: 43.7412
Epoch 160, Loss: 43.7036
Epoch 170, Loss: 43.6659
Epoch 180, Loss: 43.6280
Epoch 190, Loss: 43.5896


To decide what activation functions to use in our neural network (MLP Regressor), just do trial and error - here are some we can choose from:
- ReLU
- Tanh
- Softmax
- Sigmoid

In [12]:
# Prediction
model.eval()
with torch.no_grad():
    predictions = model(X_test_tensor).squeeze().numpy()

# Evaluation
mse = mean_squared_error(y_test, predictions)
print(f"Test MSE: {mse:.4f}")

Test MSE: 47.5388
