In [None]:
!pip install lightning
!pip install torch
!pip install torchmetrics

Collecting lightning
  Downloading lightning-2.2.1-py3-none-any.whl (2.1 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.1/2.1 MB[0m [31m11.7 MB/s[0m eta [36m0:00:00[0m
Collecting lightning-utilities<2.0,>=0.8.0 (from lightning)
  Downloading lightning_utilities-0.10.1-py3-none-any.whl (24 kB)
Collecting torchmetrics<3.0,>=0.7.0 (from lightning)
  Downloading torchmetrics-1.3.1-py3-none-any.whl (840 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m840.4/840.4 kB[0m [31m18.1 MB/s[0m eta [36m0:00:00[0m
Collecting pytorch-lightning (from lightning)
  Downloading pytorch_lightning-2.2.1-py3-none-any.whl (801 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m801.6/801.6 kB[0m [31m18.3 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: lightning-utilities, torchmetrics, pytorch-lightning, lightning
Successfully installed lightning-2.2.1 lightning-utilities-0.10.1 pytorch-lightning-2.2.1 torchmetrics-1.3.1


In [None]:
!pip install -U scikit-learn

Collecting scikit-learn
  Downloading scikit_learn-1.4.1.post1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (12.1 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m12.1/12.1 MB[0m [31m22.4 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: scikit-learn
  Attempting uninstall: scikit-learn
    Found existing installation: scikit-learn 1.2.2
    Uninstalling scikit-learn-1.2.2:
      Successfully uninstalled scikit-learn-1.2.2
Successfully installed scikit-learn-1.4.1.post1


In [None]:
from lightning.pytorch import LightningModule
from torchmetrics.regression import MeanSquaredError
import torch.nn as nn

class LinearRegressionModel(LightningModule):
  def __init__(self, num_input_features):
    super(LinearRegressionModel, self).__init__()
    self.linear = nn.Linear(num_input_features, 1)
    self.mse = MeanSquaredError()

  def forward(self, x):
    return self.linear(x)

  def loss(self, y_pred, y_true):
    return nn.functional.mse_loss(y_pred, y_true.view(-1, 1))

  def training_step(self, batch, batch_index):
    inputs, targets = batch
    outputs = self.forward(inputs)
    loss = self.loss(outputs, targets)

    self.log('train_loss', loss)
    self.log('train_acc', self.mse(outputs, targets), prog_bar=True)
    return loss

  def validation_step(self, batch, batch_index):
    inputs, targets = batch
    outputs = self.forward(inputs)
    val_loss = self.loss(outputs, targets)

    # Log validation loss and MSE as "accuracy"
    self.log('val_loss', val_loss, prog_bar = True)
    self.log('val_acc', self.mse(outputs, targets), prog_bar = True)


  def configure_optimizers(self):
    return torch.optim.Adam(self.parameters(), lr=0.01)





In [None]:
import torch
from sklearn.preprocessing import StandardScaler

def pre_process(data):
  # Shift for tommorrows day close and use previous day data as inputs
  data['tmr_close']=data['Close'].shift(-1)
  data - data.iloc[:-1]

  # Converting data into tensors
  input_features = data.drop(columns=['tmr_close', 'Volume'])
  input_tensor = torch.Tensor(input_features.values).type(torch.float32)

  targets = data['tmr_close']
  targets_tensor = torch.Tensor(targets.values).type(torch.float32)


  targets = data['tmr_close']
  targets_tensor = torch.Tensor(targets.values).type(torch.float32).view(-1,1)


  return input_tensor, targets_tensor, input_features, targets


In [None]:
from datetime import datetime
import yfinance as yf

time_period = "max"
start_date = "2000-01-01"
end_date = datetime.now().strftime("%Y-%m-%d")
time_interval = "1d"

appl = yf.Ticker('AAPL')
data = appl.history(period=time_period, interval=time_interval,)
data.index = data.index.date

In [8]:
import torch
from torch.utils.data import (
    TensorDataset,
    DataLoader,
)

input_tensor, targets_tensor, input_features, tragets=pre_process(data)

# Define the percentages for training, validation and test sets
train_percent = 0.7
val_percent = 0.1
test_percent = 0.1

# Calculate the split indices
train_split = int(train_percent * len(input_tensor))
val_split = int((train_percent + val_percent) * len(input_tensor))

# Create datasets
train_dataset = TensorDataset(input_tensor[:train_split], targets_tensor[:train_split])
val_dataset = TensorDataset(input_tensor[train_split:val_split], targets_tensor[train_split:val_split])

batch_size = 32
#dont want to shuffle because the price is sequential data
train_dataloader = DataLoader(train_dataset, shuffle=False, batch_size=batch_size)
val_dataloader = DataLoader(val_dataset, shuffle=False, batch_size=batch_size)


In [9]:
# Training
from lightning.pytorch.callbacks.early_stopping import EarlyStopping
from lightning.pytorch.callbacks import ModelCheckpoint
from lightning.pytorch import Trainer
from lightning.pytorch import LightningModule
from lightning.pytorch.loggers import CSVLogger
import torch.nn as nn

input_features = input_tensor.shape[-1]
model = LinearRegressionModel(input_features)

#Define EarlyStopping callback to stop training based on validation loss
early_stop_callback = EarlyStopping(
    monitor = 'val_loss',
    patience=20,
    verbose=False,
    mode='min',
)

# Define ModelCheckpoint callback to save the best model
checkpoint_callback = ModelCheckpoint(
    dirpath='./checkpoints/linear',
    filename='linear_model',
    monitor='val_loss',
    mode='min',
    save_top_k=1
)

#Define CSVLogger to log the training progress
logger = CSVLogger('./test_logs', name='Linear')

#Define the Lightning Trainer
trainer = Trainer(
    max_epochs=200,
    callbacks=[early_stop_callback, checkpoint_callback],
    logger=logger
)

trainer.fit(
    model,
    train_dataloaders=train_dataloader,
    val_dataloaders=val_dataloader
)
torch.save(model, './models/linear_model.pth')


INFO: GPU available: True (cuda), used: True
INFO:lightning.pytorch.utilities.rank_zero:GPU available: True (cuda), used: True
INFO: TPU available: False, using: 0 TPU cores
INFO:lightning.pytorch.utilities.rank_zero:TPU available: False, using: 0 TPU cores
INFO: IPU available: False, using: 0 IPUs
INFO:lightning.pytorch.utilities.rank_zero:IPU available: False, using: 0 IPUs
INFO: HPU available: False, using: 0 HPUs
INFO:lightning.pytorch.utilities.rank_zero:HPU available: False, using: 0 HPUs
/usr/local/lib/python3.10/dist-packages/lightning/pytorch/callbacks/model_checkpoint.py:653: Checkpoint directory /content/checkpoints/linear exists and is not empty.
INFO: LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
INFO:lightning.pytorch.accelerators.cuda:LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
INFO: 
  | Name   | Type             | Params
--------------------------------------------
0 | linear | Linear           | 7     
1 | mse    | MeanSquaredError | 0     
---------------------------------

Sanity Checking: |          | 0/? [00:00<?, ?it/s]

Training: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

In [11]:
from lightning.pytorch import LightningModule
from torchmetrics.regression import MeanSquaredError
import torch.nn as nn

# MLP MODEL
class myMLPModel(LightningModule):
  def __init__(self, num_input_features, hidden_features):
      super().__init__()
      self.model = nn.Sequential(
          nn.Linear(num_input_features, hidden_featuers),
          nn.ReLU(),
          nn.Linear(hidden_features, 1)
,      )
      self.mse = MeanSquareError()

  def forward(self, x):
      return self.model(x)

  def loss(self, y_pred, y_true):
      return nn.functional.mse_loss(y_pred, y_true.view(-1,1))

  def trainig_step(self, batch, batch_index):
      inputs, targets = batch
      ouputs = self.forward(inputs)
      loss = self.loss(outputs, targets)

      self.log('train_loss', loss)
      self.log('train_acc', self.mse(outputs, targets), prog_bar = True)
      return loss

  def validation_step(self, batch, batch_index):
      inputs, targets = batch
      ouputs = self.forward(inputs)
      loss = self.loss(outputs, targets)

      #log validation loss and MSE as "accuracy"
      self.log('val_loss', val_loss, prog_bar=True)
      self.log('val_acc', self.mse(outputs, targets), prog_bar=True)

  def configure_optimizers(self):
    return torch.optim.Adam(self. parameters(), lr=0.001)

In [12]:
import torch
from sklearn.preprocessing import StandardScaler

def pre_process(data):
  # Shift for tommorrows day close and use previous day data as inputs
  data['tmr_close']=data['Close'].shift(-1)
  data - data.iloc[:-1]

  # Converting data into tensors
  input_features = data.drop(columns=['tmr_close', 'Volume'])
  input_tensor = torch.Tensor(input_features.values).type(torch.float32)

  targets = data['tmr_close']
  targets_tensor = torch.Tensor(targets.values).type(torch.float32).view(-1,1)


  return input_tensor, targets_tensor, input_features, targets


In [13]:
from datetime import datetime
import yfinance as yf

time_period = "max"
start_date = "2000-01-01"
end_date = datetime.now().strftime("%Y-%m-%d")
time_interval = "1d"

appl = yf.Ticker('AAPL')
data = appl.history(period=time_period, interval=time_interval,)
data.index = data.index.date

In [14]:
import torch
from torch.utils.data import (
    TensorDataset,
    DataLoader,
)

input_tensor, targets_tensor, input_features, tragets=pre_process(data)

# Define the percentages for training, validation and test sets
train_percent = 0.7
val_percent = 0.1
test_percent = 0.1

# Calculate the split indices
train_split = int(train_percent * len(input_tensor))
val_split = int((train_percent + val_percent) * len(input_tensor))

# Create datasets
train_dataset = TensorDataset(input_tensor[:train_split], targets_tensor[:train_split])
val_dataset = TensorDataset(input_tensor[train_split:val_split], targets_tensor[train_split:val_split])

batch_size = 32
#dont want to shuffle because the price is sequential data
train_dataloader = DataLoader(train_dataset, shuffle=False, batch_size=batch_size)
val_dataloader = DataLoader(val_dataset, shuffle=False, batch_size=batch_size)


In [15]:
# Training
from lightning.pytorch.callbacks.early_stopping import EarlyStopping
from lightning.pytorch.callbacks import ModelCheckpoint
from lightning.pytorch import Trainer
from lightning.pytorch import LightningModule
from lightning.pytorch.loggers import CSVLogger
import torch.nn as nn

input_features = input_tensor.shape[-1]
model = LinearRegressionModel(input_features)

#Define EarlyStopping callback to stop training based on validation loss
early_stop_callback = EarlyStopping(
    monitor = 'val_loss',
    patience=20,
    verbose=False,
    mode='min',
)

# Define ModelCheckpoint callback to save the best model
checkpoint_callback = ModelCheckpoint(
    dirpath='./checkpoints/mlp',
    filename='mlp_model',
    monitor='val_loss',
    mode='min',
    save_top_k=1
)

#Define CSVLogger to log the training progress
logger = CSVLogger('./test_logs', name='MLP')

#Define the Lightning Trainer
trainer = Trainer(
    max_epochs=200,
    callbacks=[early_stop_callback, checkpoint_callback],
    logger=logger
)

trainer.fit(
    model,
    train_dataloaders=train_dataloader,
    val_dataloaders=val_dataloader
)
torch.save(model, 'models/mlp_model.pth')


INFO: GPU available: True (cuda), used: True
INFO:lightning.pytorch.utilities.rank_zero:GPU available: True (cuda), used: True
INFO: TPU available: False, using: 0 TPU cores
INFO:lightning.pytorch.utilities.rank_zero:TPU available: False, using: 0 TPU cores
INFO: IPU available: False, using: 0 IPUs
INFO:lightning.pytorch.utilities.rank_zero:IPU available: False, using: 0 IPUs
INFO: HPU available: False, using: 0 HPUs
INFO:lightning.pytorch.utilities.rank_zero:HPU available: False, using: 0 HPUs
INFO: LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
INFO:lightning.pytorch.accelerators.cuda:LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
INFO: 
  | Name   | Type             | Params
--------------------------------------------
0 | linear | Linear           | 7     
1 | mse    | MeanSquaredError | 0     
--------------------------------------------
7         Trainable params
0         Non-trainable params
7         Total params
0.000     Total estimated model params size (MB)
INFO:lightning.pytorch.ca

Sanity Checking: |          | 0/? [00:00<?, ?it/s]

Training: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

In [16]:
from lightning.pytorch import LightningModule
from torchmetrics.regression import MeanSquaredError
import torch.nn as nn

# MLP MODEL
class SimpleRNN(LightningModule):
  def __init__(self, num_input_features, hidden_features, total_layers, dropout_rate):
      super().__init__()

      self.rnn = nn.RNN(num_input_features, hidden_features, num_layers=total_layers, batch_first=True)
      self.dropout = nn.Dropout(p=dropout_rate)
      self.linear = nn.Linear(hidden_features, 1)

      self.mse = nn.MSELoss()

  def forward(self, x):
      x, _ = self.rnn(x)
      x = self.dropout(x)
      x = self.linear(x[:, -1, :])
      return x

  def loss(self, y_pred, y_true):
      return nn.functional.mse_loss(y_pred, y_true.view(-1,1))

  def trainig_step(self, batch, batch_index):
      inputs, targets = batch
      ouputs = self.forward(inputs)
      loss = self.loss(outputs, targets)

      self.log('train_loss', loss)
      self.log('train_acc', self.mse(outputs, targets), prog_bar = True)
      return loss

  def validation_step(self, batch, batch_index):
      inputs, targets = batch
      ouputs = self.forward(inputs)
      loss = self.loss(outputs, targets)

      #log validation loss and MSE as "accuracy"
      self.log('val_loss', val_loss, prog_bar=True)
      self.log('val_acc', self.mse(outputs, targets), prog_bar=True)

  def configure_optimizers(self):
    return torch.optim.Adam(self. parameters(), lr=0.001)

In [17]:
import torch
from sklearn.preprocessing import StandardScaler

def pre_process(data):
  # Shift for tommorrows day close and use previous day data as inputs
  data['tmr_close']=data['Close'].shift(-1)
  data - data.iloc[:-1]

  # Converting data into tensors
  input_features = data.drop(columns=['tmr_close', 'Volume'])
  input_tensor = torch.Tensor(input_features.values).type(torch.float32)

  targets = data['tmr_close']
  targets_tensor = torch.Tensor(targets.values).type(torch.float32).view(-1,1)


  return input_tensor, targets_tensor, input_features, targets


In [18]:
def create_sequences(input_feature_tensor, targets_tensor, sequence_length):
    # Takes the last 7 days worth of data to predict the price after 7 days
    sequences = []
    target_values = []

    for i in range(len(input_tensor) - sequence_length):
      sequence = input_feature_tensor[i : i + sequence_length]
      target = targets_tensor[i + sequence_length - 1] # Assuming the target is the last value in the sequence
      sequences.append(sequence)
      target_values.append(target)

    # Convert to PyTorch tensors
    input_sequences_tensor = torch.stack(sequences)
    target_value_tensor = torch.stack(target_values)

    return input_sequences_tensor, target_value_tensor

In [19]:
from datetime import datetime
import yfinance as yf

time_period = "max"
start_date = "2000-01-01"
end_date = datetime.now().strftime("%Y-%m-%d")
time_interval = "1d"

appl = yf.Ticker('AAPL')
data = appl.history(period=time_period, interval=time_interval,)
data.index = data.index.date

look_back_days = 10
input_tensor, targets_tensor, input_features, targets = pre_process(data)
input_sequences, target_sequences = create_sequences(input_tensor, targets_tensor, look_back_days)

In [20]:
import torch
from torch.utils.data import (
    TensorDataset,
    DataLoader,
)

train_split = int(0.95 * len(input_sequences))
val_split = int(0.05 * len(input_sequences))

# Create datasets
train_dataset = TensorDataset(input_tensor[:train_split], targets_tensor[:train_split])
val_dataset = TensorDataset(input_tensor[train_split:], targets_tensor[train_split:])

batch_size = 32
#dont want to shuffle because the price is sequential data
train_dataloader = DataLoader(train_dataset, shuffle=False, batch_size=batch_size)
val_dataloader = DataLoader(val_dataset, shuffle=False, batch_size=batch_size)


In [21]:
# Training
from lightning.pytorch.callbacks.early_stopping import EarlyStopping
from lightning.pytorch.callbacks import ModelCheckpoint
from lightning.pytorch import Trainer
from lightning.pytorch import LightningModule
from lightning.pytorch.loggers import CSVLogger
import torch.nn as nn

input_features = input_tensor.shape[-1]
model = LinearRegressionModel(input_features)

#Define EarlyStopping callback to stop training based on validation loss
early_stop_callback = EarlyStopping(
    monitor = 'val_loss',
    patience=20,
    verbose=False,
    mode='min',
)

# Define ModelCheckpoint callback to save the best model
checkpoint_callback = ModelCheckpoint(
    dirpath='./checkpoints/rnn',
    filename='rnn_model',
    monitor='val_loss',
    mode='min',
    save_top_k=1
)

#Define CSVLogger to log the training progress
logger = CSVLogger('./test_logs', name='RNN')

#Define the Lightning Trainer
trainer = Trainer(
    max_epochs=200,
    callbacks=[early_stop_callback, checkpoint_callback],
    logger=logger
)

trainer.fit(
    model,
    train_dataloaders=train_dataloader,
    val_dataloaders=val_dataloader
)
torch.save(model, 'models/rnn_model.pth')


INFO: GPU available: True (cuda), used: True
INFO:lightning.pytorch.utilities.rank_zero:GPU available: True (cuda), used: True
INFO: TPU available: False, using: 0 TPU cores
INFO:lightning.pytorch.utilities.rank_zero:TPU available: False, using: 0 TPU cores
INFO: IPU available: False, using: 0 IPUs
INFO:lightning.pytorch.utilities.rank_zero:IPU available: False, using: 0 IPUs
INFO: HPU available: False, using: 0 HPUs
INFO:lightning.pytorch.utilities.rank_zero:HPU available: False, using: 0 HPUs
INFO: LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
INFO:lightning.pytorch.accelerators.cuda:LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
INFO: 
  | Name   | Type             | Params
--------------------------------------------
0 | linear | Linear           | 7     
1 | mse    | MeanSquaredError | 0     
--------------------------------------------
7         Trainable params
0         Non-trainable params
7         Total params
0.000     Total estimated model params size (MB)
INFO:lightning.pytorch.ca

Sanity Checking: |          | 0/? [00:00<?, ?it/s]

Training: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

In [22]:
from lightning.pytorch import LightningModule
import torch.nn as nn
import torch

# LSTM MODEL
class LSTMModel(LightningModule):
  def __init__(self, input_size, num_stacked_layers=1):
      suoer().__init__()
      self.hidden_size = hidden_size
      self.num_stacked_layers = num_stacked_layers

      self.lstm = nn.LSTM(input_size, num_stacked_layers,
                          batch_first=True)

      self.fc = nn.Linear(hidden_size, 1)

  def forward(self, x):
      batch_size = x.size(0)
      h0 = torch.zeros(self.num_stacked_layers, batch_size, self.hidden_size).to(s.device)
      c0 = torch.zeros(self.num_stacked_layers, batch_size, self.hidden_size).to(x.device)

      out, _ = self.lstm(x, (h0, c0))
      out = self.fc(out[:, -1, :])
      return out

  def loss(self, y_pred, y_true):
      return nn.MSELoss()(y_pred, y_true)

  def trainig_step(self, batch, batch_index):
      inputs, targets = batch
      targets = targets[:,0].view(-1,1)
      ouputs = self.forward(inputs)
      loss = self.loss(outputs, targets)

      self.log('train_loss', loss)
      return loss

  def validation_step(self, batch, batch_index):
      inputs, targets = batch
      targets = targets[:,0].view(-1,1)
      ouputs = self.forward(inputs)
      loss = self.loss(outputs, targets)

      #log validation loss
      self.log('val_loss', val_loss, prog_bar=True)

  def configure_optimizers(self):
    return torch.optim.Adam(self. parameters(), lr=0.001)

In [23]:
import torch
from sklearn.preprocessing import StandardScaler

def pre_process(data):
  # Shift for tommorrows day close and use previous day data as inputs
  data['tmr_close']=data['Close'].shift(-1)
  data - data.iloc[:-1]

  # Converting data into tensors
  input_features = data.drop(columns=['tmr_close', 'Volume'])
  input_tensor = torch.Tensor(input_features.values).type(torch.float32)

  targets = data['tmr_close']
  targets_tensor = torch.Tensor(targets.values).type(torch.float32).view(-1,1)


  return input_tensor, targets_tensor, input_features, targets


In [24]:
def create_sequences(input_feature_tensor, targets_tensor, sequence_length):
    # Takes the last 7 days worth of data to predict the price after 7 days
    sequences = []
    target_values = []

    for i in range(len(input_tensor) - sequence_length):
      sequence = input_feature_tensor[i : i + sequence_length]
      target = targets_tensor[i + sequence_length - 1] # Assuming the target is the last value in the sequence
      sequences.append(sequence)
      target_values.append(target)

    # Convert to PyTorch tensors
    input_sequences_tensor = torch.stack(sequences)
    target_value_tensor = torch.stack(target_values)

    return input_sequences_tensor, target_value_tensor

In [25]:
from datetime import datetime
import yfinance as yf

time_period = "max"
start_date = "2000-01-01"
end_date = datetime.now().strftime("%Y-%m-%d")
time_interval = "1d"

appl = yf.Ticker('AAPL')
data = appl.history(period=time_period, interval=time_interval,)
data.index = data.index.date

look_back_days = 10
input_tensor, targets_tensor, input_features, targets = pre_process(data)
input_sequences, target_sequences = create_sequences(input_tensor, targets_tensor, look_back_days)

In [26]:
import torch
from torch.utils.data import (
    TensorDataset,
    DataLoader,
)

train_split = int(0.95 * len(input_sequences))
val_split = int(0.05 * len(input_sequences))

# Create datasets
train_dataset = TensorDataset(input_tensor[:train_split], targets_tensor[:train_split])
val_dataset = TensorDataset(input_tensor[train_split:], targets_tensor[train_split:])

batch_size = 32
#dont want to shuffle because the price is sequential data
train_dataloader = DataLoader(train_dataset, shuffle=False, batch_size=batch_size)
val_dataloader = DataLoader(val_dataset, shuffle=False, batch_size=batch_size)


In [27]:
# Training
from lightning.pytorch.callbacks.early_stopping import EarlyStopping
from lightning.pytorch.callbacks import ModelCheckpoint
from lightning.pytorch import Trainer
from lightning.pytorch import LightningModule
from lightning.pytorch.loggers import CSVLogger
import torch.nn as nn

input_features = input_tensor.shape[-1]
model = LinearRegressionModel(input_features)

#Define EarlyStopping callback to stop training based on validation loss
early_stop_callback = EarlyStopping(
    monitor = 'val_loss',
    patience=20,
    verbose=False,
    mode='min',
)

# Define ModelCheckpoint callback to save the best model
checkpoint_callback = ModelCheckpoint(
    dirpath='./checkpoints/lstm',
    filename='lstm_model',
    monitor='val_loss',
    mode='min',
    save_top_k=1
)

#Define CSVLogger to log the training progress
logger = CSVLogger('./test_logs', name='LSTM')

#Define the Lightning Trainer
trainer = Trainer(
    max_epochs=200,
    callbacks=[early_stop_callback, checkpoint_callback],
    logger=logger
)

trainer.fit(
    model,
    train_dataloaders=train_dataloader,
    val_dataloaders=val_dataloader
)
torch.save(model, 'models/lstm_model.pth')


INFO: GPU available: True (cuda), used: True
INFO:lightning.pytorch.utilities.rank_zero:GPU available: True (cuda), used: True
INFO: TPU available: False, using: 0 TPU cores
INFO:lightning.pytorch.utilities.rank_zero:TPU available: False, using: 0 TPU cores
INFO: IPU available: False, using: 0 IPUs
INFO:lightning.pytorch.utilities.rank_zero:IPU available: False, using: 0 IPUs
INFO: HPU available: False, using: 0 HPUs
INFO:lightning.pytorch.utilities.rank_zero:HPU available: False, using: 0 HPUs
INFO: LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
INFO:lightning.pytorch.accelerators.cuda:LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
INFO: 
  | Name   | Type             | Params
--------------------------------------------
0 | linear | Linear           | 7     
1 | mse    | MeanSquaredError | 0     
--------------------------------------------
7         Trainable params
0         Non-trainable params
7         Total params
0.000     Total estimated model params size (MB)
INFO:lightning.pytorch.ca

Sanity Checking: |          | 0/? [00:00<?, ?it/s]

Training: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

In [31]:
from copy import deepcopy as dc
from sklearn.preprocessing import MinMaxScaler
def prepare_dataframe(df, n_steps):
    df = dc(df)

    df.set_index('Date', inplace=True)

    for i in range(1, n_steps+1):
        df[f'Close(t-{i})'] = df['Close'].shift(i)

    df.dropna(inplace=True)

    return df

lookback = 40
shifted_df = prepare_dataframe(data, lookback)
shifted_df = shifted_df.to_numpy()
scaler = MinMaxScaler(feature_range=(-1,1))
shifted_df = scaler.fit_transform(shifted_df)
X = shifted_df[:, 1:]
y = shifted_df[:, 0]
split_index = int(len(X) * 0.95) # split with 95%
#split the data
X_train = X[:split_index]
X_test = X[spit_index:]

y_train = y[:split_index]
y_test = y[split_index:]
X_train = X_train.reshape((-1, lookback, 1))
X_test = X_test.reshape((-1, lookback, 1))

y_train = y_train.reshape((-1,1))
y_test = y_test.reshape((-1,1))

X_train = torch.tensor(X_train).float()
X_test = torch.tensor(X_test).float()
y_train = torch.tensor(y_train).float()
y_test = torch.tensor(y_test).float()


KeyError: "None of ['Date'] are in the columns"