In [1]:
from google.colab import drive
drive.mount('/content/drive', force_remount=True)

Mounted at /content/drive


In [2]:
import torch
from torch.utils.data import Dataset, DataLoader
import pandas as pd
from sklearn.preprocessing import StandardScaler
import math
import torch
import torch.nn as nn
import time
import matplotlib.pyplot as plt
import numpy as np
import os

torch.set_printoptions(sci_mode=False, precision=4, threshold=5000)

num_layers = 6 #Not tuned
num_heads = 8 #Not tuned
dim_feedforward = 512 # Not tuned
input_sequence_length = 2340 #6-day lookback window (assuming non-extended hour data) to ensure a friday can always see price action from the previous friday
prediction_length = 1 #Predict the next minute
NUM_WORKERS = 2 #Not Tuned
BATCH_SIZE = 64 #Not Tuned - change to 1 for inference code at the bottom
LEARNING_RATE = 0.0001


model_save_path = '/content/drive/My Drive/Jackie_Net_Models/model_training_2004_2024_save.pth'
# Path where the model is saved
model_load_path = '/content/drive/My Drive/Jackie_Net_Models/model_training_2008_2024_save.pth'


# Check if CUDA (GPU support) is available
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print("Using device:", device)

# Set torch print options for better readability
torch.set_printoptions(sci_mode=False, precision=4, threshold=5000)

# Function definitions
def scale_data(df):
    # Separate the datetime columns
    datetime_columns = df[['Year', 'Month', 'Day', 'Hour', 'Minute']]
    numerical_data = df.drop(columns=['Year', 'Month', 'Day', 'Hour', 'Minute'])

    # Normalize the numerical data
    scaler = StandardScaler()
    scaled_numerical_data = scaler.fit_transform(numerical_data)
    scaled_numerical_data_df = pd.DataFrame(scaled_numerical_data, columns=numerical_data.columns)

    # Concatenate the datetime columns back
    df_scaled = pd.concat([scaled_numerical_data_df, datetime_columns], axis=1)

    return df_scaled, scaler

#Check function to inspect if descaling is applied correctly

def descale_data(df_scaled, scaler):
    # Assume the last 5 columns are datetime columns for simplicity
    datetime_columns = df_scaled.iloc[:, -5:]
    numerical_data_scaled = df_scaled.iloc[:, :-5]

    # Inverse transform the numerical data
    numerical_data_descaled = scaler.inverse_transform(numerical_data_scaled)
    numerical_data_descaled_df = pd.DataFrame(numerical_data_descaled, columns=numerical_data_scaled.columns)

    # Concatenate the datetime columns back
    df_descaled = pd.concat([numerical_data_descaled_df, datetime_columns], axis=1)

    return df_descaled

def descale_tensor(tensor, scaler, column_names):
    """
    Descales a tensor output from the model, reintegrates datetime columns, and reconstructs a DataFrame
    with an identical structure as the original scaled DataFrame.

    :param tensor: The tensor output from the model.
    :param scaler: The scaler used to scale the data initially.
    :param original_df_columns: The list of all column names from the original DataFrame.
    :param datetime_columns_names: The list of datetime column names in the original DataFrame.
    :return: A DataFrame with descaled data and a structure identical to the scaled DataFrame.
    """
     # Ensure tensor is in CPU and convert it to numpy
    print(f"Original tensor shape: {tensor.shape}")  # Debug: Check the shape before any operation
    data_numpy = tensor.cpu().detach().numpy().reshape(1, -1)  # Reshape the tensor
    print(f"Reshaped array shape: {data_numpy.shape}")  # Debug: Verify the shape after reshaping


    # Separate the last 5 columns as datetime columns for simplicity
    datetime_data = data_numpy[:,-5:]
    numerical_data_scaled = data_numpy[:,:-5]

     # Inverse transform the numerical data
    numerical_data_descaled = scaler.inverse_transform(numerical_data_scaled)

    # Concatenate the descaled numerical data with the datetime data
    descaled_data = np.concatenate([numerical_data_descaled, datetime_data], axis=1)
    # Assuming datetime data were stored or can be reconstructed similarly to how they were handled originally
    # For simplicity, this example doesn't handle the reconstruction of datetime columns
    # You would need to ensure you have this data available or reconstruct it as needed

    # Create a DataFrame with the descaled data and original column names
    df_descaled = pd.DataFrame(descaled_data, columns=column_names)

    return df_descaled


class StockDataset(Dataset):
    def __init__(self, data, input_sequence_length, prediction_length):
        if isinstance(data, pd.DataFrame):
            # Convert DataFrame to NumPy array
            data = data.values
        self.data = torch.tensor(data, dtype=torch.float32)
        self.input_sequence_length = input_sequence_length
        self.prediction_length = prediction_length

    def __len__(self):
        return len(self.data) - self.input_sequence_length - self.prediction_length + 1

    def __getitem__(self, index):
        # Since data is in reverse chronological order, adjust indexing accordingly
        # The target sequence immediately follows the input sequence in chronological order,
        # which means it precedes in the dataset's order
        x_start = index + self.prediction_length
        x_end = x_start + self.input_sequence_length
        y_start = index
        y_end = index + self.prediction_length

        x = self.data[x_start:x_end]
        y = self.data[y_start:y_end]

        # Reverse x to have it in chronological order for processing
        x = x.flip(dims=[0])

        return x, y.squeeze(), index  # Return the index as well


# Example usage
# Read the Parquet file
path_transformer_input = '/content/drive/My Drive/Chrono_Jackie_Net/price_data_pt_2004_2008_pp.parquet'
df = pd.read_parquet(path_transformer_input)
print(df)

# Scale the data
df_scaled, scaler = scale_data(df)
print(df_scaled)

# Get the numerical column names from the original DataFrame, excluding the datetime columns
column_names = df_scaled.columns.tolist()

# Descale the data (for demonstration)
df_descaled = descale_data(df_scaled, scaler)
print(df_descaled)

# Create dataset and dataloader
dataset = StockDataset(df_scaled, input_sequence_length, prediction_length)
dataloader = DataLoader(dataset, batch_size=BATCH_SIZE, shuffle=True, num_workers=NUM_WORKERS)

num_batches = len(dataloader)
print(num_batches)

Using device: cuda
        Year  Month  Day  Hour  Minute   QQQ Open  QQQ Volume   SPY Open  \
0       2008      2   29    16       0  37.452999   1748021.0  97.830002   
1       2008      2   29    15      59  37.462002   5252612.0  97.940002   
2       2008      2   29    15      58  37.495998   4343744.0  97.926003   
3       2008      2   29    15      57  37.444000   4551546.0  97.786003   
4       2008      2   29    15      56  37.409000   5352330.0  97.771004   
...      ...    ...  ...   ...     ...        ...         ...        ...   
391852  2004      2    2     9      34  31.857000    413200.0  77.601997   
391853  2004      2    2     9      33  31.849001    259300.0  77.601997   
391854  2004      2    2     9      32  31.823000    215100.0  77.595001   
391855  2004      2    2     9      31  31.813999     95900.0  77.560997   
391856  2004      2    2     9      30  31.823000    650300.0  77.622002   

        SPY Volume  UCO Open  ...   AXP Open  AXP Volume   WBA Open 

In [3]:
class JackieNet(nn.Module):
    def __init__(self, num_features, num_layers, num_heads, dim_feedforward, prediction_length):
        super(JackieNet, self).__init__()
        self.embedding = nn.Linear(num_features, dim_feedforward)
        #self.positional_encoding = PositionalEncoding(dim_feedforward, max_seq_length)

        self.transformer_encoder = nn.TransformerEncoder(
            nn.TransformerEncoderLayer(
                d_model=dim_feedforward,
                nhead=num_heads,
                dim_feedforward=dim_feedforward
            ),
            num_layers=num_layers
        )

        # Initialize the Transformer Decoder
        self.transformer_decoder = nn.TransformerDecoder(
            nn.TransformerDecoderLayer(
                d_model=dim_feedforward,
                nhead=num_heads,
                dim_feedforward=dim_feedforward
            ),
            num_layers=num_layers
        )

        # Final layer to output the prediction for the next timestep
        self.final_layer = nn.Linear(dim_feedforward, prediction_length*num_features)


    def forward(self, src):
        # Embedding and encoding as before
        # src shape: [sequence_length, batch_size, num_features]
        src = self.embedding(src)
        #src = src + self.positional_encoding(src)
        memory = self.transformer_encoder(src)

        # Initialize the decoder input with zeros
        # Shape: [1, batch_size, num_features]
        #decoder_input = torch.zeros(1, src.size(1), src.size(2), device=src.device)
        decoder_input = src[-1, :, :].unsqueeze(0)

        # Transformer Decoder: Process the decoder input
        decoded_output = self.transformer_decoder(decoder_input, memory)

        # Final Forecast: Take the output from the decoder
        # Only take the first timestep (last dimension) from the decoded output
        next_step_forecast = self.final_layer(decoded_output[0])

        # Reshape to [1, batch_size, num_features]
        output = next_step_forecast.unsqueeze(0)

        return output


"""
class PositionalEncoding(nn.Module):
    def __init__(self, d_model, max_len):
        super(PositionalEncoding, self).__init__()
        # Create a long enough positional encoding
        pe = torch.zeros(max_len, d_model)
        position = torch.arange(0, max_len, dtype=torch.float).unsqueeze(1)
        div_term = torch.exp(torch.arange(0, d_model, 2).float() * (-math.log(10000.0) / d_model))
        pe[:, 0::2] = torch.sin(position * div_term)
        pe[:, 1::2] = torch.cos(position * div_term)
        self.register_buffer('pe', pe)

    def forward(self, x):
        # Change x to (batch_size, seq_len, features)
        x = x.permute(1, 0, 2)
        # Add positional encoding
        x = x + self.pe[:x.size(1)]
        # Change back to (seq_len, batch_size, features)
        return x.permute(1, 0, 2)
"""

import matplotlib.pyplot as plt

def plot_training_progress(predictions, targets, epoch_losses, epoch):
    fig, axs = plt.subplots(2, 1, figsize=(12, 12))

    # Plot Predictions vs. Targets
    axs[0].plot(predictions, label='Predictions')
    axs[0].plot(targets, label='Targets')
    axs[0].set_title(f'Epoch {epoch}: Predictions vs Targets')
    axs[0].set_xlabel('Sample Index')
    axs[0].set_ylabel('Value')
    axs[0].legend()

    # Plot Training Loss
    axs[1].plot(range(1, epoch + 1), epoch_losses[:epoch], marker='o', linestyle='-')
    axs[1].set_title('Training Loss Over Epochs')
    axs[1].set_xlabel('Epoch')
    axs[1].set_ylabel('Loss')
    axs[1].set_xticks(range(1, epoch + 1))

    plt.tight_layout()
    plt.show()


# Model initialization
num_features = df_scaled.shape[1]

# Initialize the model
model = JackieNet(num_features, num_layers, num_heads, dim_feedforward, prediction_length)
model.to(device)




JackieNet(
  (embedding): Linear(in_features=143, out_features=512, bias=True)
  (transformer_encoder): TransformerEncoder(
    (layers): ModuleList(
      (0-5): 6 x TransformerEncoderLayer(
        (self_attn): MultiheadAttention(
          (out_proj): NonDynamicallyQuantizableLinear(in_features=512, out_features=512, bias=True)
        )
        (linear1): Linear(in_features=512, out_features=512, bias=True)
        (dropout): Dropout(p=0.1, inplace=False)
        (linear2): Linear(in_features=512, out_features=512, bias=True)
        (norm1): LayerNorm((512,), eps=1e-05, elementwise_affine=True)
        (norm2): LayerNorm((512,), eps=1e-05, elementwise_affine=True)
        (dropout1): Dropout(p=0.1, inplace=False)
        (dropout2): Dropout(p=0.1, inplace=False)
      )
    )
  )
  (transformer_decoder): TransformerDecoder(
    (layers): ModuleList(
      (0-5): 6 x TransformerDecoderLayer(
        (self_attn): MultiheadAttention(
          (out_proj): NonDynamicallyQuantizableLin

In [None]:
import os

# Check if the saved model exists and load it
if os.path.isfile(model_load_path):
    model.load_state_dict(torch.load(model_load_path))
    print(f'Model loaded from {model_load_path}')
else:
    print('No saved model found, starting training from scratch.')

loss_fn = nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=LEARNING_RATE)
num_epochs = 1

for epoch in range(num_epochs):
    model.train()
    total_loss = 0
    checkpoint_loss = 0
    num_batches = 0
    checkpoint_batches = 0
    all_predictions = []
    all_targets = []

    start_time_epoch = time.time()  # Record the start time of the epoch
    last_check = start_time_epoch  # Initialize the last check time

    for input_sequence, target_sequence, start_index in dataloader:
        # Transpose sequences to match expected shape: (seq_len, batch_size, features)
        input_sequence = input_sequence.transpose(0, 1)  # Shape: (4800, batch_size, num_features)
        #target_sequence = target_sequence.transpose(0, 1)  # Ensure target is correctly shaped

        # Transfer to the device (GPU/CPU)
        input_sequence = input_sequence.to(device)
        target_sequence = target_sequence.to(device)


        #print(f"input_sequence.shape: {input_sequence.shape}")
        #print(f"target_sequence.shape: {target_sequence.shape}")
        #print(f"input_sequence: {input_sequence}")
        #print(f"input_sequence[0]: {input_sequence[0]}")
        #print(f"input_sequence[-1]: {input_sequence[-1]}")
        #print(f"target_sequence: {target_sequence}")


        # Forward pass: The forward method of JackieNet takes only the input sequence
        output = model(input_sequence)  # The model internally handles the start token for the decoder
        #print(f"input: {input_sequence.shape}")
        #print(f"output: {output.shape}")
        #print(f"tgt_sequence: {target_sequence.shape}")
        output_squeezed = torch.squeeze(output, 0)  # Removes the first dimension
        #print(f"output_squeezed: {output_squeezed.shape}")

        #print(f"output: {output}")

        #output_descaled = descale_data(output, scaler)
        #print(f"output: {output_descaled}")

        # Compute loss: Compare the model's selected output with the actual selected target features
        loss = loss_fn(output_squeezed, target_sequence)

        # Backward pass and optimize
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        total_loss += loss.item()
        checkpoint_loss += loss.item()
        num_batches += 1
        checkpoint_batches += 1

        current_time = time.time()  # Get the current time
        if current_time - last_check >= 900:  # Check if 5 minutes have passed 300
            print(f'Epoch {epoch+1}, {num_batches} batches processed. Checkpoint_loss: {checkpoint_loss}. Checkpoint_batches: {checkpoint_batches}')
            average_loss = checkpoint_loss / checkpoint_batches
            print(f'CheckpointAverage Loss: {average_loss}')
            checkpoint_loss = 0
            checkpoint_batches = 0

            last_check = current_time  # Update the last check time

            torch.save(model.state_dict(), model_save_path)
            print(f'Model saved to {model_save_path}')


    average_total_loss = total_loss / num_batches
    print(f'Epoch [{epoch+1}/{num_epochs}], Average Loss: {average_loss}')

    # Save the model to Google Drive
    epoch_save_path = f'/content/drive/My Drive/Jackie_Net_Models/model_2004_2024_epoch_{epoch+1}.pth'
    torch.save(model.state_dict(), model_save_path)
    print(f'Model saved to {epoch_save_path}')



Model loaded from /content/drive/My Drive/Jackie_Net_Models/model_training_2008_2024_save.pth
Epoch 1, 499 batches processed. Checkpoint_loss: 1723.5361771583557. Checkpoint_batches: 499
CheckpointAverage Loss: 3.453980314946605
Model saved to /content/drive/My Drive/Jackie_Net_Models/model_training_2004_2024_save.pth
Epoch 1, 998 batches processed. Checkpoint_loss: 1742.5525414943695. Checkpoint_batches: 499
CheckpointAverage Loss: 3.4920892615117625
Model saved to /content/drive/My Drive/Jackie_Net_Models/model_training_2004_2024_save.pth
Epoch 1, 1497 batches processed. Checkpoint_loss: 1635.1425139904022. Checkpoint_batches: 499
CheckpointAverage Loss: 3.2768387053915875
Model saved to /content/drive/My Drive/Jackie_Net_Models/model_training_2004_2024_save.pth
Epoch 1, 1996 batches processed. Checkpoint_loss: 1103.3105654716492. Checkpoint_batches: 499
CheckpointAverage Loss: 2.2110432173780543
Model saved to /content/drive/My Drive/Jackie_Net_Models/model_training_2004_2024_save.p

In [None]:
model.load_state_dict(torch.load(model_load_path))

# Prepare the model for inference
model.eval()

for input_sequence, target_sequence, start_index in dataloader:
    break  # This will leave `input_sequence` containing the first batch

# If your model expects the input in a specific shape, ensure to reshape it accordingly
# For example, if it expects (seq_len, batch_size, num_features):
input_sequence = input_sequence.transpose(0, 1)

# Transfer to the device (GPU/CPU)
input_sequence = input_sequence.to(device)
target_sequence = target_sequence.to(device)

# Perform inference
with torch.no_grad():  # No need to track gradients
    output = model(input_sequence)

# Process the output as needed (this step is highly application-specific)
# For example, if you need to descale your output:
# output_descaled = descale_data(output, scaler)

print(output.view(1,-1).shape)  # Or any other post-processing
#adjusted_output = output.squeeze()

descaled_input_last_minute = descale_tensor(input_sequence[-1].view(1,-1), scaler, column_names)
descaled_output = descale_tensor(output.view(1,-1), scaler, column_names)
descaled_target = descale_tensor(target_sequence.view(1, -1), scaler, column_names)  # Reshape target_sequence similarly if needed

print("Descaled Input last minute: ", descaled_input_last_minute)
print("Descaled Output: ",descaled_output)
print("Descaled Target: ", descaled_target)


torch.Size([1, 143])
Original tensor shape: torch.Size([1, 143])
Reshaped array shape: (1, 143)
Original tensor shape: torch.Size([1, 143])
Reshaped array shape: (1, 143)
Original tensor shape: torch.Size([1, 143])
Reshaped array shape: (1, 143)
Descaled Input last minute:       QQQ Open  QQQ Volume    SPY Open  SPY Volume   UCO Open  UCO Volume  \
0  303.192993    114874.0  387.899994    107674.0  21.940001     14518.0   

   CORN Open  CORN Volume  WEAT Open  WEAT Volume  ...    HD Volume  GME Open  \
0  24.712999        707.0      7.047   350.000122  ...  1661.999878     16.74   

    GME Volume     BA Open   BA Volume    Year  Month   Day  Hour  Minute  
0  4058.999023  203.789993  658.000061  2023.0    3.0  20.0  14.0    38.0  

[1 rows x 143 columns]
Descaled Output:       QQQ Open  QQQ Volume    SPY Open     SPY Volume   UCO Open    UCO Volume  \
0  306.891022     62755.5  385.774323  126847.265625  23.590811  21544.390625   

   CORN Open  CORN Volume  WEAT Open  WEAT Volume  .