You will need a `kaggle.json` file to give permissions for getting data sets. Go to https://www.kaggle.com/ , create an account, go to settings -> account ->  'Create New Token'. This will trigger the download of kaggle.json, a file containing your API credentials. Upload the .json below.

The following blocks get kaggle set up and download the stock and etf dataset

In [3]:
! pip install -q kaggle
! pip install numpy pandas matplotlib scikit-learn tensorflow tqdm
from google.colab import files
files.upload()
! mkdir ~/.kaggle
! cp kaggle.json ~/.kaggle/
! chmod 600 ~/.kaggle/kaggle.json
! kaggle datasets download -d jacksoncrow/stock-market-dataset
! kaggle datasets download -d heresjohnnyv/congress-investments
!unzip stock-market-dataset.zip
!unzip congress-investments.zip




[1;30;43mStreaming output truncated to the last 5000 lines.[0m
  inflating: stocks/CAPR.csv         
  inflating: stocks/CAR.csv          
  inflating: stocks/CARA.csv         
  inflating: stocks/CARE.csv         
  inflating: stocks/CARG.csv         
  inflating: stocks/CARO.csv         
  inflating: stocks/CARR#.csv        
  inflating: stocks/CARS.csv         
  inflating: stocks/CARV.csv         
  inflating: stocks/CASA.csv         
  inflating: stocks/CASH.csv         
  inflating: stocks/CASI.csv         
  inflating: stocks/CASS.csv         
  inflating: stocks/CASY.csv         
  inflating: stocks/CAT.csv          
  inflating: stocks/CATB.csv         
  inflating: stocks/CATC.csv         
  inflating: stocks/CATM.csv         
  inflating: stocks/CATO.csv         
  inflating: stocks/CATS.csv         
  inflating: stocks/CATY.csv         
  inflating: stocks/CB.csv           
  inflating: stocks/CBAN.csv         
  inflating: stocks/CBAT.csv         
  inflating: stocks/CBA

In [None]:
## Imports
import numpy as np
import pandas as pd
import os
import random
import collections
from collections import Counter
import matplotlib.pyplot as plt
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_squared_error, mean_absolute_error
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from sklearn.utils import class_weight
from tensorflow.keras.models import Sequential, Model
from tensorflow.keras.layers import LSTM, Dense, Input, Dropout, Bidirectional

import torch
import torch.nn as nn
from torch.utils.data import DataLoader, TensorDataset
from sklearn.preprocessing import MinMaxScaler

from tqdm import tqdm

# === Save directory setup ===
plot_dir = "plots"
os.makedirs(plot_dir, exist_ok=True)  # Creates the folder if it doesn't exist

# Data Processing

First open all stocks from kaggle dataset, and contain them in dictionary where ticker is key and pandas dataframe is value. All stock data is trimmed to be after 2011, since this is the earliest recorded date of a politican trade. Then we regularize all stocks so that they operate in the same frame of reference.

Then load in political trades.

In [None]:
stock_data = {}
for file in os.listdir('/content/stocks'):
    if file.endswith('.csv'):
        stock_name = os.path.splitext(file)[0]
        df = pd.read_csv(os.path.join('/content/stocks', file))
        # Convert all dates to pd.timeseries
        df['Date'] = pd.to_datetime(df['Date'])
        # trim to only after the earliest date of a recorded politician trade
        stock_data[stock_name] = df[df['Date'] >= pd.Timestamp('2011-11-29')]

features_to_scale = ['Open', 'High', 'Low', 'Close', 'Volume']
for stock in stock_data:
    scaler = MinMaxScaler()
    stock_data[stock][features_to_scale] = stock_data[stock][features_to_scale].apply(
        pd.to_numeric, errors='coerce'
    ).astype(float)
    stock_data[stock].loc[:, features_to_scale] = scaler.fit_transform(
        stock_data[stock][features_to_scale]
    )

In [None]:
trade_data = {}
trade_df = pd.read_csv('/content/SenatorCleaned.csv')
# Convert all dates to pd.timeseries
trade_df['Transaction.Date'] = pd.to_datetime(trade_df['Transaction.Date'])

# Make a dictionary where keys are stock tickers, and values are lists containing
# all trades made by politicans on that stock

# Im choosing to keep the date, type (buy or sell), and the amount, as I believe
# these are the useful part of the data to include as features
for index, row in tqdm(trade_df.iterrows()):
  df = pd.DataFrame([[row['Transaction.Date'], row['Type'], row['Amount']]], columns = ['Date', 'Type', 'Amount'])
  if trade_data.get(row['Ticker']) is None:
    trade_data[row['Ticker']] = pd.DataFrame(df, columns = ['Date', 'Type', 'Amount'])
  else:

    trade_data[row['Ticker']] = pd.concat([trade_data[row['Ticker']], df], ignore_index=True)





In [None]:
print(Counter(trade_df['Ticker']))

Counter({'--': 1629, 'AAPL': 269, 'PCLN': 213, 'PFE': 153, 'MSFT': 128, 'T': 116, 'BAC': 109, 'GE': 106, 'DISCA': 102, 'NFLX': 97, 'DD': 87, 'WFC': 85, 'WMT': 82, 'DIS': 77, 'FEYE': 77, 'QCOM': 76, 'CBS': 75, 'WPX': 74, 'XOM': 72, 'CZR': 71, 'PG': 69, 'KN': 69, 'AMZN': 68, 'MRK': 68, 'CSCO': 67, 'FDC': 67, 'URBN': 66, 'BRK-B': 65, 'JNJ': 63, 'CVS': 63, 'JPM': 63, 'FB': 63, 'GM': 61, 'LYV': 61, 'GPK': 60, 'CVX': 60, 'VZ': 58, 'BAX': 57, 'ABBV': 56, 'SLB': 55, 'PEP': 54, 'C': 54, 'V': 53, 'BA': 52, 'SBUX': 51, 'GILD': 51, 'WFM': 51, 'BMY': 50, 'AAN': 50, 'MOS': 49, 'HBI': 48, 'AMGN': 47, 'GLW': 46, 'DVN': 45, 'EMR': 44, 'NVDA': 44, 'M': 43, 'ESRX': 43, 'FOXA': 43, 'ORCL': 42, 'CMCSA': 41, 'SPY': 41, 'DFS': 41, 'OXY': 40, 'GS': 40, 'EMC': 39, 'ENTG': 39, 'IP': 39, 'COST': 39, 'DOW': 39, 'MCK': 39, 'HYH': 38, 'WLK': 38, 'LMT': 38, 'COP': 37, 'HON': 37, 'LGF': 37, 'CIT': 37, 'PFS': 37, 'FDX': 37, 'BWXT': 36, 'PAA': 36, 'WMB': 35, 'RF': 35, 'INTC': 35, 'CELG': 35, 'ADT': 34, 'PSX': 33, 'KO':

We are interested in seeing if the financial activity of politicians can better educate our model on where the stock market is going. We need to make a feature so that we may input this data into the models.

We use the below method to generate a simple politican sentimate score. This score is the net volume of stock purchased. Ie if one senator purchases 100 shares of some stock, and another sell 120, the sentiment score is -20.

In [None]:
def sentimentFeature(stock_name='AAPL', sentimentLookbackDistance=60):
    if stock_name not in stock_data or stock_name not in trade_data:
        return None

    # Prepare data
    stock_df = stock_data[stock_name].copy()
    trade_df = trade_data[stock_name].copy()

    # Map 'Type' to sentiment values
    trade_df['SignedAmount'] = trade_df['Amount'] * trade_df['Type'].map({
        'Purchase': 1,
        'Sale (Full)': -1,
        'Sale (Partial)': -1
    }).fillna(0)

    # Sort for efficient filtering
    trade_df = trade_df.sort_values('Date')
    stock_df = stock_df.sort_values('Date')

    # Use rolling time window (for each stock date, sum trades from past `lookback` days)
    sentiment_scores = []
    trade_dates = trade_df['Date'].values
    trade_amounts = trade_df['SignedAmount'].values

    for current_date in stock_df['Date']:
        mask = (trade_dates < current_date) & (trade_dates >= current_date - pd.Timedelta(days=sentimentLookbackDistance))
        sentiment_scores.append(trade_amounts[mask].sum())

    return sentiment_scores

In [None]:
i = 0
for stock in tqdm(stock_data):
  sentiment_scores = sentimentFeature(stock)
  stock_data[stock]['SentimentScore'] = sentiment_scores

Before running our model, we calculate the sentiment feature and add it to the input data.

Now we get to the model. We have two models, a LSTM and a time series attention model. We have two types of tasks, either the models can predict whether the stock will go up or down (binary classification task) or they can predict the next day OPEN price (regression task).

In [None]:

def create_sequences(data, labels, look_back, patch_size=None):
  if LSTM_MODEL:
      X, y = [], []
      for i in range(len(data) - look_back):
          X.append(data[i:i + look_back])
          y.append(labels[i + look_back - 1])  # label matches the last time step
      return np.array(X), np.array(y)
  else:
      if look_back % patch_size != 0:
          raise ValueError(f"look_back ({look_back}) must be divisible by patch_size ({patch_size})")
      X, y = [], []
      for i in range(len(data) - look_back):
          seq = data[i:i + look_back]
          label = labels[i]
          X.append(seq)
          y.append(label)
      return np.array(X), np.array(y)


# Binary Classification Task

Below is the code to run the binary classification task.

Hyperparameters you can adjust:


```
SENTIMENT_LOOKBACK_DISTANCE =        # How far back (in days) are politician trades accounted for

## LSTM HYPERPARAMETER
LSTM_LOOK_BACK = 120                 # How deep is our LSTM

## ATTENTION HYPERPARAMETER
ATTENTION_LOOK_BACK = 160            # How many tokens go into Attention
PATCH_SIZE = 10                      # How big each token is
# *** IMPORTANT: MAKE SURE ATTENTION_LOOK_BACK IS DIVISIBLE BY PATCH_SIZE! ***

# Choose model and task below
LSTM_MODEL = True                    # Make sure only one of these is true!
ATTENTION_MODEL = False              # Adjust to change whether using attention of LSTM

```



In [None]:
# === Transformer Model ===

class TimeSeriesTransformer(nn.Module):
    def __init__(self, input_dim, patch_size, seq_len, d_model=64, nhead=4, num_layers=2, dropout=0.1):
        super().__init__()
        self.patch_size = patch_size
        self.num_patches = seq_len // patch_size
        self.d_model = d_model

        self.patch_embed = nn.Linear(patch_size * input_dim, d_model)
        self.pos_embed = nn.Parameter(torch.randn(1, self.num_patches, d_model))

        encoder_layer = nn.TransformerEncoderLayer(d_model=d_model, nhead=nhead, dropout=dropout, batch_first=True)
        self.transformer = nn.TransformerEncoder(encoder_layer, num_layers=num_layers)

        self.classifier = nn.Sequential(
            nn.LayerNorm(d_model),
            nn.Linear(d_model, 1)
        )

    def forward(self, x):
        B, T, C = x.shape
        x = x.view(B, self.num_patches, self.patch_size * C)
        x = self.patch_embed(x) + self.pos_embed
        x = self.transformer(x)
        x = x.mean(dim=1)  # Global average pooling
        return self.classifier(x).squeeze()



# === 1. Prepare data ===

## DATA HYPERPARAMETER
STOCK_TO_PREDICT = 'MSFT'            # Change to try different stocks
SENTIMENT_LOOKBACK_DISTANCE = 60     # How far back are politician trades accounted for

## LSTM HYPERPARAMETER
LSTM_LOOK_BACK = 120                 # How deep is our LSTM

## ATTENTION HYPERPARAMETER
ATTENTION_LOOK_BACK = 160            # How many tokens go into Attention
PATCH_SIZE = 10                      # How big each token is

# Choose model and task below
LSTM_MODEL = True
ATTENTION_MODEL = False

# Make inputs
features = ['Open', 'High', 'Low', 'Close', 'Volume', 'SentimentScore']

# Initialize combined dataset containers
all_X, all_y = [], []


stock_set = []

while len(stock_set) < 500:
    random_stock = random.choice(list(stock_data.keys()))
    if random_stock not in stock_set:
        stock_set.append(random_stock)

# Loop over each stock
for stock in stock_set:
    if stock not in trade_data:
        continue

    # Add sentiment feature if not already added
    if 'SentimentScore' not in stock_data[stock].columns or stock_data[stock]['SentimentScore'].isnull().all():
        stock_data[stock]['SentimentScore'] = sentimentFeature(stock, SENTIMENT_LOOKBACK_DISTANCE)

    # Drop rows with missing features
    df = stock_data[stock].dropna(subset=features).reset_index(drop=True)

    # Skip if too short
    min_required = LSTM_LOOK_BACK if LSTM_MODEL else ATTENTION_LOOK_BACK
    if len(df) <= min_required:
        continue

    # Normalize features
    scaler = MinMaxScaler()
    data_values = scaler.fit_transform(df[features].values)

    # Create labels
    open_prices = df['Open'].values
    labels = (open_prices[1:] > open_prices[:-1]).astype(int)
    data_values = data_values[:-1]  # shift to match label length


    # Create sequences
    X_seq, y_seq = create_sequences(
        data_values, labels,
        LSTM_LOOK_BACK if LSTM_MODEL else ATTENTION_LOOK_BACK,
        PATCH_SIZE
    )

    all_X.append(X_seq)
    all_y.append(y_seq)

# Combine all stock sequences into one dataset
X = np.concatenate(all_X, axis=0)
y = np.concatenate(all_y, axis=0)

# === 3. Split and train ===
if LSTM_MODEL:
  X_train, X_test, y_train, y_test = train_test_split(
      X, y, test_size=0.2, shuffle=False  # no shuffle for time series
  )
  model = Sequential([
      Bidirectional(LSTM(64), input_shape=(LSTM_LOOK_BACK, data_values.shape[1])),
      Dropout(0.3),
      Dense(1, activation='sigmoid')
  ])

  model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

  class_weights = class_weight.compute_class_weight(
      'balanced', classes=np.unique(y_train), y=y_train
  )
  cw_dict = {0: class_weights[0], 1: class_weights[1]}

  model.fit(X_train, y_train, epochs=20, batch_size=16,
            validation_split=0.2, class_weight=cw_dict)


if ATTENTION_MODEL:

  X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, shuffle=False
  )

  X_train_tensor = torch.tensor(X_train, dtype=torch.float32)
  X_test_tensor = torch.tensor(X_test, dtype=torch.float32)
  y_train_tensor = torch.tensor(y_train, dtype=torch.float32)
  y_test_tensor = torch.tensor(y_test, dtype=torch.float32)

  train_loader = DataLoader(TensorDataset(X_train_tensor, y_train_tensor), batch_size=32, shuffle=True)
  test_loader = DataLoader(TensorDataset(X_test_tensor, y_test_tensor), batch_size=32)

  device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
  model = TimeSeriesTransformer(
      input_dim=X.shape[2],
      patch_size=PATCH_SIZE,
      seq_len=ATTENTION_LOOK_BACK,
      d_model=128,
      nhead=8,
      num_layers=4,         # Add this
      dropout=0.2
  ).to(device)

  pos_weight = torch.tensor([sum(y_train==0) / sum(y_train==1)], device=device)
  criterion = nn.BCEWithLogitsLoss(pos_weight=pos_weight)
  optimizer = torch.optim.Adam(model.parameters(), lr=1e-4)

  for epoch in range(10):
      model.train()
      total_loss = 0
      for xb, yb in tqdm(train_loader):
          xb, yb = xb.to(device), yb.to(device)
          optimizer.zero_grad()
          preds = model(xb)
          loss = criterion(preds, yb)
          loss.backward()
          optimizer.step()
          total_loss += loss.item()
      print(f"Epoch {epoch+1} - Loss: {total_loss:.4f}")

# === 4. Predict ===

if LSTM_MODEL:
  pred_probs = model.predict(X_test)
  preds = (pred_probs > 0.5).astype(int).flatten()

else:
  model.eval()
  all_preds = []
  with torch.no_grad():
      for xb, _ in test_loader:
          xb = xb.to(device)
          preds = torch.sigmoid(model(xb).cpu())
          all_preds.extend(preds)

  preds = (np.array(all_preds) > 0.5).astype(int)

# Actual (denormalized) prices for visualization
full_prices = scaler.inverse_transform(data_values)
plot_prices = full_prices[-len(y_test):, 0]

# === 5. Plotting with colored segments ===

# Here the stocks actual values are plotted, where the line is green if our model
# correctly predicted the day to day direction of change (up or down from open to open)
# or red if the model incorrectly predicted the day to day direction

colors = ['green' if pred == actual else 'red' for pred, actual in zip(preds, y_test)]

plt.figure(figsize=(14, 6))
for i in range(1, len(plot_prices)):
    plt.plot(
        [i - 1, i],
        [plot_prices[i - 1], plot_prices[i]],
        color=colors[i]
    )
string = "Many stocks stringed together"
plt.title(f"{STOCK_TO_PREDICT} Open Price – {string} Direction Prediction Accuracy")
plt.xlabel("Days (Test Set)")
plt.ylabel("Open Price")
plt.grid(True)
plt.tight_layout()
plt.show()

print("Predicted class distribution:", collections.Counter(preds))
print("Actual class distribution:", collections.Counter(y_test))

# === Save the plot ===
model_type = "LSTM" if LSTM_MODEL else "Attention"
plot_path = os.path.join(plot_dir, f"{model_type}_regression_direction_plot.png")
plt.savefig(plot_path)
plt.close()  # Optional: prevents display in notebooks
print(f"Plot saved to {plot_path}")

  super().__init__(**kwargs)


Epoch 1/20
[1m5261/5261[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m74s[0m 13ms/step - accuracy: 0.5035 - loss: 0.6946 - val_accuracy: 0.5043 - val_loss: 0.6930
Epoch 2/20
[1m5261/5261[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m72s[0m 14ms/step - accuracy: 0.5112 - loss: 0.6928 - val_accuracy: 0.4961 - val_loss: 0.6905
Epoch 3/20
[1m5261/5261[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m81s[0m 13ms/step - accuracy: 0.5625 - loss: 0.6832 - val_accuracy: 0.7063 - val_loss: 0.6397
Epoch 4/20
[1m5261/5261[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m84s[0m 14ms/step - accuracy: 0.6916 - loss: 0.6229 - val_accuracy: 0.7756 - val_loss: 0.5488
Epoch 5/20
[1m5261/5261[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m72s[0m 14ms/step - accuracy: 0.7535 - loss: 0.5458 - val_accuracy: 0.7611 - val_loss: 0.5114
Epoch 6/20
[1m4842/5261[0m [32m━━━━━━━━━━━━━━━━━━[0m[37m━━[0m [1m4s[0m 12ms/step - accuracy: 0.7795 - loss: 0.4990

# Regression Task

Run this code to use the regression task models.

Same hyperparameters as earlier, but with one new one:


```
USE_SENTIMENT_FEATURE = True                # You can turn on or off the feature we are designing, to see the impact
```



In [1]:
class TimeSeriesTransformer(nn.Module):
    def __init__(self, input_dim, patch_size, seq_len, d_model=128, nhead=8, num_layers=4, dropout=0.1):
        super().__init__()
        self.patch_size = patch_size
        self.num_patches = seq_len // patch_size
        self.d_model = d_model

        # Patch embedding: flatten each patch and project to d_model
        self.patch_embed = nn.Linear(patch_size * input_dim, d_model)

        # Positional embeddings (learned)
        self.pos_embed = nn.Parameter(torch.zeros(1, self.num_patches, d_model))

        # Transformer encoder
        encoder_layer = nn.TransformerEncoderLayer(
            d_model=d_model,
            nhead=nhead,
            dim_feedforward=4*d_model,
            dropout=dropout,
            batch_first=True
        )
        self.encoder = nn.TransformerEncoder(encoder_layer, num_layers=num_layers)

        # Regression head: reduce sequence to a single prediction
        self.regressor = nn.Sequential(
            nn.LayerNorm(d_model),
            nn.Linear(d_model, 1)
        )

    def forward(self, x):
        B, T, C = x.shape  # (batch, seq_len, features)
        x = x.view(B, self.num_patches, self.patch_size * C)  # (batch, num_patches, patch_size * features)
        x = self.patch_embed(x) + self.pos_embed[:, :self.num_patches, :]  # Add position encoding
        x = self.encoder(x)  # (batch, num_patches, d_model)
        x = x.mean(dim=1)  # Global average pooling
        out = self.regressor(x)  # (batch, 1)
        return out.squeeze(-1)  # (batch,)



# === 1. Prepare data ===

## DATA HYPERPARAMETER
STOCK_TO_PREDICT = 'MSFT'            # Change to try different stocks
SENTIMENT_LOOKBACK_DISTANCE = 60     # How far back are politician trades accounted for

## LSTM HYPERPARAMETER
LSTM_LOOK_BACK = 120                 # How deep is our LSTM

## ATTENTION HYPERPARAMETER
ATTENTION_LOOK_BACK = 160            # How many tokens go into Attention
PATCH_SIZE = 10                      # How big each token is

# Choose model and task below
LSTM_MODEL = True
ATTENTION_MODEL = not LSTM_MODEL

# Test whether sentiment is useful or not
USE_SENTIMENT_FEATURE = True

# Make inputs
features = ['Open', 'High', 'Low', 'Close', 'Volume', 'SentimentScore']

# Initialize combined dataset containers
all_X, all_y = [], []

stock_set = []

while len(stock_set) <200:
    random_stock = random.choice(list(stock_data.keys()))
    if random_stock not in stock_set:
        stock_set.append(random_stock)

# Data preparation loop
for stock in stock_set:
    if stock not in trade_data:
        continue

    if 'SentimentScore' not in stock_data[stock].columns or stock_data[stock]['SentimentScore'].isnull().all():
      if USE_SENTIMENT_FEATURE:
          stock_data[stock]['SentimentScore'] = sentimentFeature(stock, SENTIMENT_LOOKBACK_DISTANCE)
      else:
          stock_data[stock]['SentimentScore'] = 1.0

    df = stock_data[stock].dropna(subset=features).reset_index(drop=True)

    min_required = LSTM_LOOK_BACK if LSTM_MODEL else ATTENTION_LOOK_BACK
    if len(df) <= min_required:
        continue

    scaler = MinMaxScaler()
    data_values = scaler.fit_transform(df[features].values)

    open_prices = df['Open'].values
    labels = open_prices[1:]               # Next day's Open
    data_values = data_values[:-1]         # Align features

    X_seq, y_seq = create_sequences(
        data_values, labels,
        LSTM_LOOK_BACK if LSTM_MODEL else ATTENTION_LOOK_BACK,
        PATCH_SIZE
    )

    all_X.append(X_seq)
    all_y.append(y_seq)

# Combine all data
X = np.concatenate(all_X, axis=0)
y = np.concatenate(all_y, axis=0)

# === 2. Train and Evaluate ===

if LSTM_MODEL:
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, shuffle=False)

    model = Sequential([
        Bidirectional(LSTM(64), input_shape=(LSTM_LOOK_BACK, data_values.shape[1])),
        Dropout(0.3),
        Dense(1)  # No activation for regression
    ])

    model.compile(optimizer='adam', loss='mse', metrics=['mae'])
    model.fit(X_train, y_train, epochs=10, batch_size=16, validation_split=0.2)

    preds = model.predict(X_test).flatten()

if ATTENTION_MODEL:
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, shuffle=False)

    X_train_tensor = torch.tensor(X_train, dtype=torch.float32)
    X_test_tensor = torch.tensor(X_test, dtype=torch.float32)
    y_train_tensor = torch.tensor(y_train, dtype=torch.float32)
    y_test_tensor = torch.tensor(y_test, dtype=torch.float32)

    train_loader = DataLoader(TensorDataset(X_train_tensor, y_train_tensor), batch_size=32, shuffle=True)
    test_loader = DataLoader(TensorDataset(X_test_tensor, y_test_tensor), batch_size=32)

    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    model = TimeSeriesTransformer(
        input_dim=X.shape[2],
        patch_size=PATCH_SIZE,
        seq_len=ATTENTION_LOOK_BACK,
        d_model=128,
        nhead=8,
        num_layers=4,
        dropout=0.2
    ).to(device)

    criterion = nn.MSELoss()
    optimizer = torch.optim.Adam(model.parameters(), lr=1e-4)

    for epoch in range(30):
        model.train()
        total_loss = 0
        for xb, yb in tqdm(train_loader):
            xb, yb = xb.to(device), yb.to(device).unsqueeze(1)
            optimizer.zero_grad()
            preds = model(xb)
            loss = criterion(preds, yb)
            loss.backward()
            optimizer.step()
            total_loss += loss.item()
        print(f"Epoch {epoch+1} - Loss: {total_loss:.4f}")

    # Prediction
    model.eval()
    all_preds = []
    with torch.no_grad():
        for xb, _ in test_loader:
            xb = xb.to(device)
            preds = model(xb).cpu().squeeze()
            all_preds.extend(preds)
    preds = np.array(all_preds)

# === 3. Plotting ===

# Denormalize predictions and ground truth
plot_pred_prices = scaler.inverse_transform(
    np.column_stack([preds] * len(features))
)[:, 0]

plot_true_prices = scaler.inverse_transform(
    np.column_stack([y_test] * len(features))
)[:, 0]

# Color based on directional correctness
colors = ['green' if (p2 - p1) * (t2 - t1) > 0 else 'red'
          for p1, p2, t1, t2 in zip(plot_pred_prices[:-1], plot_pred_prices[1:],
                                    plot_true_prices[:-1], plot_true_prices[1:])]

plt.figure(figsize=(14, 6))
for i in range(1, len(plot_true_prices)):
    plt.plot(
        [i - 1, i],
        [plot_true_prices[i - 1], plot_true_prices[i]],
        color=colors[i - 1]
    )

plt.title(f"{STOCK_TO_PREDICT} Open Price – Regression Prediction Accuracy")
plt.xlabel("Days (Test Set)")
plt.ylabel("Open Price")
plt.grid(True)
plt.tight_layout()
plt.show()

# === Save the plot ===
model_type = "LSTM" if LSTM_MODEL else "Attention"
plot_path = os.path.join(plot_dir, f"{model_type}_regression_direction_plot.png")
plt.savefig(plot_path)
plt.close()  # Optional: prevents display in notebooks
print(f"Plot saved to {plot_path}")


plt.figure(figsize=(14, 6))
plt.plot(plot_true_prices, label='Actual Open Price', color='blue')
plt.plot(plot_pred_prices, label='Predicted Open Price', color='orange')
plt.title(f"{STOCK_TO_PREDICT} Open Price – Actual vs Predicted")
plt.xlabel("Days (Test Set)")
plt.ylabel("Open Price")
plt.legend()
plt.grid(True)
plt.tight_layout()
plt.show()

# === Save the plot ===
plot_path = os.path.join(plot_dir, f"{model_type}_regression_plot.png")
plt.savefig(plot_path)
plt.close()  # Optional: prevents display in notebooks
print(f"Plot saved to {plot_path}")

# === 4. Directional Accuracy for Regression ===

correct_direction_count = sum(
    (p2 - p1) * (t2 - t1) > 0
    for p1, p2, t1, t2 in zip(plot_pred_prices[:-1], plot_pred_prices[1:],
                              plot_true_prices[:-1], plot_true_prices[1:])
)

print(f"\nDirectional Accuracy: {correct_direction_count} out of {len(plot_true_prices)-1} days")



NameError: name 'nn' is not defined