In [1]:
import os
import numpy as np
import re
import pandas as pd
import plotly
import plotly.express as px
import plotly.figure_factory as ff
import plotly.graph_objects as go
import pickle
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset
from transformers import PatchTSMixerConfig, PatchTSMixerForPrediction
from sklearn.preprocessing import StandardScaler
from torch.optim.lr_scheduler import ReduceLROnPlateau
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score

In [2]:
class PatchTSMixerForPrediction(nn.Module):
    def __init__(self, config):
        super(PatchTSMixerForPrediction, self).__init__()
        self.layers = nn.ModuleList()
        self.residual_projection = nn.Linear(config.input_dim, config.hidden_dim)

        # Input layer
        self.layers.append(nn.Linear(config.input_dim, config.hidden_dim))
        self.layers.append(nn.BatchNorm1d(config.hidden_dim))
        self.layers.append(nn.LeakyReLU(negative_slope=0.01))

        # Hidden layers with residual connections
        for _ in range(config.num_hidden_layers):
            self.layers.append(nn.Linear(config.hidden_dim, config.hidden_dim))
            self.layers.append(nn.BatchNorm1d(config.hidden_dim))
            self.layers.append(nn.LeakyReLU(negative_slope=0.01))
            self.layers.append(nn.Linear(config.hidden_dim, config.hidden_dim))  # Residual connection
            self.layers.append(nn.BatchNorm1d(config.hidden_dim))
            self.layers.append(nn.LeakyReLU(negative_slope=0.01))

        # Output layer
        self.layers.append(nn.Linear(config.hidden_dim, config.output_dim))
        self.dropout = nn.Dropout(0.3)
        self.sequence_length = config.sequence_length

    def forward(self, x):
        batch_size, seq_len, num_features = x.size()
        x = x.view(-1, num_features)
        residual = self.residual_projection(x)

        for layer in self.layers[:-1]:
            x = layer(x)
            if isinstance(layer, nn.LeakyReLU):
                x = self.dropout(x)
            if isinstance(layer, nn.Linear):
                x += residual
                residual = x
        x = self.layers[-1](x)
        x = x.view(batch_size, seq_len, -1)
        return x

# Define the config class
class PatchTSMixerConfig:
    def __init__(self, input_dim, hidden_dim, num_hidden_layers, output_dim, sequence_length):
        self.input_dim = input_dim
        self.hidden_dim = hidden_dim
        self.num_hidden_layers = num_hidden_layers
        self.output_dim = output_dim
        self.sequence_length = sequence_length

In [3]:
def preprocess_data():
    dfs = []
    csv_path = "/kaggle/input/stock-market-data/stock_market_data/nasdaq/csv/"
    pattern = re.compile(r"([^/]+)\.csv")

    for filename in os.listdir(csv_path):
        if filename.endswith(".csv"):
            try:
                match = pattern.match(filename)
                company_id = match.group(1) if match else "unknown"

                df = pd.read_csv(os.path.join(csv_path, filename), on_bad_lines='skip')
                df['company_id'] = company_id
                dfs.append(df)
            except pd.errors.ParserError as e:
                print(f"Error reading {filename}: {e}")

    if dfs:
        nasdaq_df = pd.concat(dfs, ignore_index=True)

        nasdaq_df = nasdaq_df.rename(columns={
            "Date": "date",
            "Low": "low",
            "Open": "open",
            "Volume": "volume",
            "High": "high",
            "Close": "close",
            "Adjusted Close": "adjusted_close"
        })

        nasdaq_df.fillna(0, inplace=True)

        feature_cols = ["low", "open", "volume", "high", "close", "adjusted_close"]

        X = nasdaq_df[feature_cols]

        scaler = StandardScaler()
        X_scaled = scaler.fit_transform(X)

        target_scaler = StandardScaler()
        y = target_scaler.fit_transform(nasdaq_df[['close']])

        num_features = X.shape[1]
        sequence_length = 32

        num_sequences = X_scaled.shape[0] // sequence_length

        X_reshaped = X_scaled[:num_sequences * sequence_length].reshape(-1, sequence_length, num_features)
        y_reshaped = y[:num_sequences * sequence_length].reshape(-1, sequence_length)[:, 0]

        X_train, X_val, y_train, y_val = train_test_split(X_reshaped, y_reshaped, test_size=0.2, random_state=42)
        X_val, X_test, y_val, y_test = train_test_split(X_val, y_val, test_size=0.5, random_state=42)

        train_dataset = TensorDataset(torch.tensor(X_train, dtype=torch.float32), torch.tensor(y_train, dtype=torch.float32))
        val_dataset = TensorDataset(torch.tensor(X_val, dtype=torch.float32), torch.tensor(y_val, dtype=torch.float32))
        test_dataset = TensorDataset(torch.tensor(X_test, dtype=torch.float32), torch.tensor(y_test, dtype=torch.float32))

        batch_size = 64
        train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
        val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False)
        test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

        return train_loader, val_loader, test_loader, num_features, target_scaler
    else:
        raise ValueError("No data files found in the specified directory.")

In [4]:
def train_model(train_loader, val_loader, num_features, target_scaler):
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    sequence_length = 32

    config = PatchTSMixerConfig(
        input_dim=num_features,
        hidden_dim=256,
        num_hidden_layers=10,
        output_dim=1,
        sequence_length=sequence_length
    )

    model = PatchTSMixerForPrediction(config).to(device)
    optimizer = optim.AdamW(model.parameters(), lr=1e-4, weight_decay=1e-5)
    loss_fn = nn.MSELoss()
    scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='min', factor=0.5, patience=5, verbose=True)
    num_epochs = 25
    clip_value = 1.0

    best_val_loss = float('inf')
    patience_counter = 0
    patience = 10

    train_losses = []
    val_losses = []
    val_maes = []
    epochs = []

    for epoch in range(num_epochs):
        model.train()
        total_train_loss = 0
        total_train_mae = 0

        for X_batch, y_batch in train_loader:
            X_batch, y_batch = X_batch.to(device), y_batch.to(device)
            optimizer.zero_grad()

            outputs = model(X_batch)
            outputs = outputs[:, -1, 0].squeeze()

            loss = loss_fn(outputs, y_batch)
            loss.backward()
            torch.nn.utils.clip_grad_norm_(model.parameters(), clip_value)
            optimizer.step()

            total_train_loss += loss.item()
            total_train_mae += mean_absolute_error(y_batch.cpu().detach().numpy(), outputs.cpu().detach().numpy())

        model.eval()
        val_loss = 0
        val_mae = 0

        with torch.no_grad():
            for X_val_batch, y_val_batch in val_loader:
                X_val_batch, y_val_batch = X_val_batch.to(device), y_val_batch.to(device)
                outputs = model(X_val_batch)
                outputs = outputs[:, -1, 0].squeeze()

                loss = loss_fn(outputs, y_val_batch)
                val_loss += loss.item()
                val_mae += mean_absolute_error(y_val_batch.cpu().detach().numpy(), outputs.cpu().detach().numpy())

        val_loss /= len(val_loader)
        val_mae /= len(val_loader)

        avg_train_loss = total_train_loss / len(train_loader)
        avg_train_mae = total_train_mae / len(train_loader)
        train_losses.append(avg_train_loss)
        val_losses.append(val_loss)
        val_maes.append(val_mae)
        epochs.append(epoch + 1)

        print(f"Epoch {epoch + 1}/{num_epochs}")
        print(f"Train Loss (MSE): {avg_train_loss:.4f}")
        print(f"Train MAE: {avg_train_mae:.4f}")
        print(f"Validation Loss (MSE): {val_loss:.4f}")
        print(f"Validation MAE: {val_mae:.4f}")

        scheduler.step(val_loss)

        if val_loss < best_val_loss:
            best_val_loss = val_loss
            patience_counter = 0
            # Save the best model using pickle to /kaggle/working/
            model_save_path = '/kaggle/working/patchtsm_model.pkl'
            with open(model_save_path, 'wb') as f:
                pickle.dump(model, f)
            print(f"Model saved as '{model_save_path}'")
        else:
            patience_counter += 1
            if patience_counter >= patience:
                print("Early stopping triggered.")
                break

    # Plot the metrics
    fig = go.Figure()
    fig.add_trace(go.Scatter(x=epochs, y=train_losses, mode='lines+markers', name='Train Loss (MSE)'))
    fig.add_trace(go.Scatter(x=epochs, y=val_losses, mode='lines+markers', name='Validation Loss (MSE)'))
    fig.add_trace(go.Scatter(x=epochs, y=val_maes, mode='lines+markers', name='Validation MAE'))

    fig.update_layout(
        title='Training and Validation Metrics',
        xaxis_title='Epoch',
        yaxis_title='Metric Value',
        legend_title='Metrics',
        template='plotly_dark'
    )
    fig.show()

In [5]:
if __name__ == "__main__":
    train_loader, val_loader, test_loader, num_features, target_scaler = preprocess_data()
    train_model(train_loader, val_loader, num_features, target_scaler)

Epoch 1/25
Train Loss (MSE): 1.2041
Train MAE: 0.0607
Validation Loss (MSE): 0.0032
Validation MAE: 0.0090
Model saved as '/kaggle/working/patchtsm_model.pkl'
Epoch 2/25
Train Loss (MSE): 1.1093
Train MAE: 0.0263
Validation Loss (MSE): 0.0776
Validation MAE: 0.0074
Epoch 3/25
Train Loss (MSE): 1.0219
Train MAE: 0.0194
Validation Loss (MSE): 5.3683
Validation MAE: 0.0246
Epoch 4/25
Train Loss (MSE): 0.9068
Train MAE: 0.0159
Validation Loss (MSE): 0.0014
Validation MAE: 0.0032
Model saved as '/kaggle/working/patchtsm_model.pkl'
Epoch 5/25
Train Loss (MSE): 0.7598
Train MAE: 0.0138
Validation Loss (MSE): 0.0161
Validation MAE: 0.0030
Epoch 6/25
Train Loss (MSE): 0.5780
Train MAE: 0.0112
Validation Loss (MSE): 0.0004
Validation MAE: 0.0010
Model saved as '/kaggle/working/patchtsm_model.pkl'
Epoch 7/25
Train Loss (MSE): 0.4237
Train MAE: 0.0098
Validation Loss (MSE): 0.0004
Validation MAE: 0.0008
Epoch 8/25
Train Loss (MSE): 0.4093
Train MAE: 0.0092
Validation Loss (MSE): 0.0018
Validation 