In [1]:
import torch
import torch.nn as nn
from torch import optim  # For optimizers like SGD, Adam, etc.
import torch.nn.functional as F  # Parameterless functions, like (some) activation functions
from torch.utils.data import Dataset, DataLoader, Subset
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from sklearn.preprocessing import StandardScaler
from tqdm import tqdm
import matplotlib.pyplot as plt

In [38]:
seq_len = 10     # N days of history to look back
target_var = 'Close'  # The price to predict (next day's close)
ticker = 'Name'          # The column containing the ticker
feature_list = ['Open', 'High', 'Low', 'Close', 'Volume']
batch_size = 64

df = pd.read_csv("all_stocks_2017-01-01_to_2018-01-01.csv")
df = df.ffill()

# ticker mapping
ticker_list = df[ticker].unique()
id_mapping = {id_val: idx for idx, id_val in enumerate(ticker_list)}

# normalizing
def group_scale(group):
    scaler = StandardScaler()
    scaled_data = scaler.fit_transform(group[feature_list].value)
    df_scaled = pd.DataFrame(scaled_data,
                             columns= [col for col in feature_list],
                             index = group.index)
    
df1 = df.groupby(ticker).apply(group_scale)

AttributeError: 'DataFrame' object has no attribute 'value'

In [27]:
seq_len = 10     # N days of history to look back
target_var = 'Close'  # The price to predict (next day's close)
ticker = 'Name'          # The column containing the ticker
feature_list = ['Open', 'High', 'Low', 'Close', 'Volume']
batch_size = 64


def create_sequences_numpy(df, seq_len, target_var, feature_list, ticker, id_mapping):
    X_sequences = []
    y_targets = []
    ticker_indices = []

    # Process data 
    for ticker, group in df.groupby(ticker):
        group = group.sort_values('Date').reset_index(drop=True)

        ticker_idx = id_mapping[ticker]

        features = group[feature_list].values
        target = group[target_var].values

        # Sliding Window Creation
        for i in range(len(group) - seq_len):
            # Input X: sequence of features for seq_len days
            X = features[i : i + seq_len]
            # Target Y: the target value immediately following the sequence
            y = target[i + seq_len]

            X_sequences.append(X)
            y_targets.append(y)
            ticker_indices.append(ticker_idx)

    X_array = np.array(X_sequences)
    y_array = np.array(y_targets)
    ticker_array = np.array(ticker_indices, dtype=np.int64)

    return X_array, y_array, ticker_array


class StockTimeseriesDataset(Dataset):
    def __init__(self, X_array, y_array, ticker_array):
        # Store the NumPy arrays directly
        self.X = X_array
        self.y = y_array
        self.ticker = ticker_array

    def __len__(self):
        return len(self.y)

    def __getitem__(self, idx):
        # Use torch.from_numpy() for zero-copy conversion to tensor
        return torch.from_numpy(self.ticker[idx:idx+1]).squeeze(), \
               torch.from_numpy(self.X[idx]), \
               torch.from_numpy(self.y[idx:idx+1]).squeeze()


# --- 3. EXECUTION PIPELINE ---

# Data Loading and Preprocessing
df = pd.read_csv("all_stocks_2017-01-01_to_2018-01-01.csv")
df = df.ffill()

# Ticker Mapping
ticker_list = df[ticker].unique()
id_mapping = {id_val: idx for idx, id_val in enumerate(ticker_list)}

# Normalization (Fit/Transform)
scaler = MinMaxScaler()
df[feature_list] = scaler.fit_transform(df[feature_list])


X_array, y_array, ticker_array = create_sequences_numpy(
    df, seq_len, target_var, feature_list, ticker, id_mapping
)

# Train/Test Split by ticker
# train_df, test_df = train_test_split(df, test_size=0.2, random_state=123, stratify=df[ticker])

# train_df.head()

# --- Sequence Creation using the NumPy function ---

# print("Starting NumPy sequence creation for Training data...")
# X_train, y_train, ticker_train = create_sequences_numpy(
#     train_df, seq_len, target_var, features, ticker, id_mapping
# )

# print("Starting NumPy sequence creation for Testing data...")
# X_test, y_test, ticker_test = create_sequences_numpy(
#     test_df, seq_len, target_var, features, ticker, id_mapping
# )

# print(f"X_train (Samples, Timesteps, Features): {X_train.shape}")
# print(f"y_train (Targets): {y_train.shape}")
# print(f"ticker_train (IDs): {ticker_train.shape}")


# --- Dataset and DataLoader Creation ---

# train_dataset = StockTimeseriesDataset(X_train, y_train, ticker_train)
# test_dataset = StockTimeseriesDataset(X_test, y_test, ticker_test)

# train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True)
# test_loader = DataLoader(test_dataset, batch_size=BATCH_SIZE, shuffle=False)

# print(f"\nDataLoaders created. Total training sequences: {len(train_dataset)}")
# print(f"Total testing sequences: {len(test_dataset)}")

# # --- Verification ---
# ticker_idx_batch, X_seq_batch, y_target_batch = next(iter(train_loader))

# print("\n--- Verification of PyTorch Batch Shapes (Final Output) ---")
# print(f"Ticker ID Batch Shape: {ticker_idx_batch.shape} (Type: {ticker_idx_batch.dtype})")
# print(f"X Sequence Batch Shape: {X_seq_batch.shape} (Type: {X_seq_batch.dtype})")
# print(f"Y Target Batch Shape: {y_target_batch.shape} (Type: {y_target_batch.dtype})")

(7471,)

In [39]:
# 1. DATA PREPARATION CLASS
class StockTimeseriesDataset(Dataset):
    def __init__(self, df, seq_len, target_col, feature_cols, id_col, id_mapping):
        self.seq_len = seq_len
        self.X_sequences = []
        self.y_targets = []

        # Group data by Company ID (Name)
        for ticker, group in df.groupby(id_col):
            group = group.sort_values('Date').reset_index(drop=True)

            # Get the Ticker index for embedding
            ticker_idx = id_mapping[ticker]

            # Extract features and target arrays
            features = group[feature_cols].values
            target = group[target_col].values

            # Sliding Window Creation
            for i in range(len(group) - seq_len):
                X = features[i : i + seq_len]
                y = target[i + seq_len]

                self.X_sequences.append((ticker_idx, X))
                self.y_targets.append(y)

    def __len__(self):
        return len(self.X_sequences)

    def __getitem__(self, idx):
        # Return the ticker index, the sequence tensor, and the target
        ticker_idx, X_seq = self.X_sequences[idx]
        y_target = self.y_targets[idx]
        
        return torch.tensor(ticker_idx, dtype=torch.long), \
               torch.tensor(X_seq, dtype=torch.float32), \
               torch.tensor(y_target, dtype=torch.float32)


# 2. GRU MODEL ARCHITECTURE
class GRUModel(nn.Module):
    def __init__(self, num_entities, embed_dim, input_dim, hidden_size, num_layers):
        super(GRUModel, self).__init__()
        
        # 1. Ticker ID Embedding Layer
        self.id_embedding = nn.Embedding(num_entities, embed_dim)
        
        # Total feature size = original features + embedding dimension
        self.total_input_dim = input_dim + embed_dim
        
        # 2. GRU Layer
        self.gru = nn.GRU(
            input_size=self.total_input_dim,
            hidden_size=hidden_size,
            num_layers=num_layers,
            batch_first=True,
            dropout=0.2 if num_layers > 1 else 0
        )
        
        # 3. Output Layer (Sequence-to-single value: predict one Close price)
        self.fc = nn.Linear(hidden_size, 1)

    def forward(self, ticker_idx, X_seq):
        # Generate ID embedding
        id_embed = self.id_embedding(ticker_idx)
        
        # Expand embedding across the sequence length
        id_embed = id_embed.unsqueeze(1).repeat(1, X_seq.size(1), 1)
        
        # Concatenate embedding with features at every timestep
        combined_input = torch.cat((X_seq, id_embed), dim=2)
        
        # Pass through GRU
        out, h_n = self.gru(combined_input)
        
        # Use the final hidden state of the last layer
        final_output = self.fc(h_n[-1, :, :])
        
        return final_output


In [40]:
# Load Data
df = pd.read_csv("all_stocks_2017-01-01_to_2018-01-01.csv")


# Handle missing data by filling with the next valid observation (standard for time series)
df = df.fillna(method='ffill')

  df = df.fillna(method='ffill')


In [41]:


# --- CONFIGURATION ---
SEQUENCE_LENGTH = 10     # Look back 10 days
TARGET_COLUMN = 'Close'  # Predict the next day's closing price
EMBEDDING_DIM = 8        # Size of the Ticker ID embedding vector
HIDDEN_SIZE = 64
NUM_LAYERS = 2
BATCH_SIZE = 128
LEARNING_RATE = 0.001
NUM_EPOCHS = 50
# ---------------------

# --- EXECUTION ---



# Identify features and create Ticker mapping
ID_COL = 'Name'
FEATURE_COLS = ['Open', 'High', 'Low', 'Close', 'Volume']
UNIQUE_TICKERS = df[ID_COL].unique()
NUM_ENTITIES = len(UNIQUE_TICKERS)

# Create a mapping from actual Ticker to a zero-indexed integer
id_mapping = {id_val: idx for idx, id_val in enumerate(UNIQUE_TICKERS)}


# Normalize Features (Crucial for deep learning)
scaler = MinMaxScaler()
# Fit and transform the features across the entire dataset
df[FEATURE_COLS] = scaler.fit_transform(df[FEATURE_COLS])


# Split data by Ticker ID (to ensure sequences from one company don't leak into another split)
train_tickers, test_tickers = train_test_split(UNIQUE_TICKERS, test_size=0.2, random_state=42)

train_df = df[df[ID_COL].isin(train_tickers)].copy()
test_df = df[df[ID_COL].isin(test_tickers)].copy()


# Create Datasets and DataLoaders
train_dataset = StockTimeseriesDataset(train_df, SEQUENCE_LENGTH, TARGET_COLUMN, FEATURE_COLS, ID_COL, id_mapping)
test_dataset = StockTimeseriesDataset(test_df, SEQUENCE_LENGTH, TARGET_COLUMN, FEATURE_COLS, ID_COL, id_mapping)

train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=BATCH_SIZE, shuffle=False)


# Initialize Model, Loss, and Optimizer
INPUT_DIM = len(FEATURE_COLS)

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

model = GRUModel(
    num_entities=NUM_ENTITIES,
    embed_dim=EMBEDDING_DIM,
    input_dim=INPUT_DIM,
    hidden_size=HIDDEN_SIZE,
    num_layers=NUM_LAYERS
).to(device)

criterion = nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=LEARNING_RATE)


# 3. TRAINING LOOP
print(f"\nStarting training on {device}...")
for epoch in range(NUM_EPOCHS):
    model.train()
    total_loss = 0
    
    for ticker_idx, X_seq, y_target in tqdm(train_loader, desc=f"Epoch {epoch+1}/{NUM_EPOCHS}"):
        
        # Move data to device and adjust target shape
        ticker_idx, X_seq, y_target = ticker_idx.to(device), X_seq.to(device), y_target.to(device).unsqueeze(1)
        
        # Forward pass
        output = model(ticker_idx, X_seq)
        loss = criterion(output, y_target)
        
        # Backward and optimize
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        
        total_loss += loss.item()
        
    avg_loss = total_loss / len(train_loader)
    
    # 4. EVALUATION (Testing)
    model.eval()
    test_loss = 0
    with torch.no_grad():
        for ticker_idx, X_seq, y_target in test_loader:
            ticker_idx, X_seq, y_target = ticker_idx.to(device), X_seq.to(device), y_target.to(device).unsqueeze(1)
            
            output = model(ticker_idx, X_seq)
            loss = criterion(output, y_target)
            test_loss += loss.item()

    avg_test_loss = test_loss / len(test_loader)
    
    print(f"Epoch {epoch+1} Complete. Train Loss: {avg_loss:.4f}, Test Loss: {avg_test_loss:.4f}")

print("\nTraining complete.")


Starting training on cpu...


Epoch 1/50: 100%|██████████| 46/46 [00:00<00:00, 125.34it/s]


Epoch 1 Complete. Train Loss: 0.0096, Test Loss: 0.0025


Epoch 2/50: 100%|██████████| 46/46 [00:00<00:00, 184.44it/s]


Epoch 2 Complete. Train Loss: 0.0007, Test Loss: 0.0046


Epoch 3/50: 100%|██████████| 46/46 [00:00<00:00, 204.12it/s]


Epoch 3 Complete. Train Loss: 0.0005, Test Loss: 0.0041


Epoch 4/50: 100%|██████████| 46/46 [00:00<00:00, 211.31it/s]


Epoch 4 Complete. Train Loss: 0.0003, Test Loss: 0.0046


Epoch 5/50: 100%|██████████| 46/46 [00:00<00:00, 215.07it/s]


Epoch 5 Complete. Train Loss: 0.0002, Test Loss: 0.0044


Epoch 6/50: 100%|██████████| 46/46 [00:00<00:00, 184.94it/s]


Epoch 6 Complete. Train Loss: 0.0002, Test Loss: 0.0044


Epoch 7/50: 100%|██████████| 46/46 [00:00<00:00, 214.32it/s]


Epoch 7 Complete. Train Loss: 0.0002, Test Loss: 0.0038


Epoch 8/50: 100%|██████████| 46/46 [00:00<00:00, 217.67it/s]


Epoch 8 Complete. Train Loss: 0.0002, Test Loss: 0.0033


Epoch 9/50: 100%|██████████| 46/46 [00:00<00:00, 205.75it/s]


Epoch 9 Complete. Train Loss: 0.0001, Test Loss: 0.0045


Epoch 10/50: 100%|██████████| 46/46 [00:00<00:00, 201.05it/s]


Epoch 10 Complete. Train Loss: 0.0001, Test Loss: 0.0037


Epoch 11/50: 100%|██████████| 46/46 [00:00<00:00, 211.22it/s]


Epoch 11 Complete. Train Loss: 0.0001, Test Loss: 0.0047


Epoch 12/50: 100%|██████████| 46/46 [00:00<00:00, 210.30it/s]


Epoch 12 Complete. Train Loss: 0.0001, Test Loss: 0.0029


Epoch 13/50: 100%|██████████| 46/46 [00:00<00:00, 197.42it/s]


Epoch 13 Complete. Train Loss: 0.0001, Test Loss: 0.0028


Epoch 14/50: 100%|██████████| 46/46 [00:00<00:00, 173.13it/s]


Epoch 14 Complete. Train Loss: 0.0001, Test Loss: 0.0035


Epoch 15/50: 100%|██████████| 46/46 [00:00<00:00, 205.88it/s]


Epoch 15 Complete. Train Loss: 0.0001, Test Loss: 0.0036


Epoch 16/50: 100%|██████████| 46/46 [00:00<00:00, 196.15it/s]


Epoch 16 Complete. Train Loss: 0.0001, Test Loss: 0.0023


Epoch 17/50: 100%|██████████| 46/46 [00:00<00:00, 197.85it/s]


Epoch 17 Complete. Train Loss: 0.0001, Test Loss: 0.0024


Epoch 18/50: 100%|██████████| 46/46 [00:00<00:00, 201.08it/s]


Epoch 18 Complete. Train Loss: 0.0001, Test Loss: 0.0022


Epoch 19/50: 100%|██████████| 46/46 [00:00<00:00, 193.70it/s]


Epoch 19 Complete. Train Loss: 0.0001, Test Loss: 0.0014


Epoch 20/50: 100%|██████████| 46/46 [00:00<00:00, 213.13it/s]


Epoch 20 Complete. Train Loss: 0.0001, Test Loss: 0.0016


Epoch 21/50: 100%|██████████| 46/46 [00:00<00:00, 191.83it/s]


Epoch 21 Complete. Train Loss: 0.0001, Test Loss: 0.0020


Epoch 22/50: 100%|██████████| 46/46 [00:00<00:00, 165.65it/s]


Epoch 22 Complete. Train Loss: 0.0001, Test Loss: 0.0023


Epoch 23/50: 100%|██████████| 46/46 [00:00<00:00, 217.52it/s]


Epoch 23 Complete. Train Loss: 0.0001, Test Loss: 0.0013


Epoch 24/50: 100%|██████████| 46/46 [00:00<00:00, 219.35it/s]


Epoch 24 Complete. Train Loss: 0.0001, Test Loss: 0.0015


Epoch 25/50: 100%|██████████| 46/46 [00:00<00:00, 214.93it/s]


Epoch 25 Complete. Train Loss: 0.0001, Test Loss: 0.0011


Epoch 26/50: 100%|██████████| 46/46 [00:00<00:00, 214.82it/s]


Epoch 26 Complete. Train Loss: 0.0001, Test Loss: 0.0013


Epoch 27/50: 100%|██████████| 46/46 [00:00<00:00, 218.84it/s]


Epoch 27 Complete. Train Loss: 0.0001, Test Loss: 0.0011


Epoch 28/50: 100%|██████████| 46/46 [00:00<00:00, 222.77it/s]


Epoch 28 Complete. Train Loss: 0.0001, Test Loss: 0.0011


Epoch 29/50: 100%|██████████| 46/46 [00:00<00:00, 216.22it/s]


Epoch 29 Complete. Train Loss: 0.0001, Test Loss: 0.0010


Epoch 30/50: 100%|██████████| 46/46 [00:00<00:00, 173.44it/s]


Epoch 30 Complete. Train Loss: 0.0001, Test Loss: 0.0009


Epoch 31/50: 100%|██████████| 46/46 [00:00<00:00, 198.89it/s]


Epoch 31 Complete. Train Loss: 0.0001, Test Loss: 0.0011


Epoch 32/50: 100%|██████████| 46/46 [00:00<00:00, 204.90it/s]


Epoch 32 Complete. Train Loss: 0.0001, Test Loss: 0.0010


Epoch 33/50: 100%|██████████| 46/46 [00:00<00:00, 213.70it/s]


Epoch 33 Complete. Train Loss: 0.0001, Test Loss: 0.0009


Epoch 34/50: 100%|██████████| 46/46 [00:00<00:00, 220.89it/s]


Epoch 34 Complete. Train Loss: 0.0001, Test Loss: 0.0009


Epoch 35/50: 100%|██████████| 46/46 [00:00<00:00, 214.11it/s]


Epoch 35 Complete. Train Loss: 0.0001, Test Loss: 0.0011


Epoch 36/50: 100%|██████████| 46/46 [00:00<00:00, 214.56it/s]


Epoch 36 Complete. Train Loss: 0.0001, Test Loss: 0.0012


Epoch 37/50: 100%|██████████| 46/46 [00:00<00:00, 189.43it/s]


Epoch 37 Complete. Train Loss: 0.0001, Test Loss: 0.0013


Epoch 38/50: 100%|██████████| 46/46 [00:00<00:00, 172.47it/s]


Epoch 38 Complete. Train Loss: 0.0001, Test Loss: 0.0012


Epoch 39/50: 100%|██████████| 46/46 [00:00<00:00, 216.58it/s]


Epoch 39 Complete. Train Loss: 0.0001, Test Loss: 0.0011


Epoch 40/50: 100%|██████████| 46/46 [00:00<00:00, 203.32it/s]


Epoch 40 Complete. Train Loss: 0.0001, Test Loss: 0.0017


Epoch 41/50: 100%|██████████| 46/46 [00:00<00:00, 194.25it/s]


Epoch 41 Complete. Train Loss: 0.0001, Test Loss: 0.0011


Epoch 42/50: 100%|██████████| 46/46 [00:00<00:00, 189.10it/s]


Epoch 42 Complete. Train Loss: 0.0001, Test Loss: 0.0011


Epoch 43/50: 100%|██████████| 46/46 [00:00<00:00, 209.55it/s]


Epoch 43 Complete. Train Loss: 0.0001, Test Loss: 0.0016


Epoch 44/50: 100%|██████████| 46/46 [00:00<00:00, 218.04it/s]


Epoch 44 Complete. Train Loss: 0.0001, Test Loss: 0.0013


Epoch 45/50: 100%|██████████| 46/46 [00:00<00:00, 216.84it/s]


Epoch 45 Complete. Train Loss: 0.0001, Test Loss: 0.0015


Epoch 46/50: 100%|██████████| 46/46 [00:00<00:00, 193.50it/s]


Epoch 46 Complete. Train Loss: 0.0001, Test Loss: 0.0007


Epoch 47/50: 100%|██████████| 46/46 [00:00<00:00, 200.35it/s]


Epoch 47 Complete. Train Loss: 0.0001, Test Loss: 0.0011


Epoch 48/50: 100%|██████████| 46/46 [00:00<00:00, 185.66it/s]


Epoch 48 Complete. Train Loss: 0.0001, Test Loss: 0.0010


Epoch 49/50: 100%|██████████| 46/46 [00:00<00:00, 209.26it/s]


Epoch 49 Complete. Train Loss: 0.0000, Test Loss: 0.0017


Epoch 50/50: 100%|██████████| 46/46 [00:00<00:00, 217.69it/s]

Epoch 50 Complete. Train Loss: 0.0000, Test Loss: 0.0013

Training complete.





In [43]:
model.id_embedding.weight

Parameter containing:
tensor([[-2.2826e-01, -7.8797e-01, -6.4987e-01,  6.1657e-01,  5.2846e-01,
          1.5343e-01, -2.1592e+00, -2.8042e-01],
        [ 1.6847e+00,  3.0657e-01, -1.5596e+00,  2.0460e-02, -1.7752e+00,
          2.1215e+00,  1.3236e+00, -2.3348e-01],
        [ 2.7797e-01,  5.8675e-01, -9.0585e-01, -5.5395e-02, -3.4458e-01,
          9.3455e-01, -3.5398e-01, -1.8477e+00],
        [-3.7466e-01,  1.1362e+00,  6.7229e-01,  1.1681e+00, -9.0135e-01,
          4.4223e-01,  1.3015e+00,  3.3303e-01],
        [-1.2875e+00,  1.1090e+00, -8.5827e-02,  3.5956e-01,  1.6598e+00,
         -9.6496e-01,  1.4201e+00, -1.7681e-01],
        [-2.3735e+00, -2.5646e-01,  4.0951e-01, -2.2480e-01,  1.8452e-01,
         -6.4926e-01, -1.2565e+00,  1.4070e+00],
        [ 1.9081e-01,  1.0518e+00, -5.1251e-01,  6.7694e-01,  1.0424e+00,
         -1.3231e+00, -9.7702e-01,  6.8235e-01],
        [ 9.9218e-01,  6.7542e-01, -6.7058e-01, -6.7242e-01, -4.7832e-01,
         -1.3384e+00,  2.1459e+00,  3.3527e

In [44]:
output

tensor([[0.0091],
        [0.0090],
        [0.0089],
        [0.0089],
        [0.0090],
        [0.0091],
        [0.0092],
        [0.0093],
        [0.0093],
        [0.0093],
        [0.0094],
        [0.0095],
        [0.0095],
        [0.0096],
        [0.0095],
        [0.0095],
        [0.0095],
        [0.0095],
        [0.0095],
        [0.0096],
        [0.0096],
        [0.0097],
        [0.0098]])

In [3]:
import torch
import torch.nn as nn
from torch.utils.data import DataLoader
import pandas as pd
from sklearn.preprocessing import MinMaxScaler
# ... import other libraries (tqdm, numpy, etc.)

# --- Configuration (Same as before) ---
SEQUENCE_LENGTH = 10 
# ...

# 1. DATA PREPARATION (Filtering and Scaling)
df = pd.read_csv("all_stocks_2017-01-01_to_2018-01-01.csv")
df = df.fillna(method='ffill')

# --- Select and Filter for ONLY ONE Stock (e.g., AAPL) ---
SINGLE_STOCK_NAME = 'AAPL'
single_stock_df = df[df['Name'] == SINGLE_STOCK_NAME].copy()

FEATURE_COLS = ['Open', 'High', 'Low', 'Close', 'Volume']
TARGET_COLUMN = 'Close'

# Normalize Features (Fit only on this stock's data)
scaler = MinMaxScaler()
single_stock_df[FEATURE_COLS] = scaler.fit_transform(single_stock_df[FEATURE_COLS])

# 2. Dataset Creation (No ID/Mapping needed)
class SingleStockDataset(torch.utils.data.Dataset):
    def __init__(self, df, seq_len, target_col, feature_cols):
        # ... logic to create X and y sequences (similar to previous class, but without ID)
        self.X_sequences = []
        self.y_targets = []
        
        features = df[feature_cols].values
        target = df[target_col].values
        
        for i in range(len(df) - seq_len):
            self.X_sequences.append(features[i : i + seq_len])
            self.y_targets.append(target[i + seq_len])
            
    def __len__(self):
        return len(self.X_sequences)
    
    def __getitem__(self, idx):
        return torch.tensor(self.X_sequences[idx], dtype=torch.float32), \
               torch.tensor(self.y_targets[idx], dtype=torch.float32)

# Splitting this single stock data (e.g., 80% train, 20% test)
train_split = int(0.8 * len(single_stock_df))
train_df = single_stock_df.iloc[:train_split]
test_df = single_stock_df.iloc[train_split:]

train_dataset_single = SingleStockDataset(train_df, SEQUENCE_LENGTH, TARGET_COLUMN, FEATURE_COLS)
test_dataset_single = SingleStockDataset(test_df, SEQUENCE_LENGTH, TARGET_COLUMN, FEATURE_COLS)

train_loader_single = DataLoader(train_dataset_single, batch_size=64, shuffle=True)
test_loader_single = DataLoader(test_dataset_single, batch_size=64, shuffle=False)


# 3. SINGLE GRU MODEL Architecture
class SingleGRUModel(nn.Module):
    def __init__(self, input_dim, hidden_size, num_layers):
        super(SingleGRUModel, self).__init__()
        
        self.gru = nn.GRU(
            input_size=input_dim, # No embedding added here
            hidden_size=hidden_size,
            num_layers=num_layers,
            batch_first=True
        )
        
        self.fc = nn.Linear(hidden_size, 1)

    def forward(self, X_seq):
        # X_seq: (batch_size, seq_len, input_dim)
        out, h_n = self.gru(X_seq)
        final_output = self.fc(h_n[-1, :, :])
        return final_output

# 4. Training Loop (Similar to the shared model loop, but without the 'ID' handling)
# ... Initialize SingleGRUModel and train using train_loader_single

  df = df.fillna(method='ffill')


In [None]:
train_df


Unnamed: 0,Date,Open,High,Low,Close,Volume,Name
502,2017-01-03,0.000000,0.000000,0.000000,0.002152,0.150627,AAPL
503,2017-01-04,0.000843,0.002957,0.016473,0.000000,0.072392,AAPL
504,2017-01-05,0.002023,0.008707,0.017471,0.009768,0.083371,AAPL
505,2017-01-06,0.016523,0.030064,0.028453,0.031291,0.180947,AAPL
506,2017-01-09,0.036250,0.050928,0.052912,0.049172,0.199424,AAPL
...,...,...,...,...,...,...,...
697,2017-10-11,0.677289,0.667817,0.682030,0.671026,0.029390,AAPL
698,2017-10-12,0.683696,0.674224,0.681697,0.661921,0.021421,AAPL
699,2017-10-13,0.690103,0.672745,0.693012,0.678311,0.024169,AAPL
700,2017-10-16,0.709830,0.717431,0.713644,0.726159,0.103052,AAPL
