In [2]:
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import os
from subprocess import check_output
import seaborn as sns
import matplotlib.pyplot as plt
import warnings
from pandas.plotting import lag_plot
from datetime import datetime
from statsmodels.tsa.arima_model import ARIMA
from sklearn.metrics import mean_squared_error

In [3]:
data = pd.read_csv("/kaggle/input/minute/nifty50minute.csv")
warnings.filterwarnings('ignore')

In [4]:
data.shape
data.reset_index(drop=True, inplace=True)
data.drop(['volume'], axis=1, inplace=True)
data.drop(['Date'], axis=1, inplace=True)

In [7]:
df = data.iloc[500000:,:]

In [11]:
import numpy as np
import torch
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split

# Load OHLC data (ensure df has columns: 'open', 'high', 'low', 'close')

scaler = MinMaxScaler()
data_scaled = scaler.fit_transform(df)  # Normalize data

# Convert data into sequences
def create_sequences(data, seq_len):
    sequences, targets = [], []
    for i in range(len(data) - seq_len):
        seq = data[i:i + seq_len]  # Input sequence (OHLC)
        target = data[i + seq_len, 3]  # Predict 'close' price
        sequences.append(seq)
        targets.append(target)
    return np.array(sequences), np.array(targets)

seq_len = 30  # Sequence length
X, y = create_sequences(data_scaled, seq_len)

# Train-Test Split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, shuffle=False)

# Convert to PyTorch tensors
X_train_torch = torch.tensor(X_train, dtype=torch.float32)
y_train_torch = torch.tensor(y_train, dtype=torch.float32).unsqueeze(1)

X_test_torch = torch.tensor(X_test, dtype=torch.float32)
y_test_torch = torch.tensor(y_test, dtype=torch.float32).unsqueeze(1)

# Move tensors to GPU if available
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
X_train_torch, y_train_torch = X_train_torch.to(device), y_train_torch.to(device)
X_test_torch, y_test_torch = X_test_torch.to(device), y_test_torch.to(device)

# Print shapes
print("Train set:", X_train_torch.shape, y_train_torch.shape)
print("Test set:", X_test_torch.shape, y_test_torch.shape)


Train set: torch.Size([346332, 30, 4]) torch.Size([346332, 1])
Test set: torch.Size([86584, 30, 4]) torch.Size([86584, 1])


In [15]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import pandas as pd
import numpy as np
from sklearn.preprocessing import MinMaxScaler
from torch.utils.data import DataLoader, TensorDataset

# ========================================
# 1. Setup Device (GPU if available)
# ========================================
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("Using device:", device)

# ========================================
# 2. Define Self-Attention Module
# ========================================
class SelfAttention(nn.Module):
    def __init__(self, hidden_dim):
        super(SelfAttention, self).__init__()
        self.query = nn.Linear(hidden_dim, hidden_dim)
        self.key   = nn.Linear(hidden_dim, hidden_dim)
        self.value = nn.Linear(hidden_dim, hidden_dim)
        self.scale = torch.sqrt(torch.tensor(hidden_dim, dtype=torch.float))
    
    def forward(self, x):
        # x shape: (batch_size, seq_len, hidden_dim)
        Q = self.query(x)
        K = self.key(x)
        V = self.value(x)
        scores = torch.bmm(Q, K.transpose(1, 2)) / self.scale
        attn_weights = F.softmax(scores, dim=-1)
        attn_output = torch.bmm(attn_weights, V)
        return attn_output, attn_weights

# ========================================
# 3. Define the Agent Network (Per-Agent Model)
# ========================================
class AgentNetwork(nn.Module):
    def __init__(self, input_dim, hidden_dim, lstm_layers, seq_len):
        """
        Args:
            input_dim: Number of features (4 for OHLC).
            hidden_dim: Dimension for LSTM and subsequent layers.
            lstm_layers: Number of LSTM layers.
            seq_len: Length of the input time-series window.
        """
        super(AgentNetwork, self).__init__()
        self.seq_len = seq_len
        self.hidden_dim = hidden_dim
        
        # LSTM for time-series processing
        self.lstm = nn.LSTM(input_dim, hidden_dim, num_layers=lstm_layers, batch_first=True)
        
        # Self-Attention Layer to weigh important time steps
        self.attention = SelfAttention(hidden_dim)
        
        # Fully connected layer to reduce the flattened sequence
        self.fc = nn.Linear(seq_len * hidden_dim, hidden_dim)
        
        # Output regression head (predicts a single price value)
        self.out = nn.Linear(hidden_dim, 1)
        
    def forward(self, x):
        # x: (batch_size, seq_len, input_dim)
        lstm_out, _ = self.lstm(x)  # (batch_size, seq_len, hidden_dim)
        attn_out, _ = self.attention(lstm_out)  # (batch_size, seq_len, hidden_dim)
        
        # Flatten the attention output
        flat = attn_out.contiguous().view(x.size(0), -1)  # (batch_size, seq_len*hidden_dim)
        features = F.relu(self.fc(flat))  # (batch_size, hidden_dim)
        
        # Regression output (price prediction)
        prediction = self.out(features)  # (batch_size, 1)
        return prediction, features

# ========================================
# 4. Define the Fusion Layer (Meta-Learner)
# ========================================
class FusionLayer(nn.Module):
    def __init__(self, num_agents, agent_feature_dim, fusion_hidden_dim):
        """
        Args:
            num_agents: Number of agent models.
            agent_feature_dim: Dimension of the feature vector output from each agent.
            fusion_hidden_dim: Hidden dimension for the fusion layer.
        """
        super(FusionLayer, self).__init__()
        self.fc1 = nn.Linear(num_agents * agent_feature_dim, fusion_hidden_dim)
        self.fc2 = nn.Linear(fusion_hidden_dim, 1)
        
    def forward(self, features_list):
        # Concatenate features from all agents
        fusion_input = torch.cat(features_list, dim=1)  # (batch_size, num_agents*agent_feature_dim)
        x = F.relu(self.fc1(fusion_input))
        output = self.fc2(x)  # Final predicted price
        return output

# ========================================
# 5. Define the Multi-Agent Ensemble Model
# ========================================
class MultiAgentEnsemble(nn.Module):
    def __init__(self, input_dim, hidden_dim, lstm_layers, seq_len, num_agents, fusion_hidden_dim):
        """
        Args:
            input_dim: Number of features per time step (4 for OHLC).
            hidden_dim: Hidden dimension used by each agent.
            lstm_layers: Number of LSTM layers in each agent.
            seq_len: Length of the input sequence.
            num_agents: Number of agents in the ensemble.
            fusion_hidden_dim: Hidden dimension for the fusion layer.
        """
        super(MultiAgentEnsemble, self).__init__()
        self.num_agents = num_agents
        
        # Create a ModuleList of agents
        self.agents = nn.ModuleList([
            AgentNetwork(input_dim, hidden_dim, lstm_layers, seq_len) for _ in range(num_agents)
        ])
        
        # Fusion layer that combines agent features to output final prediction
        self.fusion = FusionLayer(num_agents, hidden_dim, fusion_hidden_dim)
        
    def forward(self, x):
        # x: (batch_size, seq_len, input_dim)
        agent_predictions = []
        agent_features = []
        for agent in self.agents:
            pred, feat = agent(x)
            agent_predictions.append(pred)
            agent_features.append(feat)
        
        # Final output from fusion layer (meta-learner)
        fusion_output = self.fusion(agent_features)
        return agent_predictions, fusion_output

# ========================================
# 6. Data Preparation Functions
# ========================================
def create_sequences(data, seq_length, prediction_horizon=5):
    """
    Create sequences of data with a target value that's prediction_horizon steps ahead.
    
    Args:
        data: Normalized numpy array of OHLC data
        seq_length: Length of input sequence
        prediction_horizon: How many steps ahead to predict (default: 5 minutes ahead)
    
    Returns:
        X: Input sequences
        y: Target values (close prices prediction_horizon steps ahead)
    """
    X, y = [], []
    for i in range(len(data) - seq_length - prediction_horizon):
        # Sequence of seq_length datapoints
        seq = data[i:i+seq_length]
        # Target is the close price prediction_horizon steps ahead
        target = data[i+seq_length+prediction_horizon-1, 3]  # Index 3 is Close price
        X.append(seq)
        y.append(target)
    return np.array(X), np.array(y).reshape(-1, 1)

def prepare_data(df, seq_length=30, prediction_horizon=5, train_split=0.8, batch_size=32):
    """
    Prepare data for training and testing.
    
    Args:
        df: Pandas DataFrame with OHLC data
        seq_length: Length of input sequences
        prediction_horizon: How many steps ahead to predict
        train_split: Fraction of data to use for training
        batch_size: Batch size for DataLoader
    
    Returns:
        train_loader: DataLoader for training data
        test_loader: DataLoader for testing data
        scaler: Fitted scaler for inverse transformation
    """
    # Ensure we have OHLC columns
    if not all(col in df.columns for col in ['open', 'high', 'low', 'close']):
        raise ValueError("DataFrame must contain 'open', 'high', 'low', 'close' columns")
    
    # Extract OHLC data
    data = df[['open', 'high', 'low', 'close']].values
    
    # Normalize data
    scaler = MinMaxScaler(feature_range=(0, 1))
    data_normalized = scaler.fit_transform(data)
    
    # Create sequences
    X, y = create_sequences(data_normalized, seq_length, prediction_horizon)
    
    # Train-test split
    train_size = int(len(X) * train_split)
    X_train, X_test = X[:train_size], X[train_size:]
    y_train, y_test = y[:train_size], y[train_size:]
    
    # Convert to PyTorch tensors
    X_train_tensor = torch.FloatTensor(X_train)
    y_train_tensor = torch.FloatTensor(y_train)
    X_test_tensor = torch.FloatTensor(X_test)
    y_test_tensor = torch.FloatTensor(y_test)
    
    # Create datasets and dataloaders
    train_dataset = TensorDataset(X_train_tensor, y_train_tensor)
    test_dataset = TensorDataset(X_test_tensor, y_test_tensor)
    
    train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
    test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)
    
    return train_loader, test_loader, scaler

# ========================================
# 7. Training Functions
# ========================================
def train_model(model, train_loader, test_loader, epochs=50, lr=0.001):
    """
    Train the model.
    
    Args:
        model: The MultiAgentEnsemble model
        train_loader: DataLoader for training data
        test_loader: DataLoader for testing data
        epochs: Number of training epochs
        lr: Learning rate
    
    Returns:
        model: Trained model
        losses: List of training losses
    """
    optimizer = torch.optim.Adam(model.parameters(), lr=lr)
    criterion = nn.MSELoss()
    scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, 'min', patience=5, factor=0.5)
    
    model.to(device)
    losses = []
    
    for epoch in range(epochs):
        model.train()
        epoch_loss = 0
        for X_batch, y_batch in train_loader:
            X_batch, y_batch = X_batch.to(device), y_batch.to(device)
            
            # Forward pass
            agent_preds, fusion_pred = model(X_batch)
            
            # Loss on fusion prediction
            loss = criterion(fusion_pred, y_batch)
            
            # Backward pass and optimization
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            
            epoch_loss += loss.item()
        
        # Validation
        model.eval()
        val_loss = 0
        with torch.no_grad():
            for X_val, y_val in test_loader:
                X_val, y_val = X_val.to(device), y_val.to(device)
                _, fusion_pred = model(X_val)
                val_loss += criterion(fusion_pred, y_val).item()
        
        avg_train_loss = epoch_loss / len(train_loader)
        avg_val_loss = val_loss / len(test_loader)
        losses.append(avg_train_loss)
        
        print(f"Epoch {epoch+1}/{epochs} - Train Loss: {avg_train_loss:.6f}, Val Loss: {avg_val_loss:.6f}")
        
        # Learning rate scheduling
        scheduler.step(avg_val_loss)
    
    return model, losses

# ========================================
# 8. Example Usage with Real Data
# ========================================
if __name__ == "__main__":
    # Assume df is your DataFrame with OHLC data at 1-minute intervals
    # df should have columns: 'open', 'high', 'low', 'close'
    
    # Example of loading data (uncomment and modify as needed)
    # df = pd.read_csv('your_ohlc_data.csv')
    
    # For demonstration, we'll create a dummy dataframe if the actual one isn't available
    # In your actual code, use your real dataframe instead of this dummy one
    try:
        # This line will throw a NameError if 'df' is not defined
        print(f"Using existing dataframe with shape: {df.shape}")
    except NameError:
        print("Creating dummy dataframe for demonstration. Replace with your actual dataframe.")
        dates = pd.date_range(start='2023-01-01', periods=10000, freq='1min')
        df = pd.DataFrame({
            'datetime': dates,
            'open': np.random.normal(100, 5, 10000),
            'high': np.random.normal(105, 5, 10000),
            'low': np.random.normal(95, 5, 10000),
            'close': np.random.normal(100, 5, 10000)
        })
        # Make sure high is higher than open and close, and low is lower
        for i in range(len(df)):
            values = [df.loc[i, 'open'], df.loc[i, 'close']]
            df.loc[i, 'high'] = max(values) + abs(np.random.normal(0, 1))
            df.loc[i, 'low'] = min(values) - abs(np.random.normal(0, 1))
    
    # Hyperparameters
    input_dim = 4          # OHLC (4 features)
    hidden_dim = 64        # LSTM hidden size
    lstm_layers = 1        # Number of LSTM layers
    seq_len = 30           # Input window: last 30 minutes of data
    num_agents = 3         # Number of agents in the ensemble
    fusion_hidden_dim = 128  # Fusion layer hidden dimension
    batch_size = 32
    epochs = 50
    prediction_horizon = 5  # Predict price 5 minutes ahead
    
    # Prepare data
    train_loader, test_loader, scaler = prepare_data(
        df=df, 
        seq_length=seq_len,
        prediction_horizon=prediction_horizon,
        batch_size=batch_size
    )
    
    # Initialize model
    model = MultiAgentEnsemble(
        input_dim=input_dim,
        hidden_dim=hidden_dim,
        lstm_layers=lstm_layers,
        seq_len=seq_len,
        num_agents=num_agents,
        fusion_hidden_dim=fusion_hidden_dim
    )
    
    # Train model
    trained_model, losses = train_model(
        model=model,
        train_loader=train_loader,
        test_loader=test_loader,
        epochs=epochs
    )
    
    # Save the trained model
    torch.save(trained_model.state_dict(), 'multi_agent_trading_model.pth')
    print("Model saved to 'multi_agent_trading_model.pth'")
    
    # Example of making predictions with the trained model
    model.eval()
    with torch.no_grad():
        # Get a test batch
        X_sample, y_sample = next(iter(test_loader))
        X_sample = X_sample.to(device)
        
        # Make predictions
        _, predictions = model(X_sample)
        
        # Convert predictions back to original scale
        # We need to create a dummy array with all features to use inverse_transform
        dummy = np.zeros((predictions.shape[0], 4))
        dummy[:, 3] = predictions.cpu().numpy().flatten()  # Put predictions in the close price column
        
        # Inverse transform
        predictions_original_scale = scaler.inverse_transform(dummy)[:, 3]
        
        # Print first few predictions
        print("\nSample Predictions (Original Scale):")
        for i in range(min(5, len(predictions_original_scale))):
            print(f"Prediction {i+1}: {predictions_original_scale[i]:.2f}")

Using device: cuda
Using existing dataframe with shape: (432946, 4)
Epoch 1/50 - Train Loss: 0.000096, Val Loss: 0.000008
Epoch 2/50 - Train Loss: 0.000010, Val Loss: 0.000107
Epoch 3/50 - Train Loss: 0.000008, Val Loss: 0.000027
Epoch 4/50 - Train Loss: 0.000007, Val Loss: 0.000018
Epoch 5/50 - Train Loss: 0.000007, Val Loss: 0.000014
Epoch 6/50 - Train Loss: 0.000006, Val Loss: 0.000025
Epoch 7/50 - Train Loss: 0.000006, Val Loss: 0.000017
Epoch 8/50 - Train Loss: 0.000004, Val Loss: 0.000008
Epoch 9/50 - Train Loss: 0.000004, Val Loss: 0.000007
Epoch 10/50 - Train Loss: 0.000004, Val Loss: 0.000017
Epoch 11/50 - Train Loss: 0.000004, Val Loss: 0.000005
Epoch 12/50 - Train Loss: 0.000004, Val Loss: 0.000005
Epoch 13/50 - Train Loss: 0.000004, Val Loss: 0.000005
Epoch 14/50 - Train Loss: 0.000004, Val Loss: 0.000014
Epoch 15/50 - Train Loss: 0.000004, Val Loss: 0.000006
Epoch 16/50 - Train Loss: 0.000004, Val Loss: 0.000019
Epoch 17/50 - Train Loss: 0.000004, Val Loss: 0.000014
Epoch 

In [16]:
# import torch
# import pandas as pd
# import numpy as np
# from sklearn.preprocessing import MinMaxScaler

# # Define the model classes (need to be identical to the training code)
# import torch.nn as nn
# import torch.nn.functional as F

# class SelfAttention(nn.Module):
#     def __init__(self, hidden_dim):
#         super(SelfAttention, self).__init__()
#         self.query = nn.Linear(hidden_dim, hidden_dim)
#         self.key = nn.Linear(hidden_dim, hidden_dim)
#         self.value = nn.Linear(hidden_dim, hidden_dim)
#         self.scale = torch.sqrt(torch.tensor(hidden_dim, dtype=torch.float))
    
#     def forward(self, x):
#         Q = self.query(x)
#         K = self.key(x)
#         V = self.value(x)
#         scores = torch.bmm(Q, K.transpose(1, 2)) / self.scale
#         attn_weights = F.softmax(scores, dim=-1)
#         attn_output = torch.bmm(attn_weights, V)
#         return attn_output, attn_weights

# class AgentNetwork(nn.Module):
#     def __init__(self, input_dim, hidden_dim, lstm_layers, seq_len):
#         super(AgentNetwork, self).__init__()
#         self.seq_len = seq_len
#         self.hidden_dim = hidden_dim
#         self.lstm = nn.LSTM(input_dim, hidden_dim, num_layers=lstm_layers, batch_first=True)
#         self.attention = SelfAttention(hidden_dim)
#         self.fc = nn.Linear(seq_len * hidden_dim, hidden_dim)
#         self.out = nn.Linear(hidden_dim, 1)
        
#     def forward(self, x):
#         lstm_out, _ = self.lstm(x)
#         attn_out, _ = self.attention(lstm_out)
#         flat = attn_out.contiguous().view(x.size(0), -1)
#         features = F.relu(self.fc(flat))
#         prediction = self.out(features)
#         return prediction, features

# class FusionLayer(nn.Module):
#     def __init__(self, num_agents, agent_feature_dim, fusion_hidden_dim):
#         super(FusionLayer, self).__init__()
#         self.fc1 = nn.Linear(num_agents * agent_feature_dim, fusion_hidden_dim)
#         self.fc2 = nn.Linear(fusion_hidden_dim, 1)
        
#     def forward(self, features_list):
#         fusion_input = torch.cat(features_list, dim=1)
#         x = F.relu(self.fc1(fusion_input))
#         output = self.fc2(x)
#         return output

# class MultiAgentEnsemble(nn.Module):
#     def __init__(self, input_dim, hidden_dim, lstm_layers, seq_len, num_agents, fusion_hidden_dim):
#         super(MultiAgentEnsemble, self).__init__()
#         self.num_agents = num_agents
#         self.agents = nn.ModuleList([
#             AgentNetwork(input_dim, hidden_dim, lstm_layers, seq_len) for _ in range(num_agents)
#         ])
#         self.fusion = FusionLayer(num_agents, hidden_dim, fusion_hidden_dim)
        
#     def forward(self, x):
#         agent_predictions = []
#         agent_features = []
#         for agent in self.agents:
#             pred, feat = agent(x)
#             agent_predictions.append(pred)
#             agent_features.append(feat)
#         fusion_output = self.fusion(agent_features)
#         return agent_predictions, fusion_output

# # Function to predict the next price
# def predict_next_price(model, df, seq_length=30):
#     """Simple function to predict the next price using the last seq_length data points"""
#     device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
#     model.to(device)
#     model.eval()
    
#     # Get the most recent data
#     recent_data = df[['open', 'high', 'low', 'close']].iloc[-seq_length:].values
    
#     # Scale the data
#     scaler = MinMaxScaler(feature_range=(0, 1))
#     recent_data_scaled = scaler.fit_transform(recent_data)
    
#     # Convert to tensor and add batch dimension
#     input_tensor = torch.FloatTensor(recent_data_scaled).unsqueeze(0).to(device)
    
#     # Make prediction
#     with torch.no_grad():
#         _, prediction = model(input_tensor)
    
#     # Create dummy array for inverse scaling
#     dummy = np.zeros((1, 4))
#     dummy[0, 3] = prediction.item()  # Put prediction in close column
    
#     # Inverse transform to get the actual price
#     predicted_price = scaler.inverse_transform(dummy)[0, 3]
    
#     return predicted_price

# # Main execution
# if __name__ == "__main__":
#     # Model hyperparameters (must match the trained model)
#     input_dim = 4
#     hidden_dim = 64
#     lstm_layers = 1
#     seq_len = 30
#     num_agents = 3
#     fusion_hidden_dim = 128
    
#     # Load the model
#     model = MultiAgentEnsemble(
#         input_dim=input_dim,
#         hidden_dim=hidden_dim,
#         lstm_layers=lstm_layers,
#         seq_len=seq_len,
#         num_agents=num_agents,
#         fusion_hidden_dim=fusion_hidden_dim
#     )
    
#     # Load model weights
#     model.load_state_dict(torch.load('/kaggle/working/multi_agent_trading_model.pth'))
#     print("Model loaded successfully.")
    
#     # Predict using your DataFrame (df)
#     predicted_price = predict_next_price(model, df, seq_len)
    
#     print(f"Predicted next price: {predicted_price:.2f}")

Model loaded successfully.
Predicted next price: 23569.48


In [18]:
import torch
import pandas as pd
import numpy as np
from sklearn.preprocessing import MinMaxScaler

# Define the model classes (need to be identical to the training code)
import torch.nn as nn
import torch.nn.functional as F

class SelfAttention(nn.Module):
    def __init__(self, hidden_dim):
        super(SelfAttention, self).__init__()
        self.query = nn.Linear(hidden_dim, hidden_dim)
        self.key = nn.Linear(hidden_dim, hidden_dim)
        self.value = nn.Linear(hidden_dim, hidden_dim)
        self.scale = torch.sqrt(torch.tensor(hidden_dim, dtype=torch.float))
    
    def forward(self, x):
        Q = self.query(x)
        K = self.key(x)
        V = self.value(x)
        scores = torch.bmm(Q, K.transpose(1, 2)) / self.scale
        attn_weights = F.softmax(scores, dim=-1)
        attn_output = torch.bmm(attn_weights, V)
        return attn_output, attn_weights

class AgentNetwork(nn.Module):
    def __init__(self, input_dim, hidden_dim, lstm_layers, seq_len):
        super(AgentNetwork, self).__init__()
        self.seq_len = seq_len
        self.hidden_dim = hidden_dim
        self.lstm = nn.LSTM(input_dim, hidden_dim, num_layers=lstm_layers, batch_first=True)
        self.attention = SelfAttention(hidden_dim)
        self.fc = nn.Linear(seq_len * hidden_dim, hidden_dim)
        self.out = nn.Linear(hidden_dim, 1)
        
    def forward(self, x):
        lstm_out, _ = self.lstm(x)
        attn_out, _ = self.attention(lstm_out)
        flat = attn_out.contiguous().view(x.size(0), -1)
        features = F.relu(self.fc(flat))
        prediction = self.out(features)
        return prediction, features

class FusionLayer(nn.Module):
    def __init__(self, num_agents, agent_feature_dim, fusion_hidden_dim):
        super(FusionLayer, self).__init__()
        self.fc1 = nn.Linear(num_agents * agent_feature_dim, fusion_hidden_dim)
        self.fc2 = nn.Linear(fusion_hidden_dim, 1)
        
    def forward(self, features_list):
        fusion_input = torch.cat(features_list, dim=1)
        x = F.relu(self.fc1(fusion_input))
        output = self.fc2(x)
        return output

class MultiAgentEnsemble(nn.Module):
    def __init__(self, input_dim, hidden_dim, lstm_layers, seq_len, num_agents, fusion_hidden_dim):
        super(MultiAgentEnsemble, self).__init__()
        self.num_agents = num_agents
        self.agents = nn.ModuleList([
            AgentNetwork(input_dim, hidden_dim, lstm_layers, seq_len) for _ in range(num_agents)
        ])
        self.fusion = FusionLayer(num_agents, hidden_dim, fusion_hidden_dim)
        
    def forward(self, x):
        agent_predictions = []
        agent_features = []
        for agent in self.agents:
            pred, feat = agent(x)
            agent_predictions.append(pred)
            agent_features.append(feat)
        fusion_output = self.fusion(agent_features)
        return agent_predictions, fusion_output

# Function to predict the next price
def predict_next_price(model, df, scaler, seq_length=30):
    """
    Predict the next price using the trained model and the existing scaler
    
    Args:
        model: Trained model
        df: DataFrame with OHLC data
        scaler: The SAME scaler used during training
        seq_length: Length of input sequence
        
    Returns:
        predicted_price: The predicted next price
    """
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    model.to(device)
    model.eval()
    
    # Get the most recent data
    recent_data = df[['open', 'high', 'low', 'close']].iloc[-seq_length:].values
    
    # Use the same scaler that was used during training
    recent_data_scaled = scaler.transform(recent_data)
    
    # Convert to tensor and add batch dimension
    input_tensor = torch.FloatTensor(recent_data_scaled).unsqueeze(0).to(device)
    
    # Make prediction
    with torch.no_grad():
        _, prediction = model(input_tensor)
    
    # Create dummy array for inverse scaling
    dummy = np.zeros((1, 4))
    dummy[0, 3] = prediction.item()  # Put prediction in close column
    
    # Inverse transform to get the actual price
    predicted_price = scaler.inverse_transform(dummy)[0, 3]
    
    return predicted_price

# Function to load or create a scaler
def get_scaler(df):
    """Create and fit a scaler on the entire dataset, as would have been done during training"""
    scaler = MinMaxScaler(feature_range=(0, 1))
    scaler.fit(df[['open', 'high', 'low', 'close']].values)
    return scaler

# Main execution
if __name__ == "__main__":
    # Model hyperparameters (must match the trained model)
    input_dim = 4
    hidden_dim = 64
    lstm_layers = 1
    seq_len = 30
    num_agents = 3
    fusion_hidden_dim = 128
    
    # Load the model
    model = MultiAgentEnsemble(
        input_dim=input_dim,
        hidden_dim=hidden_dim,
        lstm_layers=lstm_layers,
        seq_len=seq_len,
        num_agents=num_agents,
        fusion_hidden_dim=fusion_hidden_dim
    )
    
    # Load model weights
    model.load_state_dict(torch.load('/kaggle/working/multi_agent_trading_model.pth'))
    print("Model loaded successfully.")
    
    # Get or create the scaler using the entire dataset
    # This is important because we need to use the same scaling as during training
    scaler = get_scaler(df)
    
    # Predict using your DataFrame (df)
    predicted_price = predict_next_price(model, df, scaler, seq_len)
    
    print(f"Predicted next price: {predicted_price:.2f}")
    
    # You can also get the current price for comparison
    current_price = df['close'].iloc[-1]
    print(f"Current price: {current_price:.2f}")
    print(f"Predicted change: {predicted_price - current_price:.2f} ({((predicted_price/current_price)-1)*100:.2f}%)")

Model loaded successfully.
Predicted next price: 26728.63
Current price: 23563.15
Predicted change: 3165.48 (13.43%)


In [19]:
(23563/26728)*100

88.1584854833882