In [None]:
import pandas as pd
import os

In [None]:
y# @title
# Add this at the start of your notebook
def setup_wandb():
    try:
        import wandb
        # Check if already logged in
        if wandb.api.api_key is None:
            # Your API key
            WANDB_API_KEY = "641b305133f7d8345e710ecf6c9d83fea7e225f1"
            os.environ["WANDB_API_KEY"] = WANDB_API_KEY

        print("WandB setup complete!")
        return True
    except Exception as e:
        print(f"Error setting up WandB: {str(e)}")
        return False

# Use it in your notebook
setup_wandb()

WandB setup complete!


True

In [None]:
# taking input of the file
csv_path = "/content/nifty50_1min_2015_to_2024.csv"
base_data = pd.read_csv(csv_path)
df = base_data.copy()

# ## creating a sample dataframe so that I can dry run the code for leser computation
# sample_point = len(df)//5
# df_sample = df[:sample_point]

# Create a clean copy and handle datetime with timezone
df = df.copy()
df.loc[:, 'date'] = pd.to_datetime(df['date'])
df = df.set_index('date')

# Resample to 5min (preserving timezone)
df_5min = df.resample('5min', closed='left', label='left').agg({
    'open': 'first',
    'high': 'max',
    'low': 'min',
    'close': 'last',
    'volume': 'sum'
}).dropna()

# If you need to remove timezone, uncomment the next line:
# df_5min.index = df_5min.index.tz_localize(None)

print("Original shape:", df.shape)
print("New 5-min shape:", df_5min.shape)
print("\nFirst few rows of 5-min data:")
print(df_5min.head())


# Calculate returns while preserving the index
returns_df = pd.DataFrame()
returns_df['returns'] = df_5min['close'].pct_change()
returns_df = returns_df.dropna()

print("Shape of returns dataframe:", returns_df.shape)
print("\nFirst few returns (should show % changes between the close prices we saw):")
print(returns_df.head())

  return Index(sequences[0], name=names)


Original shape: (890511, 5)
New 5-min shape: (178118, 5)

First few rows of 5-min data:
                              open     high      low    close  volume
date                                                                 
2015-01-09 09:15:00+05:30  8285.45  8301.30  8285.45  8301.20       0
2015-01-09 09:20:00+05:30  8300.50  8303.00  8293.25  8301.00       0
2015-01-09 09:25:00+05:30  8301.65  8302.55  8286.80  8294.15       0
2015-01-09 09:30:00+05:30  8294.10  8295.75  8280.65  8288.50       0
2015-01-09 09:35:00+05:30  8289.10  8290.45  8278.00  8283.45       0
Shape of returns dataframe: (178117, 1)

First few returns (should show % changes between the close prices we saw):
                            returns
date                               
2015-01-09 09:20:00+05:30 -0.000024
2015-01-09 09:25:00+05:30 -0.000825
2015-01-09 09:30:00+05:30 -0.000681
2015-01-09 09:35:00+05:30 -0.000609
2015-01-09 09:40:00+05:30  0.000254


In [None]:
df = returns_df.tail(4800)

In [None]:
48*5

240

In [None]:
import cupy as cp
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from statsmodels.tsa.statespace.sarimax import SARIMAX
from tqdm import tqdm
import multiprocessing
from joblib import Parallel, delayed
import wandb
from typing import Dict, Tuple, List
import warnings
warnings.filterwarnings('ignore')  # Suppress model convergence warnings

CONFIG = {
    "initial_train_months": 6,
    "rolling_window_months": 6,
    "step_size_months": 1,
    "forecast_horizons": [1, 2, 3],
    "trading_days_per_month": 21,
    "intervals_per_day": 12,
    "models": {
        "arima": {"order": (2, 0, 2)},
        "sarima": {"order": (2, 0, 2), "seasonal_order": (2, 0, 2, 60)}
    },
    "plot_sample_size": 1000,
    "n_jobs": multiprocessing.cpu_count() - 1,
    "gpu_batch_size": 4096
}

def period_calculator(months: int) -> int:
    return int(months * CONFIG["trading_days_per_month"] * CONFIG["intervals_per_day"])

CONFIG["rolling_window_periods"] = period_calculator(CONFIG["rolling_window_months"])
CONFIG["step_size_periods"] = period_calculator(CONFIG["step_size_months"])
CONFIG["initial_train_periods"] = period_calculator(CONFIG["initial_train_months"])

class GPUMemoryContext:
    def __enter__(self):
        self.pool = cp.get_default_memory_pool()
        self.initial_used = self.pool.used_bytes()
        return self

    def __exit__(self, *args):
        self.pool.free_all_blocks()

class PortfolioTracker:
    def __init__(self, initial_capital=1e6):
        self.value = cp.asarray(initial_capital, dtype=cp.float32)
        self.drawdown = cp.asarray(0.0, dtype=cp.float32)
        self.peak_value = cp.asarray(initial_capital, dtype=cp.float32)

    def update(self, returns, position_size):
        # GPU-accelerated portfolio update
        self.value *= (1 + returns * position_size)
        self.peak_value = cp.maximum(self.peak_value, self.value)
        self.drawdown = (self.peak_value - self.value) / self.peak_value

class BaseModel:
    def __init__(self, model_config: Dict):
        self.config = model_config
        self.model = None
        self._last_gpu_residuals = None
        self._last_gpu_fitted = None
        self._memory_pool = cp.get_default_memory_pool()
        self._pinned_pool = cp.get_default_pinned_memory_pool()
        self._peak_memory_usage = 0
        self._current_memory_usage = 0
        total_memory = cp.cuda.runtime.memGetInfo()[1]
        self._memory_threshold = int(0.8 * total_memory)

    def _monitor_memory_usage(self):
        """Track current and peak memory usage"""
        current = self._memory_pool.used_bytes()
        self._current_memory_usage = current
        self._peak_memory_usage = max(self._peak_memory_usage, current)

        if current > self._memory_threshold:
            self._cleanup_gpu_memory()
            raise cp.cuda.memory.OutOfMemoryError("Memory usage exceeded threshold")

    def _get_available_memory(self) -> int:
        """Get available GPU memory in bytes"""
        free, total = cp.cuda.runtime.memGetInfo()
        return free

    def _cleanup_gpu_memory(self):
        """Enhanced memory cleanup"""
        try:
            # Clear specific arrays
            if hasattr(self, '_last_gpu_residuals'):
                del self._last_gpu_residuals
            if hasattr(self, '_last_gpu_fitted'):
                del self._last_gpu_fitted

            # Force garbage collection
            import gc
            gc.collect()

            # Clear memory pools
            self._memory_pool.free_all_blocks()
            self._pinned_pool.free_all_blocks()

            # Synchronize device
            cp.cuda.runtime.deviceSynchronize()

        except Exception as e:
            print(f"Error during memory cleanup: {str(e)}")


    def train(self, data: pd.Series) -> bool:
        try:
            # Ensure we're on the right device and manage memory
            with cp.cuda.Device(0):
                # Monitor memory before operations
                self._monitor_memory_usage()
                # Clean up any previous GPU arrays
                self._cleanup_gpu_memory()

                # Transfer data to GPU efficiently
                if isinstance(data, cp.ndarray):
                    training_data = cp.asnumpy(data)  # Single transfer
                else:
                    training_data = data.values if isinstance(data, pd.Series) else data

                # Train the model
                self.model = SARIMAX(
                    training_data,
                    order=self.config.get('order', (0,0,0)),
                    seasonal_order=self.config.get('seasonal_order', None),
                    enforce_stationarity=False,
                    enforce_invertibility=False
                ).fit(disp=False)

                # Store residuals and fitted values on GPU efficiently
                try:
                    # Batch transfer to GPU
                    self._last_gpu_residuals = cp.asarray(self.model.resid, dtype=cp.float32)
                    self._last_gpu_fitted = cp.asarray(self.model.fittedvalues, dtype=cp.float32)
                except cp.cuda.memory.OutOfMemoryError:
                    # If OOM occurs, try to recover
                    self._cleanup_gpu_memory()
                    # Retry with smaller chunks if needed
                    chunk_size = len(self.model.resid) // 2
                    self._last_gpu_residuals = cp.asarray(self.model.resid[:chunk_size], dtype=cp.float32)
                    self._last_gpu_fitted = cp.asarray(self.model.fittedvalues[:chunk_size], dtype=cp.float32)

                # Monitor memory after major operations
                self._monitor_memory_usage()

                return True

        except cp.cuda.memory.OutOfMemoryError as e:
            print(f"Out of memory during training: {str(e)}")
            self._cleanup_gpu_memory()
            # Try to recover with smaller batch
            return self._train_with_reduced_batch(data)
        except Exception as e:
            print(f"Training failed: {str(e)}")
            self._cleanup_gpu_memory()
            return False

    def forecast(self, steps: int) -> cp.ndarray:
        try:
            with cp.cuda.Device(0):
                if self.model:
                    # Generate forecast
                    forecast_result = self.model.forecast(steps=steps)

                    # Transfer to GPU and handle memory
                    try:
                        gpu_forecast = cp.asarray(forecast_result, dtype=cp.float32)
                        return gpu_forecast
                    except cp.cuda.memory.OutOfMemoryError:
                        self._cleanup_gpu_memory()
                        # Retry after cleanup
                        return cp.asarray(forecast_result, dtype=cp.float32)
                return cp.full(steps, cp.nan, dtype=cp.float32)

        except Exception as e:
            print(f"Forecast failed: {str(e)}")
            self._cleanup_gpu_memory()
            return cp.full(steps, cp.nan, dtype=cp.float32)

    def _train_with_reduced_batch(self, data):
        """Handle OOM by reducing training window"""
        reduced_window = len(data) // 2
        try:
            return self.train(data[-reduced_window:])
        except Exception as e:
            print(f"Reduced training failed: {str(e)}")
            return False

    def get_regression_metrics(self) -> Dict:
        """Calculate comprehensive regression metrics including covariance for decision layer"""
        try:
            with cp.cuda.Device(0):
                if not self.model or self._last_gpu_residuals is None:
                    return self._get_empty_metrics()

                # Reuse cached GPU arrays
                residuals = self._last_gpu_residuals
                predictions = self._last_gpu_fitted

                # Calculate forecast statistics
                forecast_horizons = CONFIG["forecast_horizons"]
                forecast_results = self.model.get_forecast(steps=max(forecast_horizons))

                try:
                    # Batch transfer forecast statistics to GPU
                    gpu_variances = cp.asarray([forecast_results.var[h-1] for h in forecast_horizons], dtype=cp.float32)
                    gpu_covariance = cp.asarray(forecast_results.cov, dtype=cp.float32)

                    # Calculate metrics on GPU
                    gpu_mse = cp.mean(residuals ** 2)
                    gpu_mae = cp.mean(cp.abs(residuals))
                    gpu_std = cp.std(residuals)

                    # Confidence calculation
                    confidence_weights = cp.asarray([0.4, 0.3, 0.3], dtype=cp.float32)
                    gpu_aic = cp.asarray(self.model.aic, dtype=cp.float32)
                    gpu_rsquared = cp.asarray(self.model.rsquared, dtype=cp.float32)

                    # Normalize metrics
                    gpu_aic_norm = 1 / (1 + cp.exp(gpu_aic/1000))
                    gpu_residual_norm = 1 / (1 + gpu_std)

                    # Calculate confidence score
                    confidence_inputs = cp.array([
                        gpu_rsquared,
                        gpu_residual_norm,
                        gpu_aic_norm
                    ])
                    gpu_model_confidence = cp.sum(confidence_weights * confidence_inputs)

                    # Get prediction intervals
                    pred_intervals = forecast_results.conf_int(alpha=0.05)

                    # Prepare metrics with minimal transfers
                    metrics = {
                        'r2': float(cp.asnumpy(gpu_rsquared)),
                        'aic': float(cp.asnumpy(gpu_aic)),
                        'bic': float(self.model.bic),
                        'llf': float(self.model.llf),
                        'mse': float(cp.asnumpy(gpu_mse)),
                        'mae': float(cp.asnumpy(gpu_mae)),
                        'residual_std': float(cp.asnumpy(gpu_std)),
                        'forecast_variances': cp.asnumpy(gpu_variances),
                        'forecast_covariances': cp.asnumpy(gpu_covariance),
                        'model_confidence': float(cp.asnumpy(gpu_model_confidence)),
                        'prediction_intervals': pred_intervals
                    }

                    return metrics

                except cp.cuda.memory.OutOfMemoryError:
                    self._cleanup_gpu_memory()
                    # Return empty metrics if we can't recover
                    return self._get_empty_metrics()

        except Exception as e:
            print(f"Error calculating regression metrics: {str(e)}")
            self._cleanup_gpu_memory()
            return self._get_empty_metrics()

    def _get_empty_metrics(self) -> Dict:
        """Return empty metrics dictionary"""
        return {
            'r2': cp.nan,
            'aic': cp.nan,
            'bic': cp.nan,
            'llf': cp.nan,
            'mse': cp.nan,
            'mae': cp.nan,
            'residual_std': cp.nan,
            'forecast_variances': None,
            'forecast_covariances': None,
            'model_confidence': cp.nan,
            'prediction_intervals': None
        }

    def __del__(self):
        """Cleanup when object is destroyed"""
        self._cleanup_gpu_memory()


class PositionSizer:
    def __init__(self,
                 base_kelly_fraction: float = 0.25,
                 max_position_size: float = 0.15,
                 min_position_size: float = 0.02,
                 transaction_cost: float = 0.0003,
                 session_cutoff: str = "15:15:00"):
        """GPU-optimized position sizing with intraday risk management"""
        # Initialize parameters on GPU
        self.base_kelly = cp.asarray(base_kelly_fraction, dtype=cp.float32)
        self.max_size = cp.asarray(max_position_size, dtype=cp.float32)
        self.min_size = cp.asarray(min_position_size, dtype=cp.float32)
        self.transaction_cost = cp.asarray(transaction_cost, dtype=cp.float32)
        self.session_cutoff = pd.Timedelta(session_cutoff)

        # Initialize rolling volatility window on GPU
        self.volatility_window = cp.zeros(20, dtype=cp.float32)  # 20-period rolling vol
        self.atr_window = cp.zeros(14, dtype=cp.float32)         # 14-period ATR

        # Risk adjustment parameters
        self.volatility_scalar = cp.ones(1, dtype=cp.float32)
        self.time_decay_scalar = cp.ones(1, dtype=cp.float32)
        self.drawdown_scalar = cp.ones(1, dtype=cp.float32)

    def _calculate_time_decay(self, current_time: pd.Timestamp) -> cp.ndarray:
        """Time-based position scaling on GPU"""
        time_left = self.session_cutoff - current_time
        hours_left = time_left.total_seconds() / 3600

        # Exponential decay in last 2 hours
        decay_factor = cp.where(
            hours_left < 2,
            cp.exp(-2.5 * (2 - hours_left)),
            cp.asarray(1.0, dtype=cp.float32)
        )
        return cp.clip(decay_factor, 0.1, 1.0)

    def _update_volatility(self, ohlc_data: Dict[str, cp.ndarray]) -> None:
        """Update volatility metrics using GPU-accelerated calculations"""
        # Calculate true range
        high_low = ohlc_data['high'] - ohlc_data['low']
        high_close = cp.abs(ohlc_data['high'] - ohlc_data['close'].shift(1))
        low_close = cp.abs(ohlc_data['low'] - ohlc_data['close'].shift(1))
        true_range = cp.maximum(cp.maximum(high_low, high_close), low_close)

        # Update ATR with smoothing
        self.atr_window = cp.roll(self.atr_window, -1)
        self.atr_window[-1] = 0.8*self.atr_window[-2] + 0.2*true_range[-1] if len(self.atr_window) > 1 else true_range[-1]

        # Update volatility window with returns
        returns = cp.log(ohlc_data['close'][-20:]/ohlc_data['close'].shift(1)[-20:])
        self.volatility_window = cp.roll(self.volatility_window, -1)
        self.volatility_window[-1] = cp.std(returns)

    def _calculate_volatility_scalar(self) -> cp.ndarray:
        """Volatility-adjusted position scaling"""
        atr_ratio = self.atr_window[-1] / cp.mean(self.atr_window)
        vol_ratio = self.volatility_window[-1] / cp.mean(self.volatility_window)

        # Combine volatility measures
        combined_vol = 0.7*vol_ratio + 0.3*atr_ratio
        return cp.clip(1.0 / combined_vol, 0.33, 3.0)

    def _calculate_kelly_size(self,
                            win_prob: cp.ndarray,
                            risk_reward: cp.ndarray,
                            confidence: cp.ndarray) -> cp.ndarray:
        """Modified Kelly Criterion with transaction costs"""
        # Adjust win probability with confidence
        adj_win_prob = 0.5 + (win_prob - 0.5) * confidence

        # Kelly formula with cost adjustment
        numerator = adj_win_prob*risk_reward - (1 - adj_win_prob)
        denominator = risk_reward
        raw_kelly = numerator / denominator - self.transaction_cost

        return self.base_kelly * cp.clip(raw_kelly, 0.0, 1.0)

    def calculate_position_size(self,
                              signal: str,
                              confidence: float,
                              current_time: pd.Timestamp,
                              ohlc_data: Dict[str, cp.ndarray],
                              portfolio_value: float,
                              current_drawdown: float) -> Tuple[float, Dict]:
        """Calculate optimal position size with GPU acceleration"""
        try:
            # Convert inputs to GPU arrays
            confidence_gpu = cp.asarray(confidence, dtype=cp.float32)
            drawdown_gpu = cp.asarray(current_drawdown, dtype=cp.float32)

            # Update volatility metrics
            self._update_volatility({k: cp.asarray(v) for k, v in ohlc_data.items()})

            # Calculate risk scalars on GPU
            self.volatility_scalar = self._calculate_volatility_scalar()
            self.time_decay_scalar = self._calculate_time_decay(current_time)
            self.drawdown_scalar = 1.0 - cp.clip(drawdown_gpu/0.2, 0.0, 0.5)  # Reduce sizing at >20% drawdown

            # Calculate position components
            win_prob = confidence_gpu * 0.8 + 0.2  # Map confidence to [0.2-1.0] win probability
            risk_reward = cp.asarray(2.0, dtype=cp.float32)  # Fixed 2:1 risk-reward ratio

            # Calculate base Kelly size
            raw_size = self._calculate_kelly_size(win_prob, risk_reward, confidence_gpu)

            # Apply risk scalars
            adj_size = raw_size * self.volatility_scalar * self.time_decay_scalar * self.drawdown_scalar

            # Convert to percentage of portfolio
            position_pct = cp.clip(adj_size, self.min_size, self.max_size).get()
            position_value = position_pct * portfolio_value

            # Prepare detailed metrics
            metrics = {
                'position_size_pct': float(position_pct),
                'volatility_scalar': float(self.volatility_scalar.get()),
                'time_decay_scalar': float(self.time_decay_scalar.get()),
                'drawdown_scalar': float(self.drawdown_scalar.get()),
                'raw_kelly_size': float(raw_size.get()),
                'current_atr': float(self.atr_window[-1].get()),
                'current_volatility': float(self.volatility_window[-1].get())
            }

            return position_value, metrics

        except Exception as e:
            print(f"Position sizing error: {str(e)}")
            return 0.0, {}


class DecisionLayer:
    def __init__(self, transaction_cost: float = 0.0001, risk_aversion: float = 1.645):
        """Initialize Decision Layer with GPU-optimized parameters"""
        # Move constants to GPU at initialization
        self.position_sizer = PositionSizer()
        self.transaction_cost = cp.asarray(transaction_cost)
        self.risk_aversion = cp.asarray(risk_aversion)

        # Store horizon weights on GPU
        self.horizon_weights = {
            1: cp.asarray(0.5),  # 30-min forecast
            2: cp.asarray(0.3),  # 60-min forecast
            3: cp.asarray(0.2)   # 90-min forecast
        }

        # Move threshold parameters to GPU
        self.alpha = cp.asarray(0.6)  # R² weight
        self.beta = cp.asarray(0.4)   # AIC weight
        self.gamma = cp.asarray(0.1)  # Model confidence scaling
        self.min_probability = cp.asarray(0.7)
        self.base_risk_aversion = cp.asarray(1.645)

        # Precompute weight vector for efficiency
        self.weight_vector = cp.array([self.horizon_weights[h] for h in sorted(self.horizon_weights.keys())])

    def compute_weighted_forecast(self, predictions: Dict[int, cp.ndarray],
                                variances: Dict[int, cp.ndarray],
                                covariance_matrix: cp.ndarray) -> Tuple[cp.ndarray, cp.ndarray]:
        """Compute weighted forecast and variance on GPU"""
        # Convert predictions to GPU array
        pred_array = cp.array([predictions[h] for h in sorted(predictions.keys())])

        # Compute weighted forecast using vectorized operations
        weighted_forecast = cp.sum(self.weight_vector * pred_array)

        # Compute total variance using matrix operations on GPU
        total_variance = cp.sum(
            cp.outer(self.weight_vector, self.weight_vector) * covariance_matrix
        )

        return weighted_forecast, total_variance

    def compute_expected_profit(self, current_price: cp.ndarray,
                              weighted_forecast: cp.ndarray,
                              total_variance: cp.ndarray,
                              model_confidence: cp.ndarray,
                              action: str = 'buy') -> cp.ndarray:
        """Compute risk-adjusted expected profit on GPU"""
        std_dev = cp.sqrt(total_variance)
        current_price_gpu = cp.asarray(current_price)

        if action == 'buy':
            model_gain = weighted_forecast - current_price_gpu
            transaction_costs = 2 * self.transaction_cost * current_price_gpu
        else:  # sell
            model_gain = current_price_gpu - weighted_forecast
            transaction_costs = self.transaction_cost * current_price_gpu

        risk_penalty = self.risk_aversion * std_dev
        confidence_boost = self.gamma * model_confidence

        return model_gain - transaction_costs - risk_penalty + confidence_boost

    def compute_trade_probability(self, current_price: cp.ndarray,
                                weighted_forecast: cp.ndarray,
                                total_variance: cp.ndarray,
                                action: str = 'buy') -> cp.ndarray:
        """Compute trade probability on GPU"""
        std_dev = cp.sqrt(total_variance)
        current_price_gpu = cp.asarray(current_price)

        if action == 'buy':
            tc = 2 * self.transaction_cost * current_price_gpu
            z_score = (weighted_forecast - current_price_gpu - tc) / std_dev
            prob = 1 - 0.5 * (1 + cp.erf(z_score / cp.sqrt(2.0)))  # GPU-based normal CDF
        else:  # sell
            tc = self.transaction_cost * current_price_gpu
            z_score = (current_price_gpu - tc - weighted_forecast) / std_dev
            prob = 0.5 * (1 + cp.erf(z_score / cp.sqrt(2.0)))  # GPU-based normal CDF

        return prob

    def adjust_parameters(self, market_volatility: cp.ndarray,
                         rolling_sharpe: cp.ndarray,
                         window_volatility: cp.ndarray):
        """Dynamically adjust decision parameters on GPU"""
        # Move inputs to GPU if needed
        market_vol_gpu = cp.asarray(market_volatility)
        window_vol_gpu = cp.asarray(window_volatility)
        rolling_sharpe_gpu = cp.asarray(rolling_sharpe)

        # Compute volatility scalar on GPU
        volatility_scalar = market_vol_gpu / window_vol_gpu
        self.risk_aversion = self.base_risk_aversion * (
            1 + cp.clip(volatility_scalar - 1, -0.5, 1.0)
        )

        # Adjust threshold parameters based on performance
        self.alpha = cp.where(
            rolling_sharpe_gpu > 1.5,
            cp.asarray(0.5),  # More aggressive
            cp.where(
                rolling_sharpe_gpu < 0.5,
                cp.asarray(0.7),  # More conservative
                self.alpha
            )
        )

        self.beta = cp.where(
            rolling_sharpe_gpu > 1.5,
            cp.asarray(0.3),
            cp.where(
                rolling_sharpe_gpu < 0.5,
                cp.asarray(0.5),
                self.beta
            )
        )

        self.min_probability = cp.where(
            rolling_sharpe_gpu > 1.5,
            cp.asarray(0.65),
            cp.where(
                rolling_sharpe_gpu < 0.5,
                cp.asarray(0.75),
                self.min_probability
            )
        )

    def generate_trading_signal(self,
                              current_price: float,
                              predictions: Dict[int, float],
                              model_metrics: Dict,
                              market_state: Dict,
                              portfolio_state: Dict) -> Tuple[str, float, Dict]:
        """Generate trading signal with GPU-optimized calculations"""
        try:
            # Move all inputs to GPU
            current_price_gpu = cp.asarray(current_price)
            predictions_gpu = {k: cp.asarray(v) for k, v in predictions.items()}
            variances_gpu = cp.asarray(model_metrics['forecast_variances'])
            covariance_gpu = cp.asarray(model_metrics['forecast_covariances'])
            model_confidence_gpu = cp.asarray(model_metrics['model_confidence'])

            # Update parameters based on market conditions
            self.adjust_parameters(
                cp.asarray(market_state.get('market_volatility', 0)),
                cp.asarray(market_state.get('rolling_sharpe', 1.0)),
                cp.asarray(market_state.get('window_volatility', 0))
            )

            # Compute weighted forecast and uncertainty on GPU
            weighted_forecast, total_variance = self.compute_weighted_forecast(
                predictions_gpu, variances_gpu, covariance_gpu
            )

            # Compute threshold on GPU
            threshold = (self.alpha * cp.asarray(model_metrics['r2']) +
                       self.beta * (1 / (1 + cp.exp(cp.asarray(model_metrics['aic'])/1000))))

            # Compute profits and probabilities on GPU
            buy_profit = self.compute_expected_profit(
                current_price_gpu, weighted_forecast, total_variance,
                model_confidence_gpu, 'buy'
            )

            sell_profit = self.compute_expected_profit(
                current_price_gpu, weighted_forecast, total_variance,
                model_confidence_gpu, 'sell'
            )

            buy_prob = self.compute_trade_probability(
                current_price_gpu, weighted_forecast, total_variance, 'buy'
            )

            sell_prob = self.compute_trade_probability(
                current_price_gpu, weighted_forecast, total_variance, 'sell'
            )

            # Generate signal (logic on GPU)
            signal = 'hold'
            confidence_score = cp.asarray(0.0)

            if (buy_profit > threshold).get() and (buy_prob > self.min_probability).get():
                signal = 'buy'
                confidence_score = buy_prob * (buy_profit / threshold)
            elif (sell_profit > threshold).get() and (sell_prob > self.min_probability).get():
                signal = 'sell'
                confidence_score = sell_prob * (sell_profit / threshold)

            # Transfer final results back to CPU only once
            detailed_metrics = {
                'weighted_forecast': float(cp.asnumpy(weighted_forecast)),
                'forecast_std': float(cp.asnumpy(cp.sqrt(total_variance))),
                'threshold': float(cp.asnumpy(threshold)),
                'buy_profit': float(cp.asnumpy(buy_profit)),
                'sell_profit': float(cp.asnumpy(sell_profit)),
                'buy_probability': float(cp.asnumpy(buy_prob)),
                'sell_probability': float(cp.asnumpy(sell_prob)),
                'model_confidence': float(cp.asnumpy(model_confidence_gpu)),
                'risk_aversion': float(cp.asnumpy(self.risk_aversion)),
                'confidence_score': float(cp.asnumpy(confidence_score))
            }
            position_size, size_metrics = self.position_sizer.calculate_position_size(
            signal=signal,
            confidence=float(cp.asnumpy(confidence_score)),
            current_time=market_state['timestamp'],
            ohlc_data=market_state['ohlc'],
            portfolio_value=portfolio_state['value'],
            current_drawdown=portfolio_state['drawdown']
            )
            detailed_metrics.update(size_metrics)
            detailed_metrics['position_size'] = position_size

            return signal, float(cp.asnumpy(confidence_score)), detailed_metrics

        except Exception as e:
            print(f"Error generating trading signal: {str(e)}")
            return 'hold', 0.0, {}

def calculate_trading_performance(metrics_df: pd.DataFrame, signals_df: pd.DataFrame) -> Dict:
    """Calculate performance metrics for trading signals with GPU optimization"""
    try:
        # Initialize empty performance dict
        performance = {}

        # Move signal data to GPU
        signals_gpu = cp.array(signals_df['signal'].values != 'hold')
        if not cp.any(signals_gpu):
            return performance

        # Get trade indices and move relevant data to GPU
        trade_mask = signals_gpu.get()  # Single transfer back to CPU for indexing

        # Move all required data to GPU at once
        returns_gpu = cp.array(metrics_df.set_index('timestamp').loc[signals_df.index[trade_mask], 'actual_return'].values)
        signals_trade_gpu = cp.array(signals_df.loc[signals_df.index[trade_mask], 'signal'].values == 'buy')
        confidence_gpu = cp.array(signals_df.loc[signals_df.index[trade_mask], 'confidence'].values)

        # Calculate adjusted returns on GPU
        adjusted_returns_gpu = cp.where(signals_trade_gpu, returns_gpu, -returns_gpu)

        # Calculate metrics on GPU
        total_trades = int(cp.sum(signals_gpu).get())
        winning_trades = int(cp.sum(adjusted_returns_gpu > 0).get())

        # Calculate average return and std on GPU
        avg_return = float(cp.mean(adjusted_returns_gpu).get())
        return_std = float(cp.std(adjusted_returns_gpu).get())

        # Calculate Sharpe ratio on GPU
        sharpe_ratio = 0.0
        if total_trades > 1:
            annualization_factor = cp.sqrt(cp.array(252.0))
            sharpe_ratio = float((avg_return / return_std * annualization_factor).get())

        # Calculate average confidence on GPU
        avg_confidence = float(cp.mean(confidence_gpu).get())

        # Update performance metrics (single transfer back to CPU)
        performance.update({
            'total_trades': total_trades,
            'winning_trades': winning_trades,
            'average_return': avg_return,
            'return_std': return_std,
            'sharpe_ratio': sharpe_ratio,
            'avg_confidence': avg_confidence,

            # Additional GPU-calculated metrics
            'win_rate': winning_trades / total_trades if total_trades > 0 else 0.0,
            'profit_factor': float(cp.sum(cp.where(adjusted_returns_gpu > 0, adjusted_returns_gpu, 0)).get() /
                                 abs(float(cp.sum(cp.where(adjusted_returns_gpu < 0, adjusted_returns_gpu, 0)).get()))
                                 ) if total_trades > 0 else 0.0,
            'max_drawdown': float(cp.min(cp.minimum.accumulate(adjusted_returns_gpu)).get()),
            'volatility_annual': float(return_std * cp.sqrt(cp.array(252.0)).get()) if return_std else 0.0
        })

        # Clean up GPU memory
        del signals_gpu, returns_gpu, signals_trade_gpu, confidence_gpu, adjusted_returns_gpu
        cp.get_default_memory_pool().free_all_blocks()

        return performance

    except Exception as e:
        print(f"Error calculating trading performance: {str(e)}")
        # Clean up GPU memory in case of error
        cp.get_default_memory_pool().free_all_blocks()
        return {}



def walk_forward_analysis_with_decisions(df: pd.DataFrame):

    wandb.init(project="gpu_parallel_forecast", config=CONFIG)

    try:
        # Move entire dataframe to GPU at start
        df_gpu = {
            'close': cp.asarray(df['close'].values, dtype=cp.float32),
            'returns': cp.asarray(df['returns'].values, dtype=cp.float32)
        }

        # Calculate indices
        total_periods = len(df)
        six_months_periods = period_calculator(6)
        last_index = total_periods - 1
        decision_start_index = max(0, last_index - six_months_periods)
        max_horizon = max(CONFIG["forecast_horizons"])
        steps = CONFIG["step_size_periods"]

        # Initialize GPU arrays for predictions with proper dtype
        predictions = {
            model: {h: cp.full(total_periods, cp.nan, dtype=cp.float32)
                   for h in CONFIG["forecast_horizons"]}
            for model in CONFIG["models"]
        }

        actuals = {
            h: cp.full(total_periods, cp.nan, dtype=cp.float32)
            for h in CONFIG["forecast_horizons"]
        }

        # Pre-allocate GPU arrays for market state calculations
        rolling_window = 500
        market_state_gpu = {
            'returns_buffer': cp.zeros(rolling_window, dtype=cp.float32),
            'volatility_window': cp.zeros(20, dtype=cp.float32)
        }

        # Initialize trading signals and metrics
        trading_signals = pd.DataFrame(index=df.index[decision_start_index:],
                                     columns=['signal', 'confidence', 'model_used'])
        trading_metrics = []

        # Initialize decision layer
        decision_layer = DecisionLayer()

        def train_and_forecast_with_metrics(config: Dict, data_gpu: cp.ndarray, steps: int) -> Tuple[cp.ndarray, Dict]:
            """GPU-optimized training and forecasting"""
            model = BaseModel(config)
            success = model.train(data_gpu)
            if success:
                forecast = model.forecast(steps)
                metrics = model.get_regression_metrics()
                return forecast, metrics
            return cp.full(steps, cp.nan, dtype=cp.float32), None

        def align_and_store_forecasts(forecasts: List[cp.ndarray],
                                    current_index: int,
                                    effective_steps: int) -> None:
            """Align and store forecasts with proper horizon handling"""
            for (model_name, _), fcst in zip(CONFIG["models"].items(), forecasts):
                if fcst is None or cp.all(cp.isnan(fcst)):
                    continue

                for h in CONFIG["forecast_horizons"]:
                    if h <= len(fcst):
                        # Calculate proper forecast alignment indices
                        forecast_start = current_index
                        valid_steps = min(effective_steps, len(fcst)-h+1)
                        forecast_end = forecast_start + valid_steps

                        if forecast_end <= forecast_start:
                            continue

                        # Use direct slicing instead of tile for more efficient memory usage
                        predictions[model_name][h][forecast_start:forecast_end] = fcst[h-1:h-1+valid_steps]

                        # Update actuals with efficient slicing
                        actual_start = forecast_start + h - 1
                        actual_end = actual_start + valid_steps
                        if actual_end <= total_periods:
                            actuals[h][forecast_start:forecast_end] = df_gpu['returns'][actual_start:actual_end]

        # Batch process training data
        i = CONFIG["initial_train_periods"]
        with tqdm(total=total_periods, desc="Forecasting Progress") as pbar:
            while i <= total_periods - max_horizon - 1:
                effective_steps = min(steps, total_periods - i - max_horizon)
                if effective_steps <= 0:
                    break

                # Prepare training data on GPU
                train_start = max(0, i - CONFIG["rolling_window_periods"])
                train_data_gpu = cp.asarray(df_gpu['returns'][train_start:i], dtype=cp.float32)

                # Process models in parallel with GPU data
                model_results = Parallel(n_jobs=CONFIG["n_jobs"])(
                    delayed(train_and_forecast_with_metrics)(config, train_data_gpu, max_horizon)
                    for config in CONFIG["models"].values()
                )

                forecasts, metrics_list = zip(*model_results)

                # Update predictions on GPU
                for (model_name, _), fcst in zip(CONFIG["models"].items(), forecasts):
                    for h in CONFIG["forecast_horizons"]:
                        if h <= len(fcst):
                            predictions[model_name][h][i:i+effective_steps] = cp.tile(fcst[h-1], effective_steps)

                # Update actuals on GPU
                for h in CONFIG["forecast_horizons"]:
                    actual_start = i + h
                    actual_end = actual_start + effective_steps
                    actuals[h][i:i+effective_steps] = df_gpu['returns'][actual_start:actual_end]

                # Apply decision layer for last 6 months
                if i >= decision_start_index:
                    # Compute market state on GPU
                    window_start = max(0, i-500)
                    market_state_gpu['returns_buffer'] = df_gpu['returns'][window_start:i]
                    market_state_gpu['volatility_window'] = df_gpu['returns'][i-20:i]

                    market_state = {
                        'ohlc': {
                          'open': df_gpu['open'][i-20:i],
                          'high': df_gpu['high'][i-20:i],
                          'low': df_gpu['low'][i-20:i],
                          'close': df_gpu['close'][i-20:i]
                      },
                        'market_volatility': float(cp.std(market_state_gpu['returns_buffer']).get()),
                        'rolling_sharpe': float((cp.mean(market_state_gpu['returns_buffer']) /
                                              cp.std(market_state_gpu['returns_buffer']) *
                                              cp.sqrt(cp.array(252.0))).get()),
                        'window_volatility': float(cp.std(market_state_gpu['volatility_window']).get())
                    }

                    # Find best model on GPU
                    if metrics_list:
                        model_performances = {
                            name: metrics['model_confidence'] if metrics else cp.nan
                            for (name, _), metrics in zip(CONFIG["models"].items(), metrics_list)
                        }

                        valid_models = {k: v for k, v in model_performances.items() if not np.isnan(v)}
                        if valid_models:
                            best_model = max(valid_models.items(), key=lambda x: x[1])[0]
                        else:
                            best_model = None  # Handle no valid models
                        best_metrics = metrics_list[list(CONFIG["models"].keys()).index(best_model)]

                        # Keep predictions on GPU for decision layer
                        best_predictions = {
                            h: predictions[best_model][h][i]
                            for h in CONFIG["forecast_horizons"]
                        }

                        # Generate trading signal
                        signal, confidence, detailed_metrics = decision_layer.generate_trading_signal(
                            current_price=float(df_gpu['close'][i].get()),
                            predictions=best_predictions,
                            model_metrics=best_metrics,
                            market_state=market_state
                        )

                        # Store signal and metrics
                        trading_signals.iloc[i-decision_start_index] = [signal, confidence, best_model]
                        detailed_metrics.update({
                            'timestamp': df.index[i],
                            'model_used': best_model,
                            'actual_return': float(df_gpu['returns'][i+1].get()) if i+1 < total_periods else cp.nan
                        })
                        trading_metrics.append(detailed_metrics)

                # Log progress
                if (i // steps) % 3 == 0:
                    log_progress(df, predictions, actuals, i, effective_steps)

                pbar.update(effective_steps)
                i += effective_steps

        # Calculate final metrics
        final_metrics = calculate_final_metrics(predictions, actuals)

        # Calculate trading performance
        if trading_metrics:
            trading_df = pd.DataFrame(trading_metrics)
            trading_performance = calculate_trading_performance(trading_df, trading_signals)
            final_metrics.update(trading_performance)

        wandb.log(final_metrics)
        wandb.finish()

        # Clean up GPU memory
        cp.get_default_memory_pool().free_all_blocks()

        return final_metrics, trading_signals, pd.DataFrame(trading_metrics)

    except Exception as e:
        print(f"Error in walk forward analysis: {str(e)}")
        cp.get_default_memory_pool().free_all_blocks()
        wandb.finish()
        return {}, pd.DataFrame(), pd.DataFrame()



def log_progress(df, predictions, actuals, i, steps):
    metrics = {}

    # Calculate start and end indices
    start = max(0, i - CONFIG["plot_sample_size"])
    end = i + steps
    dates = df.index[start:end]

    # Batch process metrics on GPU for all horizons and models
    for h in CONFIG["forecast_horizons"]:
        actual_data = actuals[h][start:end]

        for model in CONFIG["models"]:
            pred_data = predictions[model][h][start:end]
            valid_mask = ~cp.isnan(pred_data) & ~cp.isnan(actual_data)

            if cp.any(valid_mask):
                actual_gpu = actual_data[valid_mask]
                pred_gpu = pred_data[valid_mask]

                # Calculate metrics on GPU
                metrics.update({
                    f"{model}_h{h}_rmse": float(cp.sqrt(cp.mean((actual_gpu - pred_gpu)**2)).get()),
                    f"{model}_h{h}_mae": float(cp.mean(cp.abs(actual_gpu - pred_gpu)).get()),
                })

    # Create plot data only after all GPU computations
    fig, axes = plt.subplots(3, 1, figsize=(15, 10))

    # Single batch transfer for plotting
    plot_data = {
        'actuals': {h: cp.asnumpy(actuals[h][start:end]) for h in CONFIG["forecast_horizons"]},
        'predictions': {
            model: {h: cp.asnumpy(predictions[model][h][start:end])
                   for h in CONFIG["forecast_horizons"]}
            for model in CONFIG["models"]}
    }

    # Now plot using the transferred data
    for idx, h in enumerate(CONFIG["forecast_horizons"]):
        ax = axes[idx]
        cpu_actuals = plot_data['actuals'][h]

        for model in CONFIG["models"]:
            cpu_preds = plot_data['predictions'][model][h]
            valid_mask = ~np.isnan(cpu_preds) & ~np.isnan(cpu_actuals)

            if np.any(valid_mask):
                ax.plot(dates[valid_mask], cpu_preds[valid_mask], alpha=0.7, label=model)

        ax.plot(dates, cpu_actuals, 'k-', label='Actual')
        ax.set_title(f"{h*30}-Minute Forecast Horizon")
        ax.legend()

    wandb.log({"forecast_plot": wandb.Image(fig), **metrics, "step": i})
    plt.close(fig)

def calculate_final_metrics(predictions, actuals):
    # Initialize metrics dictionary
    final_metrics = {}

    # Pre-calculate masks for all models and horizons on GPU
    valid_masks = {
        model: {
            h: ~cp.isnan(predictions[model][h]) & ~cp.isnan(actuals[h])
            for h in CONFIG["forecast_horizons"]
        }
        for model in CONFIG["models"]
    }

    # Create GPU arrays for metric calculations
    for model in CONFIG["models"]:
        # Process each model's metrics in batch
        model_metrics = {}

        for h in CONFIG["forecast_horizons"]:
            mask = valid_masks[model][h]

            if cp.any(mask):
                # Get valid data points on GPU
                pred_valid = predictions[model][h][mask]
                act_valid = actuals[h][mask]

                # Compute all metrics on GPU in batch
                diff = act_valid - pred_valid
                abs_diff = cp.abs(diff)
                squared_diff = diff ** 2

                # For directional accuracy
                h_val = h  # Use current horizon from loop
                act_diff = act_valid[h_val:] - act_valid[:-h_val]
                pred_diff = pred_valid[h_val:] - pred_valid[:-h_val]
                dir_match = (cp.sign(act_diff) == cp.sign(pred_diff)).astype(float)

                # Avoid division by zero in MAPE calculation
                mape_mask = act_valid != 0
                mape = cp.mean(cp.abs(diff[mape_mask] / act_valid[mape_mask])) * 100 if cp.any(mape_mask) else cp.array(float('nan'))

                # Calculate correlation on GPU
                act_mean = cp.mean(act_valid)
                pred_mean = cp.mean(pred_valid)
                covariance = cp.mean((act_valid - act_mean) * (pred_valid - pred_mean))
                act_std = cp.sqrt(cp.mean((act_valid - act_mean) ** 2))
                pred_std = cp.sqrt(cp.mean((pred_valid - pred_mean) ** 2))
                correlation = covariance / (act_std * pred_std) if act_std != 0 and pred_std != 0 else cp.array(0.0)

                # Store all metrics in temp dict with single GPU-CPU transfer
                metrics_gpu = {
                    f"final_{model}_h{h}_rmse": cp.sqrt(cp.mean(squared_diff)),
                    f"final_{model}_h{h}_mae": cp.mean(abs_diff),
                    f"final_{model}_h{h}_corr": correlation,
                    f"final_{model}_h{h}_mape": mape,
                    f"final_{model}_h{h}_dir_acc": cp.mean(dir_match)
                }

                # Single batch transfer to CPU for all metrics
                model_metrics.update({
                    key: float(value.get())
                    for key, value in metrics_gpu.items()
                })

        # Update final metrics with batch-processed results
        final_metrics.update(model_metrics)

    # Release GPU memory explicitly
    cp.get_default_memory_pool().free_all_blocks()

    return final_metrics



if __name__ == "__main__":
    try:
        # Initialize wandb
        wandb.init(project="gpu_parallel_forecast", config=CONFIG)

        # Direct data loading with GPU optimization
        with cp.cuda.Device(0):
            # Read CSV data efficiently
            df = pd.read_csv('/content/nifty50_1min_2015_to_2024.csv')

            # Convert to datetime and set as index
            df['timestamp'] = pd.to_datetime(df['timestamp'])
            df.set_index('timestamp', inplace=True)

            gpu_data = {
                'close': cp.asarray(df['close'].values, dtype=cp.float32)
            }
            # Calculate returns directly on GPU using diff
            # This is more efficient than pct_change as it avoids CPU-GPU transfers
            gpu_data['returns'] = cp.diff(gpu_data['close']) / gpu_data['close'][:-1]
            # Add a NaN at the beginning to match original length
            gpu_data['returns'] = cp.concatenate([cp.array([cp.nan]), gpu_data['returns']])
            # Take last 16000 samples and create DataFrame
            sample_size = 16000
            df_analysis = pd.DataFrame({
                'close': cp.asnumpy(gpu_data['close'][-sample_size:]),
                'returns': cp.asnumpy(gpu_data['returns'][-sample_size:])
            }, index=df.index[-sample_size:])

            print(f"\nAnalysis dataset size: {len(df_analysis):,}")

            # Run walk-forward analysis
            results, trading_signals, trading_metrics = walk_forward_analysis_with_decisions(df_analysis)

    except Exception as e:
        print(f"Main execution failed: {str(e)}")
        traceback.print_exc()
    finally:
        cp.get_default_memory_pool().free_all_blocks()
        wandb.finish()


Optimized dataset size: 30,811


Forecasting Progress:   0%|          | 0/30811 [00:00<?, ?it/s]

ValueError: operands could not be broadcast together with shapes (252,) (67,)

In [None]:
import pandas as pd
df = pd.read_csv('/content/nifty50_1min_2015_to_2024.csv',
                    parse_dates=['date'],
                    index_col='date',
                    usecols=['date', 'close'])

    # Resample to 30-min intervals and calculate returns
df_30min = df.resample('30T').last().ffill()
df_30min['returns'] = df_30min['close'].pct_change().dropna()

print(f"Total periods in dataset: {len(df_30min):,}")

Total periods in dataset: 168,925


  df_30min = df.resample('30T').last().ffill()


In [None]:
df

Unnamed: 0_level_0,close
date,Unnamed: 1_level_1
2015-01-09 09:15:00+05:30,8292.10
2015-01-09 09:16:00+05:30,8288.15
2015-01-09 09:17:00+05:30,8293.90
2015-01-09 09:18:00+05:30,8300.65
2015-01-09 09:19:00+05:30,8301.20
...,...
2024-08-28 15:25:00+05:30,25041.45
2024-08-28 15:26:00+05:30,25040.30
2024-08-28 15:27:00+05:30,25042.55
2024-08-28 15:28:00+05:30,25038.90
