In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
!pip install --quiet pytorch-forecasting pytorch-lightning --extra-index-url https://download.pytorch.org/whl/cpu


In [None]:
!pip install networkx


In [None]:
# ┌─────────────────────────────────────────────────────────────────────────┐
# │  APEX Expert System - Phase 1 + Phase 2 Co-Existence Implementation    │
# └─────────────────────────────────────────────────────────────────────────┘
import secrets                                     # CSPRNG - replaces random / np.random
from random import SystemRandom                    # convenient alias
_secure = SystemRandom()

import pandas as pd
import numpy as np
import torch
import torch.nn as nn
import pytorch_lightning as pl
from pytorch_forecasting import TemporalFusionTransformer, TimeSeriesDataSet
from sklearn.ensemble import RandomForestRegressor
from sklearn.linear_model import LinearRegression
from sklearn.cluster import KMeans
from statsmodels.tsa.arima.model import ARIMA
from collections import Counter
import warnings
warnings.filterwarnings('ignore')

# —————————————————————— Utility: secure sampling helpers ——————————————————————
def secure_randint(a: int, b: int) -> int:
    """Cryptographically secure randint inclusive [a, b]."""
    return secrets.randbelow(b - a + 1) + a

def secure_choice(seq):
    """Secure single choice from non-empty seq."""
    idx = secrets.randbelow(len(seq))
    return seq[idx]

def secure_sample(seq, k):
    """Secure k-sample without replacement (k ≤ len(seq))."""
    return _secure.sample(seq, k)

# Data loading and preparation (unchanged)
data = pd.read_excel("/kaggle/input/draw-41to44/draw_Updated.xlsx")
data = data.rename(columns={
    'N1': 'Number1', 'N2': 'Number2', 'N3': 'Number3',
    'N4': 'Number4', 'N5': 'Number5', 'N6': 'Number6'
})

# ═══════════════════════════════════════════════════════════════════════════
# PHASE 1 EXPERTS (PROVEN & WORKING - KEEP ALL)
# ═══════════════════════════════════════════════════════════════════════════

class PerformanceTracker:
    def __init__(self):
        self.history = {}
    def update(self, expert, error):
        self.history.setdefault(expert, []).append(error)
    def score(self, expert):
        err = self.history.get(expert, [])
        return max(0.5, 1 - np.mean(err)) if err else 0.5

perf = PerformanceTracker()

def ensure_valid(pred) -> list:
    """Clip→unique→pad to six numbers using CSPRNG."""
    p = [int(np.clip(round(x), 1, 39)) for x in pred]
    seen, out = set(), []
    for x in p:
        if x not in seen:
            seen.add(x); out.append(x)
    while len(out) < 6:
        rem = list(set(range(1, 40)) - set(out))
        out.append(secure_choice(rem))
    return sorted(out)

def fallback(window) -> list:
    """Top-frequency fallback with secure padding."""
    nums = sum((window[f'Number{i}'].tolist() for i in range(1, 7)), [])
    freq = Counter(nums)
    top6 = [n for n, _ in freq.most_common(6)]
    if len(top6) < 6:
        remainder = list(set(range(1, 40)) - set(top6))
        top6 += remainder[: 6 - len(top6)]
    return sorted(top6)

# Phase 1 Expert Functions (Your Current Working System)
def A1_ARIMA_expert(window):
    """Advanced ARIMA with Dynamic Order Selection and Fallback Strategies"""
    window_size = len(window)
    score = perf.score('A1_ARIMA')
    
    # Dynamic order selection based on available data
    if window_size >= 20:
        order = (5,1,2)  # Full model for large windows
        confidence_base = 0.8
    elif window_size >= 15:
        order = (3,1,1)  # Reduced model for medium windows  
        confidence_base = 0.7
    elif window_size >= 10:
        order = (2,1,1)  # Minimal ARIMA for small windows
        confidence_base = 0.6
    elif window_size >= 6:
        order = (1,1,1)  # Simplest ARIMA that can converge
        confidence_base = 0.5
    else:
        # For very small windows (2-5 draws), use exponential smoothing approach
        preds = []
        for i in range(1,7):
            series = window[f'Number{i}'].values
            if len(series) >= 2:
                # Simple exponential smoothing with trend
                alpha = 0.3
                trend = (series[-1] - series[0]) / (len(series) - 1) if len(series) > 1 else 0
                pred = series[-1] * alpha + (1-alpha) * np.mean(series) + trend
                preds.append(pred)
            else:
                preds.append(np.mean(series))
        return ensure_valid(preds), 0.4
    
    # ARIMA modeling with dynamic order
    preds, confs = [], []
    for i in range(1,7):
        series = window[f'Number{i}'].values
        try:
            # Adaptive ARIMA with automatic order adjustment
            model = ARIMA(series, order=order)
            fitted_model = model.fit()
            
            # Forecast with trend awareness
            forecast = fitted_model.forecast()[0]
            trend_weight = 0.5 + 0.5 * abs(fitted_model.params.get('ar.L1', 0))
            pred_value = forecast * trend_weight
            
            preds.append(pred_value)
            # Confidence increases with window size and model stability
            conf = confidence_base * (1 + score) * trend_weight
            confs.append(min(0.95, conf))
            
        except Exception as e:
            # Robust fallback for convergence issues
            series_mean = np.mean(series)
            series_trend = (series[-1] - series[0]) / len(series) if len(series) > 1 else 0
            preds.append(series_mean + series_trend)
            confs.append(0.3)
    
    return ensure_valid(preds), float(np.mean(confs))


def A2_MovingAverage_expert(window):
    """Phase 1 - Adaptive EWMA Expert"""
    preds = []
    for i in range(1,7):
        series = window[f'Number{i}'].values[-5:]
        if len(series) < 2:
            return fallback(window), 0.3
        alpha = 2/6 + np.std(series)/10
        ewma = series[-1]*alpha + (1-alpha)*np.mean(series[:-1])
        lr = LinearRegression().fit(np.arange(len(series)).reshape(-1,1), series)
        preds.append(ewma + lr.coef_[0])
    return ensure_valid(preds), 0.7

def B1_RandomForest_expert(window):
    """Phase 1 - Meta-Optimized Random Forest"""
    if len(window) < 15:
        return fallback(window), 0.3
    score = perf.score('B1_RandomForest')
    feats, tars = [], []
    for idx in range(10, len(window)):
        row = []
        for lag in (1,2,3):
            row += window.iloc[idx-lag][[f'Number{i}' for i in range(1,7)]].tolist()
        row += [
            window.iloc[idx-5:idx]['Number1'].mean(),
            window.iloc[idx-5:idx]['Number2'].std(),
            score * 10
        ]
        feats.append(row)
        tars.append(window.iloc[idx][[f'Number{i}' for i in range(1,7)]].tolist())
    rf = RandomForestRegressor(
        n_estimators=100,
        max_depth=15 if score > 0.7 else 10,
        random_state=42, n_jobs=-1
    )
    rf.fit(feats, tars)
    out = rf.predict([feats[-1]])[0]
    return ensure_valid(out), 0.85

def E1_FrequencyAnalysis_expert(window):
    nums = sum((window[f'Number{i}'].tolist() for i in range(1, 7)), [])
    freq = Counter(nums)
    vals = np.array(list(freq.values()))
    z = (vals - vals.mean()) / (vals.std() or 1)
    keys = list(freq.keys())

    labels = KMeans(n_clusters=2, random_state=42).fit_predict(vals.reshape(-1, 1))
    hot = [k for k, l in zip(keys, labels) if l == 0]
    cold = list(set(range(1, 40)) - set(hot))

    hr = 0.7 + (len(np.where(z > 2)[0]) / len(keys)) * 0.3
    hot_pick = secure_sample(hot, min(int(6 * hr), len(hot))) if hot else []
    cold_pick = secure_sample(cold, 6 - len(hot_pick))
    return ensure_valid(hot_pick + cold_pick), 0.75

def D1_EnsembleStacking_expert(window):
    """Phase 1 - Attention-Stacking Meta-Expert"""
    preds, ws = [], []
    for name in ['A1_ARIMA','A2_MovingAverage','B1_RandomForest','E1_FrequencyAnalysis']:
        p,c = globals()[f"{name}_expert"](window)
        preds.append(p); ws.append(c * perf.score(name))
    if not preds:
        return fallback(window), 0.3
    w = np.exp(ws) / np.sum(np.exp(ws))
    final = [sum(p[i] * wi for p,wi in zip(preds, w)) for i in range(6)]
    return ensure_valid(final), float(min(0.98, np.dot(w, ws)))

# ═══════════════════════════════════════════════════════════════════════════
# PHASE 2 ADVANCED EXPERTS (NEW CUTTING-EDGE IMPLEMENTATIONS)
# ═══════════════════════════════════════════════════════════════════════════

class HybridLSTMTransformer(pl.LightningModule):
    """Advanced LSTM-Transformer Hybrid Architecture"""
    def __init__(self, input_size=6, hidden_size=64, num_heads=8, num_layers=3):
        super().__init__()
        self.lstm = nn.LSTM(input_size, hidden_size, batch_first=True, dropout=0.1)
        
        # Multi-head Self-Attention with positional encoding
        encoder_layer = nn.TransformerEncoderLayer(
            d_model=hidden_size, 
            nhead=num_heads, 
            dim_feedforward=hidden_size*4,
            dropout=0.1,
            activation='gelu',
            batch_first=True
        )
        self.transformer = nn.TransformerEncoder(encoder_layer, num_layers)
        
        # Advanced prediction head with residual connections
        self.prediction_head = nn.Sequential(
            nn.Linear(hidden_size, hidden_size*2),
            nn.LayerNorm(hidden_size*2),
            nn.GELU(),
            nn.Dropout(0.1),
            nn.Linear(hidden_size*2, hidden_size),
            nn.LayerNorm(hidden_size),
            nn.GELU(),
            nn.Linear(hidden_size, 6)
        )
        
        # Attention visualization for interpretability
        self.attention_weights = None
    
    def forward(self, x):
        # LSTM processing for sequential patterns
        lstm_out, _ = self.lstm(x)
        
        # Transformer with self-attention for long-range dependencies
        transformer_out = self.transformer(lstm_out)
        
        # Extract last timestep and predict
        prediction = self.prediction_head(transformer_out[:, -1, :])
        return prediction
    
    def predict_numbers(self, window_data):
        """Convert window data to prediction"""
        self.eval()
        with torch.no_grad():
            # Prepare tensor from window
            x = torch.tensor(
                window_data[[f'Number{i}' for i in range(1,7)]].values, 
                dtype=torch.float32
            ).unsqueeze(0)
            
            # Get prediction
            pred = self(x).squeeze().numpy()
            
            # Apply constraints and ensure validity
            pred = np.clip(pred, 1, 39)
            return ensure_valid(pred)

class TemporalFusionExpert:
    """Advanced Temporal Fusion Transformer Expert"""
    def __init__(self):
        self.tft = None
        self.trained = False
    
    def prepare_dataset(self, window):
        """Prepare TimeSeriesDataSet for TFT"""
        df = window.reset_index()
        df['time_idx'] = range(len(df))
        df['series'] = 0
        
        dataset = TimeSeriesDataSet(
            df,
            time_idx='time_idx',
            target=['Number1', 'Number2', 'Number3', 'Number4', 'Number5', 'Number6'],
            group_ids=['series'],
            max_encoder_length=min(20, len(df)-1),
            max_prediction_length=1,
            time_varying_unknown_reals=['Number1', 'Number2', 'Number3', 'Number4', 'Number5', 'Number6'],
            allow_missing_timesteps=True
        )
        return dataset
    
    def predict(self, window):
        """Generate TFT prediction"""
        try:
            if not self.trained or self.tft is None:
                dataset = self.prepare_dataset(window)
                dataloader = dataset.to_dataloader(train=True, batch_size=1)
                
                # Initialize TFT with optimized hyperparameters
                self.tft = TemporalFusionTransformer.from_dataset(
                    dataset,
                    learning_rate=1e-3,
                    hidden_size=64,
                    attention_head_size=8,
                    dropout=0.2,
                    hidden_continuous_size=32,
                    output_size=6,
                    loss=nn.MSELoss(),
                    reduce_on_plateau_patience=3
                )
                
                # Quick training for demonstration
                trainer = pl.Trainer(
                    max_epochs=20, 
                    enable_checkpointing=False, 
                    logger=False,
                    enable_progress_bar=False
                )
                trainer.fit(self.tft, dataloader)
                self.trained = True
            
            # Generate prediction
            test_dataset = self.prepare_dataset(window)
            test_dataloader = test_dataset.to_dataloader(train=False, batch_size=1)
            predictions = self.tft.predict(test_dataloader)
            
            # Extract and process prediction
            if len(predictions) > 0:
                pred = predictions[0][:6] if hasattr(predictions[0], '__len__') else predictions[:6]
                return ensure_valid(pred), 0.92
            else:
                return fallback(window), 0.5
                
        except Exception as e:
            return fallback(window), 0.4

class NeuralDecisionForest:
    """Neural-Boosted Decision Forest with Advanced Features"""
    def __init__(self):
        self.models = []
        self.trained = False
    
    def create_advanced_features(self, window):
        """Generate advanced feature engineering"""
        features = []
        
        # Statistical features across all numbers
        all_nums = window[[f'Number{i}' for i in range(1,7)]].values
        features.extend([
            np.mean(all_nums), np.std(all_nums), np.median(all_nums),
            np.percentile(all_nums, 25), np.percentile(all_nums, 75),
            len(np.unique(all_nums)), np.min(all_nums), np.max(all_nums)
        ])
        
        # Temporal patterns
        if len(window) > 5:
            recent_trend = np.polyfit(range(5), all_nums[-5:].mean(axis=1), 1)[0]
            features.append(recent_trend)
        else:
            features.append(0)
        
        # Frequency-based features
        flat_nums = all_nums.flatten()
        freq_dist = Counter(flat_nums)
        most_common_freq = freq_dist.most_common(1)[0][1] if freq_dist else 0
        features.extend([len(freq_dist), most_common_freq])
        
        # Gap analysis
        gaps = np.diff(np.sort(all_nums[-1]))
        features.extend([np.mean(gaps), np.std(gaps)])
        
        return features
    
    def predict(self, window):
        """Generate neural forest prediction"""
        try:
            if len(window) < 15:
                return fallback(window), 0.3
            
            # Prepare training data with advanced features
            X, y = [], []
            for i in range(10, len(window)):
                features = self.create_advanced_features(window.iloc[i-10:i])
                X.append(features)
                y.append(window.iloc[i][[f'Number{i}' for i in range(1,7)]].values)
            
            if len(X) < 5:
                return fallback(window), 0.3
            
            # Train ensemble of neural-boosted forests
            if not self.trained:
                from sklearn.ensemble import GradientBoostingRegressor, ExtraTreesRegressor
                
                self.models = [
                    RandomForestRegressor(n_estimators=200, max_depth=20, random_state=42),
                    GradientBoostingRegressor(n_estimators=100, max_depth=10, random_state=42),
                    ExtraTreesRegressor(n_estimators=150, max_depth=15, random_state=42)
                ]
                
                for model in self.models:
                    model.fit(X, y)
                self.trained = True
            
            # Generate prediction from ensemble
            test_features = self.create_advanced_features(window.iloc[-10:])
            predictions = [model.predict([test_features])[0] for model in self.models]
            
            # Ensemble averaging with confidence weighting
            weights = [0.4, 0.35, 0.25]  # Random Forest gets highest weight
            final_pred = np.average(predictions, weights=weights, axis=0)
            
            return ensure_valid(final_pred), 0.88
            
        except Exception as e:
            return fallback(window), 0.3

class MultiScalePatternAnalyzer:
    """Advanced Multi-Scale Pattern Analysis Expert"""
    def __init__(self):
        self.pattern_memory = {}
    
    def extract_multiscale_patterns(self, window):
        """Extract patterns at different time scales"""
        patterns = {}
        data = window[[f'Number{i}' for i in range(1,7)]].values
        
        # Short-term patterns (last 3 draws)
        if len(data) >= 3:
            patterns['short_term'] = data[-3:].flatten()
        
        # Medium-term patterns (last 7 draws)
        if len(data) >= 7:
            patterns['medium_term'] = data[-7:].flatten()
        
        # Long-term patterns (all available data)
        patterns['long_term'] = data.flatten()
        
        # Cyclical patterns
        if len(data) >= 7:
            weekly_pattern = [data[i::7] for i in range(min(7, len(data)))]
            patterns['cyclical'] = np.array([np.mean(p) for p in weekly_pattern if len(p) > 0])
        
        return patterns
    
    def predict(self, window):
        """Generate multi-scale pattern prediction"""
        try:
            patterns = self.extract_multiscale_patterns(window)
            
            # Advanced anomaly detection using multiple scales
            predictions = []
            confidences = []
            
            # Short-term momentum
            if 'short_term' in patterns:
                short_pred = np.mean(patterns['short_term'].reshape(-1, 6), axis=0)
                predictions.append(short_pred)
                confidences.append(0.3)
            
            # Medium-term trend
            if 'medium_term' in patterns:
                med_data = patterns['medium_term'].reshape(-1, 6)
                med_pred = med_data[-1] + (med_data[-1] - med_data[0]) / len(med_data)
                predictions.append(med_pred)
                confidences.append(0.4)
            
            # Long-term frequency analysis with clustering
            if 'long_term' in patterns:
                freq = Counter(patterns['long_term'])
                # Advanced clustering for hot/cold number identification
                vals = np.array(list(freq.values()))
                if len(vals) > 2:
                    kmeans = KMeans(n_clusters=3, random_state=42)
                    clusters = kmeans.fit_predict(vals.reshape(-1, 1))
                    hot_cluster = np.argmax(kmeans.cluster_centers_.flatten())
                    hot_numbers = [num for num, cluster in zip(freq.keys(), clusters) 
                                 if cluster == hot_cluster]
                else:
                    hot_numbers = list(freq.keys())[:20]
                
                # Select top numbers with some randomness
                if len(hot_numbers) >= 6:
                    long_pred = np.random.choice(hot_numbers, 6, replace=False)
                else:
                    remaining = list(set(range(1, 40)) - set(hot_numbers))
                    long_pred = list(hot_numbers) + list(np.random.choice(remaining, 6-len(hot_numbers), replace=False))
                
                predictions.append(long_pred)
                confidences.append(0.3)
            
            # Ensemble the predictions
            if predictions:
                weights = np.array(confidences) / np.sum(confidences)
                if len(predictions[0]) == 6:  # Numerical predictions
                    final_pred = np.average(predictions, weights=weights, axis=0)
                else:  # Mixed types, use weighted voting
                    final_pred = predictions[np.argmax(confidences)]
                
                return ensure_valid(final_pred), float(np.mean(confidences) + 0.1)
            else:
                return fallback(window), 0.4
                
        except Exception as e:
            return fallback(window), 0.3

# Initialize Phase 2 experts
lstm_transformer_expert = HybridLSTMTransformer()
tft_expert = TemporalFusionExpert()
neural_forest_expert = NeuralDecisionForest()
pattern_analyzer_expert = MultiScalePatternAnalyzer()

# Phase 2 Expert Interface Functions
def A1_ARIMA_expert_v2(window):
    """Phase 2 - Hybrid LSTM-Transformer Expert"""
    pred = lstm_transformer_expert.predict_numbers(window)
    return pred, 0.91

def A2_MovingAverage_expert_v2(window):
    """Phase 2 - Temporal Fusion Transformer Expert"""
    return tft_expert.predict(window)

def B1_RandomForest_expert_v2(window):
    """Phase 2 - Neural Decision Forest Expert"""
    return neural_forest_expert.predict(window)

def E1_FrequencyAnalysis_expert_v2(window):
    """Phase 2 - Multi-Scale Pattern Analyzer"""
    return pattern_analyzer_expert.predict(window)

# ═══════════════════════════════════════════════════════════════════════════
# ADVANCED META-SUPERVISOR FOR CO-EXISTENCE
# ═══════════════════════════════════════════════════════════════════════════

# ═══════════════════════════════════════════════════════════════════════════
# CORRECTED ADVANCED META-SUPERVISOR FOR CO-EXISTENCE  
# ═══════════════════════════════════════════════════════════════════════════

class APEXMetaSupervisor:
    """Fixed Meta-Learning Supervisor for Phase 1 + Phase 2 Co-existence"""
    
    def __init__(self, data, perf_tracker):
        self.data = data
        self.perf = perf_tracker
        
        # Define all expert registry with CORRECT function mappings
        self.expert_registry = {
            # Phase 1 Experts (Proven) - Standard naming
            'A1_ARIMA': {'status': 'active', 'min_window': 10, 'phase': 1, 'func_name': 'A1_ARIMA_expert'},
            'A2_MovingAverage': {'status': 'active', 'min_window': 5, 'phase': 1, 'func_name': 'A2_MovingAverage_expert'},
            'B1_RandomForest': {'status': 'active', 'min_window': 15, 'phase': 1, 'func_name': 'B1_RandomForest_expert'},
            'E1_FrequencyAnalysis': {'status': 'active', 'min_window': 10, 'phase': 1, 'func_name': 'E1_FrequencyAnalysis_expert'},
            'D1_EnsembleStacking': {'status': 'active', 'min_window': 20, 'phase': 1, 'func_name': 'D1_EnsembleStacking_expert'},
            
            # Phase 2 Experts (Advanced) - v2 naming  
            'A1_ARIMA_v2': {'status': 'active', 'min_window': 15, 'phase': 2, 'func_name': 'A1_ARIMA_expert_v2'},
            'A2_MovingAverage_v2': {'status': 'active', 'min_window': 20, 'phase': 2, 'func_name': 'A2_MovingAverage_expert_v2'},
            'B1_RandomForest_v2': {'status': 'active', 'min_window': 15, 'phase': 2, 'func_name': 'B1_RandomForest_expert_v2'},
            'E1_FrequencyAnalysis_v2': {'status': 'active', 'min_window': 10, 'phase': 2, 'func_name': 'E1_FrequencyAnalysis_expert_v2'}
        }
        
        # Meta-learning weights (start equal, learn over time)
        self.phase_weights = {'phase_1': 0.5, 'phase_2': 0.5}
        
    def segment_window(self, end_idx, window_size):
        """Create data window for expert processing"""
        start = max(0, end_idx - window_size + 1)
        return self.data.iloc[start:end_idx+1]
    
    def advanced_attention_mechanism(self, expert_predictions, expert_confidences, phases):
        """Advanced attention-based weighting with cross-phase learning"""
        
        # Separate Phase 1 and Phase 2 experts
        phase_1_indices = [i for i, p in enumerate(phases) if p == 1]
        phase_2_indices = [i for i, p in enumerate(phases) if p == 2]
        
        # Base confidence weighting
        conf_weights = np.array(expert_confidences)
        
        # Performance-based weighting  
        expert_names = list(expert_predictions.keys())
        perf_weights = np.array([self.perf.score(name) for name in expert_names])
        
        # Phase-based weighting (meta-learning component)
        phase_weight_array = np.array([
            self.phase_weights['phase_1'] if p == 1 else self.phase_weights['phase_2'] 
            for p in phases
        ])
        
        # Cross-attention mechanism: let phases inform each other
        if len(phase_1_indices) > 0 and len(phase_2_indices) > 0:
            p1_avg_conf = np.mean([expert_confidences[i] for i in phase_1_indices])
            p2_avg_conf = np.mean([expert_confidences[i] for i in phase_2_indices])
            
            # Adaptive phase weighting based on recent performance
            if p2_avg_conf > p1_avg_conf * 1.1:  # Phase 2 significantly better
                self.phase_weights['phase_2'] = min(0.7, self.phase_weights['phase_2'] + 0.05)
                self.phase_weights['phase_1'] = 1 - self.phase_weights['phase_2']
            elif p1_avg_conf > p2_avg_conf * 1.1:  # Phase 1 significantly better
                self.phase_weights['phase_1'] = min(0.7, self.phase_weights['phase_1'] + 0.05)
                self.phase_weights['phase_2'] = 1 - self.phase_weights['phase_1']
        
        # Combine all weighting factors
        combined_weights = conf_weights * perf_weights * phase_weight_array
        
        # Softmax normalization for attention
        exp_weights = np.exp(combined_weights - np.max(combined_weights))
        final_weights = exp_weights / np.sum(exp_weights)
        
        return final_weights
    
    def run_meta_cycle(self, window_size=25):
        """Run complete meta-learning prediction cycle with FIXED function calling"""
        
        expert_predictions = {}
        expert_confidences = {}
        expert_phases = {}
        
        end_idx = len(self.data) - 1
        window = self.segment_window(end_idx, window_size)
        
        # Phase 1: Run all active experts with CORRECT function names
        for expert_name, config in self.expert_registry.items():
            if config['status'] != 'active' or len(window) < config['min_window']:
                continue
                
            try:
                # FIXED: Use the correct function name from registry
                func_name = config['func_name']
                expert_func = globals()[func_name]
                pred, conf = expert_func(window)
                
                expert_predictions[expert_name] = pred
                expert_confidences[expert_name] = conf
                expert_phases[expert_name] = config['phase']
                
                print(f"✅ {expert_name}: {pred} (conf={conf:.3f})")
                
            except KeyError as e:
                print(f"❌ Expert {expert_name} failed: Function '{config['func_name']}' not found")
                continue
            except Exception as e:
                print(f"❌ Expert {expert_name} failed: {str(e)}")
                continue
        
        if not expert_predictions:
            print("⚠️ No experts succeeded, using fallback")
            return fallback(window), {}
        
        # Phase 2: Advanced attention-based aggregation
        names = list(expert_predictions.keys())
        confs = [expert_confidences[name] for name in names]
        phases = [expert_phases[name] for name in names]
        
        weights = self.advanced_attention_mechanism(expert_predictions, confs, phases)
        
        # Phase 3: Generate final prediction with cross-position attention
        final_prediction = []
        for position in range(6):
            # Weight predictions for this position across all experts
            position_values = [expert_predictions[name][position] for name in names]
            weighted_value = np.sum([val * w for val, w in zip(position_values, weights)])
            final_prediction.append(weighted_value)
        
        final_prediction = ensure_valid(final_prediction)
        
        # Phase 4: Update performance tracking
        avg_confidence = np.average(confs, weights=weights)
        for name in names:
            error = max(0.0, 1 - expert_confidences[name])
            self.perf.update(name, error)
        
        # Phase 5: Package detailed results
        expert_details = {
            name: {
                'pred': expert_predictions[name],
                'conf': expert_confidences[name],
                'weight': float(weights[i]),
                'phase': expert_phases[name]
            } for i, name in enumerate(names)
        }
        
        # Add meta-learning info
        meta_info = {
            'phase_1_weight': self.phase_weights['phase_1'],
            'phase_2_weight': self.phase_weights['phase_2'],
            'total_experts': len(names),
            'phase_1_experts': len([p for p in phases if p == 1]),
            'phase_2_experts': len([p for p in phases if p == 2]),
            'ensemble_confidence': float(avg_confidence)
        }
        
        return final_prediction, {'experts': expert_details, 'meta': meta_info}

# ═══════════════════════════════════════════════════════════════════════════
# CORRECTED EXECUTION
# ═══════════════════════════════════════════════════════════════════════════

# Initialize the FIXED meta-supervisor
print("🔧 Initialising secure APEX Meta-Supervisor ...")
meta_supervisor = APEXMetaSupervisor(data, perf)

print("\n🚀 APEX Phase-1 + Phase-2 Co-Existence System (Secure RNG)")
print("=" * 80)
final_prediction, details = meta_supervisor.run_meta_cycle(window_size=25)

print(f"\n🎯 FINAL ENSEMBLE PREDICTION: {final_prediction}")
print(f"📊 Ensemble confidence: {details['meta']['ensemble_confidence']:.3f}")


In [None]:
# ─────────────────────────────────────────────────────────────────────────────
# Phase-4 Expert Modules (Secure RNG Edition) – Category E, F, G
# Each expert returns (pred List[int], conf: float)
# All stochastic operations use CSPRNG via `secrets` / `SystemRandom`
# ─────────────────────────────────────────────────────────────────────────────

import secrets
from random import SystemRandom
_secure = SystemRandom()

import numpy as np
from collections import Counter

# ─── Secure helpers ─────────────────────────────────────────────────────────
def s_randint(a: int, b: int) -> int:
    """Secure randint in [a, b]."""
    return secrets.randbelow(b - a + 1) + a

def s_choice(seq):
    """Secure single choice from non-empty sequence."""
    return seq[secrets.randbelow(len(seq))]

def s_sample(seq, k: int):
    """Secure k-sample without replacement."""
    return _secure.sample(seq, k)

# unique6 is defined in the Phase-3 cell; reused here
# ---------------------------------------------------------------------------

def E2_Number_Theory_expert(window):
    nums = sum((window[f'Number{i}'].tolist() for i in range(1, 7)), [])
    mod_counts = Counter(n % 7 for n in nums)
    top_res = [r for r, _ in mod_counts.most_common(2)]
    cand = {((res + k * 7) % 39) + 1 for res in top_res for k in range(6)}
    pred = sorted(cand)[:6]
    freq = mod_counts[top_res[0]] / len(nums)
    return pred, float(min(0.9, 0.5 + freq))

def E6_Sum_Range_Analysis_expert(window):
    sums = window[[f'Number{i}' for i in range(1, 7)]].sum(axis=1)
    base = int(sums.mean() // 6)
    pred = sorted({base + i for i in range(6)})[:6]
    conf = float(0.6 + 0.4 * (1 - np.std(sums) / (6 * 39)))
    return pred, conf

def E9_Gap_Analysis_expert(window):
    latest = window.iloc[-2:][[f'Number{i}' for i in range(1, 7)]].values
    gaps = np.diff(np.sort(latest.flatten()))
    med_gap = int(np.median(gaps)) if len(gaps) else 0
    last = sorted(window.iloc[-1][[f'Number{i}' for i in range(1, 7)]])
    pred = {((n + med_gap - 1) % 39) + 1 for n in last}
    while len(pred) < 6:
        pred.add(s_randint(1, 39))
    conf = float(0.5 + 0.5 * (np.unique(gaps).size / max(len(gaps), 1)))
    return sorted(pred), conf

def E16_Chaos_Theory_expert(window):
    last = window.iloc[-1][[f'Number{i}' for i in range(1, 7)]].values / 39
    r = 3.9
    next_vals = r * last * (1 - last)
    pred = {int(v * 39) + 1 for v in next_vals}
    while len(pred) < 6:
        pred.add(s_randint(1, 39))
    conf = float(max(0.1, 1 - np.std(next_vals)))
    return sorted(pred), conf

def F3_Causal_Inference_expert(window):
    df = window[[f'Number{i}' for i in range(1, 7)]]
    corr = df.corr().abs().mean().sort_values(ascending=False)
    top_cols = corr.index[:6]
    pred = sorted(int(window[col].iloc[-1]) for col in top_cols)
    conf = float(min(0.9, corr[top_cols].mean()))
    return pred, conf

def F16_Multi_Armed_Bandits_expert(window):
    """
    EXP3-style bandit expert that securely samples experts
    and builds a six-number prediction without index errors.
    """
    if not state['hist_match']:
        return fallback(window), 0.3

    keys, vals = zip(*state['hist_match'].items())
    weights = np.array(vals) + 1e-3
    weights /= weights.sum()

    sampled = _secure.choices(keys, weights, k=6)
    preds = []
    for idx, ex in enumerate(sampled):
        raw, _ = globals().get(f"{ex}_expert", _stub)(window)
        safe = unique6(raw)
        preds.append(safe[idx % 6])

    final = ensure_valid(preds)
    conf  = float(0.6 + 0.4 * (1 - np.std(vals)))
    return final, conf

def F2_Quantum_ML_expert(window):
    X = window[[f'Number{i}' for i in range(1, 7)]].values.astype(float)
    rng = np.random.default_rng(secrets.randbits(128))
    Q, _ = np.linalg.qr(rng.standard_normal((6, 6)))
    proj = (X[-1] @ Q) % 39 + 1
    pred = {int(v) for v in proj}
    while len(pred) < 6:
        pred.add(s_randint(1, 39))
    return sorted(pred), 0.70

def G1_Auto_ML_expert(window):
    from sklearn.tree import DecisionTreeRegressor
    df = window[[f'Number{i}' for i in range(1, 7)]]
    X = np.arange(len(df)).reshape(-1, 1)
    preds, confs = [], []
    for i in range(6):
        y = df[f'Number{i+1}']
        mdl = DecisionTreeRegressor(max_depth=3, random_state=s_randint(1, 10_000))
        mdl.fit(X, y)
        preds.append(int(np.clip(round(mdl.predict([[len(df)]])[0]), 1, 39)))
        confs.append(1 - mdl.tree_.max_depth / 10)
    return ensure_valid(preds), float(np.mean(confs))

def G3_Genetic_Programming_expert(window):
    try:
        from deap import creator, base, gp, tools
    except ImportError:
        # Fallback if DEAP not installed
        seq = window[[f'Number{i}' for i in range(1, 7)]].iloc[-2:].mean().round().tolist()
        return ensure_valid(seq), 0.35
    seqs = [window[f'Number{i+1}'].tolist() for i in range(6)]
    preds = []
    for seq in seqs:
        diff = seq[-1] - seq[-2] if len(seq) > 1 else 0
        preds.append(((seq[-1] + diff - 1) % 39) + 1)
    return ensure_valid(preds), 0.60

def G19_Catastrophic_Forgetting_expert(window):
    nums = sum((window[f'Number{i}'].tolist() for i in range(1, 7)), [])
    freq = Counter(nums)
    if not freq:
        return fallback(window), 0.3
    min_cnt = min(freq.values())
    rare = [n for n, c in freq.items() if c == min_cnt]
    if len(rare) < 6:
        others = [n for n in range(1, 40) if n not in rare]
        rare += s_sample(others, 6 - len(rare))
    pred = sorted(s_sample(rare, 6))
    conf = float(0.4 + 0.6 * (len(rare) / 39))
    return pred, conf


In [None]:
# ─────────────────────────────────────────────────────────────────────────────
# Phase-5 Expert Modules – Advanced Cryptanalysis Agents
# Each returns (pred: List[int], conf: float)
# Use aescrypt-like primitives, CRT, differential analysis, and secure RNG
# ─────────────────────────────────────────────────────────────────────────────

import secrets
from random import SystemRandom
_secure = SystemRandom()

import numpy as np
from collections import Counter
from math import gcd
from sympy.ntheory.modular import crt  # Chinese remainder theorem

# Secure RNG helpers (reuse from orchestrator)
def s_randint(a, b): return secrets.randbelow(b - a + 1) + a
def s_choice(seq):   return seq[secrets.randbelow(len(seq))]
def s_sample(seq, k): return _secure.sample(seq, k)

# Expert 1: E20_Fourier_Residue_Expert
def E20_Fourier_Residue_expert(window):
    """
    Applies DFT on flattened draws mod small primes to detect frequency peaks.
    """
    nums = sum((window[f'Number{i}'].tolist() for i in range(1,7)), [])
    # transform sequence into complex vector
    vec = np.array(nums) - np.mean(nums)
    fft = np.fft.fft(vec)
    # Find top two frequency bins
    mags = np.abs(fft[:len(fft)//2])
    peaks = mags.argsort()[-2:]
    # reconstruct six numbers from inverse peaks
    preds = []
    for pk in peaks:
        phase = np.angle(fft[pk])
        val = int((mags[pk] * np.cos(phase)) % 39) + 1
        preds.append(val)
    # pad using secure RNG
    while len(preds) < 6:
        preds.append(s_randint(1,39))
    # confidence from peak sharpness
    conf = float(min(0.9, (mags[peaks[0]] / (np.mean(mags)+1e-6))))
    return sorted(set(preds))[:6], conf

# Expert 2: E21_CRT_Reconstructor
def E21_CRT_Reconstructor_expert(window):
    """
    Uses Chinese Remainder Theorem on residues mod 5,7,9 to reconstruct patterns.
    """
    nums = sum((window[f'Number{i}'].tolist() for i in range(1,7)), [])
    res5 = [n % 5 for n in nums]
    res7 = [n % 7 for n in nums]
    res9 = [n % 9 for n in nums]
    # solve for k in last residue triple
    r5, r7, r9 = res5[-1], res7[-1], res9[-1]
    candidates = []
    for x in range(1,40):
        if x%5==r5 and x%7==r7 and x%9==r9:
            candidates.append(x)
    # pick up to six candidates
    pred = sorted(candidates)[:6]
    while len(pred)<6:
        pred.append(s_randint(1,39))
    # confidence by count of valid solutions
    conf = float(min(0.9, len(candidates)/6))
    return pred, conf

# Expert 3: E22_Differential_Entropy
def E22_Differential_Entropy_expert(window):
    """
    Estimates entropy change across sliding pairs; low change indicates pattern.
    """
    arr = np.array(sum((window[f'Number{i}'].tolist() for i in range(1,7)), []))
    # compute histograms for last two draws
    h1, _ = np.histogram(arr[-12:-6], bins=39, range=(1,40), density=True)
    h2, _ = np.histogram(arr[-6:], bins=39, range=(1,40), density=True)
    # KL divergence proxy
    kl = np.sum(h1 * np.log((h1+1e-6)/(h2+1e-6)))
    # next draw prediction: repeat most recent pattern
    recent = window.iloc[-1][[f'Number{i}' for i in range(1,7)]].tolist()
    pred = sorted(unique6(recent))
    conf = float(max(0.1, 1 - kl))
    return pred, conf

# Expert 4: F23_BlockCipher_LFSR_Hybrid
def F23_BlockCipher_LFSR_expert(window):
    """
    Uses LFSR seeded from AES-like S-box outputs on window bytes.
    """
    # simple S-box mimic: nonlinear mapping
    sbox = lambda x: ((x*45) ^ 0x1F) & 0xFF
    seed = sum(sbox(n) for n in sum((window[f'Number{i}'].tolist() for i in range(1,7)), []))
    # LFSR: x_n+1 = feedback of taps at bits [0,2,3,5]
    state_val = seed & 0x7F
    pred=[]
    for _ in range(6):
        bit = ((state_val>>0) ^ (state_val>>2) ^ (state_val>>3) ^ (state_val>>5)) & 1
        state_val = ((state_val>>1) | (bit<<6)) & 0x7F
        pred.append((state_val % 39)+1)
    conf = 0.5
    return sorted(unique6(pred)), conf

# Expert 5: F24_Timing_Attack_Simulator
def F24_Timing_Attack_expert(window):
    """
    Simulates timing channel by measuring model runtime variances (proxy).
    """
    import time
    times = []
    for func in [A1_ARIMA_expert, A2_MovingAverage_expert]:
        start = time.perf_counter()
        func(window)
        times.append(time.perf_counter()-start)
    # slower function output appended
    slow_idx = int(np.argmax(times)) +1
    pred = [slow_idx + i for i in range(6)]
    conf = float(0.4 + 0.6*(max(times)/sum(times)))
    return sorted(unique6(pred)), conf

# Expert 6: E25_Resampling_Chaos
def E25_Resampling_Chaos_expert(window):
    """
    Bootstrap resampling + chaos measure: uses Lyapunov exponent proxy.
    """
    nums = sum((window[f'Number{i}'].tolist() for i in range(1,7)), [])
    # bootstrap sample
    bs = s_sample(nums, len(nums))
    # proxy Lyapunov: average log ratio
    ratios = [abs(bs[i+1]-bs[i]) / (abs(bs[i]-bs[i-1])+1e-6) for i in range(1,len(bs)-1)]
    lyap = np.mean(np.log(np.abs(ratios)+1e-6))
    # predict by sampling high-ratio points
    high = [bs[i] for i,r in enumerate(ratios) if r>np.median(ratios)]
    pred = sorted(s_sample(high if len(high)>=6 else nums, 6))
    conf = float(max(0.1, min(0.9, 1 - abs(lyap))))
    return pred, conf

# Expert 7: G26_Pruned_Network_Attack
def G26_Pruned_Network_expert(window):
    """
    Uses a tiny pruned neural net (PyTorch) to fit last draw pattern.
    """
    import torch
    # simple one-layer MLP
    X = torch.tensor(window[[f'Number{i}' for i in range(1,7)]].values[:-1], dtype=torch.float32)
    y = torch.tensor(window[[f'Number{i}' for i in range(1,7)]].values[1:], dtype=torch.float32)
    mdl = torch.nn.Sequential(
        torch.nn.Linear(6, 6),
        torch.nn.ReLU(),
        torch.nn.Linear(6, 6)
    )
    opt = torch.optim.Adam(mdl.parameters(), lr=0.01)
    for _ in range(20):
        opt.zero_grad()
        loss = torch.nn.MSELoss()(mdl(X), y)
        loss.backward(); opt.step()
    out = mdl(torch.tensor(window[[f'Number{i}' for i in range(1,7)]].values[-1:],dtype=torch.float32))
    pred = sorted(unique6(out.detach().numpy().flatten()))
    conf = float(max(0.2, 1 - loss.item()))
    return pred, conf

# Expert 8: E27_Fractal_Pattern_Expert
def E27_Fractal_Pattern_expert(window):
    """
    Uses Mandelbrot membership iteration counts as pattern scores.
    """
    def mandel_count(c, maxit=20):
        z=0; count=0
        while abs(z)<=2 and count<maxit:
            z=z*z+c; count+=1
        return count
    nums = window.iloc[-1][[f'Number{i}' for i in range(1,7)]].values
    counts = [mandel_count(complex(n/39-1,n/39-1)) for n in nums]
    pred = sorted(unique6(counts))
    conf = float(min(0.8, np.mean(counts)/20))
    return pred, conf

# Expert 9: F28_Kolmogorov_Smirnov_Test
def F28_KSTest_expert(window):
    """
    Applies KS-test against uniform distribution on history; low p-value indicates bias.
    """
    nums = sum((window[f'Number{i}'].tolist() for i in range(1,7)), [])
    from scipy.stats import kstest
    stat,p = kstest(nums, 'uniform', args=(1,39))
    # if bias detected, repeat most frequent numbers
    freq = Counter(nums).most_common(6)
    pred = [n for n,_ in freq]
    conf = float(max(0.1, min(0.9, 1 - p)))
    return sorted(unique6(pred)), conf

# Expert 10: G29_Entropy_Gradient_Boost
def G29_Entropy_Gradient_Boost_expert(window):
    """
    Trains a LightGBM regressor on entropy features to predict next draw.
    Uses MultiOutputRegressor to handle 6-dimensional targets.
    """
    from sklearn.multioutput import MultiOutputRegressor
    import lightgbm as lgb
    import numpy as np

    # Prepare features and targets
    feats, targ = [], []
    arr = window[[f'Number{i}' for i in range(1,7)]].values
    for i in range(10, len(window)):
        win = arr[i-10:i]
        # compute per-column Shannon entropy over the 10-draw window
        ent_feats = []
        for col in range(6):
            counts = np.unique(win[:, col], return_counts=True)[1]
            probs  = counts / counts.sum()
            ent    = -np.sum(probs * np.log2(probs + 1e-6))
            ent_feats.append(ent)
        feats.append(ent_feats)
        targ.append(arr[i])  # shape (6,)

    if not feats:
        return fallback(window), 0.4

    # Wrap LightGBM in MultiOutputRegressor to support 6-dimensional targets
    base = lgb.LGBMRegressor(n_estimators=50, random_state=42)
    model = MultiOutputRegressor(base, n_jobs=-1)
    model.fit(feats, targ)

    # Compute features for the prediction step (last 10 draws)
    last_feats = []
    win = arr[-10:]
    for col in range(6):
        counts = np.unique(win[:, col], return_counts=True)[1]
        probs  = counts / counts.sum()
        ent    = -np.sum(probs * np.log2(probs + 1e-6))
        last_feats.append(ent)

    pred = model.predict([last_feats])[0]
    return sorted(unique6(pred)), 0.88





In [None]:
# ─────────────────────────────────────────────────────────────────────────────
# Phase-5 Counter-AI-RNG Expert Modules – Advanced Cryptanalysis Agents
# Each returns (pred: List[int], conf: float)
# Uses secure RNG (secrets/SystemRandom) and pandas→list conversions for robustness.
# ─────────────────────────────────────────────────────────────────────────────

import secrets
from random import SystemRandom
_secure = SystemRandom()

import numpy as np
import pandas as pd
from collections import Counter
from sympy.ntheory.modular import crt
import torch
from sklearn.multioutput import MultiOutputRegressor
import lightgbm as lgb

# Secure RNG helpers
def s_randint(a: int, b: int) -> int:
    return secrets.randbelow(b - a + 1) + a

def s_uniform(a: float, b: float) -> float:
    return _secure.uniform(a, b)

def s_sample(seq, k: int):
    return _secure.sample(seq, k)

def unique6(vec):
    nums = [int(np.clip(round(x), 1, 39)) for x in vec]
    seen, out = set(), []
    for x in nums:
        if x not in seen:
            seen.add(x); out.append(x)
        if len(out)==6: break
    while len(out)<6:
        r = s_randint(1,39)
        if r not in seen:
            seen.add(r); out.append(r)
    return sorted(out)

def fallback(window):
    # most frequent fallback
    nums = sum((window[f'Number{i}'].tolist() for i in range(1,7)), [])
    top = [n for n,_ in Counter(nums).most_common(6)]
    return unique6(top)

# Expert 1: E30_MarkovChain_Residuals_expert
def E30_MarkovChain_Residuals_expert(window):
    """
    Builds transition matrix of last draws and picks six highest-probability next states.
    """
    seq = sum((window[f'Number{i}'].tolist() for i in range(1,7)), [])
    # build 39×39 transition counts
    M = np.zeros((39,39))
    for a,b in zip(seq, seq[1:]):
        M[a-1,b-1] += 1
    # normalize to probabilities
    P = M / (M.sum(axis=1, keepdims=True) + 1e-9)
    last = seq[-1]-1
    probs = P[last]
    # pick top 6 next numbers
    top6 = np.argsort(probs)[-6:][::-1] + 1
    conf = float(min(0.9, probs[top6-1].mean() * 10))
    return unique6(top6), conf

# Expert 2: E31_Fractal_Dimension_expert
def E31_Fractal_Dimension_expert(window):
    """
    Computes box-counting dimension of the flattened sequence and predicts high-dimension cells.
    """
    seq = sum((window[f'Number{i}'].tolist() for i in range(1,7)), [])
    arr = np.array(seq)  # Convert to numpy array for numerical ops
    dims = []
    for n in range(1,40):
        # count boxes of size k= cell intervals
        k = np.mean(arr==n)
        dims.append(-np.log2(k+1e-9))  # Use 1e-9 to avoid log(0); correct formula
    dims = np.array(dims)  # Ensure dims is numpy array for std/max
    # pick six largest dims
    idx = np.argsort(dims)[-6:] + 1
    conf = float(min(0.9, np.std(dims)/np.max(dims+1e-6)))
    return unique6(idx), conf

# Expert 3: E32_Poisson_Process_expert
def E32_Poisson_Process_expert(window):
    """
    Fits Poisson λ to each number’s historical count; predicts six highest λ.
    """
    seq = sum((window[f'Number{i}'].tolist() for i in range(1,7)), [])
    counts = Counter(seq)
    lam = {n: counts[n]/len(window) for n in range(1,40)}
    top6 = sorted(lam, key=lambda n: lam[n], reverse=True)[:6]
    conf = float(min(0.9, np.mean(list(lam.values()))))
    return unique6(top6), conf

# Expert 4: F33_Ghost_Factorization_expert
def F33_Ghost_Factorization_expert(window):
    """
    Uses Pollard’s rho to factor pairwise differences mod small primes, revealing cycles.
    """
    import random
    from sympy import factorint
    seq = sum((window[f'Number{i}'].tolist() for i in range(1,7)), [])
    diffs = [abs(a-b) or 1 for a,b in zip(seq, seq[1:])]
    facs = [factorint(d) for d in diffs]
    # collect small factors
    flats = [p for f in facs for p in f if p<40]
    top6 = Counter(flats).most_common(6)
    top = [n for n,_ in top6]
    conf = float(min(0.9, len(top)/6))
    return unique6(top), conf

# Expert 5: F34_Lattice_Basis_expert
def F34_Lattice_Basis_expert(window):
    """
    Applies LLL reduction to linear recurrence coefficients from sequence embedding.
    Uses a pure-Python LLL implementation for Kaggle compatibility (no fpylll needed).
    """
    seq = sum((window[f'Number{i}'].tolist() for i in range(1,7)), [])
    # Simple Python LLL (basic version for 6x6 matrix)
    def lll_basis(M):
        # Gram-Schmidt orthogonalization with reduction
        for i in range(6):
            for j in range(i):
                mu = np.dot(M[i], M[j]) / np.dot(M[j], M[j])
                M[i] = M[i] - mu * M[j]
        return M

    # Embed as 6x6 matrix from seq
    if len(seq) < 36:
        return fallback(window), 0.4  # Too short for 6x6
    mat = np.array(seq[-36:]).reshape(6,6) % 39 + 1
    reduced = lll_basis(mat)
    # Extract first row as prediction
    pred = reduced[0]
    conf = float(min(0.9, np.std(pred) / np.mean(pred + 1e-6)))
    return unique6(pred), conf

# Expert 6: G35_Meta_Reinforcement_expert
def G35_Meta_Reinforcement_expert(window):
    """
    Trains a tiny REINFORCE policy network to select numbers maximizing past hits.
    """
    import torch.nn as nn, torch.optim as optim, torch
    seq = sum((window[f'Number{i}'].tolist() for i in range(1,7)), [])
    # policy: 39 logits
    policy = nn.Linear(39,39)
    opt = optim.Adam(policy.parameters(), lr=1e-2)
    # dummy training on one-hot last state
    state_vec = torch.zeros(39); state_vec[seq[-1]-1]=1
    for _ in range(5):
        logits = policy(state_vec)
        dist = torch.distributions.Categorical(logits=logits)
        action = dist.sample((6,))
        loss = -dist.log_prob(action).mean()
        opt.zero_grad(); loss.backward(); opt.step()
    preds = [(int(a)+1) for a in action]
    return unique6(preds), float(0.6)

# Expert 7: G36_Evolutionary_Ensemble_expert
def G36_Evolutionary_Ensemble_expert(window):
    """
    Genetic algorithm that evolves small expert subsets and uses their average prediction.
    Ensures all sub-predictions are fixed to length 6 before averaging to avoid shape errors.
    """
    experts = list(expert_registry.keys())
    # start population: random subsets
    pop = [s_choice(experts) for _ in range(10)]
    # pick 6 from pop
    chosen = pop[:6]
    preds = []
    for e in chosen:
        raw_p, _ = globals().get(f"{e}_expert", lambda w: (fallback(w), 0))(window)
        # Fix to exactly 6 unique ints
        safe_p = unique6(raw_p)
        preds.append(safe_p)
    # average by position (now safe, all shape (6,))
    agg = np.mean(np.array(preds), axis=0)
    return unique6(agg), 0.5

# Expert 8: G37_Autoencoder_Anomaly_expert
def G37_Autoencoder_Anomaly_expert(window):
    """
    Uses a small PyTorch autoencoder on past draws; anomalous reconstruction errors flagged.
    """
    import torch.nn as nn, torch
    data = torch.tensor(window[[f'Number{i}' for i in range(1,7)]].values, dtype=torch.float32)
    enc = nn.Linear(6,3); dec=nn.Linear(3,6)
    for _ in range(10):
        # training skipped for brevity
        pass
    rec = dec(enc(data))
    errs = torch.mean((rec - data)**2, dim=0).detach().numpy()
    top6 = np.argsort(errs)[-6:]+1
    return unique6(top6), float(min(0.9, np.mean(errs)))

# Expert 9: G38_Hypernet_Bias_expert
def G38_Hypernet_Bias_expert(window):
    """
    Hypernetwork generates weights for sub-ensembles conditioned on window stats.
    """
    stats = np.mean(window[[f'Number{i}' for i in range(1,7)]].values, axis=0)
    # hypernetwork: simple scaled softmax
    w = np.exp(stats) / np.sum(np.exp(stats))
    # weight top6 positions
    idx = np.argsort(w)[-6:]+1
    return unique6(idx), float(min(0.9, np.max(w)))

# Expert 10: E33_MonteCarlo_Simulation_expert
def E33_MonteCarlo_Simulation_expert(window):
    """
    Bootstrap Monte Carlo of past sequences to sample top co-occurring numbers.
    """
    seq = sum((window[f'Number{i}'].tolist() for i in range(1,7)), [])
    samples = []
    for _ in range(100):
        bs = s_sample(seq, len(seq))
        samples.extend(bs[:6])
    top6 = [n for n,_ in Counter(samples).most_common(6)]
    return unique6(top6), 0.6


    


In [None]:
def E40_NIST_80022_Battery_expert(window):
    import numpy as np
    seq = sum((window[f'Number{i}'].tolist() for i in range(1,7)), [])
    # Frequency, Runs, and Serial Correlations
    freq = np.mean([abs(n-20) for n in seq]) / 19
    changes = np.sum(np.diff(np.sign(np.diff(seq))))
    runs = changes / len(seq)
    serial_corr = np.corrcoef(seq[:-1], seq[1:])[0,1]
    pvals = [1-freq, 1-runs, 1-abs(serial_corr)]
    # Lower pval = likely pattern, so invert
    score = 1 - np.mean([abs(p) for p in pvals])
    pred = np.argsort([freq, runs, serial_corr])[-6:] + 1
    pred = [int(x) for x in pred]
    while len(pred) < 6:
        pred.append(s_randint(1,39))
    return unique6(pred), float(np.clip(score, 0.3, 0.95))


def G50_Neural_Discriminator_expert(window):
    import torch, torch.nn as nn
    seq = window[[f'Number{i}' for i in range(1,7)]].values.astype(float)
    # Label last 20 draws as either "AI" (pattern matches expert) or "RNG" (uniform)
    y = np.zeros(len(seq)-1)
    for i in range(1, len(seq)):
        diffs = np.abs(seq[i] - seq[i-1])
        y[i-1] = 1 if np.mean(diffs) < 5 else 0
    X = torch.tensor(seq[:-1], dtype=torch.float32)
    y = torch.tensor(y, dtype=torch.float32).view(-1, 1)
    # Tiny net
    model = nn.Sequential(nn.Linear(6,12), nn.ReLU(), nn.Linear(12,2))
    loss_fn = nn.CrossEntropyLoss()
    opt = torch.optim.Adam(model.parameters(), lr=0.01)
    for _ in range(15):  # quick, overfit for illustration
        opt.zero_grad()
        o = model(X)
        loss = loss_fn(o, y.squeeze().long())
        loss.backward(); opt.step()
    # Use logits to pick the "AI"most likely next draw from recent history
    last = torch.tensor(seq[-1:], dtype=torch.float32)
    res = model(last).detach().numpy().flatten()
    idx = int(np.argmax(res))
    pred = [int(np.mean(seq[-3:], axis=0)[i]) for i in range(6)] if idx==1 else [s_randint(1,39) for _ in range(6)]
    return unique6(pred), float(np.min([0.94, np.max(res)/2 + 0.5]))


def X10_GraphEntropy_SubGraph_expert(window):
    import networkx as nx, numpy as np
    seq = sum((window[f'Number{i}'].tolist() for i in range(1,7)), [])
    G = nx.DiGraph()
    for i in range(len(seq)-1):
        G.add_edge(seq[i], seq[i+1])
    # Compute node entropy (Shannon)
    degrees = np.array([G.degree(n) for n in G.nodes()])
    probs = degrees / (degrees.sum() or 1)
    entropy = -np.sum(probs*np.log2(probs+1e-9))
    # Pick top-entropy nodes as prediction
    ent_nodes = np.argsort(-degrees)[:6]
    pred = [list(G.nodes())[e] for e in ent_nodes]
    while len(pred)<6: pred.append(s_randint(1,39))
    return unique6(pred), float(min(0.9, entropy/6))


def advanced_meta_aggregate(window, actual=None):
    # 1. Run all experts in parallel
    results = run_all_experts_parallel(window, expert_registry)
    preds, confs = {}, {}
    for name, (pred, conf) in results.items():
        preds[name] = pred
        confs[name] = conf
    # 2. Calculate diversity
    diversity = np.mean([
        len(set(preds[e1]) ^ set(preds[e2])) for e1 in preds for e2 in preds if e1 != e2
    ]) / 6
    # 3. Weighted softmax with bonus for diversity
    scores = np.array(list(confs.values()), dtype=np.float32)
    scores *= (1+0.1*diversity)
    weights = np.exp(scores - np.max(scores))
    weights /= weights.sum()
    # 4. Aggregation
    out = np.zeros(6)
    names = list(preds)
    for i in range(len(names)):
        out += weights[i]*np.array(unique6(preds[names[i]]))
    out_final = unique6(out)
    return out_final, dict(diversity=diversity, confs=confs, weights=weights, step_preds=preds)


In [None]:
def Z1_Hyperchaos_Lyapunov_expert(window):
    nums = sum((window[f'Number{i}'].tolist() for i in range(1,7)), [])
    arr = np.array(nums[-60:])
    # Estimate local Lyapunov exponent (sliding window)
    diffs = np.diff(arr) + 1e-9
    lyap = np.mean(np.log(np.abs(diffs[1:] / diffs[:-1])))
    # Predict future state using robust tent/logistic map emulation
    r = 3.99 if lyap > 0.95 else 3.66
    last = arr[-6:]/40
    chaos_pred = r*last*(1-last)
    pred = [int(x*39)+1 for x in chaos_pred]
    while len(pred) < 6:
        pred.append(s_randint(1,39))
    conf = float(min(0.97, abs(lyap)/1.2))
    return unique6(pred), conf


def Z2_GrammaticalEvo_ML_expert(window):
    import torch, torch.nn as nn
    from hashlib import sha256
    # Use n-gram hash patterns as features
    nums = sum((window[f'Number{i}'].tolist() for i in range(1,7)), [])
    feats = []
    for i in range(len(nums)-5):
        chunk = nums[i:i+6]
        hval = int(sha256(str(chunk).encode()).hexdigest(), 16)
        feats.append([hval%39+1])
    feats = torch.tensor(feats[-20:],dtype=torch.float32) if len(feats)>=20 else torch.zeros((20,1))
    y = torch.abs(feats[:-1] - feats[1:])
    model = nn.Sequential(nn.Linear(1,4), nn.ReLU(), nn.Linear(4,1))
    opt = torch.optim.Adam(model.parameters(),lr=0.05)
    for _ in range(6):
        opt.zero_grad(); o = model(feats[:-1]); loss = ((o-y)**2).mean(); loss.backward(); opt.step()
    pred = [int(torch.mean(model(torch.tensor([[i+1]],dtype=torch.float32))).item()) for i in range(6)]
    return unique6(pred), float(min(0.93, 1-loss.item()))


def Z3_XORMixnet_Reverse_expert(window):
    nums = sum((window[f'Number{i}'].tolist() for i in range(1,7)), [])
    arr = np.array(nums)
    xored = arr.copy()
    for i in range(1,len(arr)):
        xored[i] = xored[i]^xored[i-1]
    # Try de-xoring
    rev = [xored[0]]
    for val in xored[1:]:
        rev.append(rev[-1]^val)
    pred = [int((rev[-1]+i*7)%39 + 1) for i in range(6)]
    conf = 0.80 + 0.1*(np.std(xored)/np.max(arr))
    return unique6(pred), float(min(0.96, conf))


def Z4_GenieAI_MetaEnsemble_expert(window, registry=None, diversity_boost=0.15):
    import numpy as np
    if registry is None:
        registry = expert_registry
    valid_experts = [name for name, cfg in registry.items() if len(window) >= cfg['min_window']]
    preds, confs = [], []
    for name in valid_experts[:8]:
        fn = globals().get(registry[name]['func'], fallback)
        p, c = fn(window)
        preds.append(unique6(p)); confs.append(c)
    # Use evolutionary logic: batch generation, diversity scoring
    preds_arr = np.array(preds)
    disagreement = np.mean([
        len(set(preds_arr[i]) ^ set(preds_arr[j])) for i in range(len(preds_arr)) for j in range(i+1, len(preds_arr))
    ]) / 6.0 if len(preds_arr)>1 else 1.0
    # Weighted sum, bias to most diverse combos
    weights = np.array(confs) * (1 + diversity_boost*disagreement)
    weights = weights / weights.sum()
    agg = np.zeros(6)
    for i in range(len(preds)):
        agg += weights[i]*np.array(preds[i])
    return unique6(agg), float(np.clip(np.mean(confs)* (1+0.2*disagreement), 0.5, 0.99))


def Z5_ProbabilisticNeural_Circuit_expert(window):
    import torch
    from torch import nn
    # Flatten and normalize draw
    seq = sum((window[f'Number{i}'].tolist() for i in range(1,7)), [])
    seq = (np.array(seq[-48:])-20)/20 if len(seq)>=48 else (np.array(seq)-20)/20
    X = torch.tensor(seq, dtype=torch.float32).view(-1,6)
    # Simple neural circuit: skip connections, multiplicative attention
    class NeuralCircuit(nn.Module):
        def __init__(self):
            super().__init__()
            self.in1 = nn.Linear(6, 16)
            self.in2 = nn.Linear(16, 16)
            self.attn = nn.Linear(16, 6)
        def forward(self, x):
            x = torch.relu(self.in1(x))
            skip = x
            x = torch.relu(self.in2(x))
            x = x * torch.sigmoid(self.attn(skip))
            out = x.mean(dim=0) * 20 + 20
            return out
    model = NeuralCircuit()
    yhat = model(X)
    pred = [int(np.clip(val.item(), 1, 39)) for val in yhat]
    conf = float(min(0.97, 1-np.std(pred)/33))
    return unique6(pred), conf



def Z6_Adversarial_PhaseWatcher_expert(window):
    seq = sum((window[f'Number{i}'].tolist() for i in range(1,7)), [])
    arr = np.array(seq)
    # Sliding window FFT on windowed statistics to catch phase shifts
    swsz = 24
    phases = []
    for i in range(len(arr)-swsz):
        win = arr[i:i+swsz]
        fft = np.abs(np.fft.fft(win-np.mean(win)))
        phases.append(np.max(fft))
    # If a big change in max-fft recently, AI-RNG may have shifted strategy
    diffs = np.diff(phases)
    sudden = np.argmax(np.abs(diffs[-6:])) if len(diffs) >= 6 else 0
    shift_base = arr[-6+sudden:][-6:] if len(arr) >= 12 else arr[-6:]
    pred = [int(x) for x in shift_base]
    return unique6(pred), float(0.75 + 0.2*np.std(diffs)/max(1,np.mean(phases)))



def Z7_SparseDict_Explain_expert(window):
    # Use sklearn for dictionary learning
    from sklearn.decomposition import DictionaryLearning
    seq = sum((window[f'Number{i}'].tolist() for i in range(1,7)), [])
    arr = np.array(seq[-60:]).reshape(-1,6)
    model = DictionaryLearning(n_components=6, alpha=1.0, max_iter=30, random_state=42)
    try:
        codes = model.fit_transform(arr)
        rec = model.inverse_transform(codes[-1:])
        pred = [int(np.clip(x,1,39)) for x in rec.flatten()]
        err = np.mean(np.abs(arr[-1]-rec))
        conf = float(0.9-np.clip(err/20,0,0.7))
    except Exception:
        pred, conf = fallback(window), 0.45
    return unique6(pred), conf


def Z8_ExplorationPolicy_AI_RNG_expert(window):
    # Policy net explores unique combo distributions
    import torch
    import torch.nn as nn
    seq = sum((window[f'Number{i}'].tolist() for i in range(1,7)), [])
    seq = np.array(seq[-60:])
    X = torch.tensor(seq[:-6].reshape(-1,6), dtype=torch.float32)
    y = torch.tensor(seq[6:].reshape(-1,6), dtype=torch.float32)
    class Policy(nn.Module):
        def __init__(self): super().__init__()
        def forward(self,x): return x.mean(dim=0)
    p = Policy(); # Simple for robustness
    out = p(X)
    reward = 1-np.var(out.detach().numpy())/39
    pred = [int(np.clip(x,1,39)) for x in out.detach().numpy()]
    return unique6(pred), float(0.65+reward*0.3)





In [None]:
def Z9_SideChannel_Leakage_expert(window):
    import time, numpy as np
    nums = sum((window[f'Number{i}'].tolist() for i in range(1,7)), [])
    variants = []
    for jitter in np.linspace(-0.25, 0.25, 6):
        start = time.time()
        base = np.array(nums) + jitter
        np.fft.fft(base)
        variants.append(time.time() - start)
    stddev = np.std(variants)
    pos = int((stddev*1000) % 39) + 1
    pred = [(pos + i*3) % 39 + 1 for i in range(6)]
    return unique6(pred), float(min(0.97, stddev * 11))


def Z10_QuantumEigen_Drift_expert(window):
    nums = sum((window[f'Number{i}'].tolist() for i in range(1,7)), [])
    arr = np.array(nums[-78:])  # Use last 13 draws
    if len(arr) < 18: return fallback(window), 0.4
    mat = np.zeros((39,39))
    for i in range(len(arr)-1):
        mat[arr[i]-1, arr[i+1]-1] += 1
    vals = np.linalg.eigvalsh(mat)
    pred = [int(np.abs(vals[-i]) % 39)+1 for i in range(1,7)]
    conf = float(min(0.95, np.ptp(vals)/np.std(arr)))
    return unique6(pred), conf


def advanced_self_attention_gate(window, registry, last_meta_conf=None):
    import numpy as np
    # Parallel, as before
    expert_results = run_all_experts_parallel(window, registry)
    preds, confs = {}, {}
    for name, (pred, conf) in expert_results.items():
        preds[name] = pred
        confs[name] = conf
    # Calculate consensus and diversity
    consensus = np.mean([len(set(preds[e1]) & set(preds[e2]))/6
                         for e1 in preds for e2 in preds if e1 != e2])
    entropy = -np.sum([(c/(sum(confs.values()) or 1)) * np.log2((c/(sum(confs.values()) or 1))+1e-9)
                      for c in confs.values()])
    # Drift detection: boost experts if consensus drops or entropy dips
    weights = np.array([c for c in confs.values()])
    if last_meta_conf and consensus < last_meta_conf['consensus']:
        weights = weights * 1.20
    # Softmax
    w = np.exp(weights-np.max(weights))
    w /= w.sum()
    out = np.zeros(6)
    names = list(preds)
    for i in range(len(names)):
        out += w[i]*np.array(unique6(preds[names[i]]))
    return unique6(out), dict(consensus=consensus, entropy=entropy, weights=dict(zip(names,w)))


In [None]:
# --- Swarm Meta-Orchestrator (Insert above usual driver/orchestrator) ---
import numpy as np
from random import SystemRandom
_secure = SystemRandom()


def meta_swarm_evolution(window, registry, population=10, generations=4):
    # (1) Generate initial population: Each member a subset of expert names
    pop = [random.sample(list(registry), k=min(8, len(registry))) for _ in range(population)]
    weights_hist = []
    best_pred = None
    best_score = -np.inf
    for gen in range(generations):
        scores = []
        preds = []
        # (2) Evaluate each population member in parallel
        for genes in pop:
            local_preds, local_confs = [], []
            for name in genes:
                fn = globals().get(registry[name]['func'], fallback)
                pred, conf = fn(window)
                local_preds.append(unique6(pred))
                local_confs.append(conf)
            # Fusion: Confidence-weighted
            c = np.array(local_confs, dtype=np.float32)
            w = np.exp(c-np.max(c)); w /= w.sum() or 1
            agg = sum(w[i]*np.array(local_preds[i]) for i in range(len(w))) / (w.sum() or 1)
            final = unique6(agg)
            score = float(np.mean(local_confs)) + 0.1 * len(set.union(*[set(p) for p in local_preds]))/6
            preds.append(final)
            scores.append(score)
            if score > best_score:
                best_score = score
                best_pred = final
        # (3) Selection: keep top half
        idx = np.argsort(scores)[::-1][:population // 2]
        selected = [pop[i] for i in idx]
        # (4) Crossover/mutation: spawn new generation
        next_gen = []
        for _ in range(population - len(selected)):
            p1, p2 = random.sample(selected, 2)
            cross = list(set(random.choices(p1, k=4) + random.choices(p2, k=4)))
            # Mutate randomly
            if random.random() < 0.45:
                all_names = list(registry) + [name for name in registry if registry[name]['min_window'] < len(window)]
                cross[random.randrange(len(cross))] = random.choice(all_names)
            next_gen.append(cross[:min(len(cross), 8)])
        pop = selected + next_gen
        weights_hist.append(scores)
    return best_pred, dict(
        gen_score=best_score,
        diversity=np.std(weights_hist),
        route="GA+RL hybrid"
    )


In [None]:
def Z13_NeuralMI_Estimator_expert(window):
    import torch, torch.nn as nn
    # Prepare features: flatten the last N draws
    seq = sum((window[f'Number{i}'].tolist() for i in range(1,7)), [])
    N = min(len(seq)//6, 32)
    if N < 6:
        return fallback(window), 0.4
    data = np.array(seq)[-N*6:].reshape(N,6)
    X = torch.tensor(data[:-1], dtype=torch.float32)
    y = torch.tensor(data[1:], dtype=torch.float32)
    class SmallNN(nn.Module):
        def __init__(self): super().__init__()
        self.fc1 = nn.Linear(6, 32)
        self.fc2 = nn.Linear(32, 32)
        self.out = nn.Linear(32, 6)
        def forward(self, z):
            z = torch.relu(self.fc1(z))
            z = torch.relu(self.fc2(z))
            return self.out(z)
    net = SmallNN()
    loss_fn = nn.MSELoss()
    opt = torch.optim.Adam(net.parameters(), lr=0.02)
    for i in range(10):
        out = net(X)
        loss = loss_fn(out, y)
        opt.zero_grad(); loss.backward(); opt.step()
    mi = float(torch.mean((out - y)**2).item())
    pred = [int(np.clip(v,1,39)) for v in out[-1].detach().numpy()]
    # Confidence is inverse of error (high MI means more leakage detected)
    conf = float(np.clip(0.99-mi/50, 0.3, 0.99))
    return unique6(pred), conf


from concurrent.futures import ProcessPoolExecutor, as_completed

def run_all_experts_max_parallel(window, registry):
    import copy
    results = {}
    # NOTE: Use ProcessPoolExecutor where allowed, for heavy compute. For Kaggle, ThreadPoolExecutor if ProcessPool is restricted.
    with ProcessPoolExecutor(max_workers=16) as executor:
        futures = {}
        for name, cfg in registry.items():
            if len(window) >= cfg['min_window']:
                # Deep copy window to avoid shared memory issues
                fn = globals().get(cfg['func'], fallback)
                futures[executor.submit(fn, copy.deepcopy(window))] = name
        for future in as_completed(futures):
            name = futures[future]
            try:
                pred, conf = future.result(timeout=60)
            except Exception as e:
                pred, conf = fallback(window), 0.3
            results[name] = (pred, conf)
    return results




In [None]:
def Z20_DeterministicMomentum_Ensemble(window, registry, prev_weights=None, momentum=0.9):
    # Generate matrix of expert predictions/confidences
    valid = [(name, cfg) for name, cfg in registry.items() if len(window) >= cfg['min_window']]
    preds, confs = [], []
    for name, cfg in valid:
        fn = globals().get(cfg['func'], fallback)
        pred, conf = fn(window)
        preds.append(unique6(pred))
        confs.append(conf)
    preds = np.array(preds)
    confs = np.array(confs)
    # Initialize or update deterministic ensemble weights
    if prev_weights is None:
        weights = np.ones(len(preds)) / len(preds)
        velocity = np.zeros(len(preds))
    else:
        weights, velocity = prev_weights
    # Deterministic gradient: bias towards higher-confidence, high-diversity
    grad = confs + np.std(preds, axis=1)
    velocity = momentum * velocity + grad
    weights = weights + velocity
    weights = np.clip(weights, 0, 1)
    weights = weights / (weights.sum() or 1)
    # Weighted average ensemble prediction
    final = np.sum([w * p for w, p in zip(weights, preds)], axis=0) / (weights.sum() or 1)
    pred = unique6(final)
    conf = float(np.clip(np.dot(weights, confs), 0.5, 0.99))
    return pred, (weights, velocity), conf


def Z21_AlgebraicCubeAttack_expert(window, registry):
    # Extract all expert predictions and flatten
    valid = [(name, cfg) for name, cfg in registry.items() if len(window) >= cfg['min_window']]
    cubes = []
    for name, cfg in valid:
        fn = globals().get(cfg['func'], fallback)
        pred, _ = fn(window)
        cubes.extend(unique6(pred))
    cubes = np.array(list(set(cubes)))
    # Apply deterministic extractor: modulo-lattice projection
    basis = np.array([17, 23, 29, 31, 37, 39])
    combined = np.mod(cubes.reshape(-1,1) * basis, 39) + 1
    pred = [int(np.median(col)) for col in combined.T]
    conf = float(min(0.98, cubes.std() / 39 + 0.18))
    return unique6(pred), conf


def Z30_SymbolicRegression_expert(window):
    import numpy as np
    try:
        import pysr  # PySR is a fast symbolic regression library for Python
    except ImportError:
        return fallback(window), 0.45
    # Prepare data: flatten window
    seq = sum((window[f'Number{i}'].tolist() for i in range(1,7)), [])
    X = np.arange(len(seq)).reshape(-1,1)
    y = np.array(seq)
    # Symbolic regression (limited runs for speed)
    model = pysr.PySRRegressor(niterations=15, binary_operators=["+", "-", "*", "/"], unary_operators=["sin", "cos", "exp"])
    try:
        model.fit(X, y)
        expr = model.get_best()
        # Predict next 6 numbers
        x_pred = np.arange(len(seq), len(seq)+6).reshape(-1,1)
        yhat = expr(x_pred)
        pred = [int(np.clip(round(v),1,39)) for v in yhat]
        conf = float(min(0.98, 1/(1+np.std(yhat))))
    except Exception:
        pred, conf = fallback(window), 0.45
    return unique6(pred), conf



def Z31_Disentangled_decomposer_expert(window):
    import torch, torch.nn as nn, numpy as np
    seq = sum((window[f'Number{i}'].tolist() for i in range(1,7)), [])
    N = min(36, len(seq)//6)
    if N < 6: return fallback(window), 0.4
    data = np.array(seq)[-N*6:].reshape(N,6)
    X = torch.tensor(data, dtype=torch.float32)
    class VAE(nn.Module):
        def __init__(self): super().__init__()
        self.enc = nn.Linear(6, 4)
        self.mu = nn.Linear(4,2)
        self.logvar = nn.Linear(4,2)
        self.dec = nn.Linear(2,6)
        def encode(self, x):
            h = torch.relu(self.enc(x))
            return self.mu(h), self.logvar(h)
        def reparam(self, mu, logvar):
            std = torch.exp(0.5*logvar)
            eps = torch.zeros_like(std)
            return mu + std * eps
        def decode(self, z): return self.dec(z)
        def forward(self, x):
            mu, logvar = self.encode(x)
            z = self.reparam(mu, logvar)
            return self.decode(z)
    vae = VAE()
    opt = torch.optim.Adam(vae.parameters(), lr=0.04)
    for _ in range(7):
        opt.zero_grad()
        recon = vae(X)
        loss = ((recon-X)**2).mean()
        loss.backward(); opt.step()
    z, _ = vae.encode(X[-1])
    out = vae.decode(z)
    pred = [int(np.clip(v.item(),1,39)) for v in out]
    conf = float(min(0.98, 0.7+0.3/(1+loss.item())))
    return unique6(pred), conf


def Z32_Physics_Inspired_NN_expert(window):
    import torch, torch.nn as nn, numpy as np
    seq = sum((window[f'Number{i}'].tolist() for i in range(1,7)), [])
    N = min(36, len(seq)//6)
    if N < 7: return fallback(window), 0.4
    X = np.linspace(0,1,N).reshape(-1,1).astype(np.float32)
    y = np.array(seq)[-N*6:].reshape(N,6).astype(np.float32)
    X_t = torch.from_numpy(X)
    y_t = torch.from_numpy(y)
    class PINN(nn.Module):
        def __init__(self): super().__init__()
        self.linear1 = nn.Linear(1,16)
        self.linear2 = nn.Linear(16,32)
        self.linear3 = nn.Linear(32,6)
        def forward(self, x):
            x1 = torch.sin(self.linear1(x))
            x2 = torch.relu(self.linear2(x1))
            return self.linear3(x2)
    net = PINN()
    optimizer = torch.optim.Adam(net.parameters(), lr=0.03)
    for _ in range(8):
        optimizer.zero_grad()
        y_pred = net(X_t)
        loss = ((y_pred-y_t)**2).mean()
        loss.backward(); optimizer.step()
    next_X = torch.from_numpy(np.linspace(1,1.05,6).reshape(-1,1).astype(np.float32))
    out = net(next_X).detach().numpy().flatten()
    pred = [int(np.clip(v,1,39)) for v in out]
    conf = float(min(0.97, 0.65+0.3/(1+loss.item())))
    return unique6(pred), conf


In [None]:
def Z40_VonNeumannExtractor_expert(window):
    nums = sum((window[f'Number{i}'].tolist() for i in range(1,7)), [])
    bits = "".join(f"{n:06b}" for n in nums)
    pairs = [bits[i:i+2] for i in range(0,len(bits)-1,2)]
    unbiased = [p[0] for p in pairs if len(p)==2 and p[0]!=p[1]]
    if not unbiased: return fallback(window), 0.3
    vals = [int("".join(unbiased[i:i+6]),2)%39+1 for i in range(0, len(unbiased)-5, 6)] or [s_randint(1,39) for _ in range(6)]
    pred = unique6(vals)
    conf = float(min(0.99, len(unbiased)/len(bits)))
    return pred, conf


def Z41_MultiBatteryStatTestExpert(window):
    import numpy as np
    seq = sum((window[f'Number{i}'].tolist() for i in range(1,7)), [])
    # frequency test
    freq = np.mean(seq)
    runs = np.sum(np.diff(seq)!=0)
    acorr = np.corrcoef(seq[:-1], seq[1:])[0,1] if len(seq)>1 else 0
    # combine into joint anomaly score
    score = ((abs(freq-20)/19 + abs(runs-len(seq)/2)/(len(seq)/2) + abs(acorr))/3)
    pred = [1+int(38*score + i)%39 for i in range(6)]
    return unique6(pred), float(np.clip(1-score,0.35,0.97))


def Z42_MT19937_FingerprintExpert(window):
    # Detect regularity/missing high/low bits in output typically present in AI or MT19937 PRNGs
    nums = sum((window[f'Number{i}'].tolist() for i in range(1,7)), [])
    vals = np.array(nums)
    lsb = vals & 1
    msb = (vals >> 5) & 1
    bias = abs(lsb.mean() - msb.mean())
    pred = [int(v) for v in np.clip(vals[-6:]+int(10*bias),1,39)]
    return unique6(pred), float(min(0.99, 1/(1+bias)))


In [None]:
import torch

DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")

def to_device(tensor):
    return tensor.to(DEVICE) if isinstance(tensor, torch.Tensor) else tensor


In [None]:
from concurrent.futures import ThreadPoolExecutor, as_completed

def run_all_experts_parallel(window, registry):
    results = {}
    with ThreadPoolExecutor(max_workers=min(32, len(registry))) as executor:
        futures = {}
        for name, cfg in registry.items():
            if len(window) >= cfg['min_window']:
                fn = globals().get(cfg['func'], fallback)
                futures[executor.submit(fn, window)] = name
        for future in as_completed(futures):
            name = futures[future]
            try:
                pred, conf = future.result(timeout=22)
            except Exception as e:
                pred, conf = fallback(window), 0.3
            results[name] = (pred, conf)
    return results


In [None]:
# Quick cleanup - run this before each new orchestrator run
import os, glob
[os.remove(f) for pattern in ["/kaggle/working/*.json", "/kaggle/working/*.pkl", "/kaggle/working/*.csv"] 
 for f in glob.glob(pattern) 
 if any(x in f for x in ["dharma_", "apex_", "breakthrough", "comprehensive", "validation", "insights", "analytics", "ultimate", "state"]) 
 and os.path.exists(f)]
print("🧹 All DHARMA_X orchestrator data cleared!")


In [None]:
# 0. ABSOLUTE WARNING / LOG SUPPRESSION  ────────────────────────────────────────
import os, sys, warnings, logging
from contextlib import contextmanager
from io import StringIO
import datetime as dt

os.environ.update({
    "PYTHONWARNINGS": "ignore",
    "TF_CPP_MIN_LOG_LEVEL": "3",
    "LIGHTGBM_VERBOSITY": "-1",
    "LIGHTGBM_LOG_LEVEL": "OFF",
})
warnings.filterwarnings("ignore")
warnings.simplefilter("ignore")
logging.getLogger().setLevel(logging.CRITICAL)

for _lg in ["lightgbm", "statsmodels", "sklearn", "matplotlib", "seaborn"]:
    l = logging.getLogger(_lg)
    l.setLevel(logging.CRITICAL)
    l.propagate = False
    l.disabled = True

@contextmanager
def absolute_silence():
    """Suppress C / Fortran stdout+stderr (e.g., sklearn, lightgbm, etc.)."""
    old_stdout, old_stderr = os.dup(1), os.dup(2)
    devnull = os.open(os.devnull, os.O_WRONLY)
    try:
        os.dup2(devnull, 1)
        os.dup2(devnull, 2)
        yield
    finally:
        os.dup2(old_stdout, 1)
        os.dup2(old_stderr, 2)
        os.close(old_stdout); os.close(old_stderr); os.close(devnull)

# 1. CORE IMPORTS  ──────────────────────────────────────────────────────────────
import math, json, pickle, secrets, traceback, numpy as np, pandas as pd
from datetime import datetime
from random import SystemRandom
from collections import Counter, defaultdict, deque
from scipy.stats import entropy as scipy_entropy
from sklearn.linear_model import LinearRegression, Ridge
from sklearn.ensemble import RandomForestRegressor
from scipy import stats
import glob


# ── LOAD 872 ELITE EXPERT COMBINATIONS ─────────────────────────────────────
import pandas as pd
import ast

def load_elite_combinations():
    """Load 872 elite expert combinations from analysis"""
    df = pd.read_csv('/kaggle/input/dharma-synergy/dharma_expert_synergy_analysis.csv')
    elite_combos = []
    
    for _, row in df.iterrows():
        if float(row['success_rate_percent']) >= 20.0:
            combo = ast.literal_eval(row['expert_combination'])
            elite_combos.append(combo)
    
    print(f"🎯 Loaded {len(elite_combos)} elite combinations (≥20% success rate)")
    return elite_combos

# Load at startup
ELITE_COMBOS = load_elite_combinations()
RESOURCE_ALLOCATION = {'elite': 0.85, 'explore': 0.15}

# === BEGIN: SEGMENT SUMMARY COLLECTOR (SCREEN ONLY) ===
from collections import Counter
from typing import List, Optional

# === BEGIN: SEGMENT SUMMARY COLLECTOR (Top‑6 vs Target directly under frequency) ===
from typing import List, Optional

class SegmentSummaryCollector:
    """
    Per-segment summaries (screen only):
      - Frequency of predicted numbers (1..39) across all iterations
      - Distribution of exact hit counts (0..6)
      - Matrix by actual drawn numbers: rows = the six actuals, cols = 0/6..6/6 + Total
      - Top‑6 by frequency vs target match score (printed immediately under frequency)
    """
    __slots__ = (
        "_num_counts", "_hit_bucket", "_total_predictions",
        "_actual_rows", "_per_actual", "_min_k"
    )

    def __init__(self, min_k_include: int = 0) -> None:
        self._num_counts = [0] * 40           # 1..39 frequency
        self._hit_bucket = [0] * 7            # 0..6 exact-hit histogram
        self._total_predictions = 0
        self._actual_rows: List[int] = []     # target numbers for this segment
        self._per_actual: dict[int, List[int]] = {}
        self._min_k = int(min_k_include)      # only tally iterations with k >= min_k

    def set_actuals(self, actual_numbers: List[int]) -> None:
        self._actual_rows = [int(x) for x in actual_numbers if 1 <= int(x) <= 39][:6]
        self._per_actual = {a: [0] * 7 for a in self._actual_rows}

    def update(self, prediction_6: List[int], hit_count: int, target_numbers: Optional[List[int]] = None) -> None:
        if target_numbers is not None and not self._actual_rows:
            self.set_actuals(target_numbers)

        k = int(hit_count)
        if not (0 <= k <= 6):
            return
        if k < self._min_k:
            return

        # 1..39 frequency
        for n in prediction_6:
            n_int = int(n)
            if 1 <= n_int <= 39:
                self._num_counts[n_int] += 1

        # Segment distribution
        self._hit_bucket[k] += 1

        # Per-actual matrix
        if self._actual_rows:
            pred_set = {int(x) for x in prediction_6}
            for a in self._actual_rows:
                if a in pred_set:
                    self._per_actual[a][k] += 1

        self._total_predictions += 1

    def _compute_frequency_pairs(self) -> List[tuple]:
        pairs = [(n, self._num_counts[n]) for n in range(1, 40)]
        pairs.sort(key=lambda x: (-x[1], x[0]))  # desc by count, then asc by number
        return pairs

    def _print_number_frequency(self, pairs: List[tuple]) -> None:
        print("\nPredicted number frequency (1..39) – descending by count:")
        print("Number | Count")
        print("------ | -----")
        for n, c in pairs:
            print(f"{n:>6} | {c:>5}")

    def _print_top6_vs_target(self, pairs: List[tuple]) -> None:
        """
        Take top‑6 by frequency and compare with target; print match score and overlap
        directly under the frequency table.
        """
        # Extract top‑6 numbers (ties broken by smaller number first)
        top6 = [n for (n, c) in pairs[:6]]
        if not self._actual_rows or len(top6) < 6:
            return
        top6_set = set(top6)
        target_set = set(self._actual_rows)
        overlap = sorted(top6_set & target_set)
        k = len(overlap)
        print("\nTop‑6 by frequency vs Target:")
        print(f"  Top‑6: {sorted(top6)}")
        print(f"  Target: {sorted(self._actual_rows)}")
        print(f"  Match score: {k}/6")
        print(f"  Overlap: {overlap if overlap else 'None'}")

    def _print_actual_positions(self, pairs: list[tuple[int, int]]) -> None:
        """
        Print the position (1-based rank) and count of each target number
        in the descending frequency list (pairs), preserving draw order.
        pairs: list of (number, count) sorted by (-count, number)
        """
        if not self._actual_rows:
            return

        # Build rank map from pairs
        rank_map: dict[int, int] = {}
        count_map: dict[int, int] = {}
        for idx, (n, c) in enumerate(pairs, start=1):
            rank_map[n] = idx
            count_map[n] = c

        print("\nPositions of target numbers in frequency list (1 = highest):")
        print("Number | Rank | Count")
        print("------ | ---- | -----")
        # Keep the target draw order
        for a in self._actual_rows:
            r = rank_map.get(a)
            c = count_map.get(a, 0)
            if r is None:
                print(f"{a:>6} |  N/A | {c:>5}")
            else:
                print(f"{a:>6} | {r:>4} | {c:>5}")


    def _print_segment_distribution(self) -> None:
        print("\nExact hit distribution (k/6) for this segment:")
        cols = [f"{k}/6" for k in range(0, 7)]
        counts = [self._hit_bucket[k] for k in range(0, 7)]
        total = sum(counts)
        print(" | ".join([f"{col:>4}" for col in cols] + ["Total"]))
        print("-+-".join(["----"] * (len(cols) + 1)))
        print(" | ".join([f"{v:>4}" for v in counts] + [f"{total}"]))

    def _print_per_actual_matrix(self) -> None:
        if not self._actual_rows:
            return
        print("\nActual Numbers – exact hit distribution by contained predictions:")
        header_cols = [f"{k}/6" for k in range(0, 7)] + ["Total"]
        print(f"{'Actual Numbers':>14} | " + " | ".join([f"{h:>4}" for h in header_cols]))
        print("-" * 14 + "-+-" + "-+-".join(["----"] * len(header_cols)))
        for a in self._actual_rows:
            row = self._per_actual.get(a, [0] * 7)
            total = sum(row)
            cells = " | ".join([f"{v:>4}" for v in row] + [f"{total}"])
            print(f"{str(a):>14} | {cells}")

    def print_summary(self, segment_label: Optional[str] = None, period_label: Optional[str] = None) -> None:
        """
        Print, in order:
          1) 1..39 number frequency (descending)
          2) Top‑6 vs Target match (directly under frequency)
          3) Segment exact-hit distribution (single row)
          4) Per-actual 6×8 matrix
        """
        header = "SEGMENT SUMMARY"
        if period_label is not None:
            header += f" | Period: {period_label}"
        if segment_label is not None:
            header += f" | Segment: {segment_label}"
        print("\n" + "=" * 80)
        print(header)
        print("=" * 80)

        pairs = self._compute_frequency_pairs()
        self._print_number_frequency(pairs)
        self._print_top6_vs_target(pairs)      # moved directly under frequency
        self._print_actual_positions(pairs)
        self._print_segment_distribution()
        self._print_per_actual_matrix()

        print("\nIterations (predictions) in this segment:", self._total_predictions)
        print("=" * 80)
# === END: SEGMENT SUMMARY COLLECTOR ===




# 2. SECURE RNG HELPERS  ────────────────────────────────────────────────────────
_rand = SystemRandom()
s_int = lambda a, b: secrets.randbelow(b - a + 1) + a
s_unif = lambda a, b: _rand.uniform(a, b)
s_pick = lambda seq: seq[secrets.randbelow(len(seq))]
s_smpl = lambda seq, k: _rand.sample(seq, k)

def extract_breakthrough_intelligence(unified_analytics_file):
    """Extract >=4/6 breakthrough intelligence for orchestrator optimization"""
    import json
    import pandas as pd
    from collections import defaultdict, Counter
    import numpy as np
    
    with open(unified_analytics_file, 'r') as f:
        data = json.load(f)
    
    breakthrough_records = []
    
    # Extract all >=4/6 records across ALL periods
    for period_name, period_data in data['periods'].items():
        match_counts = period_data.get('match_counts', {})
        
        # *** CHANGE: Include 4/6, 5/6, and 6/6 hits ***
        for hit_level in ['4', '5', '6']:  # <<<--- CHANGED FROM ['5', '6']
            if hit_level in match_counts:
                for record in match_counts[hit_level]:
                    breakthrough_records.append({
                        'period': period_name,
                        'hit_level': int(hit_level),
                        'beta': record['beta'],
                        'entropy': record['entropy'],
                        'cycle_position': record['cycle_position'],
                        'phase': record['phase'],
                        'iteration': record['iteration'],
                        'expert_weights': record['expert_weights'],
                        'active_experts': record['active_experts'],
                        'weight_concentration': record['weight_concentration'],
                        'optimal_convergence': record['optimal_convergence'],
                        'performance_weighted_score': record['performance_weighted_score'],
                        'prediction': record['prediction'],
                        'target': record['target']
                    })
    
    if not breakthrough_records:
        print("❌ No ≥4/6 breakthrough records found")
        return None
    
    print(f"🎯 Found {len(breakthrough_records)} breakthrough records (≥4/6)")
    return analyze_breakthrough_patterns(breakthrough_records)

def analyze_breakthrough_patterns(breakthrough_records):
    """Analyze breakthrough records to identify optimization targets"""
    import numpy as np
    from collections import defaultdict, Counter
    
    analysis = {}
    
    # 1. PARAMETER SWEET SPOTS
    betas = [r['beta'] for r in breakthrough_records]
    entropies = [r['entropy'] for r in breakthrough_records]
    convergences = [r['optimal_convergence'] for r in breakthrough_records]
    
    analysis['parameter_sweet_spots'] = {
        'beta_min': min(betas),
        'beta_max': max(betas),
        'beta_mean': np.mean(betas),
        'beta_std': np.std(betas),
        'entropy_min': min(entropies),
        'entropy_max': max(entropies),
        'entropy_mean': np.mean(entropies),
        'entropy_std': np.std(entropies),
        'convergence_min': min(convergences),
        'convergence_max': max(convergences),
        'convergence_mean': np.mean(convergences)
    }
    
    # 2. EXPERT COMBINATION FREQUENCY
    expert_combos = Counter()
    for record in breakthrough_records:
        combo = tuple(sorted(record['active_experts']))
        expert_combos[combo] += 1
    
    analysis['top_expert_combinations'] = expert_combos.most_common(10)
    
    # 3. CYCLE POSITION HOTSPOTS
    cycle_positions = Counter(r['cycle_position'] for r in breakthrough_records)
    analysis['cycle_hotspots'] = cycle_positions.most_common(10)
    
    # 4. PHASE DISTRIBUTION
    phases = Counter(r['phase'] for r in breakthrough_records)
    analysis['phase_distribution'] = dict(phases)
    
    # 5. WEIGHT CONCENTRATION ANALYSIS
    weight_concentrations = [r['weight_concentration'] for r in breakthrough_records]
    analysis['weight_concentration'] = {
        'min': min(weight_concentrations),
        'max': max(weight_concentrations),
        'mean': np.mean(weight_concentrations),
        'optimal_threshold': np.percentile(weight_concentrations, 80)
    }
    
    return analysis

def generate_orchestrator_optimization_recommendations(analysis):
    """Generate specific orchestrator modifications based on breakthrough analysis"""
    recommendations = []
    
    params = analysis['parameter_sweet_spots']
    
    # Beta optimization
    beta_target = params['beta_mean']
    beta_tolerance = params['beta_std'] * 0.5
    recommendations.append({
        'type': 'parameter_lock',
        'parameter': 'BETA_TARGET',
        'value': beta_target,
        'tolerance': beta_tolerance,
        'reason': f'≥4/6 success concentrated at β={beta_target:.4f}±{beta_tolerance:.4f}'
    })
    
    # Entropy optimization  
    entropy_target = params['entropy_mean']
    entropy_tolerance = params['entropy_std'] * 0.5
    recommendations.append({
        'type': 'parameter_lock',
        'parameter': 'ENTROPY_TARGET', 
        'value': entropy_target,
        'tolerance': entropy_tolerance,
        'reason': f'≥4/6 success concentrated at H={entropy_target:.4f}±{entropy_tolerance:.4f}'
    })
    
    # Expert combination forcing
    top_combos = analysis['top_expert_combinations'][:7]
    recommendations.append({
        'type': 'expert_forcing',
        'parameter': 'BREAKTHROUGH_COMBINATIONS',
        'value': [list(combo) for combo, _ in top_combos],
        'reason': f'Top 3 combinations account for {sum(count for _, count in top_combos)} breakthrough successes'
    })
    
    return recommendations


# 3. BREAKTHROUGH HYPER-PARAMETERS (★ DISCOVERY-BASED ★)  ──────────────────────
# ═════════ BREAKTHROUGH INTELLIGENCE CONSTANTS (GLOBAL) ═════════
BETA_TARGET = 0.5075      # Perfect 6/6 beta (was 0.5058)
#ENTROPY_TARGET = 5.0003   # Perfect 6/6 entropy (was 5.1385)
BETA_TOLERANCE = 0.0020   # Tighter tolerance (was 0.0141)  
ENTROPY_TOLERANCE = 0.0060 # Tighter tolerance (was 0.0055)
MIN_HIT_LEVEL = 4
ENTROPY_4_5_TARGET = 5.1442   # Universal for 4/6 and 5/6 (80% of attempts)
ENTROPY_6_TARGET = 5.1444     # Specialized for 6/6 attempts (20% of attempts)
ENTROPY_TARGET = ENTROPY_4_5_TARGET
BETA_RANGE = [0.5400, 0.5552]       # Universal range present in ALL periods
ENTROPY_RANGE = [5.1301, 5.1442]    # Universal range present in ALL periods

# Strategy mode: 'conservative' (≥4/6 focus), 'balanced' (default), 'aggressive' (≥5/6–6/6 push)
# UNIVERSAL STRATEGY MODE (based on 321,130 record analysis)
STRATEGY_MODE = 'universal'  # New mode based on cross-period intelligence

# Balanced allocation based on trend analysis
FORCE_UNIVERSAL_SHARE = 0.60   # 60% universal patterns (multi-period validated)
FORCE_EXPLORATION_SHARE = 0.25 # 25% exploration (preserve 4/6-5/6 breadth)
FORCE_6OF6_SHARE = 0.15        # 15% breakthrough attempts (6/6 specialized)

# Share of iterations locked to 6/6 beta corridor (balanced=0.80, conservative=0.50, aggressive=0.95)
BETA_LOCK_SHARE = 0.80 if STRATEGY_MODE == 'balanced' else (0.50 if STRATEGY_MODE == 'conservative' else 0.95)

# Hottest cycle positions for ≥6/6 (from intelligence)
# UNIVERSAL CYCLE POSITIONS (29/29 period coverage)
UNIVERSAL_CYCLE_POSITIONS = [10, 18, 25, 39, 60, 67, 72, 41, 32, 55, 74]  # Perfect coverage
SECONDARY_CYCLE_POSITIONS = [1, 21, 24]                   # 28/29 period coverage

# Keep legacy 6/6 positions for breakthrough attempts
HOTTEST_6OF6_POSITIONS  = [10,18,25,39,60]

# Top 6/6 combinations (from intelligence)
# UNIVERSAL EXPERT COMBINATIONS (23/29 period validated)
UNIVERSAL_EXPERT_COMBOS = [
    ['B1_RandomForest_v2','E21_CRT_Reconstructor','G19_Catastrophic_Forgetting'],
    ['A1_ARIMA','E16_Chaos_Theory','E30_MarkovChain_Residuals'],
    ['A1_ARIMA_v2','E16_Chaos_Theory','E30_MarkovChain_Residuals'],
    ['E16_Chaos_Theory','E1_FrequencyAnalysis','E30_MarkovChain_Residuals','E9_Gap_Analysis'],
    ['E16_Chaos_Theory','E1_FrequencyAnalysis_v2','E30_MarkovChain_Residuals'],
    ['D1_EnsembleStacking','E16_Chaos_Theory','E1_FrequencyAnalysis']
]

# Keep 6/6 combos for breakthrough attempts (legacy)
BREAKTHROUGH_6OF6_COMBOS = [
    ['D1_EnsembleStacking', 'E16_Chaos_Theory', 'E1_FrequencyAnalysis'],  # 6/6 specific
    ['B1_RandomForest_v2', 'E21_CRT_Reconstructor', 'G19_Catastrophic_Forgetting'],
    ['E16_Chaos_Theory', 'E1_FrequencyAnalysis', 'E30_MarkovChain_Residuals']
]

# Universal cross-hit-level experts (boost weights)
UNIVERSAL_EXPERT_BOOST = 2.5
UNIVERSAL_EXPERTS = [
    'A1_ARIMA','A1_ARIMA_v2','A2_MovingAverage','A2_MovingAverage_v2',
    'B1_RandomForest','B1_RandomForest_v2','D1_EnsembleStacking',
    'E16_Chaos_Theory','E1_FrequencyAnalysis','E1_FrequencyAnalysis_v2'
]

# Signature enforcement (β:50 | H:50 dominates 6/6)
TARGET_SIGNATURE = {'beta_bucket': 50, 'entropy_bucket': 50}

# ── Helpers ─────────────────────────────────────────────────────
def enforce_breakthrough_signature(beta, entropy):
    # Nudges beta/entropy toward signature buckets without hard locking
    if abs(beta * 100 - TARGET_SIGNATURE['beta_bucket']) > 2.5:
        beta = TARGET_SIGNATURE['beta_bucket'] / 100.0 + s_unif(-0.025, 0.025)
    if abs(entropy * 10 - TARGET_SIGNATURE['entropy_bucket']) > 2.5:
        entropy = TARGET_SIGNATURE['entropy_bucket'] / 10.0 + s_unif(-0.025, 0.025)
    return beta, entropy

def adaptive_beta_with_6of6_lock(iteration, max_it):
    # Early: explore within the 6/6 corridor; Late: pin exactly to center
    if iteration < max_it * BETA_LOCK_SHARE:
        return BETA_TARGET + s_unif(-BETA_TOLERANCE, BETA_TOLERANCE)
    return BETA_TARGET

def force_universal_cycle_positions(iteration, default_cycle_pos):
    # Use universal positions with 29/29 period coverage
    if iteration % 4 == 0:  # Every 4th iteration
        return s_pick(UNIVERSAL_CYCLE_POSITIONS)  # Perfect coverage positions
    elif iteration % 8 == 0:  # Every 8th iteration  
        return s_pick(SECONDARY_CYCLE_POSITIONS)  # 28/29 coverage positions
    elif iteration % 12 == 0:  # Breakthrough attempts
        return s_pick(HOTTEST_6OF6_POSITIONS)    # Legacy 6/6 positions
    else:
        return default_cycle_pos

def build_6of6_breakthrough_set(preds, scores, st):
    # 70% chance to use the perfect D1+E16+E1 combo
    if s_unif(0, 1) < 0.7:
        combo = ['D1_EnsembleStacking', 'E16_Chaos_Theory', 'E1_FrequencyAnalysis']
    else:
        combo = s_pick(BREAKTHROUGH_6OF6_COMBOS)
    subw = {}
    for expert in combo:
        if expert in scores:
            subw[expert] = scores[expert] * 3.0
    if not subw:
        return None, None
    normalize(subw)
    agg = np.zeros(6)
    for expert, w in subw.items():
        if expert in preds:
            agg += w * np.array(preds[expert])
    return unique6(agg), subw

def build_universal_set(preds, scores, st):
    combo = s_pick(UNIVERSAL_EXPERT_COMBOS)
    subw = {e: scores[e]*2.5 for e in combo if e in scores}
    if not subw: return None, None
    normalize(subw)
    agg = sum(subw[e]*np.array(preds[e]) for e in subw)
    return unique6(agg), subw

def force_universal_cycle_positions(it, default):
    if it % 4 == 0:  # universal positions more frequently
        return s_pick(UNIVERSAL_CYCLE_POSITIONS)
    if it % 8 == 0:  # secondaries less frequently
        return s_pick(SECONDARY_CYCLE_POSITIONS)
    return default

def adaptive_entropy_targeting(it, max_it):
    return ENTROPY_6_TARGET if it > max_it*0.9 else ENTROPY_4_5_TARGET

def beta_dual_anchor(it):
    # 80% of iterations: center at 0.5075; 20%: bump to 0.5084 to hit the second universal bin
    if (it % 5) == 0:
        return 0.5084 + s_unif(-0.0010, 0.0010)
    return BETA_TARGET + s_unif(-BETA_TOLERANCE, BETA_TOLERANCE)



# ── SUCCESS SIGNATURE WEIGHTS ─────────────────────────────────────────────
SIGNATURE_WEIGHTS = {
    'signature_1': 0.40,    # Most frequent breakthrough pattern
    'signature_2': 0.35,    # Highest performance signature  
    'signature_3': 0.25     # Specialized breakthrough type
}

def matches_pattern(signature, prediction):
    """Simple pattern matching for signatures"""
    # For now, return True for any prediction to apply boost
    # You can refine this logic later based on your signature analysis
    return True


# EXPERT HIERARCHY MULTIPLIERS (Based on 5/6 analysis)
ORACLE_EXPERTS = {
    "E30_MarkovChain_Residuals": 6.00,  # The Oracle - 29 appearances
    "E16_Chaos_Theory": 5.00,  # Nonlinear Master - 16 appearances  
    "E9_Gap_Analysis": 4.50,  # Pattern Filler - 13 appearances
    "Z3_XORMixnet_Reverse": 4.00,  # Encryption Breaker - 13 appearances
}
BREAKTHROUGH_EXPERTS = {
    "E1_FrequencyAnalysis": 2.80,  # 20 appearances
    "Z6_Adversarial_PhaseWatcher": 2.70,  # 19 appearances
    "B1_RandomForest": 2.65,  # 19 appearances  
    "F2_Quantum_ML": 2.60,  # 19 appearances
    "X4_GrangerCausality": 2.55,  # 19 appearances
    "E31_Fractal_Dimension": 2.50,  # 18 appearances
}
SPECIALIST_EXPERTS = {
    "E1_FrequencyAnalysis_v2": 2.25,  # 17 appearances
    "E27_Fractal_Pattern": 2.20,  # 16 appearances
    "A1_ARIMA": 2.10,  # 15 appearances
    "E25_Resampling_Chaos": 2.05,  # 15 appearances
    "A1_ARIMA_v2": 2.00,  # 13 appearances
}
SUPPRESSED_EXPERTS = {
    "E40_NIST_80022_Battery": 0.25,
    "Z2_GrammaticalEvo_ML": 0.30,  # Often had 0.000 weights
}

# ADVANCED PARAMETERS
CONVERGENCE_BOOST = 3.50    # Boost for entropy convergence
PATTERN_REINFORCEMENT = 2.75    # Boost successful patterns
ENTROPY_LOCK_STRENGTH = 0.98    # How strongly to enforce entropy target
BETA_CYCLE_LENGTH = 75      # Longer cycles for convergence
ENTROPY_FORCING_RATIO = 0.95    # % of predictions forced to target entropy

# 4. VALIDATION PERIODS CONFIGURATION ───────────────────────────────────────────
# ═══════════════════════════════════════════════════════════════════════════════
# MONTHLY PREDICTION STRATEGY - 3 MONTHS TO PREDICT 1 MONTH
# ═══════════════════════════════════════════════════════════════════════════════

def generate_segmented_periods(df):
    """Generate segmented validation periods from data"""
    df['Date'] = pd.to_datetime(df['Date'])
    df_sorted = df.sort_values('Date')
    month_groups = df_sorted.groupby(df_sorted['Date'].dt.strftime('%Y-%m'))
    
    segmented_periods = []
    
    for month_str, group in month_groups:
        dates = list(group['Date'].dt.strftime('%Y-%m-%d'))
        
        # Skip months with less than 2 draws
        if len(dates) < 2:
            continue
            
        # Predict the LAST draw in the month
        predict_date = dates[-1]
        
        # Generate segments: from 1 draw up to (total_draws - 1)
        for seg_idx in range(1, len(dates)):
            training_end_idx = len(dates) - 2  # penultimate draw index
            training_start_idx = training_end_idx - (seg_idx - 1)  # expand backward
            
            if training_start_idx < 0:
                training_start_idx = 0
            
            # Get training window
            training_dates = dates[training_start_idx:training_end_idx + 1]
            
            period_config = {
                "name": f"{month_str}_Seg{seg_idx}",
                "month": month_str,
                "segment": seg_idx,
                "training_dates": training_dates,
                "predict_date": predict_date,
                "train_end": training_dates[-1]  # last training date
            }
            
            segmented_periods.append(period_config)
    
    return segmented_periods

# Initialize empty - will be populated when data is loaded
VALIDATION_PERIODS = []


# 5. EXPERT REGISTRY WITH BREAKTHROUGH OPTIMIZATION  ───────────────────────────
expert_registry = {
    # ORACLE TIER - The proven winners
    'E30_MarkovChain_Residuals': dict(min_window=20, func='E30_MarkovChain_Residuals_expert'),
    'E16_Chaos_Theory': dict(min_window=20, func='E16_Chaos_Theory_expert'),
    'E9_Gap_Analysis': dict(min_window=10, func='E9_Gap_Analysis_expert'),
    'Z3_XORMixnet_Reverse': dict(min_window=40, func='Z3_XORMixnet_Reverse_expert'),
    
    # BREAKTHROUGH TIER - High frequency winners
    'E1_FrequencyAnalysis': dict(min_window=10, func='E1_FrequencyAnalysis_expert'),
    'Z6_Adversarial_PhaseWatcher': dict(min_window=30, func='Z6_Adversarial_PhaseWatcher_expert'),
    'B1_RandomForest': dict(min_window=15, func='B1_RandomForest_expert'),
    'F2_Quantum_ML': dict(min_window=10, func='F2_Quantum_ML_expert'),
    'X4_GrangerCausality': dict(min_window=15, func='X4_GrangerCausality_expert'),
    'E31_Fractal_Dimension': dict(min_window=25, func='E31_Fractal_Dimension_expert'),
    
    # SPECIALIST TIER - Consistent contributors
    'E1_FrequencyAnalysis_v2': dict(min_window=6, func='E1_FrequencyAnalysis_expert_v2'),
    'E27_Fractal_Pattern': dict(min_window=10, func='E27_Fractal_Pattern_expert'),
    'A1_ARIMA': dict(min_window=10, func='A1_ARIMA_expert'),
    'E25_Resampling_Chaos': dict(min_window=15, func='E25_Resampling_Chaos_expert'),
    'A1_ARIMA_v2': dict(min_window=6, func='A1_ARIMA_expert_v2'),
    
    # ENHANCED SPECIALISTS
    'G19_Catastrophic_Forgetting': dict(min_window=12, func='G19_Catastrophic_Forgetting_expert'),
    'Z32_Physics_Inspired_NN': dict(min_window=36, func='Z32_Physics_Inspired_NN_expert'),
    'B1_RandomForest_v2': dict(min_window=6, func='B1_RandomForest_expert_v2'),
    
    # CORE CONTRIBUTORS
    'X1_TakensEmbedding': dict(min_window=20, func='X1_TakensEmbedding_expert'),
    'X2_EntropyBoundary': dict(min_window=15, func='X2_EntropyBoundary_expert'),
    'X5_AdvTraining': dict(min_window=20, func='X5_AdversarialTraining_expert'),
    'F16_Multi_Armed_Bandits': dict(min_window=8, func='F16_Multi_Armed_Bandits_expert'),
    'D1_EnsembleStacking': dict(min_window=20, func='D1_EnsembleStacking_expert'),
    'E20_Fourier_Residue': dict(min_window=12, func='E20_Fourier_Residue_expert'),
    'E22_Differential_Entropy': dict(min_window=12, func='E22_Differential_Entropy_expert'),
    'E32_Poisson_Process': dict(min_window=15, func='E32_Poisson_Process_expert'),
    'F33_Ghost_Factorization': dict(min_window=30, func='F33_Ghost_Factorization_expert'),
    'G35_Meta_Reinforcement': dict(min_window=20, func='G35_Meta_Reinforcement_expert'),
    'G36_Evolutionary_Ensemble': dict(min_window=20, func='G36_Evolutionary_Ensemble_expert'),
    'Z1_Hyperchaos_Lyapunov': dict(min_window=40, func='Z1_Hyperchaos_Lyapunov_expert'),
    'Z5_ProbabilisticNeural_Circuit': dict(min_window=24, func='Z5_ProbabilisticNeural_Circuit_expert'),
    'Z10_QuantumEigen_Drift': dict(min_window=24, func='Z10_QuantumEigen_Drift_expert'),
    'Z30_SymbolicRegression': dict(min_window=36, func='Z30_SymbolicRegression_expert'),
    'Z31_Disentangled_decomposer': dict(min_window=36, func='Z31_Disentangled_decomposer_expert'),
    'Z40_VonNeumannExtractor': dict(min_window=24, func='Z40_VonNeumannExtractor_expert'),
    'Z42_MT19937_Fingerprint': dict(min_window=24, func='Z42_MT19937_FingerprintExpert'),
    
    # SUPPORTING TIER
    'A2_MovingAverage': dict(min_window=5, func='A2_MovingAverage_expert'),
    'A2_MovingAverage_v2': dict(min_window=6, func='A2_MovingAverage_expert_v2'),
    'F28_KSTest': dict(min_window=12, func='F28_KSTest_expert'),
    'E21_CRT_Reconstructor': dict(min_window=15, func='E21_CRT_Reconstructor_expert'),
    'G50_Neural_Discriminator': dict(min_window=20, func='G50_Neural_Discriminator_expert'),
    'X10_GraphEntropy_SubGraph': dict(min_window=20, func='X10_GraphEntropy_SubGraph_expert'),
    
    # CONTROLLED SUPPRESSED TIER
    'E40_NIST_80022_Battery': dict(min_window=20, func='E40_NIST_80022_Battery_expert'),
    'Z2_GrammaticalEvo_ML': dict(min_window=40, func='Z2_GrammaticalEvo_ML_expert'),
}

# ── ADJUST MIN_WINDOW TO FIT SMALL TRAINING SETS ─────────────────────────────
# Cap every expert’s min_window to 6 for 40–50 draws
for cfg in expert_registry.values():
    cfg['min_window'] = min(cfg['min_window'], 1)  # SUPPORT 1-DRAW SEGMENTS


# 6. COMPREHENSIVE ANALYTICS COLLECTOR ──────────────────────────────────────────
class UnifiedAnalyticsCollector:
    """Complete unified collector with all AdvancedAnalyticsCollector functionality"""
    
    def __init__(self):
        self.unified_data = {
            "metadata": {
                "collection_start": datetime.now().isoformat(),
                "total_periods": len(VALIDATION_PERIODS),
                "threshold": 3,
                "system_version": "DHARMA_X_PHASE_6_COMPLETE"
            },
            "periods": {}
        }
    
    def initialize_period(self, period_name, train_end, predict_date, actual_target):
        """Initialize period structure"""
        self.unified_data["periods"][period_name] = {
            "metadata": {
                "train_end": train_end,
                "predict_date": predict_date,
                "actual_target": actual_target,
                "collection_start": datetime.now().isoformat()
            },
            "match_counts": {
                #"2": [], 
                #"3": [], 
                "4": [], "5": [], "6": []
            },
            "statistics": {
                "total_predictions": 0,
                "collected_records": 0,
                "hit_distribution": Counter()
            }
        }
    
    def collect_record(self, period_name, hit_count, iteration, prediction, target,
                      template, entropy, beta, phase, cycle_position,
                      expert_weights, expert_performances):
        """Collect complete record with ALL original AdvancedAnalyticsCollector fields"""
        
        if hit_count < 3:
            return
        
        matches = list(set(prediction) & set(target))
        misses = list(set(target) - set(prediction))
        extras = list(set(prediction) - set(target))
        
        # Complete record with ALL 78+ fields (identical to original)
        complete_record = {
            'validation_period': period_name,
            'iteration': iteration,
            'hit_count': hit_count,
            'prediction': prediction,
            'target': target,
            'matches': matches,
            'misses': misses,
            'extras': extras,
            'template': template,
            'entropy': entropy,
            'beta': beta,
            'phase': phase,
            'cycle_position': cycle_position,
            'timestamp': datetime.now().isoformat(),
            
            # Beta-Entropy Analysis (RESTORED)
            'beta_entropy_ratio': beta / entropy if entropy > 0 else 0,
            'beta_deviation': abs(beta - BETA_TARGET),
            'entropy_deviation': abs(entropy - ENTROPY_TARGET),
            'optimal_convergence': self._calculate_convergence_score(beta, entropy),
            
            # Expert Weight Analysis (RESTORED)
            'active_experts': list(template.keys()),
            'expert_weights': dict(template),
            'top_3_experts': self._get_top_experts(template, 3),
            'weight_entropy': self._calculate_weight_entropy(template),
            'weight_concentration': max(template.values()) if template else 0,
            
            # Pattern Analysis (RESTORED)
            'number_gaps': self._analyze_number_gaps(prediction),
            'number_clusters': self._analyze_number_clusters(prediction),
            'number_distribution': self._analyze_number_distribution(prediction),
            'sum_total': sum(prediction),
            'sum_deviation': abs(sum(prediction) - 117),
            
            # Performance Context (RESTORED)
            'expert_performances': dict(expert_performances),
            'performance_weighted_score': self._calculate_performance_score(template, expert_performances),
            
            # Collection metadata
            "collection_timestamp": datetime.now().isoformat()
        }
        
        # Store under period → hit_count structure
        hit_str = str(hit_count)
        self.unified_data["periods"][period_name]["match_counts"][hit_str].append(complete_record)
        
        # Update statistics
        stats = self.unified_data["periods"][period_name]["statistics"]
        stats["collected_records"] += 1
        stats["hit_distribution"][hit_count] += 1
    
    # ===== RESTORED HELPER METHODS FROM ORIGINAL =====
    
    def _calculate_convergence_score(self, beta, entropy):
        """Calculate convergence to optimal beta-entropy combination"""
        beta_score = max(0, 1 - abs(beta - BETA_TARGET) * 10)
        entropy_score = max(0, 1 - abs(entropy - ENTROPY_TARGET) * 2)
        return (beta_score + entropy_score) / 2
    
    def _get_top_experts(self, template, n):
        """Get top N experts by weight"""
        return sorted(template.items(), key=lambda x: x[1], reverse=True)[:n]
    
    def _calculate_weight_entropy(self, template):
        """Calculate entropy of expert weight distribution"""
        if not template:
            return 0
        weights = np.array(list(template.values()))
        weights = weights / weights.sum()
        return float(-(weights * np.log2(weights)).sum())
    
    def _analyze_number_gaps(self, prediction):
        """Analyze gaps between consecutive numbers"""
        sorted_nums = sorted(prediction)
        return [sorted_nums[i+1] - sorted_nums[i] for i in range(len(sorted_nums)-1)]
    
    def _analyze_number_clusters(self, prediction):
        """Analyze number clustering patterns"""
        sorted_nums = sorted(prediction)
        clusters = []
        current_cluster = [sorted_nums[0]]
        
        for i in range(1, len(sorted_nums)):
            if sorted_nums[i] - sorted_nums[i-1] <= 3:  # Close numbers
                current_cluster.append(sorted_nums[i])
            else:
                clusters.append(current_cluster)
                current_cluster = [sorted_nums[i]]
        clusters.append(current_cluster)
        
        return {'cluster_count': len(clusters), 'cluster_sizes': [len(c) for c in clusters]}
    
    def _analyze_number_distribution(self, prediction):
        """Analyze number distribution across ranges"""
        ranges = {'1-10': 0, '11-20': 0, '21-30': 0, '31-39': 0}
        for num in prediction:
            if 1 <= num <= 10:
                ranges['1-10'] += 1
            elif 11 <= num <= 20:
                ranges['11-20'] += 1
            elif 21 <= num <= 30:
                ranges['21-30'] += 1
            elif 31 <= num <= 39:
                ranges['31-39'] += 1
        return ranges
    
    def _calculate_performance_score(self, template, performances):
        """Calculate weighted performance score"""
        if not template or not performances:
            return 0
        
        total_score = 0
        total_weight = 0
        
        for expert, weight in template.items():
            if expert in performances:
                total_score += weight * performances[expert]
                total_weight += weight
        
        return total_score / total_weight if total_weight > 0 else 0
    
    # ===== REST OF UNIFIED COLLECTOR METHODS =====
    
    def finalize_period(self, period_name, total_predictions):
        """Finalize period statistics"""
        stats = self.unified_data["periods"][period_name]["statistics"]
        stats["total_predictions"] = total_predictions
        stats["collection_end"] = datetime.now().isoformat()
        stats["collection_rate"] = stats["collected_records"] / total_predictions * 100
    
    def save_unified_file(self, filename="dharma_unified_analytics_complete.json"):
        """Save all data to single structured file"""
        self.unified_data["metadata"]["collection_end"] = datetime.now().isoformat()
        
        with open(filename, 'w') as f:
            json.dump(self.unified_data, f, indent=2, default=str)
        
        return filename


# 7. ADVANCED UTILS: ENTROPY-LOCKED, PATTERN-AWARE  ───────────────────────────
# DHARMA_X BUG FIX - UNIQUE6 FUNCTION
# Replace the unique6 function with this corrected version

def unique6(vec):
    """Coerce iterable to six unique ints 1-39 with substitution error prevention."""
    
    # Fix the numpy array boolean check issue
    try:
        if vec is None:
            vec = [s_int(1, 39) for _ in range(6)]
        elif hasattr(vec, '__len__') and len(vec) == 0:
            vec = [s_int(1, 39) for _ in range(6)]
        elif isinstance(vec, np.ndarray) and vec.size == 0:
            vec = [s_int(1, 39) for _ in range(6)]
    except:
        vec = [s_int(1, 39) for _ in range(6)]
    
    # Convert to list if numpy array
    if isinstance(vec, np.ndarray):
        vec = vec.tolist()
    
    # Handle non-iterable input
    if not hasattr(vec, '__iter__'):
        vec = [vec] if vec else [s_int(1, 39) for _ in range(6)]
    
    # Convert to numbers and filter
    nums = []
    for v in vec:
        try:
            if isinstance(v, (int, float, np.integer, np.floating)) and not np.isnan(v):
                nums.append(max(1, min(39, int(round(float(v))))))
        except:
            continue
    
    # Remove duplicates while preserving order
    out, seen = [], set()
    for n in nums:
        if n not in seen:
            seen.add(n)
            out.append(n)
            if len(out) == 6:
                return sorted(out)
    
    # Fill remaining slots
    while len(out) < 6:
        r = s_int(1, 39)
        if r not in seen:
            seen.add(r)
            out.append(r)
    
    return sorted(out[:6])

# ALSO FIX THE NORMALIZE FUNCTION
def normalize(d):
    """Normalize weights with minimum threshold."""
    if not d or len(d) == 0:  # Fixed boolean check
        return d
        
    try:
        s = sum(d.values()) or 1
        for k in d: 
            d[k] = max(1e-8, d[k] / s)
    except:
        # Fallback for edge cases
        for k in d:
            d[k] = 1.0 / len(d) if len(d) > 0 else 1.0
    
    return d

# ENTROPY FUNCTION FIX
def entropy(wdict):
    """Calculate entropy with breakthrough targeting."""
    if not wdict or len(wdict) == 0:  # Fixed boolean check
        return 0.0
        
    try:
        w = np.array(list(wdict.values()))
        if w.size == 0:  # Handle empty array
            return 0.0
            
        w = w / (w.sum() or 1)
        with np.errstate(divide="ignore", invalid="ignore"):
            # Filter out zero and negative values
            w_filtered = w[w > 1e-9]
            if len(w_filtered) == 0:
                return 0.0
            ent = float(-(w_filtered * np.log2(w_filtered)).sum())
            return ent if not np.isnan(ent) else 0.0
    except:
        return 0.0

def beta_convergence_reward(beta):
    """Reward function for beta convergence to target."""
    distance = abs(beta - BETA_TARGET)
    if distance < BETA_TOLERANCE:
        return 1.0 + CONVERGENCE_BOOST
    return max(0.5, 1.0 - (distance * 2))

# 8. STATE MANAGEMENT  ──────────────────────────────────────────────────────────
def _blank_state():
    return dict(
        weights=defaultdict(float),
        performance=defaultdict(float),
        entropy_history=[],
        beta_history=[],
        success_patterns=[],
        expert_synergy=defaultdict(float),
        convergence_rate=0.0,
        iteration=0,
        breakthrough_count=0,
        pattern_memory=deque(maxlen=100),
        
        # Enhanced analytics
        successful_beta_range=[],
        optimal_beta_mean=0.0,
        optimal_beta_std=0.0,
        success_beta_histogram=[],
        total_beta_range=[],
        success_rate_by_beta=defaultdict(float),
        
        successful_entropy_range=[],
        optimal_entropy_mean=0.0,
        optimal_entropy_std=0.0,
        success_entropy_histogram=[],
        
        cycle_position_performance=defaultdict(int),
        hottest_positions=[],
        position_success_rate=defaultdict(float),
        position_clusters=[],
        
        expert_optimal_weights=defaultdict(dict),
        correlation_analysis=defaultdict(float),
        success_beta_entropy_ratio=[]
    )

def load_state(validation_period):
    state_file = f"apex_state_{validation_period}.pkl"
    
    if not os.path.exists(state_file):
        st = _blank_state()
    else:
        try:
            with warnings.catch_warnings():
                warnings.simplefilter("ignore")
                st = pickle.load(open(state_file, "rb"))
        except: 
            st = _blank_state()
    
    # Initialize expert weights with breakthrough bias
    for ex in expert_registry:
        if ex not in st["weights"]:
            if ex in ORACLE_EXPERTS:
                st["weights"][ex] = ORACLE_EXPERTS[ex] / 10.0
            elif ex in BREAKTHROUGH_EXPERTS:
                st["weights"][ex] = BREAKTHROUGH_EXPERTS[ex] / 10.0  
            elif ex in SPECIALIST_EXPERTS:
                st["weights"][ex] = SPECIALIST_EXPERTS[ex] / 10.0
            elif ex in SUPPRESSED_EXPERTS:
                st["weights"][ex] = SUPPRESSED_EXPERTS[ex] / 10.0
            else:
                st["weights"][ex] = 1.0 / len(expert_registry)
        
        st["performance"][ex] = st.get("performance", {}).get(ex, 0.5)
    
    normalize(st["weights"])
    return st

def save_state(st, validation_period):
    state_file = f"apex_state_{validation_period}.pkl"
    try: 
        pickle.dump(st, open(state_file, "wb"))
    except Exception as e:
        print(f"Warning: Could not save state - {e}")

# 9. EXPERT EXECUTION  ──────────────────────────────────────────────────────────
def run_experts(df, beta, st):
    """Execute experts with breakthrough optimization."""
    preds, confs = {}, {}
    
    for ex, cfg in expert_registry.items():
        if len(df) < cfg["min_window"]:
            continue
            
        fn = globals().get(cfg["func"], _stub)
        try:
            with absolute_silence():
                p, c = fn(df)
            preds[ex], confs[ex] = unique6(p), c
        except Exception as e:
            preds[ex], confs[ex] = unique6([s_int(1, 39) for _ in range(6)]), 0.4
    
    # Advanced scoring with breakthrough weighting
    scores = {}
    for ex in preds:
        base_conf = confs[ex]
        
        # Apply expert hierarchy multipliers
        if ex in ORACLE_EXPERTS:
            multiplier = ORACLE_EXPERTS[ex]
        elif ex in BREAKTHROUGH_EXPERTS:
            multiplier = BREAKTHROUGH_EXPERTS[ex]
        elif ex in SPECIALIST_EXPERTS:
            multiplier = SPECIALIST_EXPERTS[ex]
        elif ex in SUPPRESSED_EXPERTS:
            multiplier = SUPPRESSED_EXPERTS[ex]
        else:
            multiplier = 1.0
        
        # Performance-based adjustment
        perf_mult = 1.0 + st["performance"][ex]
        
        # Beta convergence bonus
        beta_bonus = beta_convergence_reward(beta)
        
        scores[ex] = max(1e-6, base_conf * multiplier * perf_mult * beta_bonus)
    
    normalize(scores)
    return preds, scores

# 10. ENSEMBLE BUILDING  ───────────────────────────────────────────────────────
def build_set(preds, scores, st, forced_entropy=False):
    """Build ensemble with entropy locking and pattern optimization."""
    pool = list(scores)
    if not pool:
        return unique6([s_int(1, 39) for _ in range(6)]), {}
    
    # Expert selection with synergy consideration
    if forced_entropy and len(pool) >= 4:
        # Force high-entropy configuration for target hitting
        chosen = s_smpl(pool, min(len(pool), s_int(6, 10)))
        sub_w = {e: 1.0 for e in chosen}  # Equal weights for high entropy
    else:
        # Weighted selection favoring successful combinations
        chosen = []
        remaining = pool.copy()
        
        # Always include oracle experts if available
        for oracle in ORACLE_EXPERTS:
            if oracle in remaining:
                chosen.append(oracle)
                remaining.remove(oracle)
        
        TARGET_TEAM_SIZE = 8
        # replace `target_n = min(8, len(pool))` with:
        target_n = TARGET_TEAM_SIZE
        while len(chosen) < target_n and remaining:
            if len(chosen) < 4:
                breakthrough_remaining = [e for e in remaining if e in BREAKTHROUGH_EXPERTS]
                candidates = breakthrough_remaining if breakthrough_remaining else remaining
            else:
                candidates = remaining
            
            if not candidates:
                break
            
            # Weighted random selection
            weights = [scores[e] for e in candidates]
            total_weight = sum(weights)
            if total_weight == 0:
                next_expert = s_pick(candidates)
            else:
                r = s_unif(0, total_weight)
                cumsum = 0
                next_expert = candidates[-1]  # fallback
                for i, candidate in enumerate(candidates):
                    cumsum += weights[i]
                    if r <= cumsum:
                        next_expert = candidate
                        break
            
            if next_expert in remaining:
                chosen.append(next_expert)
                remaining.remove(next_expert)
            else:
                break  # Prevent infinite loops
        
        sub_w = {e: scores[e] for e in chosen}
    
    normalize(sub_w)
    
    # Entropy adjustment for targeting
    current_entropy = entropy(sub_w)
    entropy_error = abs(current_entropy - ENTROPY_TARGET)
    
    if entropy_error > ENTROPY_TOLERANCE:
        # Adjust weights to approach target entropy
        if current_entropy < ENTROPY_TARGET:
            # Flatten weights to increase entropy
            for k in sub_w:
                sub_w[k] = sub_w[k] ** 0.8
        else:
            # Sharpen weights to decrease entropy  
            for k in sub_w:
                sub_w[k] = sub_w[k] ** 1.2
        normalize(sub_w)
    
    # Weighted aggregation
    agg = np.zeros(6)
    for e, w in sub_w.items():
        if e in preds:
            agg += w * np.array(preds[e])
    
    final_pred = unique6(agg)
    return final_pred, sub_w




# 11. TEMPORAL VALIDATION ORCHESTRATOR  ────────────────────────────────────────
def validate_period_unified(df, validation_config, unified_collector, n_sets=100, max_it=10000, thr=3):
    """Execute validation for a single period with comprehensive analytics."""
    
    period_name = validation_config["name"]
    train_end = validation_config["train_end"]
    predict_date = validation_config["predict_date"]
    
    
    
    print(f"\n{'='*80}")
    print(f"🚀 DHARMA_X SEGMENTED VALIDATION - {period_name.upper()}")
    if 'segment' in validation_config:
        print(f"📊 SEGMENT {validation_config['segment']} - TRAINING SIZE: {len(validation_config.get('training_dates', []))} DRAWS")
    print(f"{'='*80}")
    seg_summary = SegmentSummaryCollector()
   
    
    # Filter training data up to train_end
    # ═══════════════════════════════════════════════════════════════════════════════
    # SAFE DYNAMIC TRAINING WINDOW WITH ERROR HANDLING
    # ═══════════════════════════════════════════════════════════════════════════════
    
    df['Date'] = pd.to_datetime(df['Date'])
    train_end_dt = pd.to_datetime(train_end)
    
    # Get available data up to train_end
    available_data = df[df['Date'] <= train_end_dt].copy()
    available_data = available_data.sort_values('Date', ascending=True)  # Ensure ascending
    
    # Safety check for empty data
    if len(available_data) < 1:
        print(f"❌ No data available before {train_end}")
        return 0  # Return 0 total_predictions to indicate failure
    
    # Get last 12 draws for 3-month window
    # SEGMENTED TRAINING WINDOW
    if 'training_dates' in validation_config:
        # Use specific training dates for this segment
        training_date_strs = validation_config['training_dates']
        training_dates = pd.to_datetime(training_date_strs)
        train_df = df[df['Date'].isin(training_dates)].copy()
        train_df = train_df.sort_values('Date', ascending=True)
    else:
        # Fallback to original logic
        train_df = available_data.tail(3)

    print(f"🔍 Available data range: {available_data['Date'].min()} to {available_data['Date'].max()}")
    print(f"🔍 Selected window: {train_df['Date'].min()} to {train_df['Date'].max()}")
    
    # Safety check for empty training set
    if train_df.empty:
        print(f"❌ Training window is empty for {period_name}")
        return 0  # Return 0 total_predictions to indicate failure
    
    # Calculate dynamic training range for display
    dynamic_start = train_df['Date'].min().strftime('%Y-%m-%d')
    dynamic_end = train_df['Date'].max().strftime('%Y-%m-%d')
    
    print(f"📅 Dynamic Training: {dynamic_start} → {dynamic_end}")
    print(f"📊 Training draws: {len(train_df)} (target: 3)")
    if 'training_dates' in validation_config:
        print(f"📅 Segmented Training: {validation_config['training_dates']}")
        print(f"📊 Training Window Size: {len(validation_config['training_dates'])} draws")
    else:
        print(f"📅 Training: {train_df['Date'].min()} → {train_end}")
    print(f"🎯 Prediction Target: {predict_date}")


    
    print(f"📊 Training samples: {len(train_df)}")

    # Final safety check before proceeding
    if len(train_df) < 1:
        print(f"❌ Insufficient training data: {len(train_df)} draws (minimum: 1)")
        return 0  # Return 0 total_predictions to indicate failure

    
    # Get actual result for the prediction date
    predict_date_dt = pd.to_datetime(predict_date)
    actual_results = df[df['Date'] == predict_date_dt]
    
    if actual_results.empty:
        print(f"❌ No actual result found for {predict_date}")
        return None, None, None
    
    actual = actual_results.iloc[0]
    actual_numbers = [actual[f'Number{i}'] for i in range(1, 7)]
    
    print(f"✅ Target result: {actual_numbers}")
    seg_summary = SegmentSummaryCollector(min_k_include=0)  # Accept all 0..6
    seg_summary.set_actuals(actual_numbers)
    
    # Load state for this validation period
    st = load_state(period_name)
    # Boost universal experts once per period (intelligence-driven)
    for _exp in UNIVERSAL_EXPERTS:
        if _exp in st["weights"]:
            st["weights"][_exp] *= UNIVERSAL_EXPERT_BOOST
    normalize(st["weights"])

    
    # Run orchestration
    best_predictions = []
    total_hit_counts = Counter()
    
    for it in range(1, max_it + 1):
        st["iteration"] = it
        
        default_cycle_pos = (it-1)%BETA_CYCLE_LENGTH
        cycle_pos = force_universal_cycle_positions(it, default_cycle_pos)
        
        beta = beta_dual_anchor(it)
        current_entropy_target = adaptive_entropy_targeting(it, max_it)
        # Mixed corridor to preserve balanced attack (favor 5/6 periodically)
        if STRATEGY_MODE != 'aggressive' and (it % 5 == 1):
            beta = 0.5212 + s_unif(-0.0219, 0.0219)  # 5/6 envelope

        
            
        # Convergence toward target over time
        convergence_factor = min(1.0, it / (max_it * 0.7))
        beta = beta * (1 - convergence_factor) + BETA_TARGET * convergence_factor
        
        # Execute expert predictions
        preds, scores = run_experts(train_df, beta, st)
        current_entropy = entropy(scores)
        # Nudge toward current target entropy (4/5 or 6/6)
        if abs(current_entropy - current_entropy_target) > ENTROPY_TOLERANCE:
            beta, current_entropy = enforce_breakthrough_signature(beta, current_entropy)

        
        st["entropy_history"].append(current_entropy)
        st["beta_history"].append(beta)
        
        # Determine phase
        #if beta < 0.45:
            #phase = "EXPLOIT"
        #elif beta > 0.60:
            #phase = "EXPLORE"
        #else:
            #phase = "BALANCED"
        PHASE_WEIGHTS = {'BALANCED':1.00,'EXPLOIT':0.00,'EXPLORE':0.00}
        phase = max(PHASE_WEIGHTS, key=lambda p: PHASE_WEIGHTS[p])  # always BALANCED by weight

        # Build prediction sets
        passed, hit_dist = 0, Counter()
        breakthrough_this_iter = 0

                # Calculate elite vs exploration sets
        elite_sets = int(n_sets * RESOURCE_ALLOCATION['elite'])
        explore_sets = n_sets - elite_sets

        for i in range(n_sets):
            # Tier 0: Proven 6/6 forcing for a controlled share of sets
            # Tier 0: Universal patterns (60% - multi-period validated)
            if i < int(n_sets*FORCE_UNIVERSAL_SHARE):
                u_pred,u_subw = build_universal_set(preds,scores,st)
                if u_pred: pred,subw = u_pred,u_subw
                else:     pred,subw = build_set(preds,scores,st,forced_entropy=False)
            
            # Tier 1: Exploration (25%)
            elif i < int(n_sets*(FORCE_UNIVERSAL_SHARE+FORCE_EXPLORATION_SHARE)):
                pred,subw = build_set(preds,scores,st,forced_entropy=True)
            
            # Tier 2: 6/6 Breakthrough (15%)
            elif i < int(n_sets*(FORCE_UNIVERSAL_SHARE+FORCE_EXPLORATION_SHARE+FORCE_6OF6_SHARE)):
                b_pred,b_subw = build_6of6_breakthrough_set(preds,scores,st)
                if b_pred: pred,subw = b_pred,b_subw
                else:      pred,subw = build_set(preds,scores,st,forced_entropy=True)

            # Tier 1: Your original elite allocation
            elif i < elite_sets:
                combo = ELITE_COMBOS[i % len(ELITE_COMBOS)]
                subw = {}
                for expert in combo:
                    if expert in scores:
                        subw[expert] = scores[expert]
                if subw:
                    normalize(subw)
                    agg = np.zeros(6)
                    for expert, weight in subw.items():
                        if expert in preds:
                            agg += weight * np.array(preds[expert])
                    pred = unique6(agg)
                else:
                    pred, subw = build_set(preds, scores, st, forced_entropy=False)
            # Tier 2: Exploration (unchanged)
            else:
                forced = ((i - elite_sets) < explore_sets * ENTROPY_FORCING_RATIO)
                pred, subw = build_set(preds, scores, st, forced_entropy=forced)

            
            # Continue with existing hit calculation and analytics
            hit = len(set(pred) & set(actual_numbers))
            hit_dist[hit] += 1
            total_hit_counts[hit] += 1
            seg_summary.update(pred, hit)
            # Collect analytics for ≥4/6 matches (your updated threshold)
            if hit >= MIN_HIT_LEVEL:
                unified_collector.collect_record(
                    period_name=period_name,
                    hit_count=hit,
                    iteration=it,
                    prediction=pred,
                    target=actual_numbers,
                    template=subw,
                    entropy=current_entropy,
                    beta=beta,
                    phase=phase,
                    cycle_position=cycle_pos,
                    expert_weights=st['weights'],
                    expert_performances=st['performance']
                )
                seg_summary.update(pred, hit)

            
            # Breakthrough detection
            
            if hit >= 5:
                breakthrough_this_iter += 1
                st["breakthrough_count"] += 1
                st["success_patterns"].append((pred, subw, current_entropy, beta))
                
                # Update expert performance
                for exp in subw:
                    st["performance"][exp] = min(2.0, st["performance"][exp] + 0.1)
            
            if hit >= thr:
                passed += 1
                best_predictions.append((it, hit, pred, current_entropy, beta))

        
        #for i in range(n_sets):
            # Force entropy targeting for a portion of predictions
            #forced = (i < n_sets * ENTROPY_FORCING_RATIO)
            #pred, subw = build_set(preds, scores, st, forced_entropy=forced)
            #hit = len(set(pred) & set(actual_numbers))
            #hit_dist[hit] += 1
            #total_hit_counts[hit] += 1
            
            # Collect analytics for ≥4/6 matches
            #if hit >= 2:
                          
                #unified_collector.collect_record(  # ✅ NEW METHOD
                    #period_name=period_name,
                    #hit_count=hit,
                    #iteration=it,
                    #prediction=pred,
                    #target=actual_numbers,
                    #template=subw,
                    #entropy=current_entropy,
                    #beta=beta,
                    #phase=phase,
                    #cycle_position=cycle_pos,
                    #expert_weights=st['weights'],
                    #expert_performances=st['performance']
                #)
                
                
            
            # Breakthrough detection
            #if hit >= 5:
                #breakthrough_this_iter += 1
                #st["breakthrough_count"] += 1
                #st["success_patterns"].append((pred, subw, current_entropy, beta))
                
                # Update expert performance
                #for exp in subw:
                    #st["performance"][exp] = min(2.0, st["performance"][exp] + 0.1)
            
            #if hit >= thr:
                #passed += 1
                #best_predictions.append((it, hit, pred, current_entropy, beta))
        
        # Progress reporting
        if it % 50 == 0 or breakthrough_this_iter > 0:
            hits_str = ", ".join(f"{h}/6:{c}" for h, c in sorted(hit_dist.items()) if c > 0)
            target_mode = "6/6" if current_entropy_target == ENTROPY_6_TARGET else "4/5"
            print(f"🔄 Iter {it:4d} | β={beta:.4f} | ent={current_entropy:.4f} | "
                  f"mode={target_mode} | pos={cycle_pos} | Pass:{passed:2d}/{n_sets} | Hits: {hits_str}")
            
            if breakthrough_this_iter > 0:
                print(f"   🎯 BREAKTHROUGHS: {breakthrough_this_iter} | Total: {st['breakthrough_count']}")
        
        # Dynamic expert weight adjustment
        if breakthrough_this_iter > 0:
            #for exp in st["weights"]:
                #if exp in ORACLE_EXPERTS:
                    #st["weights"][exp] *= 1.02
                #elif exp in BREAKTHROUGH_EXPERTS:
                    #t["weights"][exp] *= 1.01
            for sig, w in SIGNATURE_WEIGHTS.items():
                if matches_pattern(sig, pred):
                    for exp in st["weights"]:
                        st["weights"][exp] *= (1 + w)

        
        normalize(st["weights"])
        
        # Early convergence check
        entropy_convergence = 1.0 - abs(current_entropy - ENTROPY_TARGET)
        beta_convergence = 1.0 - abs(beta - BETA_TARGET)
        overall_convergence = (entropy_convergence + beta_convergence) / 2.0
        
        #if it > 200 and overall_convergence > 0.95 and breakthrough_this_iter >= 3:
            #print(f"🎊 EARLY CONVERGENCE at iteration {it}!")
            #break
    
    # Save state and analytics
    save_state(st, period_name)
    
    seg_summary.print_summary(
        segment_label=str(validation_config.get('segment', '1')),
        period_label=period_name
    )
    # Final summary
    print(f"\n{'='*60}")
    print(f"📊 {period_name.upper()} VALIDATION COMPLETE")
    print(f"{'='*60}")
    
    total_predictions = sum(total_hit_counts.values())
    success_4plus = sum(count for hit, count in total_hit_counts.items() if hit >= 4)
    success_5plus = sum(count for hit, count in total_hit_counts.items() if hit >= 5)
    
    print(f"Total predictions: {total_predictions}")
    print(f"≥4/6 successes: {success_4plus} ({success_4plus/total_predictions*100:.2f}%)")
    print(f"≥5/6 successes: {success_5plus} ({success_5plus/total_predictions*100:.2f}%)")
    
    
    print("\nHit distribution:")
    for hit in range(7):
        count = total_hit_counts[hit]
        pct = count / total_predictions * 100 if total_predictions > 0 else 0
        print(f" {hit}/6: {count:5d} ({pct:5.2f}%)")
    
    # Show top predictions
    if best_predictions:
        print("\nTop predictions (≥4/6):")
        for it, hit, pred, ent, beta in sorted(best_predictions, key=lambda x: (-x[1], x[0]))[:10]:
            print(f" Iter {it:4d} | {hit}/6 | {pred} | E:{ent:.4f} B:{beta:.4f}")
    
    return total_predictions

# 12. MULTI-PERIOD VALIDATION CONTROLLER  ──────────────────────────────────────
def run_comprehensive_validation(df_path, n_sets=100, max_it_per_period=10000):
    """Run validation across all configured periods."""
    unified_collector = UnifiedAnalyticsCollector()
    try:
        # Load data
        print("📁 Loading data file...")
        df = pd.read_excel(df_path)
        # GENERATE SEGMENTED PERIODS FROM DATA
        global VALIDATION_PERIODS
        VALIDATION_PERIODS = generate_segmented_periods(df)
        print(f"🎯 Generated {len(VALIDATION_PERIODS)} segmented validation periods")
        
        # Display segment breakdown
        segment_summary = {}
        for period in VALIDATION_PERIODS:
            month = period['month']
            if month not in segment_summary:
                segment_summary[month] = 0
            segment_summary[month] += 1
        
        print("📊 SEGMENTED PERIOD BREAKDOWN:")
        for month, seg_count in segment_summary.items():
            print(f"  {month}: {seg_count} segments")

        
        # Standardize column names
        if 'Date' not in df.columns:
            date_cols = [col for col in df.columns if 'date' in col.lower()]
            if date_cols:
                df.rename(columns={date_cols[0]: 'Date'}, inplace=True)
        
        # Standardize number columns
        for i in range(1, 7):
            old_cols = [col for col in df.columns if f'n{i}' in col.lower() or f'number{i}' in col.lower()]
            if old_cols:
                df.rename(columns={old_cols[0]: f'Number{i}'}, inplace=True)
        
        print(f"✅ Data loaded: {len(df)} records")
        print(f"📅 Date range: {df['Date'].min()} to {df['Date'].max()}")
        # ═══════════════════════════════════════════════════════════════════════════════
        # DEBUG: CHECK DATA AVAILABILITY FOR EACH MONTHLY PERIOD
        # ═══════════════════════════════════════════════════════════════════════════════
        print("\n🔍 DEBUGGING DATA AVAILABILITY:")
        for period in VALIDATION_PERIODS:
            train_end_dt = pd.to_datetime(period["train_end"])
            available = df[df['Date'] <= train_end_dt]
            print(f"{period['name']}: {len(available)} draws available ending {period['train_end']}")
            if len(available) > 0:
                last_3 = available.tail(3)
                print(f"  → Last 3: {last_3['Date'].min().strftime('%Y-%m-%d')} to {last_3['Date'].max().strftime('%Y-%m-%d')}")
            else:
                print(f"  → ❌ NO DATA AVAILABLE")
            print()

        # ── EXPERT LOADING DIAGNOSTIC ────────────────────────────────────────────────
        print("\n🔍 Expert Function Diagnostic:")
        missing = []
        found = 0
        for ex, cfg in expert_registry.items():
            fn = globals().get(cfg['func'])
            if fn is None or fn == _stub:
                missing.append(cfg['func'])
            else:
                found += 1
        
        print(f"✅ Experts found: {found}/{len(expert_registry)}")
        if missing:
            print(f"❌ Missing experts: {len(missing)}")
            for m in missing[:5]:  # Show first 5 missing
                print(f"   - {m}")
            if len(missing) > 5:
                print(f"   ... and {len(missing)-5} more")
        else:
            print("🎯 All expert functions loaded successfully!")
        print()


        
        # Run validation for each period
        all_results = {}
        all_analytics = {}
        
        for period_config in VALIDATION_PERIODS:
            try:
                period_name = period_config["name"]
                train_end = period_config["train_end"]
                predict_date = period_config["predict_date"]
                
                # Get actual target
                predict_date_dt = pd.to_datetime(predict_date)
                actual_results = df[df['Date'] == predict_date_dt]
                actual_numbers = [actual_results.iloc[0][f'Number{i}'] for i in range(1, 7)]
                
                # Initialize period in unified collector
                unified_collector.initialize_period(period_name, train_end, predict_date, actual_numbers)
                print(f"\n🔄 Starting {period_name}...")

                # ✅ ADD THIS - ACTUAL VALIDATION EXECUTION
                total_predictions = validate_period_unified(
                    df=df, 
                    validation_config=period_config,
                    unified_collector=unified_collector,
                    n_sets=n_sets, 
                    max_it=max_it_per_period, 
                    thr=3
                )
                
                # Finalize period statistics  
                unified_collector.finalize_period(period_name, total_predictions)
                print(f"✅ {period_name} completed successfully")
                
                # Store results for cross-period analysis
                all_results[period_name] = {
                    'hit_distribution': dict(unified_collector.unified_data["periods"][period_name]["statistics"]["hit_distribution"]),
                    'total_predictions': total_predictions,
                    'collected_records': unified_collector.unified_data["periods"][period_name]["statistics"]["collected_records"]
                }
                
            except Exception as e:  # ✅ ADD THIS MISSING EXCEPT BLOCK
                print(f"❌ Error in {period_config.get('name', 'unknown_period')}: {e}")
                continue  # Skip this period and continue with next one

        def display_segmented_results(all_results):
            """Display segmented prediction results with counts and percentages on-screen"""
            
            # Group results by month and segment
            month_segment_data = {}
            
            for period_name, results in all_results.items():
                if '_Seg' in period_name:
                    month, segment = period_name.split('_Seg')
                    segment = f"Seg{segment}"
                    
                    if month not in month_segment_data:
                        month_segment_data[month] = {}
                    
                    hit_dist = results.get('hit_distribution', {})
                    month_segment_data[month][segment] = hit_dist
            
            print(f"\n{'='*80}")
            print("📊 DHARMA_X SEGMENTED PREDICTION RESULTS")
            print(f"{'='*80}")
            
            # Display results month by month
            for month in sorted(month_segment_data.keys()):
                segments = month_segment_data[month]
                segment_names = sorted(segments.keys(), key=lambda x: int(x.replace('Seg', '')))
                
                print(f"\n{month}")
                print("─" * 60)
                
                # Create header row
                header = "      " + "".join(f"{seg:>15}" for seg in segment_names)
                print(header)
                
                # Display each hit level
                for hit_level in range(7):
                    hit_label = f"{hit_level}/6"
                    row_data = [f"{hit_label:>4}  "]
                    
                    for segment in segment_names:
                        hit_data = segments.get(segment, {})
                        count = hit_data.get(hit_level, 0)
                        total = sum(hit_data.values()) if hit_data else 1
                        percentage = (count / total * 100) if total > 0 else 0
                        
                        if count > 0:
                            formatted_value = f"{count} ({percentage:.2f}%)"
                        else:
                            formatted_value = "-"
                        
                        row_data.append(f"{formatted_value:>15}")
                    
                    print("".join(row_data))
        
        def display_summary_statistics(all_results):
            """Display summary statistics for each segment on-screen"""
            
            summary_data = []
            
            for period_name, results in all_results.items():
                if '_Seg' in period_name:
                    month, segment = period_name.split('_Seg')
                    
                    hit_dist = results.get('hit_distribution', {})
                    total_predictions = sum(hit_dist.values())
                    
                    # Calculate key metrics
                    success_3plus = sum(count for hit, count in hit_dist.items() if hit >= 3)
                    success_4plus = sum(count for hit, count in hit_dist.items() if hit >= 4)
                    success_5plus = sum(count for hit, count in hit_dist.items() if hit >= 5)
                    success_6 = hit_dist.get(6, 0)
                    
                    success_3plus_pct = (success_3plus / total_predictions * 100) if total_predictions > 0 else 0
                    success_4plus_pct = (success_4plus / total_predictions * 100) if total_predictions > 0 else 0
                    success_5plus_pct = (success_5plus / total_predictions * 100) if total_predictions > 0 else 0
                    success_6_pct = (success_6 / total_predictions * 100) if total_predictions > 0 else 0
                    
                    summary_data.append({
                        'Month': month,
                        'Segment': f"Seg{segment}",
                        'Total': f"{total_predictions:,}",
                        '≥3/6': f"{success_3plus} ({success_3plus_pct:.2f}%)",
                        '≥4/6': f"{success_4plus} ({success_4plus_pct:.2f}%)",
                        '≥5/6': f"{success_5plus} ({success_5plus_pct:.2f}%)",
                        '6/6': f"{success_6} ({success_6_pct:.2f}%)"
                    })
            
            if summary_data:
                print(f"\n{'='*80}")
                print("📈 SEGMENT PERFORMANCE SUMMARY")
                print(f"{'='*80}")
                
                # Create formatted table
                header = f"{'Month':<10} {'Segment':<8} {'Total':<12} {'≥3/6':<18} {'≥4/6':<18} {'≥5/6':<18} {'6/6':<18}"
                print(header)
                print("─" * len(header))
                
                for data in summary_data:
                    row = f"{data['Month']:<10} {data['Segment']:<8} {data['Total']:<12} {data['≥3/6']:<18} {data['≥4/6']:<18} {data['≥5/6']:<18} {data['6/6']:<18}"
                    print(row)
            else:
                print("❌ No summary statistics to display")
        
        def display_comprehensive_analysis(all_results):
            """Display both detailed and summary results"""
            display_segmented_results(all_results)
            display_summary_statistics(all_results)
            
            print(f"\n{'='*80}")
            print("✅ SEGMENTED ANALYSIS COMPLETE")
            print(f"{'='*80}")

        
        # Generate comprehensive cross-period analysis
        print(f"\n{'='*80}")
        print("📊 COMPREHENSIVE MULTI-PERIOD ANALYSIS")
        print(f"{'='*80}")
        
        if all_results:
            cross_analysis = generate_cross_period_analysis(all_results, all_analytics)
            print(f"\n{'='*80}")
            print("📊 DISPLAYING SEGMENTED RESULTS")
            print(f"{'='*80}")
            
            display_comprehensive_analysis(all_results)
            
            # Save comprehensive results
            timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
            results_file = f"dharma_comprehensive_validation_{timestamp}.json"
            
            with open(results_file, 'w') as f:
                json.dump({
                    'validation_summary': {
                        'periods_completed': len(all_results),
                        'total_periods': len(VALIDATION_PERIODS),
                        'timestamp': datetime.now().isoformat()
                    },
                    'period_results': all_results,
                    'cross_period_analysis': cross_analysis
                }, f, indent=2, default=str)
            
            print(f"📄 Comprehensive results saved: {results_file}")
            # Save unified analytics file
            unified_file = unified_collector.save_unified_file()
            print(f"📊 Unified analytics saved: {unified_file}")

            
            # Display summary
            print("\nValidation Summary:")
            total_4plus = sum(sum(1 for hit, count in result['hit_distribution'].items() 
                                if hit >= 4 and count > 0) 
                            for result in all_results.values())
            total_5plus = sum(sum(1 for hit, count in result['hit_distribution'].items() 
                                if hit >= 5 and count > 0) 
                            for result in all_results.values())
            
            print(f"Periods completed: {len(all_results)}/{len(VALIDATION_PERIODS)}")
            print(f"Cross-period ≥4/6 patterns: {total_4plus}")
            print(f"Cross-period ≥5/6 patterns: {total_5plus}")
            
            if cross_analysis.get('consistency_score', 0) > 0.7:
                print("🎯 HIGH CONSISTENCY DETECTED across validation periods!")
            elif cross_analysis.get('consistency_score', 0) > 0.5:
                print("📊 MODERATE CONSISTENCY across validation periods")
            else:
                print("⚠️  LOW CONSISTENCY - System may need refinement")

            print(f"\n{'='*80}")
            print("🧠 EXTRACTING BREAKTHROUGH INTELLIGENCE (≥4/6)")
            print(f"{'='*80}")
            
            try:
                breakthrough_analysis = extract_breakthrough_intelligence(unified_file)
                
                if breakthrough_analysis:
                    recommendations = generate_orchestrator_optimization_recommendations(breakthrough_analysis)
                    
                    # Display breakthrough intelligence
                    print("\n🎯 BREAKTHROUGH PARAMETER ANALYSIS:")
                    params = breakthrough_analysis['parameter_sweet_spots']
                    print(f"   β Range: {params['beta_min']:.4f} → {params['beta_max']:.4f} (μ={params['beta_mean']:.4f})")
                    print(f"   H Range: {params['entropy_min']:.4f} → {params['entropy_max']:.4f} (μ={params['entropy_mean']:.4f})")
                    
                    print("\n🔥 TOP BREAKTHROUGH EXPERT COMBINATIONS:")
                    for i, (combo, count) in enumerate(breakthrough_analysis['top_expert_combinations'][:5], 1):
                        expert_list = list(combo)[:3]  # Show first 3 experts
                        print(f"   {i}. {expert_list}... → {count} successes")
                    
                    print("\n⚡ CYCLE POSITION HOTSPOTS:")
                    for pos, count in breakthrough_analysis['cycle_hotspots'][:5]:
                        print(f"   Position {pos}: {count} breakthroughs")
                    
                    print("\n🚀 ORCHESTRATOR OPTIMIZATION RECOMMENDATIONS:")
                    for i, rec in enumerate(recommendations, 1):
                        print(f"   {i}. {rec['type'].upper()}: {rec['parameter']}")
                        print(f"      Target: {rec['value']}")
                        print(f"      Reason: {rec['reason']}")
                    
                    print(f"\n{'='*80}")
                    print("✅ BREAKTHROUGH INTELLIGENCE ANALYSIS COMPLETE")
                    print(f"{'='*80}")
                    
            except Exception as e:
                print(f"❌ Breakthrough analysis failed: {e}")
        
        else:
            print("❌ No validation periods completed successfully")
            
        
        return all_results, unified_file
        
    except Exception as e:
        print(f"❌ Critical error in validation: {e}")
        traceback.print_exc()
        return None, None

def generate_cross_period_analysis(all_results, all_analytics):
    """Generate cross-period analysis to identify consistent patterns."""
    
    if not all_results:
        return {}
    
    # Collect success rates across periods
    period_success_rates = {}
    total_predictions_by_period = {}
    
    for period, results in all_results.items():
        hit_dist = results['hit_distribution']
        total = sum(hit_dist.values())
        success_4plus = sum(count for hit, count in hit_dist.items() if hit >= 4)
        success_5plus = sum(count for hit, count in hit_dist.items() if hit >= 5)
        
        period_success_rates[period] = {
            '4plus_rate': success_4plus / total if total > 0 else 0,
            '5plus_rate': success_5plus / total if total > 0 else 0,
            'total_predictions': total
        }
        total_predictions_by_period[period] = total
    
    # Calculate consistency metrics
    rates_4plus = [rate['4plus_rate'] for rate in period_success_rates.values()]
    rates_5plus = [rate['5plus_rate'] for rate in period_success_rates.values()]
    
    consistency_score = 1.0 - (np.std(rates_4plus) if rates_4plus else 1.0)
    
    return {
        'period_success_rates': period_success_rates,
        'consistency_score': consistency_score,
        'average_4plus_rate': np.mean(rates_4plus) if rates_4plus else 0,
        'average_5plus_rate': np.mean(rates_5plus) if rates_5plus else 0,
        'total_periods_analyzed': len(all_results),
        'recommendation': (
            "HIGH_CONFIDENCE" if consistency_score > 0.7 and np.mean(rates_4plus) > 0.02 else
            "MODERATE_CONFIDENCE" if consistency_score > 0.5 and np.mean(rates_4plus) > 0.01 else
            "LOW_CONFIDENCE"
        )
    }

# 13. EXPERT FUNCTION STUBS ──────────────────────────────────────────────────── 
def _stub(df): 
    """Fallback expert function"""
    return unique6([s_int(1, 39) for _ in range(6)]), 0.5



# Create simplified implementations for remaining experts
for expert_name in expert_registry:
    if expert_name not in globals() or not callable(globals().get(expert_name)):
        # Create a simple expert function
        def make_expert_func(name):
            def expert_func(df):
                try:
                    if len(df) < 5:
                        return unique6([s_int(1, 39) for _ in range(6)]), 0.3
                    
                    # Simple frequency-based approach with variation
                    recent = df.tail(min(50, len(df)))
                    all_nums = []
                    for _, row in recent.iterrows():
                        all_nums.extend([row[f'Number{j}'] for j in range(1, 7)])
                    
                    freq = Counter(all_nums)
                    candidates = [num for num, _ in freq.most_common(20)]
                    
                    # Add some randomness based on expert name hash
                    name_hash = hash(name) % 1000
                    candidates = candidates[name_hash % min(len(candidates), 5):] + candidates[:name_hash % min(len(candidates), 5)]
                    
                    prediction = []
                    for num in candidates:
                        if num not in prediction and len(prediction) < 6:
                            prediction.append(num)
                    
                    while len(prediction) < 6:
                        candidate = s_int(1, 39)
                        if candidate not in prediction:
                            prediction.append(candidate)
                    
                    confidence = 0.4 + (name_hash % 30) / 100  # Confidence between 0.4-0.7
                    return unique6(prediction), confidence
                except:
                    return unique6([s_int(1, 39) for _ in range(6)]), 0.3
            return expert_func
        
        globals()[expert_registry[expert_name]['func']] = make_expert_func(expert_name)

# 14. MAIN EXECUTION  ───────────────────────────────────────────────────────────
if __name__ == "__main__":
    try:
        print("🚀 DHARMA_X COMPREHENSIVE VALIDATION SYSTEM")
        print("=" * 80)
        # ── VERIFY EXPERT FUNCTIONS ─────────────────────────────────────────────────
        for ex, cfg in expert_registry.items():
            fname = cfg['func']
            status = 'FOUND' if globals().get(fname) and globals()[fname] != _stub else 'MISSING/STUB'
            print(f"{fname}: {status}")

        
        # You can modify this path to your data file
        data_file_path = "/kaggle/input/draw-41to44/draw_Updated.xlsx"  # Update this path
        
        # Run comprehensive validation
        results, analytics = run_comprehensive_validation(
            df_path=data_file_path,
            n_sets=200,  # Predictions per iteration
            max_it_per_period=10000  # Max iterations per validation period
        )
        
        if results:
            print("\n🎉 COMPREHENSIVE VALIDATION COMPLETED SUCCESSFULLY!")
            print(f"📊 Analyzed {len(results)} validation periods")
            print("📄 Check output files for detailed analysis")
            
        else:
            print("\n❌ VALIDATION FAILED - Check data file and configuration")
            
    except Exception as e:
        print(f"\n❌ Critical system error: {e}")
        traceback.print_exc()

In [None]:
from IPython.display import FileLink

display(FileLink('dharma_unified_analytics_complete.json'))

In [None]:
# dharma_all_periods_4plus_comprehensive_extractor_OPTIMIZED.py
# OPTIMIZED VERSION - Sequential file processing with memory management
# COMPLETE VERSION - Extract ≥4/6 breakthroughs from ALL periods with full micro-to-macro details
# MODIFIED: Now processes files sequentially with batch processing for 23GB+ files

import json
import pandas as pd
import numpy as np
import gc  # Garbage collector for memory management
from datetime import datetime
import os
from typing import List, Dict, Any, Optional

def safe_first_numeric(series, default=0):
    """Return first valid numeric scalar from a Series after coercion; else default."""
    import pandas as pd
    if series is None:
        return default
    s = pd.to_numeric(series, errors='coerce')
    idx = s.first_valid_index()
    return int(s.loc[idx]) if idx is not None else default

class AllPeriods4plusBreakthroughExtractor:
    """
    OPTIMIZED: Complete comprehensive micro-to-macro extraction for ALL PERIODS ≥4/6 breakthroughs
    Now processes multiple input files SEQUENTIALLY with memory management
    """
    
    def __init__(self, analytics_json_paths, min_hit=4, batch_size=1000):
        if isinstance(analytics_json_paths, str):
            analytics_json_paths = [analytics_json_paths]
            
        print("🚀 OPTIMIZED DHARMA analytics loader - Sequential file processing")
        print(f"📂 Processing {len(analytics_json_paths)} files sequentially")
        print(f"🔄 Batch size: {batch_size} records per batch")
        
        self.analytics_json_paths = analytics_json_paths
        self.batch_size = batch_size
        self.min_hit = int(min_hit)
        self.levels = [str(h) for h in range(self.min_hit, 7)]
        
        # Initialize tracking variables
        self.periods_with_hits = {}
        self.total_files_processed = 0
        self.total_records_found = 0
        
        # Get file sizes for progress tracking
        self.file_sizes = []
        total_size_gb = 0
        for path in analytics_json_paths:
            try:
                size_bytes = os.path.getsize(path)
                size_gb = size_bytes / (1024**3)
                self.file_sizes.append(size_gb)
                total_size_gb += size_gb
                print(f"   📁 {os.path.basename(path)}: {size_gb:.2f} GB")
            except Exception as e:
                print(f"   ⚠️  Cannot get size for {path}: {e}")
                self.file_sizes.append(0)
        
        print(f"📊 Total data size: {total_size_gb:.2f} GB")
        print("🔍 Starting sequential analysis to find ≥4/6 periods...")
        
        # Process files sequentially to find periods with hits
        self._analyze_files_sequentially()
        
        print(f"✅ Sequential analysis complete!")
        print(f"✅ Found {self.total_records_found} ≥{self.min_hit}/6 breakthrough records across {len(self.periods_with_hits)} periods")
        for period, level_map in self.periods_with_hits.items():
            by_lvl = {lvl: len(level_map.get(lvl, [])) for lvl in self.levels}
            print(f"   📊 {period}: {by_lvl}")
    
    def _analyze_files_sequentially(self):
        """Analyze files one by one to find periods with ≥min_hit records"""
        
        for file_idx, path in enumerate(self.analytics_json_paths):
            print(f"\n🔍 Analyzing file {file_idx + 1}/{len(self.analytics_json_paths)}: {os.path.basename(path)}")
            print(f"📊 File size: {self.file_sizes[file_idx]:.2f} GB")
            
            try:
                # Load single file
                print("   📥 Loading JSON data...")
                with open(path, 'r') as f:
                    file_data = json.load(f)
                
                # Process periods in this file
                file_periods = file_data.get('periods', {})
                print(f"   🔍 Found {len(file_periods)} periods in file")
                
                processed_periods = 0
                for period_name, period_data in file_periods.items():
                    processed_periods += 1
                    if processed_periods % 100 == 0:
                        print(f"      📈 Processed {processed_periods}/{len(file_periods)} periods")
                    
                    matches = period_data.get('match_counts', {})
                    collected = []
                    for lvl in self.levels:
                        collected.extend(matches.get(lvl, []))
                    
                    if collected:
                        # Merge with existing periods data
                        if period_name in self.periods_with_hits:
                            # Merge match_counts if period already exists from previous file
                            existing_levels = self.periods_with_hits[period_name]
                            for lvl in self.levels:
                                new_records = matches.get(lvl, [])
                                if new_records:
                                    if lvl in existing_levels:
                                        existing_levels[lvl].extend(new_records)
                                    else:
                                        existing_levels[lvl] = new_records
                        else:
                            # Add new period
                            self.periods_with_hits[period_name] = {lvl: matches.get(lvl, []) for lvl in self.levels}
                        
                        # Update record count
                        period_records = sum(len(matches.get(lvl, [])) for lvl in self.levels)
                        self.total_records_found += period_records
                
                # Clear file data from memory
                del file_data, file_periods
                gc.collect()  # Force garbage collection
                
                self.total_files_processed += 1
                print(f"   ✅ File {file_idx + 1} processed successfully")
                
            except Exception as e:
                print(f"   ❌ Error processing file {path}: {e}")
                continue
    
    def extract_comprehensive_details(self):
        """Extract ALL micro-to-macro details using BATCH PROCESSING for memory efficiency"""
        if not self.periods_with_hits:
            print(f"❌ No ≥{self.min_hit}/6 records found in any period")
            return []
        
        print(f"\n🔬 Beginning OPTIMIZED comprehensive extraction (≥{self.min_hit}/6)...")
        print(f"🔄 Using batch processing with batch size: {self.batch_size}")
        
        comprehensive_data = []
        overall_record_id = 1
        total_periods = len(self.periods_with_hits)
        processed_periods = 0
        
        # Process periods in batches
        for period_name, level_map in self.periods_with_hits.items():
            processed_periods += 1
            total_in_period = sum(len(level_map.get(lvl, [])) for lvl in self.levels)
            
            print(f"\n📊 Processing period {processed_periods}/{total_periods}: {period_name}")
            print(f"   🎯 Found {total_in_period} records (≥{self.min_hit}/6)")
            
            # Collect all records for this period
            period_records = []
            for lvl in self.levels:
                records = level_map.get(lvl, [])
                for idx, record in enumerate(records):
                    period_records.append({
                        'record': record,
                        'level': lvl,
                        'period_idx': idx + 1
                    })
            
            # Process records in batches
            batch_count = 0
            for i in range(0, len(period_records), self.batch_size):
                batch = period_records[i:i + self.batch_size]
                batch_count += 1
                batch_size_actual = len(batch)
                
                print(f"      🔄 Processing batch {batch_count} ({batch_size_actual} records)")
                
                # Process each record in the batch
                batch_data = []
                for record_info in batch:
                    record = record_info['record']
                    lvl = record_info['level']
                    period_idx = record_info['period_idx']
                    
                    # Extract comprehensive details (same logic as original)
                    details = self._extract_single_record_details(
                        record, period_name, lvl, period_idx, overall_record_id
                    )
                    batch_data.append(details)
                    overall_record_id += 1
                
                # Add batch data to comprehensive data
                comprehensive_data.extend(batch_data)
                
                # Clear batch data and force garbage collection every few batches
                del batch_data
                if batch_count % 5 == 0:
                    gc.collect()
                    print(f"         💾 Memory cleanup after batch {batch_count}")
            
            # Clear period records
            del period_records
            gc.collect()
            
            print(f"   ✅ Period {period_name} completed ({total_in_period} records processed)")
        
        print(f"\n🎉 OPTIMIZED extraction complete! Processed {len(comprehensive_data)} records")
        return comprehensive_data
    
    def _extract_single_record_details(self, record, period_name, lvl, period_idx, overall_record_id):
        """Extract comprehensive details for a single record (same logic as original)"""
        
        # Initialize details dictionary
        details = {}
        
        # LEVEL 1: BASIC BREAKTHROUGH IDENTIFICATION + PERIOD TRACKING
        details.update({
            'overall_record_id': overall_record_id,
            'period_name': period_name,
            'period_record_number': period_idx,
            'hit_level': int(lvl),
            'validation_period': record.get('validation_period', period_name),
            'iteration': record.get('iteration'),
            'hit_count': record.get('hit_count', int(lvl)),
            'timestamp': record.get('timestamp'),
            'collection_timestamp': record.get('collection_timestamp'),
        })
        
        # LEVEL 2: PREDICTION ANALYSIS (MICRO DETAILS)
        prediction = record.get('prediction', [])
        target = record.get('target', [])
        matches = record.get('matches', [])
        misses = record.get('misses', [])
        extras = record.get('extras', [])
        
        details.update({
            # Raw predictions
            'prediction_raw': str(prediction),
            'target_raw': str(target),
            'matches_raw': str(matches),
            'misses_raw': str(misses),
            'extras_raw': str(extras),
            
            # Individual numbers
            'pred_num_1': prediction[0] if len(prediction) > 0 else None,
            'pred_num_2': prediction[1] if len(prediction) > 1 else None,
            'pred_num_3': prediction[2] if len(prediction) > 2 else None,
            'pred_num_4': prediction[3] if len(prediction) > 3 else None,
            'pred_num_5': prediction[4] if len(prediction) > 4 else None,
            'pred_num_6': prediction[5] if len(prediction) > 5 else None,
            
            'target_num_1': target[0] if len(target) > 0 else None,
            'target_num_2': target[1] if len(target) > 1 else None,
            'target_num_3': target[2] if len(target) > 2 else None,
            'target_num_4': target[3] if len(target) > 3 else None,
            'target_num_5': target[4] if len(target) > 4 else None,
            'target_num_6': target[5] if len(target) > 5 else None,
            
            # Prediction statistics
            'prediction_sum': sum(prediction) if prediction else 0,
            'prediction_avg': float(np.mean(prediction)) if prediction else 0.0,
            'prediction_std': float(np.std(prediction)) if prediction else 0.0,
            'prediction_min': min(prediction) if prediction else 0,
            'prediction_max': max(prediction) if prediction else 0,
            'prediction_range': (max(prediction) - min(prediction)) if prediction else 0,
            'prediction_sorted': str(sorted(prediction)),
        })
        
        # LEVEL 3: PARAMETER ANALYSIS (BETA-ENTROPY CONVERGENCE)
        beta = record.get('beta', 0)
        entropy = record.get('entropy', 0)
        
        details.update({
            'beta': beta,
            'entropy': entropy,
            'beta_entropy_ratio': record.get('beta_entropy_ratio', (beta/entropy if entropy else 0)),
            'beta_deviation': record.get('beta_deviation', 0),
            'entropy_deviation': record.get('entropy_deviation', 0),
            'optimal_convergence': record.get('optimal_convergence', 0),
            
            # Parameter analysis
            'beta_squared': (beta ** 2) if beta else 0,
            'entropy_squared': (entropy ** 2) if entropy else 0,
            'beta_entropy_product': (beta * entropy) if beta and entropy else 0,
            'beta_entropy_sum': (beta + entropy) if beta and entropy else 0,
            'beta_entropy_diff': abs(beta - entropy) if beta and entropy else 0,
            
            # Phase and cycle context
            'phase': record.get('phase'),
            'cycle_position': record.get('cycle_position', 0),
            'cycle_position_normalized': (record.get('cycle_position', 0) / 75.0),
        })
        
        # LEVEL 4: EXPERT SYSTEM ANALYSIS (MACRO INTELLIGENCE)
        expert_weights = record.get('expert_weights', {})
        active_experts = record.get('active_experts', [])
        top_3_experts = record.get('top_3_experts', [])
        
        details.update({
            'active_experts_count': len(active_experts),
            'active_experts_list': str(active_experts),
            'expert_weights_raw': str(expert_weights),
            'top_3_experts_raw': str(top_3_experts),
            'weight_entropy': record.get('weight_entropy', 0),
            'weight_concentration': record.get('weight_concentration', 0),
        })
        
        # Individual expert weights (top experts)
        for i in range(3):
            details[f'top_expert_{i+1}_name'] = None
            details[f'top_expert_{i+1}_weight'] = None
        
        if top_3_experts and len(top_3_experts) > 0:
            for i, expert_data in enumerate(top_3_experts[:3]):
                if isinstance(expert_data, (list, tuple)) and len(expert_data) >= 2:
                    details[f'top_expert_{i+1}_name'] = expert_data[0]
                    details[f'top_expert_{i+1}_weight'] = expert_data[1]
        
        # Extract specific expert categories
        oracle_experts_list = ['E30_MarkovChain_Residuals', 'E16_Chaos_Theory', 'E9_Gap_Analysis', 'Z3_XORMixnet_Reverse']
        breakthrough_experts_list = ['E1_FrequencyAnalysis', 'Z6_Adversarial_PhaseWatcher', 'B1_RandomForest', 'F2_Quantum_ML']
        
        oracle_experts_present = [expert for expert in active_experts if expert in oracle_experts_list]
        breakthrough_experts_present = [expert for expert in active_experts if expert in breakthrough_experts_list]
        
        details.update({
            'oracle_experts_present': str(oracle_experts_present),
            'oracle_experts_count': len(oracle_experts_present),
            'breakthrough_experts_present': str(breakthrough_experts_present),
            'breakthrough_experts_count': len(breakthrough_experts_present),
        })
        
        # LEVEL 5: PATTERN SIGNATURE ANALYSIS
        number_gaps = record.get('number_gaps', [])
        number_clusters = record.get('number_clusters', {})
        number_distribution = record.get('number_distribution', {})
        
        details.update({
            'number_gaps_raw': str(number_gaps),
            'number_clusters_raw': str(number_clusters),
            'number_distribution_raw': str(number_distribution),
            'sum_total': record.get('sum_total', sum(prediction) if prediction else 0),
            'sum_deviation': record.get('sum_deviation', 0),
            
            # Gap analysis
            'gaps_count': len(number_gaps) if number_gaps else 0,
            'gaps_avg': float(np.mean(number_gaps)) if number_gaps else 0.0,
            'gaps_max': max(number_gaps) if number_gaps else 0,
            'gaps_min': min(number_gaps) if number_gaps else 0,
            
            # Cluster analysis
            'cluster_count': number_clusters.get('cluster_count', 0) if number_clusters else 0,
            'cluster_sizes': str(number_clusters.get('cluster_sizes', [])) if number_clusters else '[]',
            
            # Distribution analysis
            'dist_1_10': number_distribution.get('1-10', 0) if number_distribution else 0,
            'dist_11_20': number_distribution.get('11-20', 0) if number_distribution else 0,
            'dist_21_30': number_distribution.get('21-30', 0) if number_distribution else 0,
            'dist_31_39': number_distribution.get('31-39', 0) if number_distribution else 0,
        })
        
        # LEVEL 6: PERFORMANCE CONTEXT
        expert_performances = record.get('expert_performances', {})
        
        details.update({
            'expert_performances_raw': str(expert_performances),
            'performance_weighted_score': record.get('performance_weighted_score', 0),
            'template_raw': str(record.get('template', {})),
        })
        
        # Performance statistics
        if expert_performances:
            perf_values = [v for v in expert_performances.values() if v is not None]
            if perf_values:
                details.update({
                    'expert_perf_avg': float(np.mean(perf_values)),
                    'expert_perf_std': float(np.std(perf_values)),
                    'expert_perf_min': float(min(perf_values)),
                    'expert_perf_max': float(max(perf_values)),
                })
            else:
                details.update({
                    'expert_perf_avg': 0.0,
                    'expert_perf_std': 0.0,
                    'expert_perf_min': 0.0,
                    'expert_perf_max': 0.0,
                })
        else:
            details.update({
                'expert_perf_avg': 0.0,
                'expert_perf_std': 0.0,
                'expert_perf_min': 0.0,
                'expert_perf_max': 0.0,
            })
        
        # LEVEL 7: BREAKTHROUGH SIGNATURE FINGERPRINTING
        details.update({
            'signature_beta_bucket': (int(beta * 100) // 5 * 5) if beta else 0,
            'signature_entropy_bucket': (int(entropy * 10) // 5 * 5) if entropy else 0,
            'signature_expert_combo': str(tuple(sorted(active_experts[:5]))),
            'signature_gap_pattern': str(tuple(sorted(number_gaps[:3]))) if number_gaps else '()',
            'signature_sum_bucket': (details['sum_total'] // 10 * 10) if details['sum_total'] else 0,
        })
        
        # Create breakthrough fingerprint
        fingerprint_data = f"{details['signature_beta_bucket']}_{details['signature_entropy_bucket']}_{details['signature_sum_bucket']}"
        details['breakthrough_fingerprint'] = str(hash(fingerprint_data))
        
        # LEVEL 8: CONTEXTUAL METADATA
        non_null_count = sum(1 for v in details.values() if v is not None and v != '')
        total_fields = len(details)
        record_completeness_score = (non_null_count / total_fields) if total_fields > 0 else 0
        
        details.update({
            'extraction_timestamp': datetime.now().isoformat(),
            'record_completeness_score': float(record_completeness_score),
            'data_quality_flag': 'HIGH' if record_completeness_score > 0.8 else 'MEDIUM',
            'total_fields': total_fields,
            'non_null_fields': non_null_count
        })
        
        return details
    
    def export_comprehensive_analysis(self, output_path="ALL_PERIODS_4plus_comprehensive_analysis.csv"):
        """Export comprehensive analysis for all periods (≥4/6) to CSV with MEMORY MANAGEMENT"""
        
        print(f"\n🚀 Starting OPTIMIZED comprehensive analysis export...")
        
        data = self.extract_comprehensive_details()
        if not data:
            print(f"❌ No ≥{self.min_hit}/6 data extracted")
            return None
        
        print(f"📊 Creating DataFrame from {len(data)} records...")
        df = pd.DataFrame(data)
        
        print(f"💾 Saving to CSV: {output_path}")
        df.to_csv(output_path, index=False)
        
        # Clear large data structures
        del data
        gc.collect()
        
        print(f"\n{'='*80}")
        print(f"📊 OPTIMIZED ALL-PERIODS ≥{self.min_hit}/6 ANALYSIS COMPLETE")
        print(f"{'='*80}")
        print(f"✅ Extracted {len(df)} ≥{self.min_hit}/6 breakthrough records")
        print(f"📄 Comprehensive analysis saved: {output_path}")
        print(f"📋 Total data fields per record: {len(df.columns)}")
        
        # Period-wise summary
        if len(df) > 0:
            period_summary = df.groupby('period_name').size()
            print(f"\n🔍 PERIOD-WISE BREAKTHROUGH SUMMARY:")
            for period, count in period_summary.items():
                period_records = df[df['period_name'] == period]
                avg_beta = period_records['beta'].mean() if 'beta' in df.columns else 0
                avg_entropy = period_records['entropy'].mean() if 'entropy' in df.columns else 0
                iterations = period_records['iteration'].tolist() if 'iteration' in df.columns else []
                print(f"   {period}: {count} records (≥{self.min_hit}/6) | Iterations: {iterations} | Avg β={avg_beta:.4f} | Avg ε={avg_entropy:.4f}")
        
        return output_path
    
    def generate_breakthrough_insights(self):
        """Generate comprehensive insights about ≥min_hit breakthroughs across all periods with MEMORY MANAGEMENT"""
        
        print(f"🧠 Generating breakthrough insights...")
        data = self.extract_comprehensive_details()
        if not data:
            return None
        
        df = pd.DataFrame(data)
        
        # Permanent scalar extraction (no indexer objects involved)
        first_non_null_total_fields = safe_first_numeric(df.get('total_fields'), default=0)
        
        by_level = df['hit_level'].value_counts().to_dict() if 'hit_level' in df.columns else {}
        insights = {
            "min_hit_threshold": int(self.min_hit),
            "total_4plus_breakthroughs": int(len(data)),
            "by_level_counts": by_level,
            "periods_with_breakthroughs": int(len(df['period_name'].unique())),
            "breakthrough_by_period": df['period_name'].value_counts().to_dict(),
            "breakthrough_iterations": df.groupby('period_name')['iteration'].apply(list).to_dict(),
        
            "parameter_analysis": {
                "overall_avg_beta": float(df['beta'].mean()) if 'beta' in df.columns else 0,
                "overall_avg_entropy": float(df['entropy'].mean()) if 'entropy' in df.columns else 0,
                "beta_range": [float(df['beta'].min()), float(df['beta'].max())] if 'beta' in df.columns else [0, 0],
                "entropy_range": [float(df['entropy'].min()), float(df['entropy'].max())] if 'entropy' in df.columns else [0, 0],
                "optimal_beta_entropy_combinations": df[['period_name', 'beta', 'entropy', 'iteration', 'hit_level']].to_dict('records')
                    if set(['period_name','beta','entropy','iteration']).issubset(df.columns) else []
            },
        
            "expert_analysis": {
                "most_frequent_oracle_experts": df['oracle_experts_present'].value_counts().head().to_dict() if 'oracle_experts_present' in df.columns else {},
                "most_frequent_breakthrough_experts": df['breakthrough_experts_present'].value_counts().head().to_dict() if 'breakthrough_experts_present' in df.columns else {},
                "avg_active_experts_per_breakthrough": float(df['active_experts_count'].mean()) if 'active_experts_count' in df.columns else 0
            },
        
            "pattern_signatures": {
                "unique_fingerprints": len(df['breakthrough_fingerprint'].unique()) if 'breakthrough_fingerprint' in df.columns else 0,
                "most_common_fingerprints": df['breakthrough_fingerprint'].value_counts().head().to_dict() if 'breakthrough_fingerprint' in df.columns else {},
                "signature_consistency": (len(df['breakthrough_fingerprint'].unique()) / len(df)) if len(df) > 0 and 'breakthrough_fingerprint' in df.columns else 0
            },
        
            "temporal_analysis": {
                "breakthrough_timestamps": df['timestamp'].tolist() if 'timestamp' in df.columns else [],
                "phase_distribution": df['phase'].value_counts().to_dict() if 'phase' in df.columns else {},
                "cycle_position_distribution": df['cycle_position'].value_counts().to_dict() if 'cycle_position' in df.columns else {}
            },
        
            "quality_metrics": {
                "avg_data_completeness": float(df['record_completeness_score'].mean()) if 'record_completeness_score' in df.columns else 0,
                "high_quality_records": int(len(df[df['data_quality_flag'] == 'HIGH'])) if 'data_quality_flag' in df.columns else 0,
                "total_fields_per_record": first_non_null_total_fields
            }
        }
        
        # Clear DataFrame from memory
        del df, data
        gc.collect()
        
        # Save insights to JSON
        with open("ALL_PERIODS_4plus_insights.json", 'w') as f:
            json.dump(insights, f, indent=2, default=str)
        
        print(f"💡 Insights generated and saved to ALL_PERIODS_4plus_insights.json")
        return insights

def run_all_periods_extraction(analytics_json_paths, min_hit=4, batch_size=1000):
    """
    OPTIMIZED: Main function to extract comprehensive ≥min_hit/6 breakthrough data from ALL periods.
    Now uses sequential file processing and batch processing for 23GB+ files
    """
    try:
        print(f"🚀 STARTING OPTIMIZED DHARMA EXTRACTION")
        print(f"⚡ Sequential file processing with batch size: {batch_size}")
        
        extractor = AllPeriods4plusBreakthroughExtractor(
            analytics_json_paths, 
            min_hit=min_hit, 
            batch_size=batch_size
        )
        
        # Extract comprehensive details
        csv_path = extractor.export_comprehensive_analysis("ALL_PERIODS_4plus_comprehensive_analysis.csv")
        
        # Generate insights
        insights = extractor.generate_breakthrough_insights()
        
        if csv_path:
            print(f"\n🎯 OPTIMIZED ALL-PERIODS ≥{min_hit}/6 EXTRACTION COMPLETE!")
            print(f"📊 Comprehensive CSV: {csv_path}")
            print(f"💡 Insights JSON: ALL_PERIODS_4plus_insights.json")
            print(f"🔬 Ready for cross-period analysis of all ≥{min_hit}/6 breakthroughs!")
            
            if insights:
                print(f"\n📈 KEY CROSS-PERIOD INSIGHTS:")
                print(f"   Total ≥{min_hit}/6 breakthroughs: {insights.get('total_4plus_breakthroughs','N/A')}")
                print(f"   Periods with breakthroughs: {insights.get('periods_with_breakthroughs','N/A')}")
                pa = insights.get('parameter_analysis', {})
                print(f"   Optimal Beta range: {pa.get('beta_range','N/A')}")
                print(f"   Optimal Entropy range: {pa.get('entropy_range','N/A')}")
                ps = insights.get('pattern_signatures', {})
                if 'signature_consistency' in ps:
                    print(f"   Pattern consistency: {float(ps['signature_consistency']):.2%}")
        
        return csv_path, insights
        
    except Exception as e:
        print(f"❌ Error during extraction: {e}")
        import traceback
        traceback.print_exc()
        return None, None

# USAGE EXAMPLE - OPTIMIZED VERSION
if __name__ == "__main__":
    # Replace with your actual analytics JSON file paths
    # Can be a single file or a list of files
    analytics_files = [
        "dharma_unified_analytics_complete.json"
        
    ]
    
    # OPTIMIZED: Sequential processing with configurable batch size
    # Adjust batch_size based on available memory (lower = less memory usage)
    csv_path, insights = run_all_periods_extraction(
        analytics_files, 
        min_hit=4, 
        batch_size=500  # Reduced batch size for very large files
    )
    
    if csv_path:
        print("\n🚀 OPTIMIZED EXTRACTION SUCCESSFUL!")
        print("📋 Files created:")
        print(f"   - {csv_path}")
        print("   - ALL_PERIODS_4plus_insights.json")
        print("\n💡 Next steps:")
        print("   1. Load CSV into your analysis tool")
        print("   2. Filter by period_name for specific period analysis")
        print("   3. Use insights JSON for strategic optimization")
        print("\n⚡ Performance improvements:")
        print("   - Sequential file processing (no 23GB memory load)")
        print("   - Batch processing for memory management")
        print("   - Automatic garbage collection")
        print("   - Progress tracking and memory cleanup")

In [None]:
# dharma_cross_period_trend_analyzer_OPTIMIZED.py
# OPTIMIZED VERSION - Chunked CSV processing with memory management
# Eliminates combinatorial explosions and repeated parsing bottlenecks

import pandas as pd
import numpy as np
from collections import defaultdict, Counter
import ast
import gc
from typing import Dict, List, Set, Tuple, Any
from statistics import mean, median, mode, stdev
from scipy import stats
import warnings
warnings.filterwarnings('ignore')

def safe_parse_list(x):
    """Optimized parsing with caching"""
    if pd.isna(x):
        return []
    try:
        return ast.literal_eval(str(x))
    except:
        return []

def parse_tuple_signature(s):
    """Parse tuple signature efficiently"""
    if pd.isna(s): 
        return None
    try:
        t = ast.literal_eval(str(s))
        if isinstance(t, (list, tuple)):
            return tuple(t)
    except Exception:
        return None
    return None

class OptimizedCrossPeriodAnalyzer:
    """
    OPTIMIZED: Cross-period trend analyzer with chunked processing
    Eliminates expensive combinations and repeated parsing
    """
    
    def __init__(self, csv_file_path, chunk_size=50000):
        print("🚀 OPTIMIZED DHARMA_X - Chunked Cross-Period Analysis")
        print("="*80)
        print("RWYA: Universal patterns defeat single-segment anomalies")
        print("="*80)
        
        self.csv_file_path = csv_file_path
        self.chunk_size = chunk_size
        
        # Pre-analyze file
        print(f"\n📂 Analyzing file structure...")
        self.total_rows = sum(1 for _ in open(csv_file_path)) - 1  # Subtract header
        self.num_chunks = (self.total_rows + chunk_size - 1) // chunk_size
        
        print(f"✅ File contains {self.total_rows:,} rows")
        print(f"🔄 Will process in {self.num_chunks} chunks of {chunk_size:,} rows each")
        
        # Initialize accumulators
        self.periods = set()
        self.beta_period_success = defaultdict(set)
        self.entropy_period_success = defaultdict(set)
        self.expert_combo_periods = defaultdict(set)
        self.synergy_patterns = defaultdict(lambda: defaultdict(int))
        self.synergy_period_coverage = defaultdict(set)
        self.cycle_period_success = defaultdict(set)
        
        # Precomputed mappings
        self.beta_bin_ranges = {}
        self.entropy_bin_ranges = {}
        
        # Enhanced parameter tracking for orchestrator manipulation
        self.detailed_parameter_tracking = {
            'beta_values': defaultdict(lambda: defaultdict(list)),
            'entropy_values': defaultdict(lambda: defaultdict(list)),
            'beta_entropy_combinations': defaultdict(lambda: defaultdict(list)),
            'parameter_expert_combinations': defaultdict(lambda: defaultdict(set)),
            'hit_level_parameters': {4: defaultdict(list), 5: defaultdict(list), 6: defaultdict(list)},
            'parameter_performance': defaultdict(lambda: defaultdict(list))
        }
        
        # Cross-period consistency tracking
        self.cross_period_trends = {
            'universal_parameters': defaultdict(set),
            'parameter_combinations': defaultdict(set),
            'hit_level_segregation': {4: defaultdict(set), 5: defaultdict(set), 6: defaultdict(set)},
            'orchestrator_signals': defaultdict(lambda: defaultdict(dict))
        }
        
    def process_chunks_sequentially(self):
        """Process CSV file in chunks to avoid memory overload"""
        print(f"\n🔄 Processing {self.num_chunks} chunks sequentially...")
        
        chunk_iterator = pd.read_csv(
            self.csv_file_path, 
            chunksize=self.chunk_size, 
            low_memory=False
        )
        
        chunk_num = 0
        total_processed = 0
        
        for chunk_df in chunk_iterator:
            chunk_num += 1
            chunk_size_actual = len(chunk_df)
            total_processed += chunk_size_actual
            
            print(f"   📊 Processing chunk {chunk_num}/{self.num_chunks} ({chunk_size_actual:,} rows)")
            
            # Preprocess chunk
            self._preprocess_chunk(chunk_df)
            
            # Process each phase on this chunk
            self._process_phase1_chunk(chunk_df)
            self._process_phase2_chunk(chunk_df)
            self._process_phase3_chunk(chunk_df)
            self._process_phase4_chunk(chunk_df)
            # NEW: Enhanced parameter tracking
            self._process_enhanced_parameter_tracking(chunk_df)
            
            # Clear chunk from memory
            del chunk_df
            gc.collect()
            
            if chunk_num % 5 == 0:
                print(f"      💾 Memory cleanup after chunk {chunk_num}")
        
        print(f"✅ Processed {total_processed:,} total rows in chunks")
        
    def _preprocess_chunk(self, chunk_df):
        """Preprocess chunk with optimized parsing"""
        
        # Collect periods
        self.periods.update(chunk_df['period_name'].unique())
        
        # Precompute bins for this chunk
        chunk_df['beta_bin'] = pd.cut(chunk_df['beta'], bins=20, labels=False)
        chunk_df['entropy_bin'] = pd.cut(chunk_df['entropy'], bins=20, labels=False)
        
        # Pre-parse expert lists once per chunk (expensive operation)
        if 'active_experts_list' in chunk_df.columns:
            chunk_df['active_experts_parsed'] = chunk_df['active_experts_list'].apply(safe_parse_list)
        
        # Store bin ranges for later reference
        for bin_id in chunk_df['beta_bin'].dropna().unique():
            if bin_id not in self.beta_bin_ranges:
                beta_values = chunk_df[chunk_df['beta_bin'] == bin_id]['beta']
                self.beta_bin_ranges[bin_id] = (beta_values.min(), beta_values.max())
        
        for bin_id in chunk_df['entropy_bin'].dropna().unique():
            if bin_id not in self.entropy_bin_ranges:
                entropy_values = chunk_df[chunk_df['entropy_bin'] == bin_id]['entropy']
                self.entropy_bin_ranges[bin_id] = (entropy_values.min(), entropy_values.max())
    
    def _process_phase1_chunk(self, chunk_df):
        """Phase 1: Parameter consistency - vectorized processing"""
        
        # Group by period and collect successful bins
        for period, period_data in chunk_df.groupby('period_name'):
            # Beta bins present in this period
            successful_betas = period_data['beta_bin'].dropna().unique()
            for beta_bin in successful_betas:
                self.beta_period_success[beta_bin].add(period)
            
            # Entropy bins present in this period
            successful_entropies = period_data['entropy_bin'].dropna().unique()
            for entropy_bin in successful_entropies:
                self.entropy_period_success[entropy_bin].add(period)
    
    def _process_phase2_chunk(self, chunk_df):
        """Phase 2: Expert combinations - optimized without expensive combinations()"""
        
        # Use signature-based approach instead of combinations
        if 'signature_expert_combo' in chunk_df.columns:
            # Use pre-computed signature combinations (much faster)
            sig_data = chunk_df[['period_name', 'signature_expert_combo']].dropna()
            
            for _, row in sig_data.iterrows():
                period = row['period_name']
                combo_sig = parse_tuple_signature(row['signature_expert_combo'])
                
                if combo_sig:
                    self.expert_combo_periods[combo_sig].add(period)
        
        else:
            # Fallback: use top-N experts approach (avoid combinations explosion)
            if 'active_experts_parsed' in chunk_df.columns:
                expert_data = chunk_df[['period_name', 'active_experts_parsed']].dropna()
                
                for _, row in expert_data.iterrows():
                    period = row['period_name']
                    experts = row['active_experts_parsed']
                    
                    if len(experts) >= 3:
                        # Use top 3-5 experts as signature (avoid combinations)
                        for size in [3, 4, 5]:
                            if len(experts) >= size:
                                top_experts = tuple(sorted(experts)[:size])
                                self.expert_combo_periods[top_experts].add(period)
    
    def _process_phase3_chunk(self, chunk_df):
        """Phase 3: Parameter-Expert synergy - optimized processing"""
        
        # Filter valid rows first
        valid_mask = (
            ~chunk_df['beta_bin'].isna() & 
            ~chunk_df['entropy_bin'].isna()
        )
        
        if 'active_experts_parsed' in chunk_df.columns:
            valid_mask = valid_mask & (chunk_df['active_experts_parsed'].apply(len) >= 3)
        
        valid_data = chunk_df[valid_mask]
        
        for _, row in valid_data.iterrows():
            period = row['period_name']
            beta_bin = row['beta_bin'] 
            entropy_bin = row['entropy_bin']
            
            # Create synergy signature using top 3 experts
            if 'active_experts_parsed' in row and len(row['active_experts_parsed']) >= 3:
                top_3_experts = tuple(sorted(row['active_experts_parsed'])[:3])
                synergy_key = (beta_bin, entropy_bin, top_3_experts)
                
                self.synergy_patterns[synergy_key][period] += 1
                self.synergy_period_coverage[synergy_key].add(period)
    
    def _process_phase4_chunk(self, chunk_df):
        """Phase 4: Cycle position analysis - vectorized"""
        
        cycle_data = chunk_df[['period_name', 'cycle_position']].dropna()
        
        for period, period_data in cycle_data.groupby('period_name'):
            successful_cycles = period_data['cycle_position'].unique()
            for cycle_pos in successful_cycles:
                self.cycle_period_success[cycle_pos].add(period)

    def _process_enhanced_parameter_tracking(self, chunk_df):
        """Enhanced parameter tracking for orchestrator manipulation"""
        
        # Filter for valid hits (≥4/6)
        valid_hits = chunk_df[chunk_df['hit_level'] >= 4].copy()
        
        if valid_hits.empty:
            return
        
        for _, row in valid_hits.iterrows():
            period = row['period_name']
            hit_level = int(row['hit_level'])
            beta = row['beta']
            entropy = row['entropy']
            
            # Track detailed parameter values by period and hit level
            if not pd.isna(beta):
                self.detailed_parameter_tracking['beta_values'][period][hit_level].append(beta)
                self.detailed_parameter_tracking['parameter_performance'][f'beta_{beta:.4f}'][period].append(hit_level)
                
            if not pd.isna(entropy):
                self.detailed_parameter_tracking['entropy_values'][period][hit_level].append(entropy)
                self.detailed_parameter_tracking['parameter_performance'][f'entropy_{entropy:.4f}'][period].append(hit_level)
            
            # Track beta-entropy combinations
            if not pd.isna(beta) and not pd.isna(entropy):
                combo_key = f"β{beta:.4f}_H{entropy:.4f}"
                self.detailed_parameter_tracking['beta_entropy_combinations'][period][hit_level].append(combo_key)
                self.detailed_parameter_tracking['parameter_performance'][combo_key][period].append(hit_level)
            
            # Track parameter-expert combinations
            if 'active_experts_parsed' in row and len(row['active_experts_parsed']) >= 3:
                top_experts = tuple(sorted(row['active_experts_parsed'])[:3])
                param_expert_key = f"β{beta:.4f}_H{entropy:.4f}_{'+'.join(top_experts)}"
                self.detailed_parameter_tracking['parameter_expert_combinations'][period][hit_level].add(param_expert_key)
            
            # Hit-level segregated tracking
            self.detailed_parameter_tracking['hit_level_parameters'][hit_level][period].append({
                'beta': beta,
                'entropy': entropy,
                'beta_bin': row.get('beta_bin'),
                'entropy_bin': row.get('entropy_bin'),
                'cycle_position': row.get('cycle_position'),
                'active_experts': row.get('active_experts_parsed', [])
            })

    def _analyze_cross_period_consistency(self):
        """Analyze parameter consistency across all periods for orchestrator manipulation"""
        
        periods_list = sorted(list(self.periods))
        n_periods = len(periods_list)
        
        print(f"\n🔬 Analyzing cross-period parameter consistency across {n_periods} periods...")
        
        # Analyze universal parameters (present in ALL periods)
        universal_parameters = {}
        
        # Beta value consistency
        all_period_betas = set()
        for period in periods_list:
            if period in self.detailed_parameter_tracking['beta_values']:
                period_betas = []
                for hit_level, beta_list in self.detailed_parameter_tracking['beta_values'][period].items():
                    period_betas.extend(beta_list)
                if period_betas:
                    # Round to 4 decimal places for consistency
                    period_betas_rounded = [round(b, 4) for b in period_betas]
                    all_period_betas.update(period_betas_rounded)
        
        # Find betas present in ALL periods
        universal_betas = []
        for beta_val in all_period_betas:
            present_in_periods = 0
            total_hits = 0
            hit_levels = []
            
            for period in periods_list:
                period_found = False
                if period in self.detailed_parameter_tracking['beta_values']:
                    for hit_level, beta_list in self.detailed_parameter_tracking['beta_values'][period].items():
                        if any(abs(b - beta_val) < 0.0001 for b in beta_list):
                            present_in_periods += 1
                            total_hits += len([b for b in beta_list if abs(b - beta_val) < 0.0001])
                            hit_levels.extend([hit_level] * len([b for b in beta_list if abs(b - beta_val) < 0.0001]))
                            period_found = True
                            break
            
            if present_in_periods >= n_periods * 0.8:  # Present in 80%+ periods
                universal_betas.append({
                    'value': beta_val,
                    'period_coverage': f"{present_in_periods}/{n_periods}",
                    'total_occurrences': total_hits,
                    'avg_hit_level': mean(hit_levels) if hit_levels else 0,
                    'hit_levels': list(set(hit_levels))
                })
        
        # Same analysis for entropy
        all_period_entropies = set()
        for period in periods_list:
            if period in self.detailed_parameter_tracking['entropy_values']:
                period_entropies = []
                for hit_level, entropy_list in self.detailed_parameter_tracking['entropy_values'][period].items():
                    period_entropies.extend(entropy_list)
                if period_entropies:
                    period_entropies_rounded = [round(e, 4) for e in period_entropies]
                    all_period_entropies.update(period_entropies_rounded)
        
        universal_entropies = []
        for entropy_val in all_period_entropies:
            present_in_periods = 0
            total_hits = 0
            hit_levels = []
            
            for period in periods_list:
                if period in self.detailed_parameter_tracking['entropy_values']:
                    for hit_level, entropy_list in self.detailed_parameter_tracking['entropy_values'][period].items():
                        if any(abs(e - entropy_val) < 0.0001 for e in entropy_list):
                            present_in_periods += 1
                            total_hits += len([e for e in entropy_list if abs(e - entropy_val) < 0.0001])
                            hit_levels.extend([hit_level] * len([e for e in entropy_list if abs(e - entropy_val) < 0.0001]))
                            break
            
            if present_in_periods >= n_periods * 0.8:
                universal_entropies.append({
                    'value': entropy_val,
                    'period_coverage': f"{present_in_periods}/{n_periods}",
                    'total_occurrences': total_hits,
                    'avg_hit_level': mean(hit_levels) if hit_levels else 0,
                    'hit_levels': list(set(hit_levels))
                })
        
        return {
            'universal_betas': sorted(universal_betas, key=lambda x: x['avg_hit_level'], reverse=True),
            'universal_entropies': sorted(universal_entropies, key=lambda x: x['avg_hit_level'], reverse=True),
            'n_periods': n_periods
        }

    def _analyze_hit_level_segregation(self):
        """Analyze parameters segregated by hit levels (4/6, 5/6, 6/6)"""
        
        hit_level_analysis = {}
        periods_list = sorted(list(self.periods))
        
        for hit_level in [4, 5, 6]:
            print(f"\n📊 Analyzing {hit_level}/6 specific parameters...")
            
            # Collect all parameters for this hit level across all periods
            level_betas = []
            level_entropies = []
            level_combinations = []
            period_coverage = defaultdict(int)
            
            for period in periods_list:
                if period in self.detailed_parameter_tracking['hit_level_parameters'][hit_level]:
                    period_data = self.detailed_parameter_tracking['hit_level_parameters'][hit_level][period]
                    
                    for record in period_data:
                        if not pd.isna(record['beta']):
                            level_betas.append(record['beta'])
                            period_coverage[f"beta_{record['beta']:.4f}"] += 1
                        
                        if not pd.isna(record['entropy']):
                            level_entropies.append(record['entropy'])
                            period_coverage[f"entropy_{record['entropy']:.4f}"] += 1
                        
                        if not pd.isna(record['beta']) and not pd.isna(record['entropy']):
                            combo = f"β{record['beta']:.4f}_H{record['entropy']:.4f}"
                            level_combinations.append(combo)
                            period_coverage[combo] += 1
            
            # Statistical analysis
            analysis = {
                'hit_level': hit_level,
                'total_records': len(level_betas),
                'beta_stats': {
                    'mean': mean(level_betas) if level_betas else 0,
                    'median': median(level_betas) if level_betas else 0,
                    'std': stdev(level_betas) if len(level_betas) > 1 else 0,
                    'min': min(level_betas) if level_betas else 0,
                    'max': max(level_betas) if level_betas else 0,
                    'most_common': max(set(level_betas), key=level_betas.count) if level_betas else 0
                },
                'entropy_stats': {
                    'mean': mean(level_entropies) if level_entropies else 0,
                    'median': median(level_entropies) if level_entropies else 0,
                    'std': stdev(level_entropies) if len(level_entropies) > 1 else 0,
                    'min': min(level_entropies) if level_entropies else 0,
                    'max': max(level_entropies) if level_entropies else 0,
                    'most_common': max(set(level_entropies), key=level_entropies.count) if level_entropies else 0
                },
                'top_combinations': sorted(
                    [(combo, count) for combo, count in Counter(level_combinations).most_common(5)],
                    key=lambda x: x[1], reverse=True
                ),
                'period_consistency': {
                    param: count for param, count in period_coverage.items() 
                    if count >= len(periods_list) * 0.5  # Present in 50%+ periods
                }
            }
            
            hit_level_analysis[hit_level] = analysis
        
        return hit_level_analysis
    
    def generate_analysis_report(self):
        """Generate comprehensive analysis report from accumulated data"""
        
        periods = sorted(list(self.periods))
        n_periods = len(periods)
        
        print(f"📅 Periods: {periods}")
        
        # ================================================================
        # PHASE 1: PARAMETER RANGE CONSISTENCY ACROSS PERIODS
        # ================================================================
        print("\n" + "="*60)
        print("📊 PHASE 1: CROSS-PERIOD PARAMETER CONSISTENCY")
        print("="*60)
        
        # Universal beta ranges
        universal_beta_bins = []
        beta_thresh = int(np.ceil(0.70 * n_periods))
        
        print(f"\n🎯 Universal beta bins (≥{beta_thresh}/{n_periods} periods):")
        for beta_bin, periods_set in self.beta_period_success.items():
            if len(periods_set) >= beta_thresh:
                beta_min, beta_max = self.beta_bin_ranges.get(beta_bin, (0, 0))
                print(f"  Beta range [{beta_min:.4f}, {beta_max:.4f}] → {len(periods_set)}/{n_periods} periods")
                
                universal_beta_bins.append({
                    'bin': beta_bin,
                    'periods': len(periods_set),
                    'beta_min': beta_min,
                    'beta_max': beta_max
                })
        
        # Universal entropy ranges  
        universal_entropy_bins = []
        entropy_thresh = int(np.ceil(0.70 * n_periods))
        
        print(f"\n🎯 Universal entropy bins (≥{entropy_thresh}/{n_periods} periods):")
        for entropy_bin, periods_set in self.entropy_period_success.items():
            if len(periods_set) >= entropy_thresh:
                entropy_min, entropy_max = self.entropy_bin_ranges.get(entropy_bin, (0, 0))
                print(f"  Entropy range [{entropy_min:.4f}, {entropy_max:.4f}] → {len(periods_set)}/{n_periods} periods")
                
                universal_entropy_bins.append({
                    'bin': entropy_bin,
                    'periods': len(periods_set),
                    'entropy_min': entropy_min,
                    'entropy_max': entropy_max
                })
        
        # ================================================================
        # PHASE 2: EXPERT COMBINATION CROSS-PERIOD CONSISTENCY
        # ================================================================
        print("\n" + "="*60)
        print("🔥 PHASE 2: EXPERT COMBINATION CROSS-PERIOD ANALYSIS (accelerated)")
        print("="*60)
        
        universal_expert_combos = []
        combo_thresh = int(np.ceil(0.50 * n_periods))
        
        print(f"\n🏆 CROSS-PERIOD EXPERT COMBINATION ANALYSIS:")
        print(f"\n   UNIVERSAL EXPERT COMBINATIONS (present in ≥{combo_thresh}/{n_periods} periods):")
        
        shown = 0
        for combo, periods_set in sorted(self.expert_combo_periods.items(), 
                                       key=lambda x: len(x[1]), reverse=True):
            if len(periods_set) >= combo_thresh:
                combo_str = ", ".join(list(combo)[:4]) + ("..." if len(combo) > 4 else "")
                print(f"     [{combo_str}] → {len(periods_set)}/{n_periods} periods")
                
                universal_expert_combos.append((combo, len(periods_set)))
                shown += 1
                if shown >= 10:
                    break
        
        # ================================================================
        # PHASE 3: PARAMETER + EXPERT COMBINATION SYNERGY ANALYSIS  
        # ================================================================
        print("\n" + "="*60)
        print("⚡ PHASE 3: PARAMETER–EXPERT SYNERGY (multi-period)")
        print("="*60)
        
        synergy_thresh = int(np.ceil(0.40 * n_periods))
        universal_synergies = []
        
        print("\n💡 UNIVERSAL SYNERGIES (β-bin + H-bin + signature_expert_combo):")
        
        for synergy_key, period_counts in self.synergy_patterns.items():
            periods_with_pattern = len(self.synergy_period_coverage[synergy_key])
            
            if periods_with_pattern >= synergy_thresh:
                beta_bin, entropy_bin, expert_combo = synergy_key
                
                # Get actual parameter ranges
                beta_min, beta_max = self.beta_bin_ranges.get(beta_bin, (0, 0))
                entropy_min, entropy_max = self.entropy_bin_ranges.get(entropy_bin, (0, 0))
                
                total_records = sum(period_counts.values())
                
                combo_str = ", ".join(list(expert_combo)[:4]) + ("..." if len(expert_combo) > 4 else "")
                print(f"   β[{beta_min:.4f},{beta_max:.4f}] + H[{entropy_min:.4f},{entropy_max:.4f}] + [{combo_str}] → {periods_with_pattern}/{n_periods} periods, {total_records} rows")
                
                universal_synergies.append({
                    'beta_range': f"[{beta_min:.4f},{beta_max:.4f}]",
                    'entropy_range': f"[{entropy_min:.4f},{entropy_max:.4f}]",
                    'expert_combo': expert_combo,
                    'periods': periods_with_pattern,
                    'total_records': total_records
                })
                
                if len(universal_synergies) >= 8:
                    break
        
        # ================================================================
        # PHASE 4: TEMPORAL CYCLE UNIVERSALS
        # ================================================================
        print("\n" + "="*60)
        print("⏰ PHASE 4: UNIVERSAL CYCLE POSITIONS")
        print("="*60)
        
        cycle_thresh = int(np.ceil(0.60 * n_periods))
        universal_cycles = []
        
        print(f"\n🔄 Cycle positions present in ≥{cycle_thresh}/{n_periods} periods:")
        
        shown = 0
        for cycle_pos, periods_set in sorted(self.cycle_period_success.items(),
                                           key=lambda x: len(x[1]), reverse=True):
            if len(periods_set) >= cycle_thresh:
                print(f"   Position {int(cycle_pos):2d} → {len(periods_set)}/{n_periods} periods")
                
                universal_cycles.append({
                    'cycle_position': int(cycle_pos),
                    'periods': len(periods_set)
                })
                
                shown += 1
                if shown >= 10:
                    break

        # ================================================================
        # PHASE 5A: UNIVERSAL PARAMETER TRENDS FOR ORCHESTRATOR
        # ================================================================
        print("\n" + "="*60)
        print("🎯 PHASE 5A: UNIVERSAL PARAMETER TRENDS FOR ORCHESTRATOR")
        print("="*60)
        
        universal_analysis = self._analyze_cross_period_consistency()
        
        print(f"\n🔥 UNIVERSAL BETA VALUES (consistent across {universal_analysis['n_periods']} periods):")
        for i, beta_info in enumerate(universal_analysis['universal_betas'][:5], 1):
            print(f"   {i}. β = {beta_info['value']:.4f}")
            print(f"      Coverage: {beta_info['period_coverage']} periods")
            print(f"      Performance: {beta_info['avg_hit_level']:.2f}/6 average")
            print(f"      Hit levels achieved: {beta_info['hit_levels']}")
            print(f"      Total occurrences: {beta_info['total_occurrences']}")
            print()
        
        print(f"\n🔥 UNIVERSAL ENTROPY VALUES (consistent across {universal_analysis['n_periods']} periods):")
        for i, entropy_info in enumerate(universal_analysis['universal_entropies'][:5], 1):
            print(f"   {i}. H = {entropy_info['value']:.4f}")
            print(f"      Coverage: {entropy_info['period_coverage']} periods")
            print(f"      Performance: {entropy_info['avg_hit_level']:.2f}/6 average")
            print(f"      Hit levels achieved: {entropy_info['hit_levels']}")
            print(f"      Total occurrences: {entropy_info['total_occurrences']}")
            print()

        # ================================================================
        # PHASE 5B: HIT-LEVEL SEGREGATED PARAMETER ANALYSIS
        # ================================================================
        print("\n" + "="*60)
        print("🏆 PHASE 5B: HIT-LEVEL SEGREGATED PARAMETER ANALYSIS")
        print("="*60)
        
        hit_level_analysis = self._analyze_hit_level_segregation()
        
        for hit_level in [4, 5, 6]:
            if hit_level in hit_level_analysis:
                analysis = hit_level_analysis[hit_level]
                print(f"\n🎯 {hit_level}/6 SPECIFIC PARAMETER ANALYSIS:")
                print(f"   Total {hit_level}/6 records: {analysis['total_records']:,}")
                
                print(f"\n   📊 OPTIMAL BETA FOR {hit_level}/6:")
                beta_stats = analysis['beta_stats']
                print(f"      Most successful: β = {beta_stats['most_common']:.4f}")
                print(f"      Range: [{beta_stats['min']:.4f}, {beta_stats['max']:.4f}]")
                print(f"      Average: {beta_stats['mean']:.4f} ± {beta_stats['std']:.4f}")
                print(f"      Median: {beta_stats['median']:.4f}")
                
                print(f"\n   📊 OPTIMAL ENTROPY FOR {hit_level}/6:")
                entropy_stats = analysis['entropy_stats']
                print(f"      Most successful: H = {entropy_stats['most_common']:.4f}")
                print(f"      Range: [{entropy_stats['min']:.4f}, {entropy_stats['max']:.4f}]")
                print(f"      Average: {entropy_stats['mean']:.4f} ± {entropy_stats['std']:.4f}")
                print(f"      Median: {entropy_stats['median']:.4f}")
                
                print(f"\n   🔥 TOP PARAMETER COMBINATIONS FOR {hit_level}/6:")
                for j, (combo, count) in enumerate(analysis['top_combinations'][:3], 1):
                    print(f"      {j}. {combo} → {count} occurrences")
                
                print(f"\n   🌍 CROSS-PERIOD CONSISTENT PARAMETERS FOR {hit_level}/6:")
                for param, period_count in sorted(analysis['period_consistency'].items(), 
                                                key=lambda x: x[1], reverse=True)[:5]:
                    print(f"      {param} → Present in {period_count} periods")
                print()

        # ================================================================
        # PHASE 5C: ORCHESTRATOR MANIPULATION RECOMMENDATIONS
        # ================================================================
        print("\n" + "="*60)
        print("🚀 PHASE 5C: ORCHESTRATOR MANIPULATION RECOMMENDATIONS")
        print("="*60)
        
        print(f"\n🎯 IMMEDIATE ORCHESTRATOR SETTINGS (Universal Parameters):")
        
        if universal_analysis['universal_betas']:
            top_beta = universal_analysis['universal_betas'][0]
            print(f"\n   PRIMARY BETA LOCK:")
            print(f"      orchestrator.set_beta({top_beta['value']:.4f})  # Verified across {top_beta['period_coverage']} periods")
            print(f"      # Performance: {top_beta['avg_hit_level']:.2f}/6 average, {top_beta['total_occurrences']} total hits")
        
        if universal_analysis['universal_entropies']:
            top_entropy = universal_analysis['universal_entropies'][0]
            print(f"\n   PRIMARY ENTROPY LOCK:")
            print(f"      orchestrator.set_entropy({top_entropy['value']:.4f})  # Verified across {top_entropy['period_coverage']} periods")
            print(f"      # Performance: {top_entropy['avg_hit_level']:.2f}/6 average, {top_entropy['total_occurrences']} total hits")
        
        print(f"\n🎯 HIT-LEVEL SPECIFIC ORCHESTRATOR SETTINGS:")
        
        for hit_level in [4, 5, 6]:
            if hit_level in hit_level_analysis:
                analysis = hit_level_analysis[hit_level]
                print(f"\n   FOR TARGETING {hit_level}/6 SPECIFICALLY:")
                print(f"      orchestrator.set_beta({analysis['beta_stats']['most_common']:.4f})  # Most successful for {hit_level}/6")
                print(f"      orchestrator.set_entropy({analysis['entropy_stats']['most_common']:.4f})  # Most successful for {hit_level}/6")
                
                if analysis['top_combinations']:
                    top_combo = analysis['top_combinations'][0][0]
                    parts = top_combo.split('_H')
                    if len(parts) == 2:
                        beta_part = parts[0].replace('β', '')
                        entropy_part = parts[1]
                        print(f"      # Combined: β={beta_part}, H={entropy_part} → {analysis['top_combinations'][0][1]} occurrences")
        
        print(f"\n🎯 BALANCED ORCHESTRATOR STRATEGY:")
        print(f"      • Use universal parameters for consistent ≥4/6 performance")
        print(f"      • Switch to hit-level specific parameters for targeted improvements")
        print(f"      • Monitor cross-period consistency before making permanent changes")
        print(f"      • Combine with expert rotation for maximum effectiveness")

        # ================================================================
        # PHASE 5D: BALANCED UNIVERSAL RECOMMENDATIONS
        # ================================================================
        print("\n" + "="*60)
        print("🏆 PHASE 5D: BALANCED UNIVERSAL RECOMMENDATIONS")
        print("="*60)
        
        # Parameter locks
        if universal_beta_bins:
            top_beta = max(universal_beta_bins, key=lambda x: x['periods'])
            print(f"\n🎯 Parameter locks (multi-period): Beta [{top_beta['beta_min']:.4f}, {top_beta['beta_max']:.4f}]")
        
        if universal_entropy_bins:
            top_entropy = max(universal_entropy_bins, key=lambda x: x['periods'])
            print(f"🎯 Parameter locks (multi-period): Entropy [{top_entropy['entropy_min']:.4f}, {top_entropy['entropy_max']:.4f}]")
        
        # Expert strategy
        print("\n🎯 Expert strategy:")
        if universal_expert_combos:
            combo, coverage = universal_expert_combos[0]
            combo_str = ", ".join(list(combo)[:4]) + ("..." if len(combo) > 4 else "")
            print(f"   Prioritize signature combos like [{combo_str}] seen in {coverage}/{n_periods} periods.")
        
        # Cycle strategy
        print("\n🎯 Cycle strategy:")
        if universal_cycles:
            best_cycle = universal_cycles[0]
            print(f"   Rotate universal positions led by {best_cycle['cycle_position']} (coverage {best_cycle['periods']}/{n_periods} periods).")
        
        print("\n🎯 Balance strategy:")
        print("   • 60% universal parameters, 25% exploration, 15% breakthrough attempts.")
        
        print("\n" + "="*60)
        print("✅ OPTIMIZED DHARMA_X CROSS-PERIOD TREND ANALYSIS COMPLETE")
        print("="*60)

def dharma_cross_period_trend_analyzer_optimized(csv_file_path, chunk_size=50000):
    """
    OPTIMIZED: Cross-period trend analyzer with chunked processing
    Eliminates memory issues and combinatorial explosions
    """
    try:
        print("🚀 STARTING OPTIMIZED CROSS-PERIOD ANALYSIS")
        print("⚡ Chunked processing + vectorized operations")
        
        analyzer = OptimizedCrossPeriodAnalyzer(csv_file_path, chunk_size)
        
        # Process file in chunks
        analyzer.process_chunks_sequentially()
        
        # Generate comprehensive report
        analyzer.generate_analysis_report()
        
        # Clean up
        del analyzer
        gc.collect()
        
        return True
        
    except Exception as e:
        print(f"❌ Error during optimized analysis: {e}")
        import traceback
        traceback.print_exc()
        return False

# Preserve original function name for compatibility
def dharma_cross_period_trend_analyzer(csv_file_path):
    """
    OPTIMIZED wrapper - maintains original function signature
    """
    return dharma_cross_period_trend_analyzer_optimized(csv_file_path, chunk_size=50000)

# USAGE EXAMPLE - OPTIMIZED VERSION
if __name__ == "__main__":
    # Your CSV file
    csv_file = "ALL_PERIODS_4plus_comprehensive_analysis.csv"
    
    print("🚀 INITIALIZING OPTIMIZED DHARMA_X CROSS-PERIOD TREND ANALYZER")
    print("RWYA - Universal patterns defeat single-segment anomalies")
    print("True intelligence comes from consistency across all conditions")
    
    # Use optimized version with chunked processing
    success = dharma_cross_period_trend_analyzer_optimized(
        csv_file, 
        chunk_size=30000  # Adjust based on available memory
    )
    
    if success:
        print("\n🚀 OPTIMIZED ANALYSIS SUCCESSFUL!")
        print("⚡ Performance improvements:")
        print("   - Chunked CSV processing (no memory overload)")
        print("   - Eliminated expensive combinations() operations")
        print("   - Pre-computed parsing and vectorized operations")
        print("   - Progressive result accumulation")
        print("   - Same comprehensive 6-phase analysis as original!")
        print("   - ENHANCED: Universal parameter trends for orchestrator manipulation")
        print("   - ENHANCED: Hit-level segregated analysis (4/6, 5/6, 6/6)")
        print("   - ENHANCED: Direct orchestrator.set_beta() and orchestrator.set_entropy() recommendations")