In [14]:
import pandas as pd
import numpy as np
from typing import Dict, Any
import scipy.stats as stats
from scipy import signal
from statsmodels.tsa.stattools import acf, pacf

# Define a dummy ComprehensiveHypothesisSpace class for testing purposes
class ComprehensiveHypothesisSpace:
    def __init__(self):
        self.window_sizes = [10, 20, 30]
        self.mathematical_constants = {'phi': 1.618, 'pi': 3.14159, 'e': 2.718}
        self.frequency_bands = [(0, 0.1), (0.1, 0.5)]

class ExhaustivePatternDetector:
    """
    Test ALL possible patterns without bias.
    """

    def __init__(self, hypothesis_space: ComprehensiveHypothesisSpace):
        self.hypothesis_space = hypothesis_space
        self.all_results = []

    def detect_all_patterns(self, data: pd.DataFrame) -> Dict[str, Any]:
        """
        Exhaustively test all patterns in the hypothesis space.
        """
        print("Starting detect_all_patterns...")
        if data.empty or len(data) < 100:
            print("Data is empty or too short. Returning empty results.")
            return {}

        results = {
            'density_patterns': {},
            'ratio_patterns': {},
            'spectral_patterns': {},
            'scaling_patterns': {},
            'autocorrelation_patterns': {},
            'entropy_patterns': {}
        }

        # 1. Test density convergence at ALL windows
        print("Testing density convergence...")
        for window in self.hypothesis_space.window_sizes:
            if len(data) < window:
                continue

            # Add print statement to inspect window value
            # print(f"Testing density convergence for window: {window}") # Keep this print for now

            density_result = self._test_density_convergence(data, window)
            if density_result: # Only add if not empty
                results['density_patterns'][window] = density_result

        # 2. Test ratio patterns for ALL targets
        print("Testing ratio patterns...")
        ratio_results = self._test_all_ratios(data)
        if ratio_results: # Only add if not empty
            results['ratio_patterns'] = ratio_results

        # 3. Spectral analysis across all frequency bands
        print("Performing spectral analysis...")
        spectral_results = self._comprehensive_spectral_analysis(data)
        if spectral_results: # Only add if not empty
            results['spectral_patterns'] = spectral_results

        # 4. Scaling analysis (Hurst, DFA, multifractal)
        print("Performing scaling analysis...")
        scaling_results = self._scaling_analysis(data)
        if scaling_results: # Only add if not empty
            results['scaling_patterns'] = scaling_results

        # 5. Autocorrelation structure
        print("Performing autocorrelation analysis...")
        autocorr_results = self._autocorrelation_analysis(data)
        if autocorr_results: # Only add if not empty
            results['autocorrelation_patterns'] = autocorr_results

        # 6. Information theoretic measures
        print("Performing entropy analysis...")
        entropy_results = self._entropy_analysis(data)
        if entropy_results: # Only add if not empty
            results['entropy_patterns'] = entropy_results

        print(f"Finished detect_all_patterns. Results: {results}")
        return results

    def _test_density_convergence(self, data: pd.DataFrame, window: int) -> Dict:
        """Test if binary patterns converge to specific densities"""
        print(f"  _test_density_convergence called with window: {window}")
        prices = data['Close'].values

        # Ensure prices is a numpy array
        if isinstance(prices, pd.Series):
            prices = prices.values
        if isinstance(prices, list):
            prices = np.array(prices)

        if len(prices) < 2:
            print(f"    _test_density_convergence: prices too short ({len(prices)}). Returning empty.")
            return {}

        binary = (np.diff(prices) > 0).astype(int)

        if len(binary) < window:
            print(f"    _test_density_convergence: binary data too short ({len(binary)}) for window ({window}). Returning empty.")
            return {}

        densities = []
        # Ensure step size is at least 1 using an explicit check
        step = window // 4
        if step == 0:
            step = 1

        # print(f"    _test_density_convergence: range params: 0, {len(binary) - window}, {step}") # Keep this print for now
        for i in range(0, len(binary) - window, step):
            segment = binary[i:i+window]
            densities.append(np.mean(segment))

        if not densities:
            print(f"    _test_density_convergence: no densities calculated. Returning empty.")
            return {}

        # Test against ALL mathematical constants
        results = {
            'observed_mean': float(np.mean(densities)),
            'observed_std': float(np.std(densities)),
            'n_observations': len(densities)
        }

        # Test convergence to each constant
        for const_name, const_value in self.hypothesis_space.mathematical_constants.items():
            if 0 <= const_value <= 1:  # Density must be in [0,1]
                # Added check for std deviation to avoid division by zero
                std_dev = np.std(densities)
                denominator = std_dev / np.sqrt(len(densities)) + 1e-10
                z_score = (np.mean(densities) - const_value) / denominator
                p_value = 2 * (1 - stats.norm.cdf(abs(z_score)))

                results[f'convergence_to_{const_name}'] = {
                    'z_score': float(z_score),
                    'p_value': float(p_value),
                    'distance': float(abs(np.mean(densities) - const_value))
                }
        print(f"  _test_density_convergence finished. Results: {len(results)} entries.")
        return results

    def _test_all_ratios(self, data: pd.DataFrame) -> Dict:
        """Test for all possible ratio patterns"""
        print("  _test_all_ratios called.")
        results = {}

        # Volume ratios
        if 'Volume' in data.columns:
            volumes = data['Volume'].values
            vol_ratios = []
            for i in range(len(volumes) - 1):
                if volumes[i+1] > 0:
                    ratio = volumes[i] / volumes[i+1]
                    if 0.01 < ratio < 100:  # Filter extreme outliers
                        vol_ratios.append(ratio)

            if vol_ratios:
                # Convert list to numpy array before analysis
                vol_ratios = np.array(vol_ratios)
                analysis_result = self._analyze_ratio_distribution(vol_ratios)
                if analysis_result: results['volume_ratios'] = analysis_result

        # Price ratios (high/low, close/open, etc.)
        price_ratios = {
            'high_low': data['High'] / data['Low'],
            'close_open': data['Close'] / data['Open'],
            'close_prev_close': data['Close'] / data['Close'].shift(1)
        }

        for ratio_name, ratio_values in price_ratios.items():
            ratio_values = ratio_values.dropna()
            if len(ratio_values) > 100:
                analysis_result = self._analyze_ratio_distribution(ratio_values.values)
                if analysis_result: results[ratio_name] = analysis_result
        print(f"  _test_all_ratios finished. Results: {len(results)} entries.")
        return results

    def _analyze_ratio_distribution(self, ratios: np.ndarray) -> Dict:
        """Analyze distribution of ratios against all targets"""
        print(f"    _analyze_ratio_distribution called with {len(ratios)} ratios.")
        from sklearn.neighbors import KernelDensity

        # Ensure ratios is a numpy array
        if isinstance(ratios, list):
            ratios = np.array(ratios)

        # Ensure it's 1D
        if len(ratios.shape) > 1:
            ratios = ratios.flatten()

        # Filter out NaN and infinite values
        ratios = ratios[np.isfinite(ratios)]

        if len(ratios) == 0:
            print("      _analyze_ratio_distribution: no finite ratios. Returning empty.")
            return {}

        # Find natural peaks in distribution
        try:
            kde = KernelDensity(bandwidth=0.05, kernel='gaussian')
            ratios_reshaped = ratios.reshape(-1, 1)
            kde.fit(ratios_reshaped)

            x_range = np.linspace(0.5, 3, 500).reshape(-1, 1)
            density = np.exp(kde.score_samples(x_range))
            peaks, properties = signal.find_peaks(density, height=np.max(density)*0.2)

            peak_values = x_range[peaks].flatten()
        except Exception as e:
            print(f"      _analyze_ratio_distribution: Error during KDE or peak finding: {e}. Returning empty.")
            return {}


        results = {
            'distribution_stats': {
                'mean': float(np.mean(ratios)),
                'std': float(np.std(ratios)),
                'median': float(np.median(ratios)),
                'mode': float(peak_values[np.argmax(density[peaks])]) if len(peaks) > 0 else np.nan
            },
            'discovered_peaks': peak_values.tolist(),
            'peak_heights': density[peaks].tolist()
        }

        # Test proximity to each target ratio
        for const_name, const_value in self.hypothesis_space.mathematical_constants.items():
            if 0.5 <= const_value <= 3.0:  # Reasonable ratio range
                # Find closest peak
                if len(peak_values) > 0:
                    distances = np.abs(peak_values - const_value)
                    min_distance = np.min(distances)

                    # Statistical test: is the distribution centered around this constant?
                    # Added check for sufficient observations for t-test
                    if len(ratios) > 1:
                        t_stat, p_value = stats.ttest_1samp(ratios, const_value)
                    else:
                        t_stat, p_value = np.nan, np.nan # Not enough data for t-test


                    results[f'proximity_to_{const_name}'] = {
                        'min_peak_distance': float(min_distance),
                        't_statistic': float(t_stat) if not np.isnan(t_stat) else None,
                        'p_value': float(p_value) if not np.isnan(p_value) else None,
                        'mean_distance': float(abs(np.mean(ratios) - const_value))
                    }
                else:
                     results[f'proximity_to_{const_name}'] = {
                        'min_peak_distance': np.nan,
                        't_statistic': np.nan,
                        'p_value': np.nan,
                        'mean_distance': float(abs(np.mean(ratios) - const_value)) if len(ratios) > 0 else np.nan
                    }
        print(f"    _analyze_ratio_distribution finished. Results: {len(results)} entries.")
        return results

    def _comprehensive_spectral_analysis(self, data: pd.DataFrame) -> Dict:
        """Perform comprehensive frequency domain analysis"""
        print("  _comprehensive_spectral_analysis called.")
        prices = data['Close'].values

        # Ensure prices is a numpy array
        if isinstance(prices, list):
            prices = np.array(prices)

        if len(prices) < 256:
            print(f"    _comprehensive_spectral_analysis: prices too short ({len(prices)}). Returning empty.")
            return {}

        # Compute FFT
        try:
            fft_values = np.fft.fft(prices)
            freqs = np.fft.fftfreq(len(prices))
            power_spectrum = np.abs(fft_values) ** 2
        except Exception as e:
            print(f"    _comprehensive_spectral_analysis: Error during FFT calculation: {e}. Returning empty.")
            return {}


        results = {}

        # Analyze each frequency band
        print("    _comprehensive_spectral_analysis: Analyzing frequency bands.")
        for band_name, (low_freq, high_freq) in enumerate(self.hypothesis_space.frequency_bands):
            band_mask = (np.abs(freqs) >= low_freq) & (np.abs(freqs) <= high_freq)
            if np.any(band_mask):
                band_power = np.sum(power_spectrum[band_mask])
                total_power = np.sum(power_spectrum)

                # Added check for total_power to avoid division by zero
                if total_power > 0:
                    results[f'band_{low_freq}_{high_freq}'] = {
                        'power_ratio': float(band_power / total_power),
                        'peak_frequency': float(freqs[band_mask][np.argmax(power_spectrum[band_mask])]) if np.sum(band_mask) > 0 else np.nan
                    }
                else:
                     results[f'band_{low_freq}_{high_freq}'] = {
                        'power_ratio': 0.0,
                        'peak_frequency': np.nan
                    }


        # Find dominant frequencies
        print("    _comprehensive_spectral_analysis: Finding dominant frequencies.")
        try:
            peak_indices = signal.find_peaks(power_spectrum[:len(power_spectrum)//2],
                                            height=np.max(power_spectrum)*0.1)[0]
        except Exception as e:
            print(f"    _comprehensive_spectral_analysis: Error during peak finding: {e}.")
            peak_indices = []


        if len(peak_indices) > 0:
            dominant_freqs = freqs[peak_indices]
            # Added check for dominant_freqs > 0 before division
            dominant_periods = 1 / (dominant_freqs + 1e-10)

            results['dominant_frequencies'] = {
                'frequencies': dominant_freqs[:10].tolist(),  # Top 10
                'periods': dominant_periods[:10].tolist(),
                'power_values': power_spectrum[peak_indices][:10].tolist()
            }

            # Check if any periods match our constants
            for const_name, const_value in self.hypothesis_space.mathematical_constants.items():
                period_matches = np.abs(dominant_periods - const_value)
                if len(period_matches) > 0:
                    min_match = np.min(period_matches)
                    results[f'period_match_{const_name}'] = float(min_match)
        print(f"  _comprehensive_spectral_analysis finished. Results: {len(results)} entries.")
        return results

    def _scaling_analysis(self, data: pd.DataFrame) -> Dict:
        """Analyze scaling properties (fractality, self-similarity)"""
        print("  _scaling_analysis called.")
        prices = data['Close'].values

        # Ensure prices is a numpy array
        if isinstance(prices, list):
            prices = np.array(prices)

        if len(prices) < 100:
            print(f"    _scaling_analysis: prices too short ({len(prices)}). Returning empty.")
            return {}

        results = {}

        # 1. Hurst Exponent (multiple methods)
        print("    _scaling_analysis: Calculating Hurst exponents.")
        try:
            # R/S Analysis
            hurst_rs = self._calculate_hurst_rs(prices)
            results['hurst_rs'] = float(hurst_rs)
        except Exception as e:
            print(f"    _scaling_analysis: Error calculating Hurst (R/S): {e}.")
            pass

        try:
            # DFA (Detrended Fluctuation Analysis)
            hurst_dfa = self._calculate_hurst_dfa(prices)
            results['hurst_dfa'] = float(hurst_dfa)
        except Exception as e:
            print(f"    _scaling_analysis: Error calculating Hurst (DFA): {e}.")
            pass


        # 2. Multifractal spectrum
        print("    _scaling_analysis: Calculating multifractal spectrum.")
        scaling_exponents = []
        for q in [-5, -2, -1, 0, 1, 2, 5]:
            try:
                exponent = self._calculate_scaling_exponent(prices, q)
                # Added check for valid exponent
                if not np.isnan(exponent) and not np.isinf(exponent):
                    scaling_exponents.append((q, float(exponent)))
            except Exception as e:
                print(f"    _scaling_analysis: Error calculating scaling exponent for q={q}: {e}.")
                pass

        if scaling_exponents:
            results['multifractal_spectrum'] = scaling_exponents

            # Width of multifractal spectrum (indicates complexity)
            exponents_only = [e[1] for e in scaling_exponents]
            # Added check for non-empty exponents_only list
            if exponents_only:
                 results['multifractal_width'] = float(max(exponents_only) - min(exponents_only))
            else:
                 results['multifractal_width'] = np.nan


        # 3. Test for specific scaling ratios
        print("    _scaling_analysis: Testing for specific scaling ratios.")
        scales = [2, 3, 5, 8, 13, 21, 34, 55, 89, 144]
        scale_ratios = []

        for scale in scales:
            if scale < len(prices):
                # Variance at different scales
                var_original = np.var(prices)
                # Added check for sufficient data for scaled variance calculation
                if len(prices) - scale >= scale:
                    var_scaled = np.var([np.mean(prices[i:i+scale])
                                        for i in range(0, len(prices)-scale, scale)])
                    if var_scaled > 0:
                        scale_ratios.append(var_original / var_scaled)

        if scale_ratios:
            results['scale_variance_ratios'] = [float(r) for r in scale_ratios]

            # Check if ratios match any constants
            for const_name, const_value in self.hypothesis_space.mathematical_constants.items():
                distances = [abs(ratio - const_value) for ratio in scale_ratios]
                if distances:
                    results[f'scale_ratio_match_{const_name}'] = float(min(distances))
        print(f"  _scaling_analysis finished. Results: {len(results)} entries.")
        return results

    def _calculate_hurst_rs(self, prices: np.ndarray) -> float:
        """Calculate Hurst exponent using R/S analysis"""
        # Ensure prices is a numpy array
        if isinstance(prices, list):
            prices = np.array(prices)

        lags = range(2, min(100, len(prices)//2))
        # Added check for sufficient data for lags
        if len(lags) < 2: return 0.5 # Not enough data for R/S analysis

        tau = [np.sqrt(np.std(np.subtract(prices[lag:], prices[:-lag]))) for lag in lags]

        if len(tau) > 10:
            # Convert lags to list then array for polyfit
            lags_array = np.array(list(lags))
            tau_array = np.array(tau)
            # Added check for non-zero tau_array before log
            if np.min(tau_array) > 0:
                poly = np.polyfit(np.log(lags_array), np.log(tau_array), 1)
                return poly[0] * 2.0
        return 0.5

    def _calculate_hurst_dfa(self, prices: np.ndarray) -> float:
        """Calculate Hurst exponent using DFA"""
        # Ensure prices is a numpy array
        if isinstance(prices, list):
            prices = np.array(prices)

        scales = np.logspace(1, min(3, np.log10(len(prices)//4)), 20).astype(int)
        fluct = []

        for scale in scales:
            if scale < len(prices):
                # Divide into segments
                segments = len(prices) // scale
                fluctuations = []

                for i in range(segments):
                    segment = prices[i*scale:(i+1)*scale]
                    # Detrend using linear fit
                    x = np.arange(len(segment))
                    # Added check for sufficient data in segment for polyfit
                    if len(segment) > 1:
                        poly = np.polyfit(x, segment, 1)
                        fit = np.polyval(poly, x)
                        fluctuations.append(np.sqrt(np.mean((segment - fit)**2)))

                if fluctuations:
                    fluct.append(np.mean(fluctuations))

        if len(fluct) > 5:
            # Added check for non-zero scales and fluct before log
            if np.min(scales[:len(fluct)]) > 0 and np.min(fluct) > 0:
                poly = np.polyfit(np.log(scales[:len(fluct)]), np.log(fluct), 1)
                return poly[0]
        return 0.5

    def _calculate_scaling_exponent(self, prices: np.ndarray, q: float) -> float:
        """Calculate generalized scaling exponent for multifractal analysis"""
        # Ensure prices is a numpy array
        if isinstance(prices, list):
            prices = np.array(prices)

        scales = np.logspace(1, min(3, np.log10(len(prices)//4)), 10).astype(int)
        fluctuations = []

        for scale in scales:
            if scale < len(prices):
                segments = len(prices) // scale
                seg_flucts = []

                for i in range(segments):
                    segment = prices[i*scale:(i+1)*scale]
                    # Added check for sufficient data in segment for std
                    if len(segment) > 1:
                        seg_flucts.append(np.std(segment))

                if seg_flucts and q != 0:
                     # Added check for positive seg_flucts before power and mean
                    positive_seg_flucts = [f for f in seg_flucts if f > 0]
                    if positive_seg_flucts:
                        fluctuations.append(np.mean([f**q for f in positive_seg_flucts])**(1/q))
                elif seg_flucts:
                     # Added check for positive seg_flucts before log and mean
                    positive_seg_flucts = [f for f in seg_flucts if f > 0]
                    if positive_seg_flucts:
                        fluctuations.append(np.exp(np.mean([np.log(f) for f in positive_seg_flucts])))


        if len(fluctuations) > 3:
            # Added check for non-zero scales and fluctuations before log
            if np.min(scales[:len(fluctuations)]) > 0 and np.min(fluctuations) > 0:
                poly = np.polyfit(np.log(scales[:len(fluctuations)]), np.log(fluctuations), 1)
                return poly[0]
        return 0

    def _autocorrelation_analysis(self, data: pd.DataFrame) -> Dict:
        """Analyze autocorrelation structure"""
        print("  _autocorrelation_analysis called.")
        returns = data['returns'].dropna().values

        if len(returns) < 100:
            print(f"    _autocorrelation_analysis: returns too short ({len(returns)}). Returning empty.")
            return {}

        results = {}

        try:
            # Calculate ACF and PACF
            max_lag = min(100, len(returns)//4)
            # Added check for sufficient data for acf/pacf
            if max_lag > 0 and len(returns) > max_lag:
                acf_values = acf(returns, nlags=max_lag)
                pacf_values = pacf(returns, nlags=max_lag)
            else:
                print(f"    _autocorrelation_analysis: not enough data for acf/pacf calculation (returns={len(returns)}, max_lag={max_lag}). Returning empty.")
                return {}


            # Find significant lags
            # Added check for non-zero len(returns) before division
            confidence_interval = 1.96 / (np.sqrt(len(returns)) + 1e-10)
            significant_acf_lags = np.where(np.abs(acf_values) > confidence_interval)[0]
            significant_pacf_lags = np.where(np.abs(pacf_values) > confidence_interval)[0]

            results['significant_lags'] = {
                'acf': significant_acf_lags.tolist(),
                'pacf': significant_pacf_lags.tolist()
            }

            # Check if any significant lags match our constants
            for const_name, const_value in self.hypothesis_space.mathematical_constants.items():
                if 1 <= const_value <= max_lag:
                    lag_int = int(const_value)
                    # Added bounds checking for acf_values and pacf_values access
                    if lag_int < len(acf_values):
                         results[f'lag_{const_name}_acf'] = float(acf_values[lag_int])
                    if lag_int < len(pacf_values):
                         results[f'lag_{const_name}_pacf'] = float(pacf_values[lag_int])


            # Ljung-Box test for autocorrelation
            print("    _autocorrelation_analysis: Performing Ljung-Box test.")
            try:
                from statsmodels.stats.diagnostic import acorr_ljungbox
                # Added check for sufficient data for Ljung-Box test
                lb_lags = min(40, len(returns)//5)
                if lb_lags > 0 and len(returns) > lb_lags:
                    lb_stats = acorr_ljungbox(returns, lags=lb_lags, return_df=True)

                    results['ljung_box'] = {
                        'min_p_value': float(lb_stats['lb_pvalue'].min()),
                        'autocorrelation_present': lb_stats['lb_pvalue'].min() < 0.05
                    }
                else:
                    print(f"    _autocorrelation_analysis: not enough data for Ljung-Box test (returns={len(returns)}, lags={lb_lags}). Skipping.")

            except Exception as e:
                print(f"    _autocorrelation_analysis: Error during Ljung-Box test: {e}. Skipping.")
                pass

        except Exception as e:
            print(f"  Autocorrelation analysis error: {e}")

        print(f"  _autocorrelation_analysis finished. Results: {len(results)} entries.")
        return results

    def _entropy_analysis(self, data: pd.DataFrame) -> Dict:
        """Calculate various entropy measures"""
        print("  _entropy_analysis called.")
        prices = data['Close'].values
        returns = data['returns'].dropna().values

        # Ensure arrays
        if isinstance(prices, list):
            prices = np.array(prices)
        if isinstance(returns, list):
            returns = np.array(returns)

        if len(returns) < 100:
            print(f"    _entropy_analysis: returns too short ({len(returns)}). Returning empty.")
            return {}

        results = {}

        # 1. Shannon entropy of returns
        print("    _entropy_analysis: Calculating Shannon entropy.")
        # Added check for sufficient data for histogram
        if len(returns) > 1:
            hist, bins = np.histogram(returns, bins=50, density=True)
            hist = hist[hist > 0]  # Remove zeros
            # Added check for non-empty hist before log
            if len(hist) > 0:
                 shannon_entropy = -np.sum(hist * np.log2(hist)) / len(hist)
                 results['shannon_entropy'] = float(shannon_entropy)
            else:
                 results['shannon_entropy'] = 0.0 # Or np.nan, depending on desired behavior
        else:
            print("    _entropy_analysis: not enough data for Shannon entropy histogram. Skipping.")


        # 2. Approximate entropy
        print("    _entropy_analysis: Calculating Approximate entropy.")
        # Added check for sufficient data for approximate entropy
        if len(prices) > 2:
            approx_entropy = self._approximate_entropy(prices, 2, 0.2 * np.std(prices))
            # Added check for valid entropy value
            if not np.isnan(approx_entropy) and not np.isinf(approx_entropy):
                 results['approximate_entropy'] = float(approx_entropy)
            else:
                 results['approximate_entropy'] = np.nan
        else:
            print("    _entropy_analysis: not enough data for Approximate entropy. Skipping.")


        # 3. Sample entropy
        print("    _entropy_analysis: Calculating Sample entropy.")
        # Added check for sufficient data for sample entropy
        if len(prices) > 2:
            sample_entropy = self._sample_entropy(prices, 2, 0.2 * np.std(prices))
            # Added check for valid entropy value
            if not np.isnan(sample_entropy) and not np.isinf(sample_entropy):
                 results['sample_entropy'] = float(sample_entropy)
            else:
                 results['sample_entropy'] = np.nan
        else:
            print("    _entropy_analysis: not enough data for Sample entropy. Skipping.")

        # 4. Permutation entropy
        print("    _entropy_analysis: Calculating Permutation entropy.")
        # Define m and delay before using them in the calculation
        m = 3
        delay = 1
        # Added check for sufficient data for permutation entropy
        min_data_perm_entropy = delay * (m - 1) + 1 # Corrected formula: delay * (m - 1) + 1
        if len(prices) >= min_data_perm_entropy:
            perm_entropy = self._permutation_entropy(prices, m, delay)
            # Added check for valid entropy value
            if not np.isnan(perm_entropy) and not np.isinf(perm_entropy):
                results['permutation_entropy'] = float(perm_entropy)
            else:
                results['permutation_entropy'] = np.nan
        else:
            print(f"    _entropy_analysis: not enough data for Permutation entropy ({len(prices)} < {min_data_perm_entropy}). Skipping.")
            results['permutation_entropy'] = np.nan # Or some other indicator


        # Check if any entropy values match constants
        print("    _entropy_analysis: Checking entropy matches with constants.")
        # Filter out None/NaN values before calculating distances
        valid_entropy_values = [ent for ent in [results.get('shannon_entropy'), results.get('approximate_entropy'), results.get('sample_entropy'), results.get('permutation_entropy')] if ent is not None and not np.isnan(ent)]

        if valid_entropy_values:
            for const_name, const_value in self.hypothesis_space.mathematical_constants.items():
                if 0 < const_value < 10:  # Reasonable entropy range
                    distances = [abs(ent - const_value) for ent in valid_entropy_values]
                    results[f'entropy_match_{const_name}'] = float(min(distances))
        else:
            print("    _entropy_analysis: no valid entropy values calculated to check against constants.")


        print(f"  _entropy_analysis finished. Results: {len(results)} entries.")
        return results

    def _approximate_entropy(self, U: np.ndarray, m: int, r: float) -> float:
        """Calculate approximate entropy"""
        # Ensure U is a numpy array
        if isinstance(U, list):
            U = np.array(U)

        # Added check for sufficient data
        if len(U) < m + 1:
            return np.nan # Not enough data

        def _maxdist(xi, xj, m):
            return max([abs(float(xi[k]) - float(xj[k])) for k in range(m)])

        def _phi(m):
            patterns = np.array([U[i:i+m] for i in range(len(U) - m + 1)])
            C = np.zeros(len(patterns))

            for i in range(len(patterns)):
                matching = np.sum([1 for j in range(len(patterns))
                                  if _maxdist(patterns[i], patterns[j], m) <= r])
                C[i] = matching / len(patterns)

            # Added check for non-zero C before log
            C_positive = C[C > 0]
            if len(C_positive) > 0:
                 return np.sum(np.log(C_positive)) / len(C)
            else:
                 return np.nan # Cannot calculate log if all C are zero


        try:
            phi_m = _phi(m)
            phi_m1 = _phi(m + 1)
            # Added checks for valid phi_m and phi_m1
            if not np.isnan(phi_m) and not np.isnan(phi_m1) and phi_m > 0:
                return -np.log(phi_m1 / phi_m)
            else:
                return np.nan
        except Exception as e:
            print(f"      _approximate_entropy: Error during calculation: {e}.")
            return np.nan

    def _sample_entropy(self, U: np.ndarray, m: int, r: float) -> float:
        """Calculate sample entropy"""
        # Ensure U is a numpy array
        if isinstance(U, list):
            U = np.array(U)

        # Added check for sufficient data
        if len(U) < m + 2: # Sample entropy requires m+2 data points
             return np.nan # Not enough data

        def _maxdist(xi, xj, m):
            return max([abs(float(xi[k]) - float(xj[k])) for k in range(m)])

        def _phi(m):
            patterns = np.array([U[i:i+m] for i in range(len(U) - m + 1)])
            matching = 0

            for i in range(len(patterns)):
                for j in range(i+1, len(patterns)):
                    if _maxdist(patterns[i], patterns[j], m) <= r:
                        matching += 1

            # Added check for sufficient patterns before division
            num_pairs = len(patterns) * (len(patterns) - 1) / 2
            return matching / num_pairs if num_pairs > 0 else 0

        try:
            phi_m = _phi(m)
            phi_m1 = _phi(m + 1)
            # Added checks for valid phi_m and phi_m1 before log
            if phi_m > 0 and phi_m1 > 0:
                return -np.log(phi_m1 / phi_m)
            else:
                return np.nan # Cannot calculate log if phi_m or phi_m1 is zero or negative
        except Exception as e:
            print(f"      _sample_entropy: Error during calculation: {e}.")
            return np.nan

    def _permutation_entropy(self, U: np.ndarray, m: int, delay: int) -> float:
        """Calculate permutation entropy"""
        from itertools import permutations

        # Ensure U is a numpy array
        if isinstance(U, list):
            U = np.array(U)
        n = len(U)
        # Added check for sufficient data
        min_data_points = delay * (m - 1) + 1
        if n < min_data_points:
            return np.nan # Not enough data

        permutations_list = list(permutations(range(m)))
        c = np.zeros(len(permutations_list))

        for i in range(n - delay * (m - 1)):
            # Added check for valid slice indices
            start = i
            end = i + delay * m
            step = delay
            if end <= n and step > 0:
                sorted_indices = np.argsort(U[start:end:step])
                for j, perm in enumerate(permutations_list):
                    if tuple(sorted_indices) == perm:
                        c[j] += 1
                        break
            else:
                # This case should ideally not be reached if n >= min_data_points and delay > 0
                print(f"      _permutation_entropy: Invalid slice indices or step ({start}:{end}:{step}). Skipping segment.")


        c = c[c > 0]
        if len(c) == 0:
            return 0
        c = c / c.sum()
        # Added check for non-zero c before log
        if np.min(c) > 0:
             return -np.sum(c * np.log2(c))
        else:
             # This case should not be reached if c = c[c > 0] and len(c) > 0
             print("      _permutation_entropy: encountered zero values after filtering for log calculation.")
             return np.nan # Or handle as appropriate

# Add a test case to check if the class is producing output
print("\n--- Running test case ---")
# Create dummy data
dummy_data = pd.DataFrame({
    'Close': np.random.rand(200),
    'Open': np.random.rand(200),
    'High': np.random.rand(200) + 0.1,
    'Low': np.random.rand(200) - 0.1,
    'Volume': np.random.randint(100, 1000, 200)
})
# Add a 'returns' column (needed for autocorrelation/entropy)
dummy_data['returns'] = dummy_data['Close'].pct_change()

# Create dummy hypothesis space
dummy_hypothesis_space = ComprehensiveHypothesisSpace()

# Instantiate and run the detector
dummy_detector = ExhaustivePatternDetector(dummy_hypothesis_space)
test_results = dummy_detector.detect_all_patterns(dummy_data)

print("Test case finished. Results:")
print(test_results)
print("--- Test case finished ---")


--- Running test case ---
Starting detect_all_patterns...
Testing density convergence...
  _test_density_convergence called with window: 10
  _test_density_convergence finished. Results: 3 entries.
  _test_density_convergence called with window: 20
  _test_density_convergence finished. Results: 3 entries.
  _test_density_convergence called with window: 30
  _test_density_convergence finished. Results: 3 entries.
Testing ratio patterns...
  _test_all_ratios called.
    _analyze_ratio_distribution called with 199 ratios.
    _analyze_ratio_distribution finished. Results: 5 entries.
    _analyze_ratio_distribution called with 200 ratios.
    _analyze_ratio_distribution finished. Results: 5 entries.
    _analyze_ratio_distribution called with 200 ratios.
    _analyze_ratio_distribution finished. Results: 5 entries.
    _analyze_ratio_distribution called with 199 ratios.
    _analyze_ratio_distribution finished. Results: 5 entries.
  _test_all_ratios finished. Results: 4 entries.
Performin