# Autocorrelation Analysis of Muqatta'at

This notebook performs autocorrelation analysis to detect temporal patterns and repetition structures in the Muqatta'at (المقطعات) letters and test the checksum hypothesis.

## Analysis Objectives
- Calculate autocorrelation function for letter sequences
- Identify repeating structures at different lags
- Test if Muqatta'at predict periodic patterns within surah text
- Analyze cross-correlation between Muqatta'at and surah body


In [None]:
# Import required libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from scipy import signal
from scipy.stats import pearsonr
from scipy.signal import correlate
import warnings
warnings.filterwarnings('ignore')

# Set up plotting
plt.style.use('seaborn-v0_8')
sns.set_palette("husl")
plt.rcParams['figure.figsize'] = (12, 8)
plt.rcParams['font.size'] = 10

# Import our data utilities
from data_utils import load_quran_data


In [None]:
# Load and process the Quran data
processor = load_quran_data("../datasets/quran-simple-clean.csv")
clean_df = processor.clean_dataset()

print(f"Dataset loaded: {len(clean_df)} verses from {clean_df['surah'].nunique()} surahs")
print(f"Surahs with Muqatta'at: {len(processor.get_surahs_with_muqattaat())}")
print(f"Surahs without Muqatta'at: {len(processor.get_surahs_without_muqattaat())}")


In [None]:
# Helper functions for autocorrelation analysis
def text_to_numeric(text, method='unicode'):
    """Convert Arabic text to numeric sequence for autocorrelation analysis."""
    if method == 'unicode':
        return np.array([ord(char) for char in text if char.strip()])
    elif method == 'alphabet':
        arabic_letters = 'ابتثجحخدذرزسشصضطظعغفقكلمنهوي'
        return np.array([arabic_letters.find(char) + 1 for char in text if char in arabic_letters])
    else:
        raise ValueError("Method must be 'unicode' or 'alphabet'")

def calculate_autocorrelation(signal_data, max_lags=None):
    """Calculate autocorrelation function."""
    if max_lags is None:
        max_lags = len(signal_data) // 4
    
    # Normalize the signal
    signal_normalized = (signal_data - np.mean(signal_data)) / np.std(signal_data)
    
    # Calculate autocorrelation
    autocorr = np.correlate(signal_normalized, signal_normalized, mode='full')
    autocorr = autocorr[autocorr.size // 2:]
    
    # Normalize by the first value
    autocorr = autocorr / autocorr[0]
    
    return autocorr[:max_lags]

def find_autocorrelation_peaks(autocorr, min_height=0.1, min_distance=5):
    """Find significant peaks in autocorrelation function."""
    peaks, properties = signal.find_peaks(autocorr, height=min_height, distance=min_distance)
    return peaks, properties

def calculate_cross_correlation(signal1, signal2, max_lags=None):
    """Calculate cross-correlation between two signals."""
    if max_lags is None:
        max_lags = min(len(signal1), len(signal2)) // 4
    
    # Normalize signals
    signal1_norm = (signal1 - np.mean(signal1)) / np.std(signal1)
    signal2_norm = (signal2 - np.mean(signal2)) / np.std(signal2)
    
    # Calculate cross-correlation
    cross_corr = correlate(signal1_norm, signal2_norm, mode='full')
    
    # Normalize
    cross_corr = cross_corr / (np.std(signal1_norm) * np.std(signal2_norm) * len(signal1_norm))
    
    # Return centered cross-correlation
    center = len(cross_corr) // 2
    return cross_corr[center - max_lags:center + max_lags]

def calculate_periodicity_strength(autocorr):
    """Calculate the strength of periodicity in the signal."""
    # Find the first significant peak after lag 0
    peaks, _ = find_autocorrelation_peaks(autocorr[1:], min_height=0.05)
    if len(peaks) > 0:
        return autocorr[peaks[0] + 1]
    else:
        return 0

print("Autocorrelation analysis helper functions defined.")


## 1. Autocorrelation Analysis of Individual Surahs


In [None]:
# Analyze autocorrelation for representative surahs
sample_surahs_with = [2, 19, 36]  # Examples with Muqatta'at
sample_surahs_without = [4, 6, 8]  # Examples without Muqatta'at

autocorr_results = []

for surah_num in sample_surahs_with + sample_surahs_without:
    surah_text = processor.get_surah_text(surah_num, include_muqattaat=False)
    numeric_sequence = text_to_numeric(surah_text, method='unicode')
    
    if len(numeric_sequence) > 20:  # Ensure sufficient data
        # Calculate autocorrelation
        autocorr = calculate_autocorrelation(numeric_sequence, max_lags=50)
        
        # Find peaks
        peaks, peak_properties = find_autocorrelation_peaks(autocorr)
        
        # Calculate periodicity strength
        periodicity_strength = calculate_periodicity_strength(autocorr)
        
        # Calculate decay rate (how quickly autocorrelation decreases)
        decay_rate = np.mean(np.diff(autocorr[:10]))
        
        autocorr_results.append({
            'surah': surah_num,
            'has_muqattaat': surah_num in processor.muqattaat_mapping,
            'text_length': len(surah_text),
            'numeric_length': len(numeric_sequence),
            'periodicity_strength': periodicity_strength,
            'decay_rate': decay_rate,
            'num_peaks': len(peaks),
            'max_autocorr': np.max(autocorr[1:]) if len(autocorr) > 1 else 0,
            'autocorr_at_lag_1': autocorr[1] if len(autocorr) > 1 else 0
        })

autocorr_df = pd.DataFrame(autocorr_results)
print(f"Autocorrelation analysis completed for {len(autocorr_df)} surahs")
print("\nAutocorrelation Results Summary:")
print(autocorr_df[['surah', 'has_muqattaat', 'periodicity_strength', 'decay_rate', 'num_peaks']].to_string(index=False))


## 2. Cross-Correlation Analysis with Muqatta'at


In [None]:
# Analyze cross-correlation between Muqatta'at and surah content
cross_corr_results = []

for surah_num in processor.get_surahs_with_muqattaat():
    muqattaat_letters = processor.get_muqattaat_letters(surah_num)
    surah_text = processor.get_surah_text(surah_num, include_muqattaat=False)
    
    # Convert to numeric sequences
    muqattaat_numeric = text_to_numeric(muqattaat_letters, method='unicode')
    surah_numeric = text_to_numeric(surah_text, method='unicode')
    
    if len(muqattaat_numeric) > 0 and len(surah_numeric) > 10:
        # Calculate cross-correlation
        cross_corr = calculate_cross_correlation(muqattaat_numeric, surah_numeric, max_lags=20)
        
        # Find maximum cross-correlation
        max_cross_corr = np.max(np.abs(cross_corr))
        max_cross_corr_lag = np.argmax(np.abs(cross_corr)) - len(cross_corr) // 2
        
        # Calculate correlation at lag 0
        correlation_at_zero = cross_corr[len(cross_corr) // 2]
        
        cross_corr_results.append({
            'surah': surah_num,
            'muqattaat_letters': muqattaat_letters,
            'muqattaat_length': len(muqattaat_numeric),
            'surah_length': len(surah_numeric),
            'max_cross_corr': max_cross_corr,
            'max_cross_corr_lag': max_cross_corr_lag,
            'correlation_at_zero': correlation_at_zero,
            'muqattaat_frequency_in_surah': sum(1 for char in surah_text if char in muqattaat_letters) / len(surah_text)
        })

cross_corr_df = pd.DataFrame(cross_corr_results)
print(f"Cross-correlation analysis completed for {len(cross_corr_df)} surahs with Muqatta'at")
print("\nCross-Correlation Results Summary:")
print(cross_corr_df[['surah', 'muqattaat_letters', 'max_cross_corr', 'max_cross_corr_lag', 'correlation_at_zero']].to_string(index=False))


## 3. Comprehensive Autocorrelation Analysis


In [None]:
# Perform comprehensive autocorrelation analysis on all surahs
all_autocorr_results = []

for surah_num in sorted(clean_df['surah'].unique()):
    surah_text = processor.get_surah_text(surah_num, include_muqattaat=False)
    numeric_sequence = text_to_numeric(surah_text, method='unicode')
    
    if len(numeric_sequence) > 30:  # Ensure sufficient data for meaningful autocorrelation
        try:
            # Calculate autocorrelation
            autocorr = calculate_autocorrelation(numeric_sequence, max_lags=30)
            
            # Find peaks
            peaks, peak_properties = find_autocorrelation_peaks(autocorr)
            
            # Calculate metrics
            periodicity_strength = calculate_periodicity_strength(autocorr)
            decay_rate = np.mean(np.diff(autocorr[:5]))
            max_autocorr = np.max(autocorr[1:]) if len(autocorr) > 1 else 0
            
            # Calculate autocorrelation at different lags
            autocorr_lag_1 = autocorr[1] if len(autocorr) > 1 else 0
            autocorr_lag_2 = autocorr[2] if len(autocorr) > 2 else 0
            autocorr_lag_5 = autocorr[5] if len(autocorr) > 5 else 0
            
            all_autocorr_results.append({
                'surah': surah_num,
                'has_muqattaat': surah_num in processor.muqattaat_mapping,
                'text_length': len(surah_text),
                'numeric_length': len(numeric_sequence),
                'periodicity_strength': periodicity_strength,
                'decay_rate': decay_rate,
                'num_peaks': len(peaks),
                'max_autocorr': max_autocorr,
                'autocorr_lag_1': autocorr_lag_1,
                'autocorr_lag_2': autocorr_lag_2,
                'autocorr_lag_5': autocorr_lag_5
            })
        except Exception as e:
            print(f"Error processing surah {surah_num}: {e}")
            continue

all_autocorr_df = pd.DataFrame(all_autocorr_results)
print(f"Comprehensive autocorrelation analysis completed for {len(all_autocorr_df)} surahs")

# Compare surahs with and without Muqatta'at
with_muqattaat_autocorr = all_autocorr_df[all_autocorr_df['has_muqattaat'] == True]
without_muqattaat_autocorr = all_autocorr_df[all_autocorr_df['has_muqattaat'] == False]

print(f"\nAutocorrelation Analysis Comparison:")
print(f"Surahs with Muqatta'at: {len(with_muqattaat_autocorr)}")
print(f"Surahs without Muqatta'at: {len(without_muqattaat_autocorr)}")
print(f"\nAverage periodicity strength (with Muqatta'at): {with_muqattaat_autocorr['periodicity_strength'].mean():.4f}")
print(f"Average periodicity strength (without Muqatta'at): {without_muqattaat_autocorr['periodicity_strength'].mean():.4f}")


## 4. Visualizations


In [None]:
# Create comprehensive autocorrelation visualizations
fig, axes = plt.subplots(2, 3, figsize=(18, 12))

# Plot 1: Periodicity strength comparison
axes[0, 0].hist([with_muqattaat_autocorr['periodicity_strength'], without_muqattaat_autocorr['periodicity_strength']], 
                bins=15, alpha=0.7, label=['With Muqatta\'at', 'Without Muqatta\'at'])
axes[0, 0].set_title('Periodicity Strength Distribution')
axes[0, 0].set_xlabel('Periodicity Strength')
axes[0, 0].set_ylabel('Frequency')
axes[0, 0].legend()

# Plot 2: Decay rate comparison
axes[0, 1].hist([with_muqattaat_autocorr['decay_rate'], without_muqattaat_autocorr['decay_rate']], 
                bins=15, alpha=0.7, label=['With Muqatta\'at', 'Without Muqatta\'at'])
axes[0, 1].set_title('Autocorrelation Decay Rate Distribution')
axes[0, 1].set_xlabel('Decay Rate')
axes[0, 1].set_ylabel('Frequency')
axes[0, 1].legend()

# Plot 3: Number of peaks comparison
axes[0, 2].hist([with_muqattaat_autocorr['num_peaks'], without_muqattaat_autocorr['num_peaks']], 
                bins=10, alpha=0.7, label=['With Muqatta\'at', 'Without Muqatta\'at'])
axes[0, 2].set_title('Number of Autocorrelation Peaks')
axes[0, 2].set_xlabel('Number of Peaks')
axes[0, 2].set_ylabel('Frequency')
axes[0, 2].legend()

# Plot 4: Autocorrelation function for Surah 2 (with Muqatta'at)
surah_2_text = processor.get_surah_text(2, include_muqattaat=False)
surah_2_numeric = text_to_numeric(surah_2_text, method='unicode')
surah_2_autocorr = calculate_autocorrelation(surah_2_numeric, max_lags=30)

axes[1, 0].plot(range(len(surah_2_autocorr)), surah_2_autocorr, 'b-', linewidth=2, label='Surah 2 (الم)')
axes[1, 0].set_title('Autocorrelation Function - Surah 2 (الم)')
axes[1, 0].set_xlabel('Lag')
axes[1, 0].set_ylabel('Autocorrelation')
axes[1, 0].grid(True, alpha=0.3)

# Plot 5: Autocorrelation function for Surah 4 (without Muqatta'at)
surah_4_text = processor.get_surah_text(4, include_muqattaat=False)
surah_4_numeric = text_to_numeric(surah_4_text, method='unicode')
surah_4_autocorr = calculate_autocorrelation(surah_4_numeric, max_lags=30)

axes[1, 1].plot(range(len(surah_4_autocorr)), surah_4_autocorr, 'r-', linewidth=2, label='Surah 4 (No Muqatta\'at)')
axes[1, 1].set_title('Autocorrelation Function - Surah 4 (No Muqatta\'at)')
axes[1, 1].set_xlabel('Lag')
axes[1, 1].set_ylabel('Autocorrelation')
axes[1, 1].grid(True, alpha=0.3)

# Plot 6: Cross-correlation analysis
if len(cross_corr_df) > 0:
    axes[1, 2].scatter(cross_corr_df['surah'], cross_corr_df['max_cross_corr'], 
                       alpha=0.7, s=60, color='green')
    axes[1, 2].set_title('Maximum Cross-Correlation Between Muqatta\'at and Surah Content')
    axes[1, 2].set_xlabel('Surah Number')
    axes[1, 2].set_ylabel('Maximum Cross-Correlation')
    axes[1, 2].grid(True, alpha=0.3)
else:
    axes[1, 2].text(0.5, 0.5, 'No cross-correlation data available', 
                    ha='center', va='center', transform=axes[1, 2].transAxes)
    axes[1, 2].set_title('Cross-Correlation Analysis')

plt.tight_layout()
plt.show()


## 5. Statistical Analysis and Summary


In [None]:
# Statistical analysis of autocorrelation results
from scipy import stats

print("AUTOCORRELATION ANALYSIS SUMMARY")
print("=" * 50)

# Statistical tests for periodicity strength
periodicity_with = with_muqattaat_autocorr['periodicity_strength']
periodicity_without = without_muqattaat_autocorr['periodicity_strength']

t_stat_period, p_value_period = stats.ttest_ind(periodicity_with, periodicity_without)
print(f"\nPeriodicity Strength Analysis:")
print(f"- Average periodicity (with Muqatta'at): {periodicity_with.mean():.4f}")
print(f"- Average periodicity (without Muqatta'at): {periodicity_without.mean():.4f}")
print(f"- T-test p-value: {p_value_period:.4f}")
print(f"- Significant difference: {'Yes' if p_value_period < 0.05 else 'No'}")

# Statistical tests for decay rate
decay_with = with_muqattaat_autocorr['decay_rate']
decay_without = without_muqattaat_autocorr['decay_rate']

t_stat_decay, p_value_decay = stats.ttest_ind(decay_with, decay_without)
print(f"\nDecay Rate Analysis:")
print(f"- Average decay rate (with Muqatta'at): {decay_with.mean():.4f}")
print(f"- Average decay rate (without Muqatta'at): {decay_without.mean():.4f}")
print(f"- T-test p-value: {p_value_decay:.4f}")
print(f"- Significant difference: {'Yes' if p_value_decay < 0.05 else 'No'}")

# Statistical tests for number of peaks
peaks_with = with_muqattaat_autocorr['num_peaks']
peaks_without = without_muqattaat_autocorr['num_peaks']

t_stat_peaks, p_value_peaks = stats.ttest_ind(peaks_with, peaks_without)
print(f"\nNumber of Peaks Analysis:")
print(f"- Average peaks (with Muqatta'at): {peaks_with.mean():.4f}")
print(f"- Average peaks (without Muqatta'at): {peaks_without.mean():.4f}")
print(f"- T-test p-value: {p_value_peaks:.4f}")
print(f"- Significant difference: {'Yes' if p_value_peaks < 0.05 else 'No'}")

# Cross-correlation analysis
if len(cross_corr_df) > 0:
    print(f"\nCross-Correlation Analysis:")
    print(f"- Average maximum cross-correlation: {cross_corr_df['max_cross_corr'].mean():.4f}")
    print(f"- Average correlation at zero lag: {cross_corr_df['correlation_at_zero'].mean():.4f}")
    print(f"- Surahs with strong cross-correlation (>0.3): {len(cross_corr_df[cross_corr_df['max_cross_corr'] > 0.3])}")
    print(f"- Average Muqatta'at frequency in surahs: {cross_corr_df['muqattaat_frequency_in_surah'].mean():.4f}")

# Key insights
print(f"\nKey Insights:")
if p_value_period < 0.05:
    print("✓ Significant difference in periodicity strength between surahs with and without Muqatta'at")
else:
    print("✗ No significant difference in periodicity strength between the two groups")

if p_value_decay < 0.05:
    print("✓ Significant difference in autocorrelation decay rates between the two groups")
else:
    print("✗ No significant difference in autocorrelation decay rates between the two groups")

if p_value_peaks < 0.05:
    print("✓ Significant difference in number of autocorrelation peaks between the two groups")
else:
    print("✗ No significant difference in number of autocorrelation peaks between the two groups")

if len(cross_corr_df) > 0 and cross_corr_df['max_cross_corr'].mean() > 0.2:
    print("✓ Strong cross-correlation between Muqatta'at and surah content detected")
else:
    print("✗ Weak cross-correlation between Muqatta'at and surah content")

print(f"\nFinal Summary:")
print("- All four analysis notebooks have been completed")
print("- Basic statistics, information theory, frequency domain, and autocorrelation analyses")
print("- Comprehensive visualizations and statistical tests performed")
print("- Results provide insights into the Muqatta'at checksum hypothesis")
print("\nNext Steps:")
print("- Review all results and draw conclusions about the checksum hypothesis")
print("- Consider additional advanced analysis techniques if needed")
print("- Prepare final report with findings and recommendations")
