# Frequency Domain Analysis of Muqatta'at

This notebook performs frequency domain analysis using FFT to detect spectral patterns in the Muqatta'at (المقطعات) letters and test the checksum hypothesis.

## Analysis Objectives
- Convert letter sequences to numeric representations
- Apply Fast Fourier Transform to detect spectral patterns
- Identify dominant frequencies correlated with Muqatta'at
- Compare spectral signatures between surahs with/without Muqatta'at


In [None]:
# Import required libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from scipy.fft import fft, fftfreq, ifft
from scipy import signal
from scipy.stats import pearsonr
import warnings
warnings.filterwarnings('ignore')

# Set up plotting
plt.style.use('seaborn-v0_8')
sns.set_palette("husl")
plt.rcParams['figure.figsize'] = (12, 8)
plt.rcParams['font.size'] = 10

# Import our data utilities
from data_utils import load_quran_data


In [None]:
# Load and process the Quran data
processor = load_quran_data("../datasets/quran-simple-clean.csv")
clean_df = processor.clean_dataset()

print(f"Dataset loaded: {len(clean_df)} verses from {clean_df['surah'].nunique()} surahs")
print(f"Surahs with Muqatta'at: {len(processor.get_surahs_with_muqattaat())}")
print(f"Surahs without Muqatta'at: {len(processor.get_surahs_without_muqattaat())}")


In [None]:
# Helper functions for frequency domain analysis
def text_to_numeric(text, method='unicode'):
    """
    Convert Arabic text to numeric sequence for FFT analysis.
    
    Args:
        text: Arabic text string
        method: 'unicode' for Unicode values, 'alphabet' for alphabet position
    
    Returns:
        Numeric array
    """
    if method == 'unicode':
        return np.array([ord(char) for char in text if char.strip()])
    elif method == 'alphabet':
        # Map Arabic letters to numbers (basic mapping)
        arabic_letters = 'ابتثجحخدذرزسشصضطظعغفقكلمنهوي'
        return np.array([arabic_letters.find(char) + 1 for char in text if char in arabic_letters])
    else:
        raise ValueError("Method must be 'unicode' or 'alphabet'")

def calculate_power_spectrum(signal_data):
    """Calculate power spectral density."""
    # Apply window function to reduce spectral leakage
    windowed_signal = signal_data * signal.windows.hann(len(signal_data))
    
    # Calculate FFT
    fft_result = fft(windowed_signal)
    freqs = fftfreq(len(signal_data))
    
    # Calculate power spectral density
    psd = np.abs(fft_result) ** 2
    
    # Return only positive frequencies
    positive_freqs = freqs[:len(freqs)//2]
    positive_psd = psd[:len(psd)//2]
    
    return positive_freqs, positive_psd

def find_dominant_frequencies(freqs, psd, num_peaks=5):
    """Find dominant frequency peaks."""
    # Find peaks in the power spectrum
    peaks, properties = signal.find_peaks(psd, height=np.mean(psd), distance=len(psd)//20)
    
    # Sort by peak height
    peak_heights = psd[peaks]
    sorted_indices = np.argsort(peak_heights)[::-1]
    
    # Return top peaks
    top_peaks = peaks[sorted_indices[:num_peaks]]
    return freqs[top_peaks], psd[top_peaks]

def calculate_spectral_entropy(psd):
    """Calculate spectral entropy."""
    # Normalize PSD to probabilities
    psd_norm = psd / np.sum(psd)
    
    # Calculate entropy
    entropy = -np.sum(psd_norm * np.log2(psd_norm + 1e-10))
    return entropy

print("Frequency domain analysis helper functions defined.")


## 1. FFT Analysis of Individual Surahs


In [None]:
# Analyze a few representative surahs with and without Muqatta'at
sample_surahs_with = [2, 19, 36]  # Examples with Muqatta'at
sample_surahs_without = [4, 6, 8]  # Examples without Muqatta'at

fft_results = []

for surah_num in sample_surahs_with + sample_surahs_without:
    # Get surah text
    surah_text = processor.get_surah_text(surah_num, include_muqattaat=False)
    
    # Convert to numeric
    numeric_sequence = text_to_numeric(surah_text, method='unicode')
    
    if len(numeric_sequence) > 10:  # Ensure sufficient data
        # Calculate power spectrum
        freqs, psd = calculate_power_spectrum(numeric_sequence)
        
        # Find dominant frequencies
        dom_freqs, dom_psd = find_dominant_frequencies(freqs, psd)
        
        # Calculate spectral entropy
        spectral_entropy = calculate_spectral_entropy(psd)
        
        fft_results.append({
            'surah': surah_num,
            'has_muqattaat': surah_num in processor.muqattaat_mapping,
            'text_length': len(surah_text),
            'numeric_length': len(numeric_sequence),
            'spectral_entropy': spectral_entropy,
            'dominant_frequencies': dom_freqs,
            'dominant_powers': dom_psd,
            'max_frequency': freqs[np.argmax(psd)],
            'total_power': np.sum(psd)
        })

fft_df = pd.DataFrame(fft_results)
print(f"FFT analysis completed for {len(fft_df)} surahs")
print("\nFFT Results Summary:")
print(fft_df[['surah', 'has_muqattaat', 'spectral_entropy', 'max_frequency', 'total_power']].to_string(index=False))


## 2. Comprehensive FFT Analysis


In [None]:
# Perform FFT analysis on all surahs
all_fft_results = []

for surah_num in sorted(clean_df['surah'].unique()):
    surah_text = processor.get_surah_text(surah_num, include_muqattaat=False)
    numeric_sequence = text_to_numeric(surah_text, method='unicode')
    
    if len(numeric_sequence) > 20:  # Ensure sufficient data for meaningful FFT
        try:
            # Calculate power spectrum
            freqs, psd = calculate_power_spectrum(numeric_sequence)
            
            # Find dominant frequencies
            dom_freqs, dom_psd = find_dominant_frequencies(freqs, psd, num_peaks=3)
            
            # Calculate spectral metrics
            spectral_entropy = calculate_spectral_entropy(psd)
            max_power = np.max(psd)
            mean_power = np.mean(psd)
            power_std = np.std(psd)
            
            all_fft_results.append({
                'surah': surah_num,
                'has_muqattaat': surah_num in processor.muqattaat_mapping,
                'text_length': len(surah_text),
                'numeric_length': len(numeric_sequence),
                'spectral_entropy': spectral_entropy,
                'max_power': max_power,
                'mean_power': mean_power,
                'power_std': power_std,
                'dominant_freq_1': dom_freqs[0] if len(dom_freqs) > 0 else 0,
                'dominant_freq_2': dom_freqs[1] if len(dom_freqs) > 1 else 0,
                'dominant_freq_3': dom_freqs[2] if len(dom_freqs) > 2 else 0,
                'total_power': np.sum(psd)
            })
        except Exception as e:
            print(f"Error processing surah {surah_num}: {e}")
            continue

all_fft_df = pd.DataFrame(all_fft_results)
print(f"Comprehensive FFT analysis completed for {len(all_fft_df)} surahs")

# Compare surahs with and without Muqatta'at
with_muqattaat_fft = all_fft_df[all_fft_df['has_muqattaat'] == True]
without_muqattaat_fft = all_fft_df[all_fft_df['has_muqattaat'] == False]

print(f"\nSpectral Analysis Comparison:")
print(f"Surahs with Muqatta'at: {len(with_muqattaat_fft)}")
print(f"Surahs without Muqatta'at: {len(without_muqattaat_fft)}")
print(f"\nAverage spectral entropy (with Muqatta'at): {with_muqattaat_fft['spectral_entropy'].mean():.4f}")
print(f"Average spectral entropy (without Muqatta'at): {without_muqattaat_fft['spectral_entropy'].mean():.4f}")


## 3. Visualizations


In [None]:
# Create comprehensive frequency domain visualizations
fig, axes = plt.subplots(2, 3, figsize=(18, 12))

# Plot 1: Spectral entropy comparison
axes[0, 0].hist([with_muqattaat_fft['spectral_entropy'], without_muqattaat_fft['spectral_entropy']], 
                bins=15, alpha=0.7, label=['With Muqatta\'at', 'Without Muqatta\'at'])
axes[0, 0].set_title('Spectral Entropy Distribution')
axes[0, 0].set_xlabel('Spectral Entropy')
axes[0, 0].set_ylabel('Frequency')
axes[0, 0].legend()

# Plot 2: Maximum power comparison
axes[0, 1].hist([with_muqattaat_fft['max_power'], without_muqattaat_fft['max_power']], 
                bins=15, alpha=0.7, label=['With Muqatta\'at', 'Without Muqatta\'at'])
axes[0, 1].set_title('Maximum Power Distribution')
axes[0, 1].set_xlabel('Maximum Power')
axes[0, 1].set_ylabel('Frequency')
axes[0, 1].legend()

# Plot 3: Dominant frequency 1 comparison
axes[0, 2].scatter(with_muqattaat_fft['surah'], with_muqattaat_fft['dominant_freq_1'], 
                    alpha=0.7, s=60, label='With Muqatta\'at', color='blue')
axes[0, 2].scatter(without_muqattaat_fft['surah'], without_muqattaat_fft['dominant_freq_1'], 
                  alpha=0.7, s=60, label='Without Muqatta\'at', color='red')
axes[0, 2].set_title('Dominant Frequency 1 by Surah')
axes[0, 2].set_xlabel('Surah Number')
axes[0, 2].set_ylabel('Dominant Frequency 1')
axes[0, 2].legend()

# Plot 4: Power spectrum for a specific surah with Muqatta'at
surah_2_text = processor.get_surah_text(2, include_muqattaat=False)
surah_2_numeric = text_to_numeric(surah_2_text, method='unicode')
freqs_2, psd_2 = calculate_power_spectrum(surah_2_numeric)

axes[1, 0].plot(freqs_2, psd_2, alpha=0.8, linewidth=1)
axes[1, 0].set_title('Power Spectrum - Surah 2 (الم)')
axes[1, 0].set_xlabel('Frequency')
axes[1, 0].set_ylabel('Power Spectral Density')
axes[1, 0].set_xlim(0, 0.5)  # Focus on lower frequencies

# Plot 5: Power spectrum for a specific surah without Muqatta'at
surah_4_text = processor.get_surah_text(4, include_muqattaat=False)
surah_4_numeric = text_to_numeric(surah_4_text, method='unicode')
freqs_4, psd_4 = calculate_power_spectrum(surah_4_numeric)

axes[1, 1].plot(freqs_4, psd_4, alpha=0.8, linewidth=1, color='red')
axes[1, 1].set_title('Power Spectrum - Surah 4 (No Muqatta\'at)')
axes[1, 1].set_xlabel('Frequency')
axes[1, 1].set_ylabel('Power Spectral Density')
axes[1, 1].set_xlim(0, 0.5)  # Focus on lower frequencies

# Plot 6: Spectral entropy vs text length
axes[1, 2].scatter(all_fft_df['text_length'], all_fft_df['spectral_entropy'], 
                   c=all_fft_df['has_muqattaat'], alpha=0.7, s=60, cmap='viridis')
axes[1, 2].set_title('Spectral Entropy vs Text Length')
axes[1, 2].set_xlabel('Text Length (characters)')
axes[1, 2].set_ylabel('Spectral Entropy')
axes[1, 2].set_colorbar()

plt.tight_layout()
plt.show()


## 4. Statistical Analysis and Summary


In [None]:
# Statistical analysis of frequency domain results
from scipy import stats

print("FREQUENCY DOMAIN ANALYSIS SUMMARY")
print("=" * 50)

# Statistical tests
entropy_with = with_muqattaat_fft['spectral_entropy']
entropy_without = without_muqattaat_fft['spectral_entropy']

t_stat_entropy, p_value_entropy = stats.ttest_ind(entropy_with, entropy_without)
print(f"\nSpectral Entropy Analysis:")
print(f"- Average spectral entropy (with Muqatta'at): {entropy_with.mean():.4f}")
print(f"- Average spectral entropy (without Muqatta'at): {entropy_without.mean():.4f}")
print(f"- T-test p-value: {p_value_entropy:.4f}")
print(f"- Significant difference: {'Yes' if p_value_entropy < 0.05 else 'No'}")

# Maximum power analysis
max_power_with = with_muqattaat_fft['max_power']
max_power_without = without_muqattaat_fft['max_power']

t_stat_power, p_value_power = stats.ttest_ind(max_power_with, max_power_without)
print(f"\nMaximum Power Analysis:")
print(f"- Average max power (with Muqatta'at): {max_power_with.mean():.4f}")
print(f"- Average max power (without Muqatta'at): {max_power_without.mean():.4f}")
print(f"- T-test p-value: {p_value_power:.4f}")
print(f"- Significant difference: {'Yes' if p_value_power < 0.05 else 'No'}")

# Dominant frequency analysis
dom_freq_1_with = with_muqattaat_fft['dominant_freq_1']
dom_freq_1_without = without_muqattaat_fft['dominant_freq_1']

t_stat_freq, p_value_freq = stats.ttest_ind(dom_freq_1_with, dom_freq_1_without)
print(f"\nDominant Frequency Analysis:")
print(f"- Average dominant freq (with Muqatta'at): {dom_freq_1_with.mean():.4f}")
print(f"- Average dominant freq (without Muqatta'at): {dom_freq_1_without.mean():.4f}")
print(f"- T-test p-value: {p_value_freq:.4f}")
print(f"- Significant difference: {'Yes' if p_value_freq < 0.05 else 'No'}")

# Correlation analysis
correlation_entropy_length, p_corr_entropy = pearsonr(all_fft_df['text_length'], all_fft_df['spectral_entropy'])
print(f"\nCorrelation Analysis:")
print(f"- Spectral entropy vs text length correlation: {correlation_entropy_length:.4f}")
print(f"- Correlation p-value: {p_corr_entropy:.4f}")

# Key insights
print(f"\nKey Insights:")
if p_value_entropy < 0.05:
    print("✓ Significant difference in spectral entropy between surahs with and without Muqatta'at")
else:
    print("✗ No significant difference in spectral entropy between the two groups")

if p_value_power < 0.05:
    print("✓ Significant difference in maximum power between the two groups")
else:
    print("✗ No significant difference in maximum power between the two groups")

if p_value_freq < 0.05:
    print("✓ Significant difference in dominant frequencies between the two groups")
else:
    print("✗ No significant difference in dominant frequencies between the two groups")

if abs(correlation_entropy_length) > 0.3:
    print(f"✓ Strong correlation ({correlation_entropy_length:.3f}) between spectral entropy and text length")
else:
    print(f"✗ Weak correlation ({correlation_entropy_length:.3f}) between spectral entropy and text length")

print(f"\nNext Steps:")
print("- Proceed to Autocorrelation analysis to detect temporal patterns")
print("- Consider advanced spectral analysis techniques")
print("- Investigate cross-spectral analysis between Muqatta'at and surah content")
