# Quran Quantitative Analysis
## Statistical Analysis and Time Series Investigation

This notebook implements comprehensive analysis of the Quran text from quantitative and time series perspectives.

### Project Objectives:
- Multi-level time series analysis (character, word, surah, ayah windows)
- Investigation of Muqatta'at letters (المقطعات) as potential CRC checks
- Statistical analysis of text patterns and distributions
- Encoding scheme comparison for time series analysis


## 1. Setup and Data Loading


In [15]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from collections import Counter, defaultdict
import re
import os
from pathlib import Path
import warnings
warnings.filterwarnings('ignore')

# Arabic text processing
import arabic_reshaper
from bidi.algorithm import get_display
import matplotlib.font_manager as fm

# Set up plotting
plt.style.use('default')
sns.set_palette("husl")

print("Libraries imported successfully")


Libraries imported successfully


In [17]:
def load_quran_data():
    # Set up paths and working directory
    project_root = Path.cwd()
    data_path = project_root / "datasets" / "quran-simple-clean.csv"

    print(f"Project root: {project_root}")
    print(f"Data path: {data_path}")
    print(f"Data file exists: {data_path.exists()}")

    # Load the dataset
    if data_path.exists():
        df = pd.read_csv(data_path)
        print(f"Dataset loaded successfully")
        print(f"Shape: {df.shape}")
        print(f"Columns: {list(df.columns)}")
    else:
        print("Data file not found. Please check the path.")
        df = None

    return df

In [19]:
original_df = load_quran_data()

Project root: /media/me/Active/mywork/coding/mycode/code_experiments/1_r_quran_analysis
Data path: /media/me/Active/mywork/coding/mycode/code_experiments/1_r_quran_analysis/datasets/quran-simple-clean.csv
Data file exists: True
Dataset loaded successfully
Shape: (6236, 3)
Columns: ['surah', 'aya', 'text']


Dsiplay dataset

In [20]:
original_df

Unnamed: 0,surah,aya,text
0,1,1,بسم الله الرحمن الرحيم
1,1,2,الحمد لله رب العالمين
2,1,3,الرحمن الرحيم
3,1,4,مالك يوم الدين
4,1,5,إياك نعبد وإياك نستعين
...,...,...,...
6231,114,2,ملك الناس
6232,114,3,إله الناس
6233,114,4,من شر الوسواس الخناس
6234,114,5,الذي يوسوس في صدور الناس


## 2. Data Exploration and Basic Statistics


In [21]:
if original_df is not None:
    print("=== DATASET OVERVIEW ===")
    print(f"Total verses: {len(original_df)}")
    print(f"Total surahs: {original_df['surah'].nunique()}")
    print(f"Surah range: {original_df['surah'].min()} to {original_df['surah'].max()}")
    print(f"Ayah range: {original_df['aya'].min()} to {original_df['aya'].max()}")
    
    print("\n=== SAMPLE DATA ===")
    print(original_df.head())
    
    print("\n=== DATA TYPES ===")
    print(original_df.dtypes)
    
    print("\n=== MISSING VALUES ===")
    print(original_df.isnull().sum())


=== DATASET OVERVIEW ===
Total verses: 6236
Total surahs: 114
Surah range: 1 to 114
Ayah range: 1 to 286

=== SAMPLE DATA ===
   surah  aya                    text
0      1    1  بسم الله الرحمن الرحيم
1      1    2   الحمد لله رب العالمين
2      1    3           الرحمن الرحيم
3      1    4          مالك يوم الدين
4      1    5  إياك نعبد وإياك نستعين

=== DATA TYPES ===
surah     int64
aya       int64
text     object
dtype: object

=== MISSING VALUES ===
surah    0
aya      0
text     0
dtype: int64


## 3. Text Preprocessing


First, we need to remove Basmala, display the first aya of each sura

In [25]:
# Display the first aya of each sura
if original_df is not None:
    first_ayas = original_df[original_df['aya'] == 1][['surah', 'aya', 'text']]
    print("First aya of each sura:")
    display(first_ayas.reset_index(drop=True))

First aya of each sura:


Unnamed: 0,surah,aya,text
0,1,1,بسم الله الرحمن الرحيم
1,2,1,بسم الله الرحمن الرحيم الم
2,3,1,بسم الله الرحمن الرحيم الم
3,4,1,بسم الله الرحمن الرحيم يا أيها الناس اتقوا ربك...
4,5,1,بسم الله الرحمن الرحيم يا أيها الذين آمنوا أوف...
...,...,...,...
109,110,1,بسم الله الرحمن الرحيم إذا جاء نصر الله والفتح
110,111,1,بسم الله الرحمن الرحيم تبت يدا أبي لهب وتب
111,112,1,بسم الله الرحمن الرحيم قل هو الله أحد
112,113,1,بسم الله الرحمن الرحيم قل أعوذ برب الفلق


The following code removes basmala from the first verse of each surah, knowing that many surahs have basmala in the first verse and also some of them has the second verse after basmala in the first verse so we need to account for that


In [26]:
basmala = 'بسم الله الرحمن الرحيم'

def remove_basmala_from_first_aya(row):
    if row['aya'] == 1 and (basmala in row['text']):
        # If basmala is the only text (after stripping spaces and punctuation), do NOT remove it
        stripped_text = row['text'].strip(' ،:;،.،')
        if stripped_text == basmala:
            return row['text']
        # Remove only the first occurrence of basmala, and handle extra spaces or punctuation
        new_text = row['text']
        idx = new_text.find(basmala)
        if idx != -1:
            # Remove basmala and any following spaces or punctuation (like "، ", etc.)
            new_text = new_text[idx+len(basmala):].lstrip(' ،:;،.،')
            # If the text is empty after basmala removal, return empty string
            if new_text.strip() == '':
                return ''
            return new_text
    return row['text']

# Apply to the dataframe inplace (if not already done in preprocessing function)
df_no_basmala = original_df.copy()
df_no_basmala['text'] = df_no_basmala.apply(remove_basmala_from_first_aya, axis=1)

Display first aya again after moving Basmala

In [27]:
# Display the first aya of each sura
if df is not None:
    first_ayas = df[df['aya'] == 1][['surah', 'aya', 'text']]
    print("First aya of each sura:")
    display(first_ayas.reset_index(drop=True))

First aya of each sura:


Unnamed: 0,surah,aya,text
0,1,1,بسم الله الرحمن الرحيم
1,2,1,الم
2,3,1,الم
3,4,1,يا أيها الناس اتقوا ربكم الذي خلقكم من نفس واح...
4,5,1,يا أيها الذين آمنوا أوفوا بالعقود ۚ أحلت لكم ب...
...,...,...,...
109,110,1,إذا جاء نصر الله والفتح
110,111,1,تبت يدا أبي لهب وتب
111,112,1,قل هو الله أحد
112,113,1,قل أعوذ برب الفلق


We can choose to keep or remove the Basamla from surat Al Fatihah, it is debated between scientists whether it is a verse or not

In [None]:
# Display the first aya of surat Al Fatihah
if df_no_basmala is not None:
    fatihah_aya = df_no_basmala[df_no_basmala['surah'] == 1][['surah', 'aya', 'text']]
    print("First aya of surat Al Fatihah:")
    display(fatihah_aya.reset_index(drop=True))



KeyError: (0,)

if df is not None:
    print ("not null")

In [11]:
def clean_quran_text(df):
    """Clean and preprocess Quran text for analysis"""
    df_clean = df.copy()
    
    # Remove Basmala from first verse of each surah
    basmala = 'بسم الله الرحمن الرحيم'
    
    def remove_basmala(row):
        if row['aya'] == 1 and row['text'].startswith(basmala):
            return row['text'][len(basmala):].strip()
        return row['text']
    
    df_clean['text'] = df_clean.apply(remove_basmala, axis=1)
    
    # Remove diacritics and non-Arabic characters (keeping only Arabic letters and spaces)
    df_clean['text_clean'] = df_clean['text'].str.replace(r'[^ء-ي\s]', '', regex=True)
    df_clean['text_clean'] = df_clean['text_clean'].str.replace(r'\s+', ' ', regex=True).str.strip()
    
    # Keep original text with diacritics for comparison
    df_clean['text_with_diacritics'] = df_clean['text']
    
    # Remove empty verses after cleaning
    df_clean = df_clean[df_clean['text_clean'] != ''].copy()
    
    return df_clean

if df is not None:
    df_processed = clean_quran_text(df)
    print(f"Original verses: {len(df)}")
    print(f"After cleaning: {len(df_processed)}")
    print(f"Removed: {len(df) - len(df_processed)} empty verses")
    
    print("\n=== SAMPLE CLEANED TEXT ===")
    for i in range(3):
        print(f"Surah {df_processed.iloc[i]['sura']}, Ayah {df_processed.iloc[i]['aya']}: {df_processed.iloc[i]['text_clean'][:50]}...")


Original verses: 6236
After cleaning: 6235
Removed: 1 empty verses

=== SAMPLE CLEANED TEXT ===


KeyError: 'sura'

## 4. Basic Statistical Analysis


In [None]:
def calculate_basic_stats(df):
    """Calculate basic statistical measures for the text"""
    stats = {}
    
    # Character-level statistics
    all_text = ' '.join(df['text_clean'])
    stats['total_characters'] = len(all_text)
    stats['unique_characters'] = len(set(all_text.replace(' ', '')))
    stats['total_words'] = len(all_text.split())
    stats['unique_words'] = len(set(all_text.split()))
    
    # Per verse statistics
    df['char_count'] = df['text_clean'].str.len()
    df['word_count'] = df['text_clean'].str.split().str.len()
    
    stats['avg_chars_per_verse'] = df['char_count'].mean()
    stats['avg_words_per_verse'] = df['word_count'].mean()
    stats['std_chars_per_verse'] = df['char_count'].std()
    stats['std_words_per_verse'] = df['word_count'].std()
    
    # Per surah statistics
    surah_stats = df.groupby('surah').agg({
        'char_count': ['sum', 'mean', 'std'],
        'word_count': ['sum', 'mean', 'std'],
        'aya': 'count'
    }).round(2)
    
    stats['surah_stats'] = surah_stats
    
    return stats, df

if 'df_processed' in locals():
    stats, df_with_stats = calculate_basic_stats(df_processed)
    
    print("=== BASIC STATISTICS ===")
    for key, value in stats.items():
        if key != 'surah_stats':
            print(f"{key}: {value}")
    
    print("\n=== PER VERSE STATISTICS ===")
    print(f"Character count - Mean: {stats['avg_chars_per_verse']:.2f}, Std: {stats['std_chars_per_verse']:.2f}")
    print(f"Word count - Mean: {stats['avg_words_per_verse']:.2f}, Std: {stats['std_words_per_verse']:.2f}")


In [None]:
# Visualize basic distributions
if 'df_with_stats' in locals():
    fig, axes = plt.subplots(2, 2, figsize=(15, 10))
    
    # Character count distribution
    axes[0, 0].hist(df_with_stats['char_count'], bins=50, alpha=0.7, color='skyblue')
    axes[0, 0].set_title('Distribution of Characters per Verse')
    axes[0, 0].set_xlabel('Character Count')
    axes[0, 0].set_ylabel('Frequency')
    axes[0, 0].grid(True, alpha=0.3)
    
    # Word count distribution
    axes[0, 1].hist(df_with_stats['word_count'], bins=50, alpha=0.7, color='lightgreen')
    axes[0, 1].set_title('Distribution of Words per Verse')
    axes[0, 1].set_xlabel('Word Count')
    axes[0, 1].set_ylabel('Frequency')
    axes[0, 1].grid(True, alpha=0.3)
    
    # Characters vs Words scatter
    axes[1, 0].scatter(df_with_stats['char_count'], df_with_stats['word_count'], alpha=0.5, s=1)
    axes[1, 0].set_title('Characters vs Words per Verse')
    axes[1, 0].set_xlabel('Character Count')
    axes[1, 0].set_ylabel('Word Count')
    axes[1, 0].grid(True, alpha=0.3)
    
    # Surah length distribution
    surah_lengths = df_with_stats.groupby('surah')['aya'].count()
    axes[1, 1].bar(range(1, len(surah_lengths)+1), surah_lengths.values, alpha=0.7, color='orange')
    axes[1, 1].set_title('Number of Verses per Surah')
    axes[1, 1].set_xlabel('Surah Number')
    axes[1, 1].set_ylabel('Number of Verses')
    axes[1, 1].grid(True, alpha=0.3)
    
    plt.tight_layout()
    plt.show()
    
    # Print some interesting statistics
    print("\n=== INTERESTING STATISTICS ===")
    print(f"Longest verse: {df_with_stats.loc[df_with_stats['char_count'].idxmax(), 'char_count']} characters")
    print(f"Shortest verse: {df_with_stats.loc[df_with_stats['char_count'].idxmin(), 'char_count']} characters")
    print(f"Longest surah: Surah {surah_lengths.idxmax()} with {surah_lengths.max()} verses")
    print(f"Shortest surah: Surah {surah_lengths.idxmin()} with {surah_lengths.min()} verses")


## 5. Character-Level Analysis


In [None]:
def analyze_characters(df):
    """Analyze character frequency and patterns"""
    # Combine all text
    all_text = ' '.join(df['text_clean'])
    
    # Character frequency
    char_freq = Counter(all_text.replace(' ', ''))
    
    # Create character analysis dataframe
    char_df = pd.DataFrame(char_freq.most_common(), columns=['character', 'frequency'])
    char_df['percentage'] = (char_df['frequency'] / char_df['frequency'].sum()) * 100
    
    return char_df, all_text

if 'df_with_stats' in locals():
    char_df, all_text = analyze_characters(df_with_stats)
    
    print("=== CHARACTER FREQUENCY ANALYSIS ===")
    print(f"Total unique characters: {len(char_df)}")
    print(f"Total characters (excluding spaces): {char_df['frequency'].sum()}")
    
    print("\n=== TOP 20 MOST FREQUENT CHARACTERS ===")
    print(char_df.head(20).to_string(index=False))
    
    # Visualize character frequency
    plt.figure(figsize=(15, 8))
    top_chars = char_df.head(20)
    
    # Create display names for Arabic characters
    top_chars['display_name'] = top_chars['character'].apply(
        lambda x: get_display(arabic_reshaper.reshape(x)) if x != ' ' else 'Space'
    )
    
    plt.barh(range(len(top_chars)), top_chars['frequency'])
    plt.yticks(range(len(top_chars)), top_chars['display_name'])
    plt.xlabel('Frequency')
    plt.title('Top 20 Most Frequent Characters in Quran')
    plt.gca().invert_yaxis()
    plt.grid(axis='x', alpha=0.3)
    plt.tight_layout()
    plt.show()


## 6. Muqatta'at Letters Analysis (CRC Hypothesis)


In [None]:
def analyze_muqattaat(df):
    """Analyze Muqatta'at letters and their patterns"""
    
    # Define Muqatta'at letters (المقطعات)
    muqattaat_letters = {
        'alif_lam_mim': ['ا', 'ل', 'م'],
        'alif_lam_mim_sad': ['ا', 'ل', 'م', 'ص'],
        'alif_lam_ra': ['ا', 'ل', 'ر'],
        'kaf_ha_ya_ain_sad': ['ك', 'ه', 'ي', 'ع', 'ص'],
        'ta_ha': ['ط', 'ه'],
        'ta_sin_mim': ['ط', 'س', 'م'],
        'ta_sin': ['ط', 'س'],
        'ya_sin': ['ي', 'س'],
        'sad': ['ص'],
        'ha_mim': ['ح', 'م'],
        'ain_sin_qaf': ['ع', 'س', 'ق'],
        'qaf': ['ق'],
        'nun': ['ن']
    }
    
    # Get all unique Muqatta'at letters
    all_muqattaat = set()
    for letters in muqattaat_letters.values():
        all_muqattaat.update(letters)
    
    # Analyze frequency of Muqatta'at letters
    all_text = ' '.join(df['text_clean'])
    char_freq = Counter(all_text.replace(' ', ''))
    
    muqattaat_freq = {char: char_freq.get(char, 0) for char in all_muqattaat}
    
    # Calculate statistics
    total_chars = sum(char_freq.values())
    muqattaat_total = sum(muqattaat_freq.values())
    muqattaat_percentage = (muqattaat_total / total_chars) * 100
    
    return {
        'muqattaat_letters': all_muqattaat,
        'muqattaat_freq': muqattaat_freq,
        'total_muqattaat': muqattaat_total,
        'muqattaat_percentage': muqattaat_percentage,
        'muqattaat_groups': muqattaat_letters
    }

if 'df_with_stats' in locals():
    muqattaat_analysis = analyze_muqattaat(df_with_stats)
    
    print("=== MUQATTA'AT LETTERS ANALYSIS ===")
    print(f"Total Muqatta'at letters: {len(muqattaat_analysis['muqattaat_letters'])}")
    print(f"Total frequency of Muqatta'at letters: {muqattaat_analysis['total_muqattaat']}")
    print(f"Percentage of text: {muqattaat_analysis['muqattaat_percentage']:.2f}%")
    
    print("\n=== MUQATTA'AT LETTER FREQUENCIES ===")
    muqattaat_df = pd.DataFrame(
        list(muqattaat_analysis['muqattaat_freq'].items()),
        columns=['letter', 'frequency']
    ).sort_values('frequency', ascending=False)
    
    # Add display names
    muqattaat_df['display_name'] = muqattaat_df['letter'].apply(
        lambda x: get_display(arabic_reshaper.reshape(x))
    )
    
    print(muqattaat_df.to_string(index=False))
    
    # Visualize Muqatta'at frequency
    plt.figure(figsize=(12, 6))
    plt.bar(muqattaat_df['display_name'], muqattaat_df['frequency'])
    plt.title('Frequency of Muqatta\\'at Letters')
    plt.xlabel('Letter')
    plt.ylabel('Frequency')
    plt.xticks(rotation=45)
    plt.grid(axis='y', alpha=0.3)
    plt.tight_layout()
    plt.show()


## 7. Time Series Preparation


In [None]:
def prepare_time_series_data(df, encoding_scheme='unicode'):
    """Prepare data for time series analysis with different encoding schemes"""
    
    time_series_data = []
    
    for idx, row in df.iterrows():
        text = row['text_clean']
        surah = row['surah']
        aya = row['aya']
        
        if encoding_scheme == 'unicode':
            # Unicode values
            encoded = [ord(char) for char in text if char != ' ']
        elif encoding_scheme == 'sequential':
            # Sequential ordering (1, 2, 3, ...)
            unique_chars = list(set(text.replace(' ', '')))
            char_to_num = {char: i+1 for i, char in enumerate(unique_chars)}
            encoded = [char_to_num.get(char, 0) for char in text if char != ' ']
        elif encoding_scheme == 'muqattaat_focus':
            # Focus on Muqatta'at letters only
            muqattaat_letters = {'ا', 'ل', 'م', 'ص', 'ر', 'ك', 'ه', 'ي', 'ع', 'ط', 'س', 'ح', 'ق', 'ن'}
            encoded = [ord(char) if char in muqattaat_letters else 0 for char in text if char != ' ']
        
        time_series_data.append({
            'surah': surah,
            'aya': aya,
            'encoded_sequence': encoded,
            'sequence_length': len(encoded),
            'text': text
        })
    
    return pd.DataFrame(time_series_data)

if 'df_with_stats' in locals():
    # Test different encoding schemes
    print("=== TIME SERIES DATA PREPARATION ===")
    
    # Unicode encoding
    ts_unicode = prepare_time_series_data(df_with_stats, 'unicode')
    print(f"Unicode encoding - Sample sequence: {ts_unicode.iloc[0]['encoded_sequence'][:10]}...")
    
    # Muqatta'at focus encoding
    ts_muqattaat = prepare_time_series_data(df_with_stats, 'muqattaat_focus')
    print(f"Muqatta'at focus - Sample sequence: {ts_muqattaat.iloc[0]['encoded_sequence'][:10]}...")
    
    # Calculate some basic time series statistics
    print(f"\n=== TIME SERIES STATISTICS ===")
    print(f"Average sequence length: {ts_unicode['sequence_length'].mean():.2f}")
    print(f"Sequence length std: {ts_unicode['sequence_length'].std():.2f}")
    print(f"Total sequences: {len(ts_unicode)}")
    
    # Show distribution of sequence lengths
    plt.figure(figsize=(12, 5))
    
    plt.subplot(1, 2, 1)
    plt.hist(ts_unicode['sequence_length'], bins=50, alpha=0.7, color='skyblue')
    plt.title('Distribution of Sequence Lengths (Unicode)')
    plt.xlabel('Sequence Length')
    plt.ylabel('Frequency')
    plt.grid(True, alpha=0.3)
    
    plt.subplot(1, 2, 2)
    plt.hist(ts_muqattaat['sequence_length'], bins=50, alpha=0.7, color='lightcoral')
    plt.title('Distribution of Sequence Lengths (Muqatta\\'at Focus)')
    plt.xlabel('Sequence Length')
    plt.ylabel('Frequency')
    plt.grid(True, alpha=0.3)
    
    plt.tight_layout()
    plt.show()


## 8. Rolling Window Analysis


In [None]:
def rolling_window_analysis(df, window_size=10):
    """Analyze patterns in rolling windows of verses"""
    
    # Calculate rolling statistics
    df['rolling_char_mean'] = df['char_count'].rolling(window=window_size, min_periods=1).mean()
    df['rolling_word_mean'] = df['word_count'].rolling(window=window_size, min_periods=1).mean()
    df['rolling_char_std'] = df['char_count'].rolling(window=window_size, min_periods=1).std()
    df['rolling_word_std'] = df['word_count'].rolling(window=window_size, min_periods=1).std()
    
    return df

if 'df_with_stats' in locals():
    # Apply rolling window analysis
    df_rolling = rolling_window_analysis(df_with_stats, window_size=10)
    
    print("=== ROLLING WINDOW ANALYSIS (Window Size: 10) ===")
    
    # Visualize rolling patterns
    fig, axes = plt.subplots(2, 2, figsize=(15, 10))
    
    # Rolling character mean
    axes[0, 0].plot(df_rolling.index, df_rolling['rolling_char_mean'], alpha=0.7, color='blue')
    axes[0, 0].set_title('Rolling Mean of Character Count (Window=10)')
    axes[0, 0].set_xlabel('Verse Index')
    axes[0, 0].set_ylabel('Mean Character Count')
    axes[0, 0].grid(True, alpha=0.3)
    
    # Rolling word mean
    axes[0, 1].plot(df_rolling.index, df_rolling['rolling_word_mean'], alpha=0.7, color='green')
    axes[0, 1].set_title('Rolling Mean of Word Count (Window=10)')
    axes[0, 1].set_xlabel('Verse Index')
    axes[0, 1].set_ylabel('Mean Word Count')
    axes[0, 1].grid(True, alpha=0.3)
    
    # Rolling character std
    axes[1, 0].plot(df_rolling.index, df_rolling['rolling_char_std'], alpha=0.7, color='red')
    axes[1, 0].set_title('Rolling Std of Character Count (Window=10)')
    axes[1, 0].set_xlabel('Verse Index')
    axes[1, 0].set_ylabel('Std Character Count')
    axes[1, 0].grid(True, alpha=0.3)
    
    # Rolling word std
    axes[1, 1].plot(df_rolling.index, df_rolling['rolling_word_std'], alpha=0.7, color='orange')
    axes[1, 1].set_title('Rolling Std of Word Count (Window=10)')
    axes[1, 1].set_xlabel('Verse Index')
    axes[1, 1].set_ylabel('Std Word Count')
    axes[1, 1].grid(True, alpha=0.3)
    
    plt.tight_layout()
    plt.show()
    
    # Print some statistics about the rolling analysis
    print(f"\nRolling Character Mean - Min: {df_rolling['rolling_char_mean'].min():.2f}, Max: {df_rolling['rolling_char_mean'].max():.2f}")
    print(f"Rolling Word Mean - Min: {df_rolling['rolling_word_mean'].min():.2f}, Max: {df_rolling['rolling_word_mean'].max():.2f}")


## 9. Summary and Next Steps


In [None]:
print("=== ANALYSIS SUMMARY ===")
print("\n1. Data Quality:")
if 'df_processed' in locals():
    print(f"   - Successfully processed {len(df_processed)} verses")
    print(f"   - Removed {len(df) - len(df_processed)} empty verses after cleaning")

print("\n2. Statistical Findings:")
if 'stats' in locals():
    print(f"   - Total characters: {stats['total_characters']:,}")
    print(f"   - Unique characters: {stats['unique_characters']}")
    print(f"   - Total words: {stats['total_words']:,}")
    print(f"   - Unique words: {stats['unique_words']:,}")
    print(f"   - Average characters per verse: {stats['avg_chars_per_verse']:.2f}")
    print(f"   - Average words per verse: {stats['avg_words_per_verse']:.2f}")

print("\n3. Muqatta'at Analysis:")
if 'muqattaat_analysis' in locals():
    print(f"   - Muqatta'at letters represent {muqattaat_analysis['muqattaat_percentage']:.2f}% of all text")
    print(f"   - Total Muqatta'at letter frequency: {muqattaat_analysis['total_muqattaat']:,}")

print("\n4. Time Series Preparation:")
if 'ts_unicode' in locals():
    print(f"   - Prepared {len(ts_unicode)} sequences for time series analysis")
    print(f"   - Average sequence length: {ts_unicode['sequence_length'].mean():.2f}")

print("\n=== RECOMMENDED NEXT STEPS ===")
print("1. Implement autocorrelation analysis on the time series data")
print("2. Apply spectral analysis (FFT) to detect periodic patterns")
print("3. Test the CRC hypothesis by analyzing Muqatta'at letter distributions")
print("4. Implement entropy analysis for information content")
print("5. Create surah-level pattern analysis")
print("6. Compare different encoding schemes systematically")
print("7. Implement rolling window analysis with different window sizes")
print("8. Add diacritics analysis for comparison")
