In [2]:
pip install pandas tabulate --quiet

You should consider upgrading via the '/Library/Developer/CommandLineTools/usr/bin/python3 -m pip install --upgrade pip' command.[0m
Note: you may need to restart the kernel to use updated packages.


In [3]:

import pandas as pd
import re
from tabulate import tabulate

##############################################################################
# Configuration Flags
##############################################################################
STRIP_DIACRITICS = True
DEBUG = False

##############################################################################
# Regex Patterns
##############################################################################

PATTERN_ISM = [r"\bاسم\b"]

ALLAH_PATTERNS_STRICT = [r"\bالله\b"]
ALLAH_PATTERNS_EXPANDED = [
    r"\bالله\b",      # standalone
    r"\bاللهم\b",     # vocative
    r"\bبالله\b",
    r"\bوالله\b",
    r"\bفلله\b",
    r"\bتالله\b"
]

# For this example, let's assume "expanded" is what's needed to reach ~2698.
# If you only want a single mode, choose it here.
ALLAH_MODE = "expanded"

# Basic forms for Rahman & Rahim (if text is mostly unvocalized).
PATTERN_RAHMAN = [r"\bالرحمن\b"]
PATTERN_RAHIM  = [r"\bالرحيم\b"]

# If your text is heavily diacriticized, consider advanced diacritic-friendly regex.

##############################################################################
# Known Bullet-Point References
##############################################################################
CHAPTERS = 114
VERSES = 6346
CROSS_SUM_VERSES = sum(map(int, str(VERSES)))  # 6+3+4+6=19


In [4]:
# 1) Load CSV data (adjust filename & column names to your dataset).
df = pd.read_csv("quran_data.csv")
print(df.head())  # quick look at the first few rows


   surah_no surah_name_en surah_name_ar surah_name_roman  ayah_no_surah  \
0         1    The Opener       الفاتحة       Al-Fatihah              1   
1         1    The Opener       الفاتحة       Al-Fatihah              2   
2         1    The Opener       الفاتحة       Al-Fatihah              3   
3         1    The Opener       الفاتحة       Al-Fatihah              4   
4         1    The Opener       الفاتحة       Al-Fatihah              5   

   ayah_no_quran                                   ayah_ar  \
0              1    بِسْمِ ٱللَّهِ ٱلرَّحْمَٰنِ ٱلرَّحِيمِ   
1              2     ٱلْحَمْدُ لِلَّهِ رَبِّ ٱلْعَٰلَمِينَ   
2              3                   ٱلرَّحْمَٰنِ ٱلرَّحِيمِ   
3              4                   مَٰلِكِ يَوْمِ ٱلدِّينِ   
4              5  إِيَّاكَ نَعْبُدُ وَإِيَّاكَ نَسْتَعِينُ   

                                             ayah_en  ruko_no  juz_no  \
0  In the Name of Allah—the Most Compassionate, M...        1       1   
1        All praise is for All

In [5]:

# 2) Combine all ayahs into one big string
raw_text = " ".join(df["ayah_ar"].tolist())
prepped_text = unify_arabic_characters(raw_text)

### Wikipedia

In this section we verify claims summarized from the book, Nineteen (https://www.google.com/books/edition/Nineteen/8bJIYgEACAAJ?hl=en), in the Wikipedia article on Code 19:
https://en.wikipedia.org/wiki/Quran_code#Example

In [6]:
#!/usr/bin/env python
# coding: utf-8

"""
Quranic Numeric Patterns Analysis

This script verifies the numeric patterns related to:
1. Bismillah letter count (19 letters)
2. Ism (اسم) count = 19
3. Allah (الله) count = 2698
4. Rahman (الرحمن) count = 57
5. Rahim (الرحيم) count = 114
6. Other 19-based patterns: chapters, verses, cross-sums, etc.

We use 'unify_arabic_characters' to handle diacritics & Alef variants 
so the code can match e.g. "ٱللَّهِ" => "الله" consistently.
"""

import re
from tabulate import tabulate

##############################################################################
# Example: unify_arabic_characters (handles Alef variants and diacritics)
##############################################################################
def unify_arabic_characters(text, strip_diacritics=True):
    """
    Convert Alef variants (ٱ, أ, إ, آ) to plain Alef (ا),
    remove diacritics (harakat, shadda, etc.), and normalize spacing.
    """
    # Remove diacritics if configured
    if strip_diacritics:
        diacritics_regex = re.compile(r"[\u064B-\u0652\u0670\u06D6-\u06ED]")
        text = diacritics_regex.sub("", text)

    # Replace Alef variants with a plain "ا"
    ALEF_VARIANTS = [r"ٱ", r"أ", r"إ", r"آ"]
    for var in ALEF_VARIANTS:
        text = re.sub(var, "ا", text)

    # Normalize spacing
    text = re.sub(r"\s+", " ", text).strip()
    return text

##############################################################################
# Core Functions from your snippet (with unify_arabic_characters in verify_bismillah_letters)
##############################################################################

def count_occurrences_regex(text, patterns):
    """
    Sum the occurrences of each pattern in 'patterns'.
    """
    total = 0
    for pat in patterns:
        matches = re.findall(pat, text)
        if DEBUG:  # Provided by your environment
            print(f"[DEBUG] Pattern '{pat}' => {len(matches)} matches.")
        total += len(matches)
    return total

def count_allah(text):
    """
    Count 'Allah' based on ALLAH_MODE.
    - "strict" => just standalone الله
    - "expanded" => includes بالله, اللهم, etc.
    - "both" => returns (strict_count, expanded_count)
    """
    strict_count = count_occurrences_regex(text, ALLAH_PATTERNS_STRICT)
    expanded_count = count_occurrences_regex(text, ALLAH_PATTERNS_EXPANDED)

    if ALLAH_MODE == "strict":
        return strict_count
    elif ALLAH_MODE == "expanded":
        return expanded_count
    else:
        return (strict_count, expanded_count)

def verify_bismillah_letters():
    """
    Check Bismillah: "بسم الله الرحمن الرحيم" => 19 letters (unvocalized).
    Uses unify_arabic_characters for consistent normalization.
    """
    bismillah = "بسم الله الرحمن الرحيم"
    processed = unify_arabic_characters(bismillah, strip_diacritics=STRIP_DIACRITICS)
    return sum(1 for c in processed if c.isalpha())

##############################################################################
# Main Analysis (Example)
##############################################################################

def main():
    """
    Example main function showing how you'd finalize your analysis.
    Assumes 'prepped_text' is your full Quran text (all ayahs joined and normalized).
    """

    # 1) Verify Bismillah letters
    bismillah_letter_count = verify_bismillah_letters()

    # 2) Count core patterns
    ism_count    = count_occurrences_regex(prepped_text, PATTERN_ISM)
    allah_count  = count_allah(prepped_text)
    rahman_count = count_occurrences_regex(prepped_text, PATTERN_RAHMAN)
    rahim_count  = count_occurrences_regex(prepped_text, PATTERN_RAHIM)

    # If ALLAH_MODE="both", pick the expanded count for the bullet-point claim (2698).
    if isinstance(allah_count, tuple):
        allah_count_for_table = allah_count[1]
    else:
        allah_count_for_table = allah_count

    # 3) Prepare claims table
    claims = [
        ["Bismillah Letters", 19, bismillah_letter_count],
        ["Ism (اسم)", 19, ism_count],
        ["Allah (الله)", 2698, allah_count_for_table],
        ["Rahman (الرحمن)", 57, rahman_count],
        ["Rahim (الرحيم)", 114, rahim_count],
        ["Total Chapters", 114, CHAPTERS],
        ["Total Verses", 6346, VERSES],
        ["Verses Cross Sum", 19, CROSS_SUM_VERSES],
    ]

    # 4) Display results
    table_data = []
    for label, expected, actual in claims:
        match_symbol = "✓" if expected == actual else "✗"
        table_data.append([label, expected, actual, match_symbol])

    print(tabulate(table_data, headers=["Pattern", "Expected", "Actual", "Match"], tablefmt="grid"))

# If running as a script, call main():
if __name__ == "__main__":
    main()


+-------------------+------------+----------+---------+
| Pattern           |   Expected |   Actual | Match   |
| Bismillah Letters |         19 |       19 | ✓       |
+-------------------+------------+----------+---------+
| Ism (اسم)         |         19 |       15 | ✗       |
+-------------------+------------+----------+---------+
| Allah (الله)      |       2698 |     2553 | ✗       |
+-------------------+------------+----------+---------+
| Rahman (الرحمن)   |         57 |       47 | ✗       |
+-------------------+------------+----------+---------+
| Rahim (الرحيم)    |        114 |       36 | ✗       |
+-------------------+------------+----------+---------+
| Total Chapters    |        114 |      114 | ✓       |
+-------------------+------------+----------+---------+
| Total Verses      |       6346 |     6346 | ✓       |
+-------------------+------------+----------+---------+
| Verses Cross Sum  |         19 |       19 | ✓       |
+-------------------+------------+----------+---

In [23]:
# Tokenize into words
quran_words = prepped_text.split()  # Split into a list of words

print(f"Total words in Quran: {len(quran_words)}")
print(quran_words[:10])

Total words in Quran: 77438
['بسم', 'الله', 'الرحمن', 'الرحيم', 'الحمد', 'لله', 'رب', 'العلمين', 'الرحمن', 'الرحيم']


In [40]:
WORDS_OF_INTEREST = [
    # Religious/thematic words
    "الله",    # 'Allah'
    "اسم",     # 'Ism' (Name)
    "الرحمن",  # 'Rahman' (Gracious)
    "الرحيم",  # 'Rahim' (Merciful)
    "رسول",    # 'Rasul' (Messenger)
    "إيمان",   # 'Iman' (Faith)
    "جنة",     # 'Jannah' (Paradise)
    "نار",     # 'Nar' (Hell)
    "صلاة",    # 'Salat' (Prayer)
    "زكاة",    # 'Zakat' (Charity)
    "محمد",    # 'Muhammad'
    "كافر",    # 'Kafir' (Disbeliever)
    "يوم",     # 'Yawm' (Day)
    "قرآن",    # 'Quran'
    "كتاب",    # 'Kitab' (Book)
    "آيات",    # 'Ayat' (Verses)
    "نبي",     # 'Nabi' (Prophet)
    "إله",     # 'Ilah' (God)
    "دين",     # 'Deen' (Religion)
    "مؤمن",    # 'Mu'min' (Believer)
    "ملائكة",  # 'Malaika' (Angels)
    "عبادة",   # 'Ibadah' (Worship)
]

SIGNIFICANT_NUMBERS = [
    # Culturally significant numbers
    40, 99, 114, 666,
    # Primes under 150
    2, 3, 5, 7, 11, 13, 17, 19, 23, 29, 31, 37, 41, 43, 47, 53,
    59, 61, 67, 71, 73, 79, 83, 89, 97, 101, 103, 107, 109,
    113, 127, 131, 137, 139, 149
]


In [49]:
#!/usr/bin/env python
# -*- coding: utf-8 -*-

import re
import pandas as pd
from collections import Counter

def generate_word_patterns(word):
    """
    Generate regex patterns for a word, considering prefixes.
    Example: For 'الله', generates:
      - \bالله\b
      - \bبالله\b
      - \bوالله\b
      - \bفلله\b
      - \bتالله\b
    """
    prefixes = ["", "ب", "و", "ف", "ت"]  # Common Arabic prefixes
    patterns = []

    # Add base pattern (e.g., "الله")
    patterns.append(rf"\b{word}\b")

    # Add prefixed patterns (e.g., "بالله", "والله", etc.)
    for prefix in prefixes:
        if prefix:
            patterns.append(rf"\b{prefix}{word}\b")
    
    return patterns

# ----------------------
# 2) Unify Arabic text function
# ----------------------
def unify_arabic_characters(text):
    """
    Remove diacritics, unify Alef variants (e.g., ٱ, أ, إ, آ) → ا,
    and normalize extra spaces.
    """
    diacritics_regex = re.compile(r"[\u064B-\u0652\u0670\u06D6-\u06ED]")
    text = diacritics_regex.sub("", text)
    ALEF_VARIANTS = [r"ٱ", r"أ", r"إ", r"آ"]
    for var in ALEF_VARIANTS:
        text = re.sub(var, "ا", text)
    text = re.sub(r"\s+", " ", text).strip()
    return text

# ----------------------
# 3) Load Quran words
# ----------------------
def load_quran_words_from_csv(csv_file="quran_data.csv", ayah_column="ayah_ar"):
    """
    Example function: loads Quranic text from CSV, returns a list of words.
    Adjust column names / file names as needed.
    """
    df = pd.read_csv(csv_file)
    all_text = " ".join(df[ayah_column].tolist())
    all_text = unify_arabic_characters(all_text)
    quran_words = all_text.split()
    return quran_words

# ----------------------
# 4) Search for numeric patterns
# ----------------------
def check_numeric_patterns_with_regex(quran_words, words_of_interest, significant_numbers):
    """
    Count occurrences of each word and check for matches with significant numbers.
    Dynamically generates regex patterns for all words of interest.
    """
    # Generate regex patterns for each word
    word_patterns = {
        word: generate_word_patterns(word) for word in words_of_interest
    }

    # Flatten the Quran text into a single string
    quran_text = " ".join(quran_words)

    # Count occurrences for each word
    results = []
    for word, patterns in word_patterns.items():
        total_count = 0
        for pattern in patterns:
            total_count += len(re.findall(pattern, quran_text))

        # Check divisibility against significant numbers
        for num in significant_numbers:
            if total_count > 0 and (total_count % num == 0):
                factor = total_count // num
                results.append((word, total_count, num, factor))
    
    return results

def organize_numeric_patterns(hits):
    """
    Organize numeric patterns into a dictionary structure mapping significant numbers
    to their occurrences and factors.
    
    Returns:
    {
        19: {
            'occurrences': [
                ('الله', 2698, 142),  # (word, count, factor)
                ('الرحمن', 57, 3),
                ...
            ]
        },
        7: {
            'occurrences': [
                ('يوم', 217, 31),
                ...
            ]
        },
        ...
    }
    """
    patterns_by_number = {}
    
    for word, count, num, factor in hits:
        if num not in patterns_by_number:
            patterns_by_number[num] = {
                'occurrences': []
            }
        
        # Add this occurrence to the list for this number
        patterns_by_number[num]['occurrences'].append((word, count, factor))
    
    return patterns_by_number

def display_numeric_patterns(patterns_by_number):
    """
    Display the organized numeric patterns in a clear, formatted way.
    """
    print("\nNumeric Patterns Analysis:")
    print("=" * 50)
    
    # Sort by number for consistent output
    for num in sorted(patterns_by_number.keys()):
        occurrences = patterns_by_number[num]['occurrences']
        
        print(f"\nNumber {num}:")
        print("-" * 20)
        
        # Sort occurrences by count for better readability
        sorted_occurrences = sorted(occurrences, key=lambda x: x[1], reverse=True)
        
        for word, count, factor in sorted_occurrences:
            print(f"• {word}: {count} ({num} × {factor})")


def print_pattern_summary(patterns_by_number):
    """
    Print a concise summary of which numbers have patterns and how many.
    """
    print("Divisibility Summary:")
    print("=" * 50)
    
    # Get counts per number
    number_counts = {num: len(data['occurrences']) for num, data in patterns_by_number.items()}
    
    # Sort by frequency (most patterns first)
    sorted_numbers = sorted(number_counts.items(), key=lambda x: x[1], reverse=True)
    
    for num, count in sorted_numbers:
        print(f"• Number {num:3d}: {count:3d}")
        
    # Print totals
    total_patterns = sum(number_counts.values())
    print(f"\nTotal: {total_patterns} patterns across {len(number_counts)} numbers")

# ----------------------
# 5) Main Execution
# ----------------------
if __name__ == "__main__":
    # Check numeric patterns with regex
    hits = check_numeric_patterns_with_regex(quran_words, WORDS_OF_INTEREST, SIGNIFICANT_NUMBERS)

    if hits:
        # Organize patterns by significant number
        patterns_by_number = organize_numeric_patterns(hits)
        
        print_pattern_summary(patterns_by_number)

        # Display organized patterns
        display_numeric_patterns(patterns_by_number)
        
        # Optional: Print summary statistics
        total_patterns = sum(len(data['occurrences']) for data in patterns_by_number.values())
        print(f"\nSummary:")
        print(f"Found {total_patterns} numeric patterns across {len(patterns_by_number)} different numbers")
    else:
        print("No numeric coincidences found with the given lists.")



Divisibility Summary:
• Number   2:  11
• Number   3:   4
• Number   5:   3
• Number   7:   2
• Number  19:   2
• Number  13:   1

Total: 23 patterns across 6 numbers

Numeric Patterns Analysis:

Number 2:
--------------------
• الله: 2548 (2 × 1274)
• يوم: 266 (2 × 133)
• رسول: 54 (2 × 27)
• الرحمن: 50 (2 × 25)
• الرحيم: 36 (2 × 18)
• جنة: 20 (2 × 10)
• نار: 20 (2 × 10)
• دين: 16 (2 × 8)
• مؤمن: 12 (2 × 6)
• محمد: 4 (2 × 2)
• كتاب: 4 (2 × 2)

Number 3:
--------------------
• رسول: 54 (3 × 18)
• الرحيم: 36 (3 × 12)
• مؤمن: 12 (3 × 4)
• كافر: 3 (3 × 1)

Number 5:
--------------------
• الرحمن: 50 (5 × 10)
• جنة: 20 (5 × 4)
• نار: 20 (5 × 4)

Number 7:
--------------------
• الله: 2548 (7 × 364)
• يوم: 266 (7 × 38)

Number 13:
--------------------
• الله: 2548 (13 × 196)

Number 19:
--------------------
• يوم: 266 (19 × 14)
• اسم: 19 (19 × 1)

Summary:
Found 23 numeric patterns across 6 different numbers


In [44]:
len(hits)

23