In [1]:


import re
import os

# Function to count the number of sentences
def count_sentences(text):
    # Regex pattern to match sentence-ending punctuation
    sentence_endings = re.compile(r'[.!?;:]')
    sentences = sentence_endings.split(text)
    # Count sentences and ensure at least one if the text is not empty
    return len([s for s in sentences if s.strip()]) or 1

# Function to count the number of words
def count_words(text):
    # Regex pattern to match words (alphanumeric sequences)
    words = re.findall(r'\b\w+\b', text)
    return len(words)

# Function to count the number of characters
def count_characters(text):
    # Count only alphanumeric characters (ignoring punctuation and spaces)
    return len(re.findall(r'[a-zA-Z0-9]', text))

# Function to calculate the Automated Readability Index (ARI)
def calculate_ari(num_sentences, num_words, num_characters):
    # Apply the ARI formula
    return round(4.71 * (num_characters / num_words) + 0.5 * (num_words / num_sentences) - 21.43)

# Function to process a text file and compute ARI
def process_text_file(file_path):
    with open(file_path, 'r', encoding='utf-8') as file:
        text = file.read()

    num_sentences = count_sentences(text)
    num_words = count_words(text)
    num_characters = count_characters(text)
    ari = calculate_ari(num_sentences, num_words, num_characters)

    print(f"Text File: {os.path.basename(file_path)}")
    print(f"Number of sentences: {num_sentences}")
    print(f"Number of words: {num_words}")
    print(f"Number of characters: {num_characters}")
    print(f"Readability index: {ari}")
    print("\n")

# File paths for the three text files
file_paths = [
    '/content/drive/MyDrive/AI Assignments/file 1.txt',
    '/content/drive/MyDrive/AI Assignments/File 2.txt',
    '/content/drive/MyDrive/AI Assignments/File 3.txt'
]

# Process each file and print the results
for file_path in file_paths:
    process_text_file(file_path)


Text File: file 1.txt
Number of sentences: 7
Number of words: 22
Number of characters: 92
Readability index: 0


Text File: File 2.txt
Number of sentences: 8
Number of words: 46
Number of characters: 197
Readability index: 2


Text File: File 3.txt
Number of sentences: 8
Number of words: 141
Number of characters: 572
Readability index: 6


