DyslexiaLens

Text Ingestion

In [70]:
#text = input(Text Input: )
text = """The defendant shall constitute a response to the aforementioned allegations 
    pursuant to the statutory requirements, whereas failure to comply may result 
    in default judgment."""


Preprocessing

1. Text Cleaning

In [71]:
import ftfy
import re

In [72]:
def clean_text(text):

    #to clear the encoding issues
    text = ftfy.fix_text(text)

    #to remove the non printable letters
    text = ''.join(c for c in text if c.isprintable())
    
    #to remove the extra spaces
    text = re.sub(r'[\r\n]+', '\n', text)
    text = re.sub(r'\s{2,}', ' ', text)

    return text.strip()

In [73]:
cleanText = clean_text(text)

2. Segmentation

In [74]:
import nltk 
nltk.download('punkt')

[nltk_data] Downloading package punkt to
[nltk_data]     /home/prathoseraaj-v/nltk_data...
[nltk_data]   Package punkt is already up-to-date!


True

In [75]:
def segmentation_text(text):
    paragraph = [p for p in text.split('\n') if p.strip()]
    sentance = []

    for para in paragraph:
        sentance.extend(nltk.sent_tokenize(para))

    tokens = [nltk.word_tokenize(sent) for sent in sentance]
    return {
        'paragraph': paragraph,
        'sentences': sentance,
        'tokens': tokens,
    }

In [76]:
preprocessed_text = segmentation_text(text)

Readability Assesment

In [77]:
full_text = " ".join(preprocessed_text["paragraph"])
sentences = preprocessed_text["sentences"]

print(full_text)
print(sentences)

The defendant shall constitute a response to the aforementioned allegations      pursuant to the statutory requirements, whereas failure to comply may result      in default judgment.
['The defendant shall constitute a response to the aforementioned allegations', '    pursuant to the statutory requirements, whereas failure to comply may result', '    in default judgment.']


In [78]:
import spacy

In [79]:
nlp = spacy.load("en_core_web_sm")

1. Readability Score

In [80]:
import textstat

In [81]:
def readability_score(text):
    return{
        "flesch_reading_ease" : textstat.flesch_reading_ease(text),
        "flesch_kincaid_grade" : textstat.flesch_kincaid_grade(text),
        "gunning_fog": textstat.gunning_fog(text),
        "smog_index": textstat.smog_index(text),
        "coleman_liau_index" : textstat.coleman_liau_index(text),
        "automated_readability_index" : textstat.automated_readability_index(text),
        "dale_chall_readability_score" : textstat.dale_chall_readability_score(text),
        "difficult_words_count" : textstat.difficult_words(text),
        "difficult_words_list" : textstat.difficult_words_list(text),
        }

In [82]:
readability_score(text)

{'flesch_reading_ease': 13.275000000000034,
 'flesch_kincaid_grade': 17.37,
 'gunning_fog': 21.26666666666667,
 'smog_index': 18.243605946275583,
 'coleman_liau_index': 18.733333333333338,
 'automated_readability_index': 20.0075,
 'dale_chall_readability_score': 14.037733333333335,
 'difficult_words_count': 14,
 'difficult_words_list': ['response',
  'pursuant',
  'allegations',
  'requirements',
  'failure',
  'default',
  'aforementioned',
  'judgment',
  'result',
  'comply',
  'defendant',
  'whereas',
  'constitute',
  'statutory']}

2. Detect long sentance

In [83]:
def detect_long_sentance(sentances, threshold=25):
    return [sent for sent in sentances if len(sent.split()) > threshold]

In [84]:
detect_long_sentance(sentances=sentences)

[]

3. Detect passive voice

In [85]:
import spacy

In [86]:
def detect_passive_voice(sentences):
    passive_sentences = []

    for sent in sentences:
        doc = nlp(sent)
        for token in doc:
            if token.dep_ == "nsubjpass":
                passive_sentences.append(sent)
                break

    return passive_sentences

In [87]:
detect_long_sentance(sentances=sentences)

[]

4. Detect ambigious structures

In [88]:
def detect_ambiguous_structures(sentences):
    ambiguous_keywords = ["might", "could", "possibly", "maybe", "potentially", "approximately", "suggests", "appears"]
    return [sent for sent in sentences if any(word in sent.lower() for word in ambiguous_keywords)]

In [89]:
detect_ambiguous_structures(sentences=sentences)

[]

5. Return the readability assesment 

In [90]:
def assesment_data(preprocessed_text):

    paragraph = "".join(preprocessed_text['paragraph'])
    sentences = preprocessed_text["sentences"]

    return{
        "readability_score" : readability_score(paragraph),
        "long_sentences" : detect_long_sentance(sentences),
        "passive_voice" : detect_passive_voice(sentences),
        "detect_ambiguous_structures" : detect_ambiguous_structures(sentences),
    }

In [91]:
results = assesment_data(preprocessed_text)

Text Simplification

In [92]:
# Install: pip install transformers torch nltk textstat

import torch
from transformers import pipeline
import nltk
import textstat

# Download the punkt tokenizer for sentence splitting (only once)
nltk.download('punkt')

# Define your input text
text = """Pursuant to the overarching objectives articulated within the Strategic Urban Development Framework, municipal authorities have commenced implementation of multifaceted infrastructural initiatives intended to augment both intermodal connectivity and socioeconomic inclusivity. The preliminary phase encompasses the recalibration of extant transportation modalities, the optimization of resource allocation protocols, and the systemic integration of public-private partnerships to facilitate sustainable urban growth trajectories."""

print("🤖 Loading AI simplification model...")

try:
    # Choose a model that fits your disk space and needs ("sshleifer/distilbart-cnn-12-6" is small and good for summarization)
    simplifier = pipeline(
        "text2text-generation",
        model="sshleifer/distilbart-cnn-12-6",
        device=0 if torch.cuda.is_available() else -1
    )
    print("✅ Model loaded successfully!")
except Exception as e:
    print(f"❌ Error loading model: {e}")
    simplifier = None  # Don't use fallback

def huggingface_simplify(text):
    """Simplify text using Hugging Face AI model ONLY"""
    if simplifier is None:
        print("❌ No model available for simplification.")
        return [text]  # Return original if model fails
    sentences = nltk.sent_tokenize(text)
    simplified_sentences = []
    for sentence in sentences:
        try:
            if len(sentence.split()) > 10:
                result = simplifier(
                    sentence,
                    max_length=min(len(sentence.split()) + 10, 100),
                    min_length=max(len(sentence.split()) - 5, 10),
                    do_sample=True,
                    temperature=0.3,
                    top_p=0.9
                )
                simplified = result[0].get('summary_text', sentence)
            else:
                simplified = sentence
            simplified_sentences.append(simplified)
        except Exception as e:
            print(f"⚠️ Error processing sentence, using original: {e}")
            simplified_sentences.append(sentence)
    return simplified_sentences

print("\n=== AI-POWERED DYSLEXIALENS SIMPLIFICATION ===")
print(f"Original text grade level: {textstat.flesch_kincaid_grade(text):.1f}")
print(f"Difficult words: {textstat.difficult_words(text)}")

print("\n🤖 Processing with AI model...")
simplified_sentences = huggingface_simplify(text)

print("\n=== SIMPLIFIED TEXT ===")
for i, sentence in enumerate(simplified_sentences, 1):
    print(f"{i}. {sentence}")

if simplified_sentences:
    final_text = ' '.join(simplified_sentences)
    final_grade = textstat.flesch_kincaid_grade(final_text)
    improvement = textstat.flesch_kincaid_grade(text) - final_grade
    print(f"\n=== RESULTS ===")
    print(f"Original grade level: {textstat.flesch_kincaid_grade(text):.1f}")
    print(f"Simplified grade level: {final_grade:.1f}")
    print(f"Improvement: {improvement:.1f} grades easier")
    print(f"Difficult words reduced: {textstat.difficult_words(text)} → {textstat.difficult_words(final_text)}")

[nltk_data] Downloading package punkt to
[nltk_data]     /home/prathoseraaj-v/nltk_data...
[nltk_data]   Package punkt is already up-to-date!


🤖 Loading AI simplification model...


Device set to use cuda:0


✅ Model loaded successfully!

=== AI-POWERED DYSLEXIALENS SIMPLIFICATION ===
Original text grade level: 29.3
Difficult words: 40

🤖 Processing with AI model...

=== SIMPLIFIED TEXT ===
1. Pursuant to the overarching objectives articulated within the Strategic Urban Development Framework, municipal authorities have commenced implementation of multifaceted infrastructural initiatives intended to augment both intermodal connectivity and socioeconomic inclusivity.
2. The preliminary phase encompasses the recalibration of extant transportation modalities, the optimization of resource allocation protocols, and the systemic integration of public-private partnerships to facilitate sustainable urban growth trajectories.

=== RESULTS ===
Original grade level: 29.3
Simplified grade level: 29.3
Improvement: 0.0 grades easier
Difficult words reduced: 40 → 40
