<a href="https://colab.research.google.com/github/mrkim21/mrkim21.github.io/blob/main/appfolder/appcodes/240129_textanalysis01.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Text Analysis Tool

[applink](https://mrkim21.github.io/appfolder/textanalysis01.html)

+ TTS
+ Word count
+ Lexical Diversity Measure (MTLD)
+ FRE (Readability)
+ FKG level (Readability)

In [None]:
!pip install gradio math

In [None]:
import gradio as gr
import re
import math

# Define the TTR, MTLD, and Flesch Reading Ease functions as previously described
# ... (functions go here)

# TTR

def calculate_ttr(text):
    words = re.findall(r'\b[a-zA-Z]+\b', text.lower())
    unique_words = len(set(words))
    total_words = len(words)
    ttr = unique_words / total_words if total_words != 0 else 0
    return math.ceil(ttr * 100) / 100  # Rounded up to 2 decimal places

# MTLD

def calculate_mtld(text, ttr_threshold=0.72):
    def mtld_calculation(word_list, threshold):
        token_count = 0
        factor_count = 0
        for i in range(len(word_list)):
            token_count += 1
            unique_words = len(set(word_list[:i + 1]))
            current_ttr = unique_words / token_count
            if current_ttr < threshold:
                factor_count += 1
                token_count = 0
        if token_count > 0:
            factor_count += token_count / len(word_list)
        return len(word_list) / factor_count if factor_count != 0 else 0

    words = re.findall(r'\b[a-zA-Z]+\b', text.lower())
    mtld_forward = mtld_calculation(words, ttr_threshold)
    mtld_backward = mtld_calculation(words[::-1], ttr_threshold)
    mtld = (mtld_forward + mtld_backward) / 2
    return math.ceil(mtld * 100) / 100  # Rounded up to 2 decimal places

# Flesch Reading Ease

def count_syllables(word):
    syllable_count = 0
    vowels = "aeiouy"
    if word[0] in vowels:
        syllable_count += 1
    for index in range(1, len(word)):
        if word[index] in vowels and word[index - 1] not in vowels:
            syllable_count += 1
    if word.endswith("e"):
        syllable_count -= 1
    if syllable_count == 0:
        syllable_count += 1
    return syllable_count

def calculate_flesch_reading_ease(text):
    sentences = re.split(r'[.!?]+', text)
    words = re.findall(r'\b[a-zA-Z]+\b', text)
    total_sentences = len(sentences) - 1 if sentences[-1] == '' else len(sentences) # Adjusting for the case when text ends with a punctuation mark
    total_words = len(words)
    total_syllables = sum(count_syllables(word) for word in words)
    if total_sentences == 0 or total_words == 0: # Prevent division by zero
        return 0
    flesch_score = 206.835 - 1.015 * (total_words / total_sentences) - 84.6 * (total_syllables / total_words)
    return math.ceil(flesch_score * 100) / 100  # Rounded up to 2 decimal places

def calculate_flesch_kincaid_grade_level(text):
    sentences = re.split(r'[.!?]+', text)
    words = re.findall(r'\b[a-zA-Z]+\b', text)
    total_sentences = len(sentences) - 1 if sentences[-1] == '' else len(sentences)
    total_words = len(words)
    total_syllables = sum(count_syllables(word) for word in words)
    if total_sentences == 0 or total_words == 0: # Prevent division by zero
        return 0
    fk_grade_level = 0.39 * (total_words / total_sentences) + 11.8 * (total_syllables / total_words) - 15.59
    return math.ceil(fk_grade_level * 100) / 100  # Rounded up to 2 decimal places

def analyze_text(text):
    word_count = len(re.findall(r'\b[a-zA-Z]+\b', text))
    ttr = calculate_ttr(text)
    mtld = calculate_mtld(text)
    flesch_score = calculate_flesch_reading_ease(text)
    fk_grade_level = calculate_flesch_kincaid_grade_level(text)
    return word_count, round(ttr, 2), round(mtld, 2), round(flesch_score, 2), round(fk_grade_level, 2)

interface = gr.Interface(
    fn=analyze_text,
    inputs=gr.Textbox(lines=10, label="Input Text"),
    outputs=[
        gr.Textbox(label="Word Count"),
        gr.Textbox(label="Type-Token Ratio (TTR)"),
        gr.Textbox(label="Measure of Textual Lexical Diversity (MTLD)"),
        gr.Textbox(label="Flesch Reading Ease (Readability measure)"),
        gr.Textbox(label="Flesch-Kincaid Grade Level (Readability index)")
    ],
    title="Text Analysis Tool",
    description="Enter text to analyze its word count, Type-Token Ratio (TTR), Measure of Textual Lexical Diversity (MTLD), Flesch Reading Ease, and Flesch-Kincaid Grade Level. Note: The Flesch-Kincaid Grade Level indicates the U.S. school grade level needed to understand the text."
)

interface.launch()