# Tigrigna Spelling Checker

This notebook implements a basic spelling checker for the Tigrigna language. It provides functions to check spelling and suggest corrections for misspelled words.

## 1. Loading the Tigrigna Dictionary

In [1]:
def load_dictionary(file_path='tigrigna_dictionary.txt'):
    """
    Loads the Tigrigna dictionary from the specified file path.
    
    Args:
        file_path (str): Path to the dictionary file
        
    Returns:
        set: A set containing Tigrigna words
    """
    try:
        with open(file_path, 'r', encoding='utf-8') as file:
            dictionary = {line.strip() for line in file if line.strip()}
        return dictionary
    except FileNotFoundError:
        print(f"Error: Dictionary file '{file_path}' not found.")
        return set()
    except Exception as e:
        print(f"Error loading dictionary: {e}")
        return set()

# Load the Tigrigna dictionary
tigrigna_dictionary = load_dictionary()
print(f"Loaded {len(tigrigna_dictionary)} Tigrigna words into the dictionary.")

Loaded 294 Tigrigna words into the dictionary.


## 2. Text Processing Functions

In [2]:
import re

def tokenize_text(text):
    """
    Tokenizes Tigrigna text into words.
    
    Args:
        text (str): The input Tigrigna text
        
    Returns:
        list: A list of words
    """
    # Split text by non-Tigrigna characters and spaces
    # This regex pattern handles Tigrigna characters and keeps punctuation separate
    words = re.findall(r'[\u1200-\u137F\u1380-\u139F\u2D80-\u2DDF]+|[\s,.!?;:"()\[\]{}]', text)
    
    # Remove empty strings and whitespace
    words = [word.strip() for word in words if word.strip() and not word.isspace()]
    
    return words

def is_punctuation(word):
    """
    Checks if a token is punctuation.
    
    Args:
        word (str): The word to check
        
    Returns:
        bool: True if the token is punctuation, False otherwise
    """
    return all(c in ',.!?;:"()[]{}' for c in word)

## 3. Spell Checking Functions

In [3]:
def check_spelling(word, dictionary):
    """
    Checks if a word is spelled correctly.
    
    Args:
        word (str): The word to check
        dictionary (set): The Tigrigna dictionary
        
    Returns:
        bool: True if the word is correct, False otherwise
    """
    return word in dictionary

## 4. Suggestion Generation using Levenshtein Distance

In [4]:
def levenshtein_distance(s1, s2):
    """
    Calculate the Levenshtein distance between two strings.
    
    Args:
        s1 (str): First string
        s2 (str): Second string
        
    Returns:
        int: The Levenshtein distance
    """
    if len(s1) < len(s2):
        return levenshtein_distance(s2, s1)
    
    if len(s2) == 0:
        return len(s1)
    
    previous_row = range(len(s2) + 1)
    for i, c1 in enumerate(s1):
        current_row = [i + 1]
        for j, c2 in enumerate(s2):
            insertions = previous_row[j + 1] + 1
            deletions = current_row[j] + 1
            substitutions = previous_row[j] + (c1 != c2)
            current_row.append(min(insertions, deletions, substitutions))
        previous_row = current_row
    
    return previous_row[-1]

def get_suggestions(word, dictionary, max_distance=2, max_suggestions=5):
    """
    Generate suggestions for a misspelled word.
    
    Args:
        word (str): The misspelled word
        dictionary (set): The Tigrigna dictionary
        max_distance (int): Maximum Levenshtein distance for suggestions
        max_suggestions (int): Maximum number of suggestions to return
        
    Returns:
        list: Suggested corrections sorted by distance
    """
    suggestions = []
    
    for dict_word in dictionary:
        distance = levenshtein_distance(word, dict_word)
        if distance <= max_distance:
            suggestions.append((dict_word, distance))
    
    # Sort by distance and return only the words
    suggestions.sort(key=lambda x: x[1])
    return [word for word, _ in suggestions[:max_suggestions]]

## 5. Main Spell Checking Function

In [5]:
def spell_check_text(text, dictionary):
    """
    Check spelling of all words in a text and provide suggestions for misspelled words.
    
    Args:
        text (str): The input Tigrigna text
        dictionary (set): The Tigrigna dictionary
        
    Returns:
        list: A list of dictionaries containing words, their status, and suggestions
    """
    result = []
    words = tokenize_text(text)
    
    for word in words:
        # Skip punctuation and spaces
        if is_punctuation(word) or word.isspace():
            continue
            
        is_correct = check_spelling(word, dictionary)
        
        word_result = {
            'word': word,
            'is_correct': is_correct,
            'suggestions': [] if is_correct else get_suggestions(word, dictionary)
        }
        
        result.append(word_result)
    
    return result

## 6. User Interface

In [6]:
from IPython.display import display, HTML
import ipywidgets as widgets

def format_results(results):
    """
    Format spell check results as HTML for display.
    
    Args:
        results (list): List of dictionaries containing spell check results
        
    Returns:
        str: HTML-formatted results
    """
    html = '<div style="font-family: Arial, sans-serif; line-height: 1.6;">'    
    html += '<h3>Spell Check Results:</h3>'

    if not results:
        html += '<p>No words to check.</p>'
    else:
        html += '<ul style="list-style-type: none; padding-left: 0;">'        
        
        for item in results:
            word = item['word']
            is_correct = item['is_correct']
            suggestions = item['suggestions']
            
            if is_correct:
                html += f'<li style="margin-bottom: 10px;"><span style="color: green;">{word}</span> - Correct</li>'
            else:
                html += f'<li style="margin-bottom: 10px;"><span style="color: red; text-decoration: underline;">{word}</span> - Misspelled'
                
                if suggestions:
                    html += '<ul style="margin-top: 5px; margin-bottom: 5px;">'
                    html += '<li>Suggestions:</li>'
                    for suggestion in suggestions:
                        html += f'<li style="margin-left: 20px;">{suggestion}</li>'
                    html += '</ul>'
                else:
                    html += '<br>No suggestions available.'
                    
                html += '</li>'
                
        html += '</ul>'
    
    html += '</div>'
    return html

def highlight_text(text, results):
    """
    Highlight misspelled words in the original text.
    
    Args:
        text (str): The original text
        results (list): Spell check results
        
    Returns:
        str: HTML with highlighted misspelled words
    """
    # Get misspelled words
    misspelled = {item['word']: item['suggestions'] for item in results if not item['is_correct']}
    
    if not misspelled:
        return f'<p>{text}</p>'
    
    # Tokenize text to preserve word boundaries
    tokens = tokenize_text(text)
    highlighted = []
    
    for token in tokens:
        if token in misspelled:
            # Add tooltip with suggestions
            suggestions = ", ".join(misspelled[token])
            tooltip = f'Suggestions: {suggestions}' if suggestions else 'No suggestions'
            highlighted.append(f'<span style="color: red; text-decoration: underline;" title="{tooltip}">{token}</span>')
        else:
            highlighted.append(token)
    
    return '<p>' + ''.join(highlighted) + '</p>'

In [7]:
def create_spell_check_ui():
    """
    Create an interactive UI for spell checking.
    """
    input_text = widgets.Textarea(
        placeholder='Enter Tigrigna text here...',
        description='Input:',
        layout=widgets.Layout(width='100%', height='150px')
    )
    
    check_button = widgets.Button(
        description='Check Spelling',
        button_style='primary',
        icon='check'
    )
    
    output = widgets.Output()
    
    def on_button_click(b):
        with output:
            output.clear_output()
            text = input_text.value
            
            if not text.strip():
                display(HTML('<p style="color: orange;">Please enter some text to check.</p>'))
                return
                
            results = spell_check_text(text, tigrigna_dictionary)
            
            # Display the original text with misspelled words highlighted
            display(HTML('<h3>Original Text with Highlights:</h3>'))
            display(HTML(highlight_text(text, results)))
            
            # Display detailed results
            display(HTML(format_results(results)))
    
    check_button.on_click(on_button_click)
    
    # Create UI layout
    ui = widgets.VBox([
        widgets.HTML('<h2>Tigrigna Spelling Checker</h2>'),
        widgets.HTML('<p>Enter Tigrigna text below and click "Check Spelling" to see results.</p>'),
        input_text,
        check_button,
        output
    ])
    
    return ui

# Create and display the UI
spell_check_ui = create_spell_check_ui()
display(spell_check_ui)

VBox(children=(HTML(value='<h2>Tigrigna Spelling Checker</h2>'), HTML(value='<p>Enter Tigrigna text below and …

## 7. Example Usage

In [8]:
# Example usage with sample text
sample_text = "ሰላም ኣለኹም። ኣነ ኣብ ቤትትምህሪት ኣለኹ። ኣቦይ ሎሚ ናብ ከተማ ከይዱ። ሓብተይ ኣብ ገዛ ኣላ።"
print("Sample text:", sample_text)

# Introduce a spelling error for demonstration
text_with_error = "ሰላም ኣለኹም። ኣነ ኣብ ቤትትምህሪት ኣለኹ። ኣቦይ ሎሙ ናብ ከተማ ከይዱ። ሓብተይ ኣብ ገዛ ኣላ።"  # ሎሙ instead of ሎሚ
print("\nText with error:", text_with_error)

# Check spelling of the text with error
results = spell_check_text(text_with_error, tigrigna_dictionary)

# Format and display the results
display(HTML(format_results(results)))

Sample text: ሰላም ኣለኹም። ኣነ ኣብ ቤትትምህሪት ኣለኹ። ኣቦይ ሎሚ ናብ ከተማ ከይዱ። ሓብተይ ኣብ ገዛ ኣላ።

Text with error: ሰላም ኣለኹም። ኣነ ኣብ ቤትትምህሪት ኣለኹ። ኣቦይ ሎሙ ናብ ከተማ ከይዱ። ሓብተይ ኣብ ገዛ ኣላ።


## 8. Adding Words to the Dictionary

In [9]:
def add_word_to_dictionary(word, dictionary, file_path='tigrigna_dictionary.txt'):
    """
    Add a new word to the Tigrigna dictionary.
    
    Args:
        word (str): Word to add
        dictionary (set): The Tigrigna dictionary set
        file_path (str): Path to the dictionary file
        
    Returns:
        bool: True if successful, False otherwise
    """
    if not word.strip():
        return False
    
    try:
        # Add to the in-memory dictionary
        dictionary.add(word)
        
        # Add to the file
        with open(file_path, 'a', encoding='utf-8') as file:
            file.write(f"\n{word}")
            
        return True
    except Exception as e:
        print(f"Error adding word to dictionary: {e}")
        return False

# Create a UI for adding words to the dictionary
def create_dictionary_ui():
    """
    Create a UI for adding words to the dictionary.
    """
    word_input = widgets.Text(
        placeholder='Enter a Tigrigna word',
        description='New Word:',
        layout=widgets.Layout(width='300px')
    )
    
    add_button = widgets.Button(
        description='Add to Dictionary',
        button_style='success',
        icon='plus'
    )
    
    output = widgets.Output()
    
    def on_add_button_click(b):
        with output:
            output.clear_output()
            word = word_input.value.strip()
            
            if not word:
                display(HTML('<p style="color: orange;">Please enter a word to add.</p>'))
                return
                
            if word in tigrigna_dictionary:
                display(HTML(f'<p style="color: blue;">"{word}" is already in the dictionary.</p>'))
                return
                
            success = add_word_to_dictionary(word, tigrigna_dictionary)
            
            if success:
                display(HTML(f'<p style="color: green;">"{word}" has been added to the dictionary.</p>'))
                word_input.value = ''  # Clear the input
            else:
                display(HTML(f'<p style="color: red;">Failed to add "{word}" to the dictionary.</p>'))
    
    add_button.on_click(on_add_button_click)
    
    # Create UI layout
    ui = widgets.VBox([
        widgets.HTML('<h3>Add Words to Dictionary</h3>'),
        widgets.HBox([word_input, add_button]),
        output
    ])
    
    return ui

# Create and display the UI
dictionary_ui = create_dictionary_ui()
display(dictionary_ui)

VBox(children=(HTML(value='<h3>Add Words to Dictionary</h3>'), HBox(children=(Text(value='', description='New …

## 9. Conclusion

This notebook provides a basic spelling checker for the Tigrigna language with the following features:

1. Dictionary-based word verification
2. Misspelled word identification
3. Spelling correction suggestions using Levenshtein distance
4. Interactive user interface for spell checking
5. Capability to add new words to the dictionary

The dictionary provided is a starting point and can be expanded for better coverage of the Tigrigna language. The spell checker can be improved by adding more sophisticated algorithms for suggestion generation and by expanding the dictionary.