<a href="https://colab.research.google.com/github/nann72/L-m-Lab5-main/blob/main/lab%205%20.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [4]:
import re
from collections import Counter
import numpy as np

# 1. Корпус текстів пісень (тут вставлено невелику підмножину для прикладу)


In [5]:
lyrics_corpus = [
    "I'm gonna take my horse to the old town road",
    "Baby you're a firework, come on show 'em what you're worth",
    "Cause baby you're a firework, come on let your colors burst",
    "Hello from the other side, I must have called a thousand times",
    "I see a little silhouetto of a man, Scaramouche, Scaramouche",
    "Thunderbolt and lightning, very very frightening me",
    "We will we will rock you",
    "Don't stop believin', hold on to that feelin'"
]

# Об'єднуємо всі тексти та токенізуємо слова
def get_words(corpus):
    text = " ".join(corpus)
    words = re.findall(r'\w+', text.lower())
    return words

word_list = get_words(lyrics_corpus)
word_counts = Counter(word_list)

# Ймовірності появи слів у корпусі
total_words = sum(word_counts.values())
word_probs = {word: count / total_words for word, count in word_counts.items()}

vocab = set(word_counts.keys())


# 2. Функції редагування слова


In [6]:
def delete_letter(word):
    return [word[:i] + word[i+1:] for i in range(len(word))]

def insert_letter(word):
    letters = 'abcdefghijklmnopqrstuvwxyz'
    return [word[:i] + c + word[i:] for i in range(len(word)+1) for c in letters]

def replace_letter(word):
    letters = 'abcdefghijklmnopqrstuvwxyz'
    return [word[:i] + c + word[i+1:] for i in range(len(word)) for c in letters if word[i] != c]

def switch_letter(word):
    return [word[:i] + word[i+1] + word[i] + word[i+2:] for i in range(len(word)-1)]

def edit_one(word, allow_switches=True):
    edits = set()
    edits.update(delete_letter(word))
    edits.update(insert_letter(word))
    edits.update(replace_letter(word))
    if allow_switches:
        edits.update(switch_letter(word))
    return edits

def edit_two(word):
    edits = set()
    for e1 in edit_one(word):
        edits.update(edit_one(e1))
    return edits


# 3. Пошук кандидатів та автокорекція


In [7]:
def get_candidates(word, vocab, probs, n=1):
    if word in vocab:
        candidates = [word]
    else:
        edits1 = edit_one(word) & vocab
        edits2 = edit_two(word) & vocab
        candidates = edits1 or edits2 or [word]

    # сортування за ймовірністю
    return sorted([(w, probs.get(w, 0)) for w in candidates], key=lambda x: x[1], reverse=True)[:n]

def autocorrect(word, vocab, probs):
    suggestions = get_candidates(word, vocab, probs, n=1)
    return suggestions[0][0] if suggestions else word


# 4. Алгоритм мінімальної відстані редагування


In [8]:
def min_edit_distance(source, target, ins_cost=1, del_cost=1, rep_cost=2):
    m, n = len(source), len(target)
    D = np.zeros((m+1, n+1), dtype=int)

    for i in range(m+1):
        D[i][0] = i * del_cost
    for j in range(n+1):
        D[0][j] = j * ins_cost

    for i in range(1, m+1):
        for j in range(1, n+1):
            cost = 0 if source[i-1] == target[j-1] else rep_cost
            D[i][j] = min(
                D[i-1][j] + del_cost,
                D[i][j-1] + ins_cost,
                D[i-1][j-1] + cost
            )
    return D[m][n]

# 5. Тестування системи


In [9]:
if __name__ == "__main__":
    test_words = ['fierwork', 'believin', 'othre', 'gonnaa', 'thundrbolt']
    print("=== Результати автокорекції ===")
    for w in test_words:
        corrected = autocorrect(w, vocab, word_probs)
        print(f"{w} -> {corrected}")

    print("\n=== Мінімальна відстань редагування ===")
    pairs = [('firework', 'fierwork'), ('believin', 'beliving'), ('thunderbolt', 'thundrbolt')]
    for w1, w2 in pairs:
        dist = min_edit_distance(w1, w2)
        print(f"{w1} -> {w2} : {dist}")

=== Результати автокорекції ===
fierwork -> firework
believin -> believin
othre -> other
gonnaa -> gonna
thundrbolt -> thunderbolt

=== Мінімальна відстань редагування ===
firework -> fierwork : 2
believin -> beliving : 2
thunderbolt -> thundrbolt : 1
