# Utils - Sara Bardají

This notebook contains the functions built by Sara Bardají.

## Feature: Number of characters
**Example 1:**
q = 'how'
num_of_characters(q) = 3

**Example 2:**
q = 'how are you doing'
num_of_characters(q) = 17

In [None]:
def num_of_characters(q):
    """
    Computes the number of characters in a given string.

    Parameters:
        q (str): The input string.

    Returns:
        int: The number of characters in the input string.
    """
    return len(q)


In [None]:
q = 'how'
num_of_characters(q)

In [None]:
q = 'how are you doing'
num_of_characters(q)

## Feature: Word count difference
**Example 1:**
q1 = 'how are you today'
q2 = 'how are you doing today'
difference_word_count(q1, q2) = 1

**Example 2:**
q1 = 'does my shoe fit'
q2 = 'is my shoe on'
difference_word_count(q1, q2) = 0

In [None]:
def difference_word_count(q1, q2):
    """
    Computes the absolute difference in word count between two input strings.

    Parameters:
        q1 (str): The first input string.
        q2 (str): The second input string.

    Returns:
        int: The absolute difference in word count between the input strings.
    """

    len_q1 = len(q1.split())
    len_q2 = len(q2.split())
    return abs(len_q1 - len_q2)


In [None]:
q1 = 'how are you today'
q2 = 'how are you doing today'
difference_word_count(q1, q2)

In [None]:
q1 = 'does my shoe fit'
q2 = 'is my shoe on'
difference_word_count(q1, q2)

## Feature: Total Unique Word Count and Total Unique Word Count to Total Words Ratio
**Example 1:**
q1 = 'how are you today'
q2 = 'how are you doing today'
num_of_unique_words(q1, q2) = 5
num_of_unique_words_ration(q1, q2) = 0.556

**Example 2:**
q1 = 'does my shoe fit'
q2 = 'is my shoe on'
num_of_unique_words(q1, q2) = 6
num_of_unique_words_ration(q1, q2) = 0.750

In [None]:
def num_of_unique_words(q1, q2):
    """
    Computes the number of unique words in the combined input strings.

    Parameters:
        q1 (str): The first input string.
        q2 (str): The second input string.

    Returns:
        int: The number of unique words in the combined input strings.
    """
    q1q2 = q1 + " " + q2
    words = q1q2.split()
    num_unique_words = len(set(words))
    return num_unique_words


def num_of_words(q1, q2):
    """
    Computes the total number of words in the combined input strings.

    Parameters:
        q1 (str): The first input string.
        q2 (str): The second input string.

    Returns:
        int: The total number of words in the combined input strings.
    """
    q1q2 = q1 + " " + q2
    words = q1q2.split()
    num_words = len(words)
    return num_words


def total_unique_words_ratio(q1, q2):
    """
    Computes the ratio of unique words to the total number of words in the combined input strings.

    Parameters:
        q1 (str): The first input string.
        q2 (str): The second input string.

    Returns:
        float: The ratio of unique words to the total number of words in the combined input strings.
    """

    num_unique_words = num_of_unique_words(q1, q2)
    num_words = num_of_words(q1, q2)
    return num_unique_words / num_words

In [None]:
q1 = 'how are you today'
q2 = 'how are you doing today'

print(f'Number of unique words: {num_of_unique_words(q1, q2)}')
print(f'Number of words: {num_of_words(q1, q2)}')
print(f'Number of unique words ratio: {total_unique_words_ratio(q1, q2):.3f}')

In [None]:
q1 = 'does my shoe fit'
q2 = 'is my shoe on'

print(f'Number of unique words: {num_of_unique_words(q1, q2)}')
print(f'Number of words: {num_of_words(q1, q2)}')
print(f'Number of unique words ratio: {total_unique_words_ratio(q1, q2):.3f}')

## Functionality: Spelling Correction

In [None]:
class BKTree:
    def __init__(self, distfn, words):
        self.distfn = distfn

        it = iter(words)
        root = next(it)
        self.tree = (root, {})

        for i in it:
            self._add_word(self.tree, i)

    def _add_word(self, parent, word):
        pword, children = parent
        d = self.distfn(word, pword)
        if d in children:
            self._add_word(children[d], word)
        else:
            children[d] = (word, {})

    def _search_descendants(self, parent, max_distance, distance, query_word):
        node_word, children_dict = parent
        dist_to_node = distance(query_word, node_word)
        self.visited_nodes.append(node_word)
        results = []

        if dist_to_node <= max_distance:
            results.append((dist_to_node, node_word))

        I = range(max(0, dist_to_node - max_distance), dist_to_node + max_distance + 1)
        for dist in I:
            if dist in children_dict:
                child = children_dict[dist]
                if child[0] not in self.visited_nodes:
                    results.extend(self._search_descendants(child, max_distance, distance, query_word))
        return results

    def query(self, query_word, max_distance):
        self.visited_nodes = []
        results = self._search_descendants(self.tree, max_distance, self.distfn, query_word)
        sorted_results = sorted(results)
        return sorted_results

In [None]:
import editdistance

def spellchecker(q, V):
        """
    Performs spell correction on the input string using a given vocabulary and a BKTree.

    Parameters:
        q (str): The input string to be corrected.
        V (set): A set containing the vocabulary of known words.

    Returns:
        str: The corrected input string.
    """
    bk_tree = BKTree(editdistance.eval, V)
    correction = []
    for word in q.split():
        if word in V:
            correction.append(word)
        else:
            candidates = bk_tree.query(word, 2)
            if len(candidates)>0:
                correction.append(candidates[0][1])
            else:
                correction.append(word)
    return ' '.join(correction)

In [None]:
V = ['serendipity', 'cacophony', 'onomatopoeia', 'persnickety', 'mellifluous', 'ineffable', 'ephemeral', 'synchronicity', 'languid', 'ubiquitous', 'panacea', 'hapless', 'euphoria', 'inscrutable', 'inevitable', 'capricious', 'antithesis', 'discombobulated', 'superfluous', 'equanimity', 'furtive', 'magnanimous', 'ambivalent', 'laconic', 'gregarious', 'loquacious', 'opaque', 'resilient', 'vexatious', 'enigmatic', 'esoteric', 'gratuitous', 'melancholy', 'obfuscate', 'pernicious', 'querulous', 'recondite', 'scrupulous', 'torpid', 'voracious', 'joke', 'jose']
q = 'serendipty equanimity furtve melancholy pernicius jole'

spellchecker(q, V)