In [1]:
import string

In [2]:
class Word_Processing:
    def split_text(text):
        """
        Split a string into words.
        @param text String: The text to process
        @return list: The individual words
        """
        # Replace punctuation with spaces
        #punctuation = "!@#$%^&*().,?\"';:~`"
        translator = str.maketrans(string.punctuation, ' ' * len(string.punctuation))
        text = text.translate(translator)

        words = text.split()
        words = [word.upper() for word in words]
        return words
    
    def load_dictionary():
        """
        Read the words.txt file into a dictionary.
        See https://github.com/dwyl/english-words for the original words.txt file.
        @return the dictionary. Key is the word (all upper case), value is the length of the word
        """
        with open("words.txt", 'r') as file:
            lines_dict = {}
            line_number = 1
            for line in file:
                lines_dict[line.strip().upper()] = len(line.strip())
        return lines_dict

    def find_word(questions, word):
        """
        Look for a word across all the questions
        @param questions list of dictionaries: The questions to be searched
        @param word string: the word to look for
        @return list of dictionaries: all questions that contain the word
        """
        found = []
        for question in questions:
            if word.upper() in question["input"].upper():
                found.append(question)
        return found

    def compute_word_frequency(text, force_to_upper_case = True, min_length = 1):
        """
        @param text String: The text to process
        @param force_to_upper_case bool: convert all words to upper case so 'the' is the same as 'The', etc.
        @param min_length int: Ignore words shorter than min_length
        @return (Dictionary, count): ({key is word, value is frequency of that word}, count is the number of total words in text)
        """
        word_frequency = dict()
        count = 0
        for word in text.replace(',', ' ').replace("\n", " ").split():
            count += 1
            word = word.strip()
            if len(word) >= min_length:
                if force_to_upper_case:
                    word = word.upper()
                try:
                    word_frequency[word] += 1
                except:
                    word_frequency[word] = 1
        return (word_frequency, count)
    
    def compute_longest_words(text, force_to_upper_case = True, min_length = 0):
        """
        @param text String: The text to process
        @param force_to_upper_case: convert all words to upper case so 'the' is the same as 'The', etc.
        @param min_length int: Use this as the minumum length for words to find rather than computing a maximum length and using that, If 0, compute the max_length
        @return (Dictionary, count): ({key is word, value is frequency of that word}, count is the number of total words in text)
        """
        word_frequency = dict()
        count = 0
        if min_length == 0:
            max_length = 0
            # Find the max length across all words in the text
            for word in text.replace(',', ' ').split():
                word = word.strip()
                if len(word) > max_length:
                    max_length = len(word)
        else:
            max_length = min_length

        # Find all words with length == max_length, store in our dictionary
        #for word in text.replace([',', '?', '!', '.'], ' ').split():
        for word in text.replace(',', ' ').split():
            count += 1
            word = word.strip()
            if len(word) >= max_length:
                if force_to_upper_case:
                    word = word.upper()
                try:
                    word_frequency[word] += 1
                except:
                    word_frequency[word] = 1
        return (word_frequency, count)
