In [1]:
def tokenize_word(word):
    """
    Given a word using the spanish alphabet, returns a list with the letters of the word.
    It includes the letters "CH", "LL", "Ñ" and "RR".

    Input: word (string)
    Output: token_list (list of strings)
    """
    # Define the alphabet
    alphabet = [
        'A', 'B', 'C', 'CH', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'LL', 'M',
        'N', 'Ñ', 'O', 'P', 'Q', 'R', 'RR', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z'
    ]

    # Write the word in capital letters
    word = word.upper()

    # Verify that all the letters of the word are in the alphabet
    if not all(map(lambda char : char in alphabet, word)):
        raise Exception("Some letter of the word is not in the alphabet.")

    # Initialize the token list
    token_list = []
    # Go over the word adding tokens to the list
    for letter in word:
        # If the token is "CH", "LL" or "RR", we have to remove the last letter from the list and add the complete token
        if token_list and any([token_list[-1]=="C" and letter=="H", token_list[-1]=="L" and letter=="L", token_list[-1]=="R" and letter=="R"]):
            token_list.append(token_list.pop() + letter)
        # Otherwhise, just add the letter
        else:
            token_list.append(letter)
    return token_list

In [2]:
tokenize_word("Hola")

['H', 'O', 'L', 'A']

In [3]:
tokenize_word("Manchego")

['M', 'A', 'N', 'CH', 'E', 'G', 'O']

In [4]:
tokenize_word("Lluvia")

['LL', 'U', 'V', 'I', 'A']

In [5]:
tokenize_word("carro")

['C', 'A', 'RR', 'O']

In [6]:
tokenize_word("(2+1)")

Exception: Some letter of the word is not in the alphabet.