# ACSL 2020-2021 Contest #2 - Lex String

## Junior Division

### PROBLEM :
Given an inputted string, create a new string as follows:

1. Change uppercase letters to lowercase and eliminate all non-letters.
2. Find all first occurrences of each distinct letter and arrange them in sorted order.
3. Append these sorted letters to the end of a new string and remove those distinct letters from the original string.
4. Repeat steps 2 and 3 until the original string is empty.
5. Before outputting the new string, remove adjacent duplicate letters (e.g. "bboooo" becomes "bo").

For example, the string “A good sorting algorithm.” would arrange as follows:
1st pass: New string: adghilmnorst Rest of string: oogagorit  
2nd pass: New string: adghilmnorstagiort Rest of string: ogo  
3rd pass: New string: adghilmnorstagiortgo Rest of string: o  
4th pass: New string: adghilmnorstagiortgoo Rest of string:  
Output: adghilmnorstagiortgo  


In [4]:
def lowerletters_v0(text):
    """ convert an input text string to lower case letters and eliminiate all non-letters
    """
    res = ""
    for c in text:
        if c.isalpha():
            res += c.lower()
            
    return res

In [24]:
def lowerletters_v1(text):
    """ convert an input text string to lower case letters and eliminiate all non-letters
    """
    return ''.join([c.lower() for c in text if c.isalpha()])

In [25]:
input = "This is a test with 100 Characters!"
output = lowerletters_v0(input)
print(output)

thisisatestwithcharacters


In [26]:
output = lowerletters_v1(input)
print(output)

thisisatestwithcharacters


In [6]:
import re
def lowerletters(text):
    """ convert an input text string to lower case letters and eliminiate all non-letters
    """
    return re.sub('[^a-zA-Z]', '', text).lower()

In [7]:
input = "This is a test with 100 Characters!"
output = lowerletters(input)
print(output)

thisisatestwithcharacters


In [28]:
def remove_letters(text, distinctlist):
    if distinctlist:
        mylist = distinctlist.copy()
        res = ""
        for c in text:
            if c in mylist:
                mylist.remove(c)
            else:
                res +=c
        return res
    else:
        return text

In [29]:
input = "This is a test with 100 Characters!"
text = lowerletters(input)
print(text)
distinct = sorted(set(text))
print(distinct)
output = remove_letters(text, distinct)
print(output)

thisisatestwithcharacters
['a', 'c', 'e', 'h', 'i', 'r', 's', 't', 'w']
iststithhaacters


In [30]:
def remove_duplicate(text):
    """
    A function to remove adjacent duplicate letters
    """    
    res = ''
    prev = ''
    for c in text:
        if c != prev:
            res +=c
        prev = c
    return res
            

In [31]:
input = "bboocc"
output = remove_duplicate(input)
print(output)

boc


In [32]:
def lexstring_sub(text, debug=False):
    """
    A Lex String Substitution function for ACSL 2021 Contest #2 Programming Problem    
    """

    # Step 2 - Find all first occurrences of each distinct letter and arrange them in sorted order.
    distinct_letters = set(text)
    res = sorted(distinct_letters)
    
    # Step 3 - Append these sorted letters to the end of a new string and remove those distinct letters from the original string.    
    remaining_letters = remove_letters(text, distinct_letters)
    if debug:
        print(f"text: {text}, distinct_letters:{''.join(res)}, remaining_letters: {remaining_letters}")

    # Recursive:
    if remaining_letters:
        return res + lexstring_sub(remaining_letters)
    else:
        return res

In [33]:
def lexstring_cipher(text, debug=False):
    """
    A Lex String Cipher function for ACSL 2021 Contest #2 Programming Problem    
    """
    # Step 1 - Change uppercase letters to lowercase and eliminate all non-letters.
    text = lowerletters(text)
    if debug:
        print("after step 1:", text)
        
    # Steps 2 & 3    
    res = lexstring_sub(text, debug=debug)
    if debug:
        print("after Steps 2 & 3:",''.join(res))    
    
    # Final Step - remove adjacent duplicates
    res = remove_duplicate(res)
    if debug:
        print("after Final Step:", res)    
        
    return res


In [34]:
input = "A good sorting algorithm."
output = lexstring_cipher(input)
print(output)

adghilmnorstagiortgo


In [35]:
input = "A good sorting algorithm."
output = lexstring_cipher(input, debug=True)
print(output)

after step 1: agoodsortingalgorithm
text: agoodsortingalgorithm, distinct_letters:adghilmnorst, remaining_letters: oogagorit
after Steps 2 & 3: adghilmnorstagiortgoo
after Final Step: adghilmnorstagiortgo
adghilmnorstagiortgo


In [16]:
input = "Tennessee is the volunteer state."
output = lexstring_cipher(input, debug=True)
print(output)

after step 1: tennesseeisthevolunteerstate
text: tennesseeisthevolunteerstate, distinct_letters:aehilnorstuv, remaining_letters: neseestenteestte
after Steps 2 & 3: aehilnorstuvenstenstesteteee
after Final Step: aehilnorstuvenstenstestete
aehilnorstuvenstenstestete


In [17]:
input = "Einstein was a genius."
output = lexstring_cipher(input, debug=True)
print(output)

after step 1: einsteinwasagenius
text: einsteinwasagenius, distinct_letters:aeginstuw, remaining_letters: einsaenis
after Steps 2 & 3: aeginstuwaeinseins
after Final Step: aeginstuwaeinseins
aeginstuwaeinseins


In [36]:
inputs =["A good sorting algorithm.",
         "Tennessee is the volunteer state.",
         "Einstein was a genius.",
         "Tom Sawyer & the Mississippi River ", 
         "She sells seashells by the seashore."]
answers = ["adghilmnorstagiortgo", 
           "aehilnorstuvenstenstestete",
           "aeginstuwaeinseins",
           "aehimoprstvwyeimprsteirsisis",
           "abehlorstyaehlsehlsehlseseses"]
   
for index, input in enumerate(inputs):
         output = lexstring_cipher(input)
         if output != answers[index]:
             print("Error: wrong answer on input {index+1}")
         else:
             print(f"{index+1}. {output}")
         

1. adghilmnorstagiortgo
2. aehilnorstuvenstenstestete
3. aeginstuwaeinseins
4. aehimoprstvwyeimprsteirsisis
5. abehlorstyaehlsehlsehlseseses
