In [10]:
# utils
def parse_line(line,option='default'):
    if option == 'set':
        return list(set(int(num.strip()) for num in line.strip("{}\n").split(",")))
    return list(int(num.strip()) for num in line.strip("{}\n").split(","))

def converse(string,dictionary):
    res = 0
    for letter in string:
        res = res+(dictionary[letter])
        
    return res

def reverseDict(original_dict):
    reversed_dict = {}
    for key, value in original_dict.items():
        reversed_dict[value] = int(key)
    return reversed_dict

In [106]:
import re
import copy


alphabet = [
                'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm',
                'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z'
            ]

# track the occurence of every number from a level
class LevelTracker:
    def __init__(self,depth, content):
        self.depth = depth
        self.content = content
        
        
    def _track(self):
        self.tracker = {
            x: self.content.count(x) for x in set(self.content)
        }

# Data structure that stores the numerical values of each level from the text file
class NumericalLevels:
    def __init__(self,filename, generators):
        self.filename = filename
        self.generators = generators
        
        self.trackers = []
    
    # read in the data and store them in two formats. 
    def _populate(self):
        with open(self.filename, "r") as file:
            lines = file.readlines()
        
        self.original_levels = [sorted(parse_line(line, 'default')) for line in lines]
        self.clumped_levels = [sorted(list(set(level))) for level in self.original_levels]
        
        self.clumped_levels = [[0]]+self.clumped_levels
        
        self.original_levels = [[0]] + self.original_levels
     
    # create an array of occurences for each element in every level
    def _tracking(self):
        for index in range( len(self.original_levels) ):
            level_tracker = LevelTracker(
                depth = index,
                content = self.original_levels[index] 
            )
            level_tracker._track()
            
            self.trackers.append(
                level_tracker
            )
        
# Basis structure that stores a numerical value and all of its valid word formations
class NumericalWord:
    def __init__(self, numerical_value):
        self.numerical_value = numerical_value
        
        if self.numerical_value == 0:
            self.words = ['']
        else:
            self.words = []
        
    def __str__(self):
        return f"Numerical value: {self.numerical_value}\nAssociated words: {self.words}\n"
        
    def _addWord(self,string):
        self.words.append(string)
    
    # if there is a match from the next level, add a letter to the end of all word formations
    def _modify(self, letter):
        return [
            wrd+letter for wrd in self.words
        ]
        
    def _size(self):
        return len(self.words)

class WordGenerator:
    def __init__(self, regex, startwith,generators):
        # basic attrs of WordGenerator
        self.regex = regex
        self.generators = generators
        self.startwith = startwith
        

        self.dictionary = {}
        self.reverse_dictionary = {}

        self.words_list= []
        
    def __str__(self):
        return f"Generators: {self.generators}\nConstraint: {self.regex}"
    
    # prototype for the set of rules that might be implemented in a single class in the future
    # for now, we're only concerned with the general form of the language
    def _satisfy(self, string):
        if self.startwith != '':
            return re.fullmatch(self.regex, string) and re.startwith(self.startwith, string) 
        if self.regex != '':
            return re.fullmatch(self.regex, string)
        return True
    
    def execute(self, filename):
        self.numerical_levels = NumericalLevels(
            filename = filename,
            generators = self.generators,
        )
        
        self.numerical_levels._populate()
        self.numerical_levels._tracking()
        
            
        levels = self.numerical_levels.clumped_levels
        
        # first letters should be assigned to numbers in the first level
        # any additional generators will be kept track of by the mismatch variable
        counter = 0
        mismatch = len(
            set(self.generators).symmetric_difference(set(levels[1]))
        )
        
        
        # create dictionary and its reverse variant
        for index in range(len(self.generators)):
            if self.generators[index] in levels[1]:
                self.dictionary[str(self.generators[index])] = alphabet[counter]
                counter += 1
            else:
                self.dictionary[str(self.generators[index])] = alphabet[len(self.generators)+mismatch-2]
                mismatch += 1
        
        self.reverse_dictionary = reverseDict(self.dictionary)
        
        # first level
        self.words_list.append([
            NumericalWord(0)
        ])
        
        # main loop
        for depth in range(1, len(levels)):
            # curren level's placeholder
            new_words = []
            
            for current_element_index in range(len(levels[depth])):
                flag = False
                tmp_word = NumericalWord(
                    levels[depth][current_element_index]
                )
                for previous_element_index in range(len(levels[depth-1])):
                    diff = levels[depth][current_element_index] - levels[depth-1][previous_element_index]
                    if diff in self.generators:
                        flag = True
                        mod = self.words_list[depth-1][previous_element_index]._modify(
                            self.dictionary[str(diff)]
                        )
                        for wrd in mod:
                            if self._satisfy(wrd):
                                tmp_word._addWord(wrd)
                
                if flag == False:
                    print("extra word: {}!\n".format(tmp_word.numerical_value))
                    tmp_words.add_word('')
                
                new_words.append(tmp_word)
                    
            self.words_list.append(new_words)
            
    def prettify(self, wordsList):
        for level in wordsList:
            for word in level:
                print(word)
    
    # print out any number that has the same/different number of words as the number of times it occurs
    # if mode == 1 -> same number of words
    # if mode == 0 -> different number of words
    def nitpick(self, wordsList, mode=1):
        xor = False if mode == 1 else True
        
        for idx in range( len(wordsList) ):
            for value in wordsList[idx]:
                if (value._size() == self.numerical_levels.trackers[idx].tracker[value.numerical_value]) != xor:
                    print(value)
    
    # see the cascading effect of excluding a specific word of a numerical value from a level
    def exclude(self, wordsList, word, mode = 'quiet'):
        modified_words_list = copy.deepcopy(wordsList)
        target = converse(
            word,
            self.reverse_dictionary
        )
        excluded_pattern = re.compile(r'^(?!' + re.escape(word) + r').*')
        
        # main loop
        for level_index in range( len(word), len(wordsList) ):
            for word_index in range( len(wordsList[level_index]) ):
                for idx in range( len(wordsList[level_index][word_index].words) ):
                    if not excluded_pattern.match( wordsList[level_index][word_index].words[idx] ):
                        modified_words_list[level_index][word_index].words.remove(
                            wordsList[level_index][word_index].words[idx]
                        )
        
        if mode == 'verbose':
            self.prettify(modified_words_list)
        return modified_words_list
    
    # check if every number in the new excluded list has the same number of words as the number of times it occurs
    def check_new_list(self, new_list):
        for idx in range( len(new_list) ):
            for value in new_list[idx]:
                if value._size() != self.numerical_levels.trackers[idx].tracker[value.numerical_value]:
                    return False
        return True

In [108]:
gen = WordGenerator(
    regex = r'(a(b|c|d)*)|((b|c|d)*)',
    generators = [4,101,202,171],
    startwith=''
)
gen.execute("layers.txt")
# gen.prettify(gen.words_list)

In [116]:
gen.nitpick(gen.words_list, mode = 0)

Numerical value: 373
Associated words: ['bcb', 'cbb']

Numerical value: 474
Associated words: ['bcd', 'cbd', 'bdc']

Numerical value: 377
Associated words: ['abcb', 'acbb']

Numerical value: 478
Associated words: ['abcd', 'acbd', 'abdc']

Numerical value: 544
Associated words: ['bcbc', 'cbbc', 'bccb', 'cbcb', 'ccbb']

Numerical value: 575
Associated words: ['bcbd', 'cbbd', 'bdbc', 'bcdb', 'cbdb', 'bdcb']

Numerical value: 645
Associated words: ['bccd', 'cbcd', 'ccbd', 'bcdc', 'cbdc', 'bdcc']

Numerical value: 548
Associated words: ['abcbc', 'acbbc', 'abccb', 'acbcb', 'accbb']

Numerical value: 579
Associated words: ['abcbd', 'acbbd', 'abdbc', 'abcdb', 'acbdb', 'abdcb']

Numerical value: 645
Associated words: ['bcbcb', 'cbbcb', 'bccbb', 'cbcbb', 'ccbbb']

Numerical value: 649
Associated words: ['abccd', 'acbcd', 'accbd', 'abcdc', 'acbdc', 'abdcc']

Numerical value: 676
Associated words: ['bcbdb', 'cbbdb', 'bdbcb', 'bcdbb', 'cbdbb', 'bdcbb']

Numerical value: 715
Associated words: ['bcbc

In [110]:
newList = gen.exclude(
    wordsList = gen.words_list,
    word = 'cbb'
)

In [111]:
gen.check_new_list(newList)

False

In [112]:
gen.nitpick(newList, mode = 0)

Numerical value: 474
Associated words: ['bcd', 'cbd', 'bdc']

Numerical value: 377
Associated words: ['abcb', 'acbb']

Numerical value: 478
Associated words: ['abcd', 'acbd', 'abdc']

Numerical value: 544
Associated words: ['bcbc', 'bccb', 'cbcb', 'ccbb']

Numerical value: 575
Associated words: ['bcbd', 'bdbc', 'bcdb', 'cbdb', 'bdcb']

Numerical value: 645
Associated words: ['bccd', 'cbcd', 'ccbd', 'bcdc', 'cbdc', 'bdcc']

Numerical value: 548
Associated words: ['abcbc', 'acbbc', 'abccb', 'acbcb', 'accbb']

Numerical value: 579
Associated words: ['abcbd', 'acbbd', 'abdbc', 'abcdb', 'acbdb', 'abdcb']

Numerical value: 645
Associated words: ['bcbcb', 'bccbb', 'cbcbb', 'ccbbb']

Numerical value: 649
Associated words: ['abccd', 'acbcd', 'accbd', 'abcdc', 'acbdc', 'abdcc']

Numerical value: 676
Associated words: ['bcbdb', 'bdbcb', 'bcdbb', 'cbdbb', 'bdcbb']

Numerical value: 715
Associated words: ['bcbcc', 'bccbc', 'cbcbc', 'ccbbc', 'bcccb', 'cbccb', 'ccbcb', 'cccbb']

Numerical value: 746


In [113]:
test_list = gen.exclude(
        wordsList = newList,
        word = 'cbd'
    )

In [114]:
gen.nitpick(test_list, mode = 0)

Numerical value: 377
Associated words: ['abcb', 'acbb']

Numerical value: 478
Associated words: ['abcd', 'acbd', 'abdc']

Numerical value: 544
Associated words: ['bcbc', 'bccb', 'cbcb', 'ccbb']

Numerical value: 645
Associated words: ['bccd', 'cbcd', 'ccbd', 'bcdc', 'bdcc']

Numerical value: 548
Associated words: ['abcbc', 'acbbc', 'abccb', 'acbcb', 'accbb']

Numerical value: 579
Associated words: ['abcbd', 'acbbd', 'abdbc', 'abcdb', 'acbdb', 'abdcb']

Numerical value: 645
Associated words: ['bcbcb', 'bccbb', 'cbcbb', 'ccbbb']

Numerical value: 649
Associated words: ['abccd', 'acbcd', 'accbd', 'abcdc', 'acbdc', 'abdcc']

Numerical value: 676
Associated words: ['bcbdb', 'bdbcb', 'bcdbb', 'bdcbb']

Numerical value: 715
Associated words: ['bcbcc', 'bccbc', 'cbcbc', 'ccbbc', 'bcccb', 'cbccb', 'ccbcb', 'cccbb']

Numerical value: 746
Associated words: ['bcbcd', 'bccbd', 'cbcbd', 'ccbbd', 'bcbdc', 'bdbcc', 'bcdbc', 'bdcbc', 'bccdb', 'cbcdb', 'ccbdb', 'bcdcb', 'bdccb']

Numerical value: 777
As