## Proof of Concept
- JUST generating a regex expression to match a particular string

In [1]:
%config IPCompleter.greedy=True

In [2]:
import warnings

warnings.filterwarnings('ignore')

In [3]:
import re
import numpy as np

print('re:', re.__version__)
print('numpy:', np.__version__)

re: 2.2.1
numpy: 1.16.3


In [4]:
import sys

sys.path.append('..')

from package.ga import BinaryGeneFactory, AbstractFitness, SimpleHillClimber
from package.transformers import IntegerToBinaryString, StringToMapping, KeyArrayToRegex

In [5]:
expected_match = 'backstreets back   11:05:20   alright'
length_of_expected_match = len(expected_match)

In [6]:
binary_start = 0
binary_end = 3 + 1

gene_factory = BinaryGeneFactory(binary_start, binary_end, 2)
string_mapper = StringToMapping({
    '00': r'\s',
    '01': r'\d',
    '10': r'[a-z]',
    '11': r'[:]'
})
to_regex = KeyArrayToRegex(string_mapper)

In [7]:
class Fitness(AbstractFitness):
    to_regex = None
    expected_match = ''
    
    def __init__(self, to_regex, expected_match):
        self.to_regex = to_regex
        self.expected_match = expected_match
        super()

    def evaluate(self, individual, display_logging = False):
        transformed = self.to_regex.transform_to_array(individual)
        
        length_of_expected_match = len(self.expected_match)
        return np.array(
            [
                re.match(ai, self.expected_match[i]) != None 
                for i, ai 
                in enumerate(transformed)
            ]
        ).astype(int).sum() / length_of_expected_match
    
fitness_evaluator = Fitness(to_regex, expected_match)

In [8]:
individual = gene_factory.create_many(length_of_expected_match)

print('binary:', '|'.join(individual))
print('regex: ', ''.join(to_regex.transform_and_compress(individual)))

binary: 10|10|00|10|01|11|00|01|01|11|01|10|10|11|11|11|01|10|11|10|00|01|10|00|10|00|00|01|00|10|01|11|00|00|11|00|00
regex:  [a-z]+\s[a-z]\d[:]\s\d+[:]\d[a-z]+[:]+\d[a-z][:][a-z]\s\d[a-z]\s[a-z]\s+\d\s[a-z]\d[:]\s+[:]\s+


In [9]:
def gene_mutator(gene, display_logging):
    precentage = np.random.rand()
    if precentage < .08:
        gene = gene_factory.create()
        
    return gene

hill_climber = SimpleHillClimber(fitness_evaluator, [ gene_mutator ])

In [10]:
number_of_iterations = 5000
result = hill_climber.run(individual, number_of_iterations)

final_individual = result[0]
final_fitness = result[1]
final_iteration = result[2]

print(
    'compressed:',
    '/' + to_regex.transform_and_compress(final_individual) + '/gimu',
    '~',
    '"' + expected_match + '"',
    '~',
    final_fitness,
    '~',
    final_iteration
)

compressed: /[a-z]+\s[a-z]+\s+\d+[:]\d+[:]\d+\s+[a-z]+/gimu ~ "backstreets back   11:05:20   alright" ~ 1.0 ~ 790
