## Proof of Concept
- generating a regex expression
- individual has a dynamic length, can grow / shrink, encouraging growth in fitness method
- individual finds a distinct regex to produce the desired value.

In [1]:
%config IPCompleter.greedy=True

In [2]:
import warnings

warnings.filterwarnings('ignore')

In [3]:
import re
import numpy as np

print('re:', re.__version__)
print('numpy:', np.__version__)

re: 2.2.1
numpy: 1.16.3


In [4]:
import sys

sys.path.append('..')

from package.ga import BinaryGeneFactory, AbstractFitness, SimpleHillClimber
from package.transformers import IntegerToBinaryString, StringToMapping, KeyArrayToRegex

In [5]:
%%html
<h4>1. text, expected text</h4>

In [6]:
## 1. text -> '{expected string}' within,
expected_number = 0.55
text = 'ab mn gd: 0.33\n' \
     + 'cd mn gd: 0.44\n' \
     + 'de mn gd: 0.55\n' \
     + 'fg mn gd: 0.66\n' \

static_ending = r'\W([\d.]*\d[.\d]*)\b'

print(text)

ab mn gd: 0.33
cd mn gd: 0.44
de mn gd: 0.55
fg mn gd: 0.66



In [7]:
%%html
<h4>2. setup</h4>

In [8]:
consts = 'abcdefghijklmnopqrstuvwxyz'
regexes = [
    r'\s',
    r'\d',
    r'[a-z]',
    r'[:]',
    r'[!?.]',
    r'\W'
]

complete_set = [ c for c in consts ] + regexes

binary_start = 0
binary_end = len(complete_set) -1 # hard end, values < binary_end
 
integer_to_binary_transformer = IntegerToBinaryString(5)
gene_factory = BinaryGeneFactory(binary_start, binary_end, 5)

binary_to_regex = {}
for i in range(binary_end):
    key = integer_to_binary_transformer.transform(i)
    binary_to_regex[key] = complete_set[i]

string_mapper = StringToMapping(binary_to_regex)
to_regex = KeyArrayToRegex(string_mapper)

In [9]:
from package.ga.setups import DynamicWithStaticEnding

fitness_evaluator = DynamicWithStaticEnding.Fitness(to_regex, static_ending, expected_number, text)
mutator = DynamicWithStaticEnding.Mutator(gene_factory)
hill_climber = SimpleHillClimber(fitness_evaluator, [ mutator.gene_mutator ], [ mutator.individual_height_mutator ])

In [10]:
%%html
<h4>3. create individual</h4>

In [11]:
individual = gene_factory.create_many(12)

print('binary:', '|'.join(individual))
print('regex: ', '/'+ ''.join(to_regex.transform_and_compress(individual)) + static_ending + '/gimu')

binary: 01110|10111|11010|01110|10011|01001|01011|01001|11110|10101|11101|11001
regex:  /ox\sotjlj[!?.]v[:]z\W([\d.]*\d[.\d]*)\b/gimu


In [12]:
%%html
<h4>4. run</h4>

In [13]:
number_of_iterations = 10000
result = hill_climber.run(individual, number_of_iterations, False)

final_individual = result[0]
final_fitness = result[1]
final_iteration = result[2]

print(
    'compressed:',
    '/' + to_regex.transform_and_compress(final_individual) + static_ending + '/gimu',
    '~',
    final_fitness,
    '~',
    final_iteration
)

print(
    'original:',
    '/' + to_regex.transform_and_compress(individual) + '/gimu',
)

compressed: /[a-z]e\s[a-z]+\s[a-z]d[:]\W([\d.]*\d[.\d]*)\b/gimu ~ 1.0 ~ 1069
original: /ox\sotjlj[!?.]v[:]z/gimu


In [14]:
match = re.compile(
    to_regex.transform_and_compress(final_individual) + static_ending,
    re.IGNORECASE
).search(text)

if match is not None:
    print(match.group(0), '=>', match.group(1))

de mn gd: 0.55 => 0.55
