In [None]:
# GENERATE docs/chars.txt TABLE OF ORDINALS => SYMBOL MAPPINGS
from benchmark.util import UniUtil

lines = [""]
for i in range(32, 2**16):
    content = (UniUtil.chr(i).encode("utf-8").rjust(4) + " => " + str(i).ljust(7)).ljust(4+4+7)
    if len(lines[-1]) + len(content) <= 120:
        lines[-1] += content
    else:
        lines[-1] += "\n"
        lines.append(content)



file = open("docs/chars.txt", "w")
file.write(reduce(lambda p,c: p+c, lines, ""))
file.close()

In [1]:
# FAdo BACKTRACKING DEMO
from benchmark.convert import Converter
from timeit import timeit, time

evil = "(a + a)*"
re = Converter().math(evil)
startT = time.time()
i = 0

print "   iter        derivative  backtrack"
while startT + 60 > time.time():
    word = "a"*i + "b"
    print "'a'*{0:<2} + 'b'".format(i).ljust(10),
    print "{:10.4f}".format(timeit(stmt=lambda: re.evalWordP_PD(word), number=1)),
    print "{:10.4f}".format(timeit(stmt=lambda: re.evalWordP_Backtrack(word), number=1))
    i += 1

   iter        derivative  backtrack
'a'*0  + 'b'     0.0001     0.0000
'a'*1  + 'b'     0.0001     0.0000
'a'*2  + 'b'     0.0001     0.0000
'a'*3  + 'b'     0.0001     0.0000
'a'*4  + 'b'     0.0001     0.0001
'a'*5  + 'b'     0.0002     0.0001
'a'*6  + 'b'     0.0002     0.0002
'a'*7  + 'b'     0.0003     0.0006
'a'*8  + 'b'     0.0003     0.0019
'a'*9  + 'b'     0.0002     0.0030
'a'*10 + 'b'     0.0003     0.0068
'a'*11 + 'b'     0.0002     0.0114
'a'*12 + 'b'     0.0002     0.0258
'a'*13 + 'b'     0.0004     0.0388
'a'*14 + 'b'     0.0003     0.0597
'a'*15 + 'b'     0.0003     0.1187
'a'*16 + 'b'     0.0003     0.2495
'a'*17 + 'b'     0.0003     0.4883
'a'*18 + 'b'     0.0004     0.9556
'a'*19 + 'b'     0.0003     1.9467
'a'*20 + 'b'     0.0004     3.8564
'a'*21 + 'b'     0.0003     7.8410
'a'*22 + 'b'     0.0004    15.7955
'a'*23 + 'b'     0.0004    32.1275


In [1]:
import random
from FAdo.cfg import *
import FAdo.reex as reex

from benchmark.reex_ext import *
from benchmark.util import UniUtil

def random_sample(length=10, amount=100):
    """
    :param int length: the tree length of the wanted expression
    :param int amount: the number of regular expressions to generate
    :returns list<uregexp>:
    """
    grammar = reStringRGenerator(["X"], size=length, cfgr=reGrammar['g_rpn_snf_option'])
    expressions = [None] * amount
    for i in range(amount):
        # generate a FAdo regular expression
        re = reex.str2regexp(grammar.generate(), parser=reex.ParserRPN)
        
        # transform the expression into my FAdo extensions & modify atoms
        def transform(re):
            if type(re) is reex.concat:
                return uconcat(transform(re.arg1), transform(re.arg2))
            elif type(re) is reex.disj:
                return udisj(transform(re.arg1), transform(re.arg2))
            elif type(re) is reex.star:
                return ustar(transform(re.arg))
            elif type(re) is reex.option:
                return uoption(transform(re.arg))
            elif type(re) is reex.epsilon:
                return uepsilon()
            elif type(re) is reex.atom:
                rand = random.random()
                if rand < 0.1: # 10% wild dot
                    return dotany()
                elif rand < 0.3: # 20% character classes
                    length = int(random.triangular(low=2.0, high=10.9, mode=0))
                    symbols = [None] * length
                    neg = False
                    if random.random() < 0.2:
                        neg = True
                    for i in range(length):
                        if random.random() < 0.7:
                            symbols[i] = UniUtil.randChr()
                        else:
                            size = random.randint(3, 1000)
                            start = UniUtil.randChr(max=2**16 - 1 - size)
                            symbols[i] = (start, UniUtil.chr(UniUtil.ord(start) + size))
                    return chars(symbols, neg=neg)
                else: # 70% simple atoms
                    return uatom(UniUtil.randChr(max="z"))
            else:
                raise TypeError("Unknown type " + str(type(re)))

        expressions[i] = transform(re)
    return expressions

regexps = dict()
for length in range(1, 500):
    print "\r", " "*4, "\r" + str(length),
    for re in random_sample(length=length, amount=10):
        re = str(re)
        ref = regexps.get(len(re), set())
        ref.add(re)
        regexps[len(re)] = ref

499                                                                                                                                                                                                                                                                                                                                                                                                                 


In [2]:
print sum(map(lambda x: len(x), regexps.values())), "total expressions!\n"

for l in range(min(regexps.keys()), max(regexps.keys()) + 1):
    if regexps.has_key(l):
        print l, len(regexps[l])

4986 total expressions!

1 2
2 5
4 1
5 6
6 4
7 3
8 5
9 3
10 1
11 2
12 6
13 1
14 3
15 4
16 5
17 2
18 5
19 1
20 2
21 4
22 3
23 1
24 2
25 3
26 2
27 5
28 2
29 5
30 3
31 2
32 2
33 1
34 4
35 5
36 3
37 2
38 3
39 1
40 4
41 3
42 1
43 1
45 3
46 1
47 6
48 2
49 3
50 1
51 1
52 3
53 2
54 2
55 5
56 5
57 3
58 4
59 2
60 3
61 3
63 5
64 2
65 1
66 2
67 2
68 3
69 1
70 5
71 3
72 2
73 1
74 1
75 3
76 5
78 1
79 4
80 1
81 6
82 2
83 2
84 3
85 2
86 1
87 3
88 3
89 3
90 3
91 5
92 1
93 3
94 1
95 3
96 1
97 7
98 5
99 3
100 2
102 2
103 6
104 4
105 1
106 3
107 6
108 1
109 5
110 2
111 3
112 4
113 2
114 2
115 2
116 2
117 1
118 3
119 1
120 1
121 2
122 5
123 5
124 3
125 4
126 1
127 2
128 3
129 3
130 2
131 4
133 5
134 1
135 3
136 2
137 1
138 2
139 2
140 1
141 4
142 4
143 1
144 2
145 1
146 2
147 6
148 2
150 5
151 3
152 3
153 2
154 1
155 3
156 2
157 1
158 2
159 3
161 1
162 4
163 6
164 4
165 5
166 2
167 6
168 3
170 6
171 6
172 4
173 3
174 3
175 4
176 4
177 5
178 1
179 1
180 3
181 1
182 6
183 4
184 2
185 4
186 2
187 4
188 3
189 