# All available regex constructions

In [1]:
from regexmodel import RegexModel

### Digits

In [2]:
print("Single digits:", [RegexModel("[0-9]").draw() for _ in range(4)])

Single digits: ['8', '2', '4', '2']


In [3]:
print("Single digits with \\d:", [RegexModel("\d").draw() for _ in range(4)])

Single digits with \d: ['6', '6', '2', '5']


In [4]:
print("Sub range for digits (0-3): ", [RegexModel("[0-3]").draw() for _ in range(4)])

Sub range for digits (0-3):  ['1', '3', '1', '3']


### Letters

In [5]:
print("lower case", [RegexModel("[a-z]").draw() for _ in range(4)])

lower case ['c', 't', 'z', 'e']


In [6]:
print("upper case", [RegexModel("[A-Z]").draw() for _ in range(4)])

upper case ['J', 'W', 'O', 'J']


In [7]:
print("Lower and upper case:", [RegexModel("[a-zA-Z]").draw() for _ in range(4)])

Lower and upper case: ['T', 'd', 'Q', 'K']


In [8]:
print("Sub ranges (d-f):", [RegexModel("[d-f]").draw() for _ in range(4)])

Sub ranges (d-f): ['e', 'f', 'd', 'f']


### Single characters

In [9]:
print("Always a: ",  [RegexModel("a").draw() for _ in range(4)])

Always a:  ['a', 'a', 'a', 'a']


In [10]:
print("Also always a:", [RegexModel("[a]").draw() for _ in range(4)])

Also always a: ['a', 'a', 'a', 'a']


In [11]:
print("a or b or c:", [RegexModel("[abc]").draw() for _ in range(4)])

a or b or c: ['a', 'a', 'b', 'a']


### Multiple choice

In [12]:
print("Within a charcter class (digit or a-e):", [RegexModel("[0-9a-e]").draw() for _ in range(4)])

Within a charcter class (digit or a-e): ['8', '0', '4', '4']


In [13]:
print("Outside a character class (aaa or bbb):", [RegexModel("(aaa|bbb)").draw() for _ in range(4)])

Outside a character class (aaa or bbb): ['aaa', 'aaa', 'aaa', 'bbb']


In [14]:
print("More than two choices (a or b or c):", [RegexModel("(a|b|c)").draw() for _ in range(4)])

More than two choices (a or b or c): ['a', 'c', 'a', 'a']


In [15]:
print("Repeats change the probabilities:", [RegexModel("(a|a|a|a|b)").draw() for _ in range(4)])

Repeats change the probabilities: ['a', 'a', 'a', 'b']


### Repeating character (classes)

In [16]:
print("Repeat digit x3:", [RegexModel("\d{3}").draw() for _ in range(4)])

Repeat digit x3: ['451', '968', '921', '684']


In [17]:
print("Repeat digit between 2x and 6x:", [RegexModel("\d{2,6}").draw() for _ in range(4)])

Repeat digit between 2x and 6x: ['7715', '1900', '409441', '28522']


## API

In [18]:
model = RegexModel.fit(10*["a"]+10*["b"])  # Create a model from data
model.regex  # Get regex
model.draw()  # Draw a value
model = RegexModel("\d(a||ab)[A-Z]{3,6}")  # Create a model from a regex
model_data = model.serialize()  # Serialize the model so that it can be stored in a JSON (or any other) file
model = RegexModel.deserialize(model_data)  # Create the model from the serialization
model.fit_statistics(3*["a"]+3*["b"])  # Get statistical information on goodness of fit and more

{'failed': 6,
 'success': 0,
 'n_tot_char': 6,
 'n_char_success': 0,
 'n_parameters': 10,
 'avg_log_like_per_char': -6.907755278982137,
 'avg_log_like_pc_success': 0.0}

## Visualization

In [19]:
from regexmodel.visualization import regex_model_to_pyvis

net = regex_model_to_pyvis(model)
net.show("regex.html", notebook=True)

regex.html
