In [2]:
from FAdo.fa import *
from FAdo.reex import *
from FAdo.fio import *
from FAdo.rndadfa import *

import lark

# -*- coding: utf-8 -*-

In [51]:
class CCount():
	def __init__(self, arg, min, max = None, sigma=None):
		self.arg = arg
		self.min = min
		self.max = "inf" if max == -1 else max
		self.sigma = sigma

	def __repr__(self):
		"""Representation of the regular expression's syntactical tree."""
		return 'CCount({},[{}])'.format(self.arg, self.min if not self.max else '{},{}'.format(self.min, self.max))
	
	def linearForm(self): # https://www.dcc.fc.up.pt/~nam/resources/publica/51480046.pdf
		print(self.arg)

	def derivative(self, sigma): # how ...?
		# add power attribute to CCount class and keep count?
		# do i even need this? cant eval a word if i dont have it....

		if self.n == 0:
			return CEmptySet()
		elif self.n == 1:
			return CEpsilon()
		else:
			d = self.arg.derivative(sigma)
			if d == CEpsilon():
				return CCount(self.arg, self.n-1, sigma)
			else:
				return CConcat(d, CCount(self.arg, self.n-1, sigma), sigma)

In [57]:
class BuildRegexpCC(lark.Transformer):
    """ Semantics of the FAdo grammars' regexps
        Priorities of operators: pow > disj > conj > shuffle > concat > not > star >= option

    """

    def __init__(self, context=None):
        super(BuildRegexpCC, self).__init__()
        if context is None:
            context = dict()
        self.context = context
        if "sigma" in self.context:
            self.sigma = self.context["sigma"]
        else:
            self.sigma = None

    @staticmethod
    def rege(s):
        return s[0]

    epsilon = lambda self, _: CEpsilon(self.sigma)

    emptyset = lambda self, _: CEmptySet(self.sigma)

    sigmap = lambda self, _: CSigmaP(self.sigma)

    sigmas = lambda self, _: CSigmaS(self.sigma)

    @staticmethod
    def base(s):
        return s[0]
        
    def pow_min(self, s, inf=False):
        (arg, n_r) = s
        n = n_r.children[0].value
        if inf:
            r = CCount(arg, n, -1, self.sigma)
        else:
            r = CCount(arg, n, self.sigma)
        return r

    def pow_minmax(self, s):
        (arg, n_mi, n_ma) = s
        n_min = n_mi.children[0].value
        n_max = n_ma.children[0].value
        r = CCount(arg, n_min, n_max, self.sigma)
        return r
    
    def pow_inf(self, s):
        return self.pow_min(s, True)

    def symbol(self, s):
        (s,) = s
        r = CAtom(s[:], self.sigma)
        r._ewp = False
        return r

    def disj(self, s):
        (arg1, arg2) = s
        r = CDisj(arg1, arg2, self.sigma)
        r._ewp = arg1._ewp or arg2._ewp
        return r

    def concat(self, s):
        (arg1, arg2) = s
        r = CConcat(arg1, arg2, self.sigma)
        r._ewp = arg1._ewp and arg2._ewp
        return r

    def shuffle(self, s):
        (arg1, arg2) = s
        r = CShuffle(arg1, arg2, self.sigma)
        r._ewp = arg1._ewp and arg2._ewp
        return r

    def u_shuffle(self, s):
        r = CShuffleU(s[0], self.sigma)
        r._ewp = s[0]._ewp
        return r

    def conj(self, s):
        (arg1, arg2) = s
        r = CConj(arg1, arg2, self.sigma)
        r._ewp = arg1._ewp and arg2._ewp
        return r

    def option(self, s):
        r = COption(s[0], self.sigma)
        r._ewp = True
        return r

    def notn(self, s):
        arg = s[0]
        r = Compl(arg, self.sigma)
        r._ewp = not arg._ewp
        return r

    def star(self, s):
        r = CStar(s[0], self.sigma)
        r._ewp = True
        return r
    
regGrammar = lark.Lark.open("regexp_test.lark", start="rege", parser="lalr")

# available:
# b^[1] -> 'b'
# b^[1,2] -> 1 <= 'b' <= 2
# b^[1,...] -> 1 <= 'b' <= inf

tree = regGrammar.parse("a(b^[1,...])")
reg = BuildRegexpCC(context={"sigma": None}).transform(tree)

reg

CConcat(CAtom(a),CCount(b,[1,inf]))