In [1]:
import numpy as np
import pandas as pd
import re 

In [2]:
example = "SELECT user FROM USERS WHERE userid >= 1000"


## Class

In [None]:
class Rule:
    def __init__(self):
        self.prefix = ""
        self.subject = ""
        self.op = None

    def __repr__(self):
        op = self.op
        if not op:
            op = ''
        return "<Rule: {} {}({})>".format(op, self.prefix, self.subject)

In [3]:
class RuleGroup:
    def __init__(self, parent, level, op):
        self.op = op
        self.parent = parent
        self.level = level
        self.rule_count = 1
        self.rules = [Rule(), ]

    def __repr__(self):
        return "<RuleGroup: {}>".format(self.__dict__)

In [4]:
class Rule_Parse_FSM:

    def __init__(self, input_str):
        self.input_str = input_str
        self.current_state = S_NEW_GROUP
        self.group_current_level = 0
        self.current_group = RuleGroup(None, self.group_current_level, None)
        self.current_char = ''

    def run(self):
        for c in self.input_str:
            if not self.process_next(c):
                print("skip '{}' in {}".format(c, self.current_state))

    def process_next(self, achar):
        self.current_char = achar
        frozen_state = self.current_state
        for transition in FSM_MAP:
            if transition['src'] == frozen_state:
                if self.iterate_re_evaluators(achar, transition):
                    return True
        return False

    def iterate_re_evaluators(self, achar, transition):
        condition = transition['condition_re_compiled']
        if condition.match(achar):
            self.update_state(
                transition['dst'], transition['callback'])
            return True
        return False

    def update_state(self, new_state, callback):
        print("{} -> {} : {}".format(self.current_char,
                                     self.current_state,
                                     new_state))
        self.current_state = new_state
        callback(self)

In [5]:
def transition_skip(fsm_obj):
    pass


def transition_new_group(fsm_obj):
    fsm_obj.group_current_level += 1
    fsm_obj.current_group = RuleGroup(fsm_obj.current_group,
                                      fsm_obj.group_current_level,
                                      None)


def transition_append_pre(fsm_obj):
    rule_count = fsm_obj.current_group.rule_count
    fsm_obj.current_group.rules[rule_count - 1].prefix += fsm_obj.current_char


def transition_add_op(fsm_obj):
    rule_count = fsm_obj.current_group.rule_count
    fsm_obj.current_group.rules[rule_count - 1].op = fsm_obj.current_char


def transition_end_group(fsm_obj):
    fsm_obj.group_current_level += 1
    fsm_obj.current_group = RuleGroup(fsm_obj.current_group,
                                      fsm_obj.group_current_level,
                                      None)


def transition_end_rule(fsm_obj):
    pass


def transition_add_op_new_rule(fsm_obj):
    fsm_obj.current_group.rule_count += 1
    fsm_obj.current_group.rules.append(Rule())
    rule_count = fsm_obj.current_group.rule_count
    fsm_obj.current_group.rules[rule_count - 1].op = fsm_obj.current_char


def transition_append_subj(fsm_obj):
    rule_count = fsm_obj.current_group.rule_count
    fsm_obj.current_group.rules[rule_count - 1].subject += fsm_obj.current_char


def transition_add_op_new_group(fsm_obj):
    fsm_obj.current_group.op = fsm_obj.current_char