In [None]:
from lark import Lark
from lark import Transformer

In [4]:
## ver20230610
## common definition
lark_def_str = r"""    
    date_or_datetime: DATETIME | DATE
    prefixed_time: PREFIXED_TIME
    time_zone: TIME_ZONE
    
    account: STRING
    sub_account: STRING | ref_memo
    item: STRING | ref_memo
    
    ref_memo: "<" STRING ">"
    price: number | ref_memo
    price_unit: STRING_AND_MARK_WITHOUT_DIGIT
   
    quantity: number | op_quantity | ref_memo
    op_quantity: OP_EQUAL_QUANTITY | OP_BALANCE_QUANTITY | OP_DIFF_QUANTITY
    quantity_unit: STRING_AND_MARK_WITHOUT_DIGIT
    
    amount: number | op_amount | ref_memo
    op_amount: OP_AUTO_AMOUNT | OP_BALANCE_AMOUNT | OP_DIFF_AMOUNT | OP_EQUAL_AMOUNT
    amount_unit: STRING_AND_MARK_WITHOUT_DIGIT
    
    memo_number: key ":" number memo_unit?
    memo_string: key "::" STRING 
    key: STRING
    memo_unit: STRING_AND_MARK_WITHOUT_DIGIT
    
    DEBIT_SIGN.10: "Dr" | "借方"
    CREDIT_SIGN.10: "Cr" | "貸方"
    
    param_mark: PARTNER_MARK | PERSON_IN_CHARGE_MARK | MEMO_MARK | REMARKS_MARK
    param: string | memo_number | memo_string
    param_pair: param_mark param
    
    _ENTRY_START_MARK.10: "//"
    _SUB_ACCOUNT_MARK.5: "/"
    _ITEM_MARK.5: "#"
    _PRICE_MARK.5: "@"
    _QUANTITY_MARK.5: "*"
    PARTNER_MARK.5: "$"
    PERSON_IN_CHARGE_MARK.5: ">"
    MEMO_MARK.5: "&"
    REMARKS_MARK.10: "##"
    _ENTRY_END_MARK.10: "//"
    
    OP_AUTO_AMOUNT.9: "?" | "?A"
    OP_BALANCE_AMOUNT.10: "?B"
    OP_DIFF_AMOUNT.10: "?D"
    OP_EQUAL_AMOUNT.10: "?E"
    
    OP_EQUAL_QUANTITY.9:"E"
    OP_BALANCE_QUANTITY.9:"B"
    OP_DIFF_QUANTITY.9:"D"

    DIGIT: "0".."9" // \u0020-\u0029
    INT: DIGIT+
    number: NUMBER
    NUMBER: ["+"|"-"] INT ((","|"_") INT)* ["." INT]
    
    text: TEXT
    TEXT: /[\u0011-\uFFFF]+/
    TEXT_WITHOUT_SLASH: /[\u0011-\u002E\u0030-\uFFFF]+/
    
    string:STRING
    
    PARENTHESIS: "(" | ")" | "（" | "）" | "[" | "]" | "［" | "］"
    SLASH: "/" | "／"
    MIDDLE_DOT: "･" | "・"
    UNDER_SCORE: "_" | "＿"     
    KUTEN_KIGOU:/[\u3001-\u3030]+/
    HIRAGANA: /[\u3041-\u3096\u3099-\u309F]+/
    KATAKANA: /[\u30A1-\u30FF]+/
    ZENKAKU_DIGIT: /[\uFF10-\uFF19]+/
    KANJI: /[〻\u3400-\u9FFF\uF900-\uFAFF]|[\uD840-\uD87F][\uDC00-\uDFFF]+/
    STRING: (UNDER_SCORE|LETTER|DIGIT|HIRAGANA|KATAKANA|KANJI|ZENKAKU_DIGIT|MIDDLE_DOT|KUTEN_KIGOU)+
    STRING_AND_MARK: (UNDER_SCORE|LETTER|DIGIT|HIRAGANA|KATAKANA|KANJI|ZENKAKU_DIGIT|MIDDLE_DOT|KUTEN_KIGOU|SLASH|PARENTHESIS)+
    STRING_AND_MARK_WITHOUT_DIGIT: (UNDER_SCORE|LETTER|HIRAGANA|KATAKANA|KANJI|MIDDLE_DOT|KUTEN_KIGOU|SLASH|PARENTHESIS)+
    
    WS2: /[ \t\f\r\n　]/+ // HANKAKU space,Horizontal TAB,Form Feed,Carriage Return,Line Feed,ZENKAKU space 

    LCASE_LETTER: "a".."z" // \u0031-\u004A
    UCASE_LETTER: "A".."Z" // \u0051-\u006A
    LETTER: UCASE_LETTER | LCASE_LETTER
    
    %import .datetime_iso_8601 (DATETIME,DATE,PREFIXED_TIME,TIME_ZONE)
    %ignore WS2
    
    """

In [6]:
## ver20230610
## definition of journal_entry
## definition of journal(plain text document that include journal entries and texts)
##
journal_str = r"""
    journal: (text* journal_entry)* text*
    journal_entry: entry_header (debit | credit)* entry_footer
    entry_header: _ENTRY_START_MARK  date_or_datetime param_pair*
    debit:  DEBIT_SIGN   account (_SUB_ACCOUNT_MARK sub_account)? (_ITEM_MARK item)? (_PRICE_MARK price)? (_QUANTITY_MARK quantity quantity_unit)? (amount? amount_unit?) param_pair*
    credit: CREDIT_SIGN  account (_SUB_ACCOUNT_MARK sub_account)? (_ITEM_MARK item)? (_PRICE_MARK price)? (_QUANTITY_MARK quantity quantity_unit)? (amount? amount_unit?) param_pair*
    entry_footer: _ENTRY_END_MARK
    """

journal_entry_parser_lalr = Lark(journal_str+lark_def_str,start ="journal_entry",parser='lalr', debug=True)

In [10]:
#仕訳 (journal_entry)の解析
journal_entry1 = r"""//2022-12-14 $得意先A >担当者あ $得意先B &気温:23.1[kg] &天気::晴れ
##test
##こめんとです
Dr 通信費/携帯代 @2 *E(ems) 100円 $NTT
Cr 現金/小口#あい 5ドル &気温:23.1度 &天気::晴れ
Cr 現金/小口２ ?Dドル >担当者V
//"""

journal_entry2 = r"""//2022-12-14 ##test
Dr　通信費/携帯代　#0901234567 @600 ?B円 
 Cr　現金 5000 ##test
 Cr　預金 1000//
"""

tree_entry = journal_entry_parser_lalr.parse(journal_entry1)

print(tree_entry.pretty())

journal_entry
  entry_header
    date_or_datetime	2022-12-14
    param_pair
      param_mark	$
      param
        string	得意先A
    param_pair
      param_mark	>
      param
        string	担当者あ
    param_pair
      param_mark	$
      param
        string	得意先B
    param_pair
      param_mark	&
      param
        memo_number
          key	気温
          number	23.1
          memo_unit	[kg]
    param_pair
      param_mark	&
      param
        memo_string
          key	天気
          晴れ
    param_pair
      param_mark	##
      param
        string	test
    param_pair
      param_mark	##
      param
        string	こめんとです
  debit
    Dr
    account	通信費
    sub_account	携帯代
    price
      number	2
    quantity
      op_quantity	E
    quantity_unit	(ems)
    amount
      number	100
    amount_unit	円
    param_pair
      param_mark	$
      param
        string	NTT
  credit
    Cr
    account	現金
    sub_account	小口
    item	あい
    amount
      number	5
    amount_unit	ドル
    param_pair
      param_m

In [12]:
##Joutnal（仕訳を含むテキスト）の解析
##電子メールの本文に仕訳を記載する場合など
journal_parser_lalr = Lark(journal_str+lark_def_str,start ="journal",parser='lalr')

In [13]:
journal3= r"""
以下に仕訳の例を示します。
*仕訳の例１ dds 123


//2022-12-14 ##test
Dr　通信費/<担当者>　#<特売品> @600 ?B円 
 Cr　現金 5000 ##test
 Cr　預金 1000//

次に、仕訳の例２を示します。
仕訳の例２

//
2022-12-14
##こめんと２です。
Dr　通信費/携帯代　#0901234567 @<rate> *<販売上限>個 <所持金額>円 
 Cr　現金 5000 ##test
 Cr　預金 1000 &気温:25.0度
 Cr　預金 1000 &天気::晴れ
//

このようにテキストの中に仕訳を記述し、抽出できます。

//2022-12-15 >ken &rate:100 &天気::晴れ//
//2022-12-16 >gen &rate:110 &天気::雨//

このようにパラメータだけの指定もできます。
パラメータの値（例えば為替レートなど）を事前に取り込んでおくことにより、その時点でのパラメータの値を反映した仕訳を作成することが可能です。

"""

tree_journal = journal_parser_lalr.parse(journal3)
#tree
print(tree_journal.pretty())

journal
  text	以下に仕訳の例を示します。
  text	*仕訳の例１ dds 123
  journal_entry
    entry_header
      date_or_datetime	2022-12-14
      param_pair
        param_mark	##
        param
          string	test
    debit
      Dr
      account	通信費
      sub_account
        ref_memo	担当者
      item
        ref_memo	特売品
      price
        number	600
      amount
        op_amount	?B
      amount_unit	円
    credit
      Cr
      account	現金
      amount
        number	5000
      param_pair
        param_mark	##
        param
          string	test
    credit
      Cr
      account	預金
      amount
        number	1000
    entry_footer
  text	次に、仕訳の例２を示します。
  text	仕訳の例２
  journal_entry
    entry_header
      date_or_datetime	2022-12-14
      param_pair
        param_mark	##
        param
          string	こめんと２です。
    debit
      Dr
      account	通信費
      sub_account	携帯代
      item	0901234567
      price
        ref_memo	rate
      quantity
        ref_memo	販売上限
      quantity_unit	個
      amount
        ref_me

In [15]:
##20230602
from lark import Token

class CalculateTree(Transformer):
    
    def __init__(self):
        pass

    def get_updated_dict_with_param_pair(self,original_dict,child):
        
        if type(child) is not tuple:
            return child
        
        if type(original_dict) is not dict:
            return child
        
        target_dict = original_dict.copy()
        
        param_mark,param = child
        if param_mark=="MEMO_MARK" and type(param) is dict:
            if "memo_number" in param and "memo_number" in target_dict:
                target_dict["memo_number"].append(param["memo_number"])
            if "memo_string" in param and "memo_string" in target_dict:
                target_dict["memo_string"].append(param["memo_string"])
        elif param_mark=="PARTNER_MARK":
            target_dict.update({"partner":param})
        elif param_mark=="PERSON_IN_CHARGE_MARK":
            target_dict.update({"person_in_charge":param})
        elif param_mark=="REMARKS_MARK":
            target_dict.update({"remarks":param})                    
        else:
            pass
        return target_dict
    

    def date_or_datetime(self, children):
        #date_or_datetime: DATETIME | DATE
        return {"date_or_datetime":(children[0].type,children[0].value)}

    def prefixed_time(self,children):
        return {"prefixed_time":children[0].value}
    
    def time_zone(self,children):
        return {"time_zone":children[0].value}
    
    def account(self, children):
        #account: STRING
        
        return {"account":children[0].value}

    def sub_account(self, children):
        #sub_account: STRING | ref_memo
        #print(children[0])
        if type(children[0]) is dict:
            #ref_memo
            return {"sub_account":children[0]}
        else:
            #STRING
            return {"sub_account":children[0].value}
    
    def item(self, children):
        #item: STRING | ref_memo
        if type(children[0]) is dict:
            #ref_memo
            return {"item":children[0]}
        else:
            #STRING
            return {"item":children[0].value}

    def price(self, children):
        #price: number | ref_memo
        return {"price":children[0]}
    
    def price_unit(self, children):
        #price_unit: STRING_AND_MARK_WITHOUT_DIGIT
        return {"price_unit":children[0].value}

    def op_quantity(self, children):
        #op_quantity: OP_EQUAL_QUANTITY | OP_BALANCE_QUANTITY | OP_DIFF_QUANTITY
        #if type(children[0])
        return {"op_quantity":children[0].type}

    def quantity(self, children):
        #quantity: number | op_quantity | ref_memo
        #print("quantity")
        #print(type(children[0]))
        if type(children[0]) is dict:
            if "op_quantity" in children[0]:
                #op_quantity
                ##print(children[0])
                ## example {'op_quantity': 'OP_EQUAL_QUANTITY'}
                return children[0]
            else:
                #ref_memo
                return {"op_quantity":"OP_REF_QUANTITY","quantity":children[0]}
        else:
            #number
            return {"op_quantity":"OP_VALUE_QUANTITY","quantity":children[0]}

        return children
    
    def quantity_unit(self, children):
        #quantity_unit: STRING_AND_MARK_WITHOUT_DIGIT
        return {"quantity_unit":children[0].value}
    
    def op_amount(self, children):
        #op_amount: OP_AUTO_AMOUNT | OP_BALANCE_AMOUNT | OP_DIFF_AMOUNT
        return {"op_amount":children[0].type}

    def amount(self, children):
        #amount: number | op_amount | ref_memo
        if type(children[0]) is dict:
            if "op_amount" in  children[0]:
                #op_amount
                ##print(children[0])
                ## example {'op_amount': 'OP_EQUAL_AMOUNT'}
                return children[0]
            else:
                #ref_memo
                return {"op_amount":"OP_REF_AMOUNT","amount":children[0]}
        else:
            #number
            return {"op_amount":"OP_VALUE_AMOUNT","amount":children[0]}

    def amount_unit(self, children):
        #amount_unit: STRING_AND_MARK_WITHOUT_DIGIT
        return {"amount_unit":children[0].value}
    
    def param_pair(self, children):
        #param_pair: param_mark param
        #("PARTNER_MARK","ABC商会")
        #("MEMO_MARK","ABC商会")
        #print("param_pair;",children)
        return (children[0],children[1])
    
    def param_mark(self, children):
        #param_mark: PARTNER_MARK | PERSON_IN_CHARGE_MARK | MEMO_MARK | REMARKS_MARK
        #print("param_mark",children)
        type = children[0].type
        return type
        
    def param(self, children):
        #param: string | memo_number | memo_string
        #print("param:",children)
        return children[0]
    
    def ref_memo(self, children):
        return {"ref_memo":children[0].value}
    
    def memo_number(self, children):
        #memo_number: key ":" number memo_unit?
        unit_str = None
        if len(children)>=3:
            unit_str = children[2]
        #print("memo_number",children)
        return {"memo_number":(children[0],children[1],unit_str)}
    
    def memo_string(self, children):
        #memo_string: key "::" STRING
        #print("memo_string",children)
        return {"memo_string":(children[0],children[1].value)}
    
    def memo_unit(self, children):
        #memo_unit: STRING_AND_MARK_WITHOUT_DIGIT
        return children[0].value
    
    def key(self, children):
        #key: STRING
        return children[0].value

    def number(self,children):
        #number: NUMBER
        #remove ',' and '_'
        #if number contains '.' then return float value, else return int value.
        value = children[0].value
        value_str = value.translate(str.maketrans({',': None, '_': None}))
        if "." in value_str:
            ##float
            value = float(value_str)
        else:
            ##int
            value = int(value_str)
        return value
    
    def string(self,children):
        #string:STRING
        value = children[0].value
        return value
    
    def text(self,children):
        #text: STRING_AND_MARK
        value = children[0].value
        return value


In [16]:
##20230602
from lark import Transformer
from lark import Token

class CaliculateJournal(CalculateTree):
    def __init__(self):
        super().__init__()
        
    def journal(self,children):
        #print("journal")
        journal = []
        #journal: (text* journal_entry)* text*
        for child in children:
            #print("child:",child)
            #if type(child) is dict:
            journal.append(child)
        return {"journal":journal}
    
    def journal_entry(self,children):
        #journal_entry: entry_header (debit | credit)+ entry_footer
        #print("journal_entry:")
        journal_entry = {"entries":[]}
        for child in children:
            #print("child:",child)
            if type(child) is dict:
                if "entry_header" in child:
                    journal_entry.update(child)
                elif "entry_footer" in child:
                    journal_entry.update(child)
                else:  
                    journal_entry["entries"].append(child)
        return  {"journal_entry":journal_entry}

    def entry_header(self,children):
        #entry_header: _ENTRY_START_MARK  date_or_datetime param_pair*
        header = {"memo_number":[],"memo_string":[]}
        #print("HEADER")
        #print(len(children))
        for child in children:
            #print(type(child))
            #PARTNER_MARK | PERSON_IN_CHARGE_MARK | MEMO_MARK | REMARKS_MARK
            if type(child) is tuple:
                #param_pair
                #print("header before:",header)
                #print("child:",child)
                header = self.get_updated_dict_with_param_pair(header,child)
                #print("header after:",header)
            elif type(child) is dict:
                #date_or_datetime
                #{"date_or_datetime":('DATE','2022-12-14')}
                header.update(child)
            else:
                pass
            
        return {"entry_header":header}
    
    def debit(self,children):
        #debit:  DEBIT_SIGN   account (_SUB_ACCOUNT_MARK sub_account)? (_ITEM_MARK item)? (_PRICE_MARK price)? (_QUANTITY_MARK quantity quantity_unit)? (amount? amount_unit?) param_pair*
        debit = {"memo_number":[],"memo_string":[]}
        for child in children:
            if type(child) is tuple:
                #param_pair
                debit = self.get_updated_dict_with_param_pair(debit,child)
            elif type(child) is dict:
                #account item price quantity quantity_unit amount amount_unit
                debit.update(child)
        if "op_amount" not in debit:
            debit.update({"op_amount":"OP_EQUAL_AMOUNT"})
            
        return {"debit":debit}

    def credit(self,children):
        #credit: CREDIT_SIGN  account (_SUB_ACCOUNT_MARK sub_account)? (_ITEM_MARK item)? (_PRICE_MARK price)? (_QUANTITY_MARK quantity quantity_unit)? (amount? amount_unit?) param_pair*
        credit = {"memo_number":[],"memo_string":[]}
        for child in children:
            if type(child) is tuple:
                #param_pair
                #print("credit before:",credit)
                #print("child:",child)
                credit = self.get_updated_dict_with_param_pair(credit,child)
                #print("credit after:",credit)
            elif type(child) is dict:
                #account item price quantity quantity_unit amount amount_unit
                credit.update(child)
        if "op_amount" not in credit:
            credit.update({"op_amount":"OP_EQUAL_AMOUNT"})
        return {"credit":credit}

    def entry_footer(self,children):
        #entry_footer: _ENTRY_END_MARK
        footer = {}
        #print(len(children))
        for child in children:
           #print(type(child))
           if type(child) is dict:
                footer.update(child)
        return {"entry_footer":footer}


In [17]:
#仕訳帳を辞書型の変数に変換する
tf = CaliculateJournal()
%time journal = tf.transform(tree_journal)
journal

Wall time: 997 µs


{'journal': ['以下に仕訳の例を示します。',
  '*仕訳の例１ dds 123',
  {'journal_entry': {'entries': [{'debit': {'memo_number': [],
       'memo_string': [],
       'account': '通信費',
       'sub_account': {'ref_memo': '担当者'},
       'item': {'ref_memo': '特売品'},
       'price': 600,
       'op_amount': 'OP_BALANCE_AMOUNT',
       'amount_unit': '円'}},
     {'credit': {'memo_number': [],
       'memo_string': [],
       'account': '現金',
       'op_amount': 'OP_VALUE_AMOUNT',
       'amount': 5000,
       'remarks': 'test'}},
     {'credit': {'memo_number': [],
       'memo_string': [],
       'account': '預金',
       'op_amount': 'OP_VALUE_AMOUNT',
       'amount': 1000}}],
    'entry_header': {'memo_number': [],
     'memo_string': [],
     'date_or_datetime': ('DATE', '2022-12-14'),
     'remarks': 'test'},
    'entry_footer': {}}},
  '次に、仕訳の例２を示します。',
  '仕訳の例２',
  {'journal_entry': {'entries': [{'debit': {'memo_number': [],
       'memo_string': [],
       'account': '通信費',
       'sub_account': '携帯代',
 

In [18]:
#20230604
def get_journal_sorted(journal):
    #journal からtextを除いて、取引の日付順に並べ替える
    journal_ls = journal['journal']
    journal_entries = [x for x in journal_ls if type(x) is dict] #exclude text
    #journal_entries
    journal_entries_sorted = sorted(journal_entries,key = lambda x : x['journal_entry']['entry_header']['date_or_datetime'][1])
    journal_sorted = {'journal':journal_entries_sorted}
    return journal_sorted

journal_sorted = get_journal_sorted(journal)
journal_sorted

{'journal': [{'journal_entry': {'entries': [{'debit': {'memo_number': [],
       'memo_string': [],
       'account': '通信費',
       'sub_account': {'ref_memo': '担当者'},
       'item': {'ref_memo': '特売品'},
       'price': 600,
       'op_amount': 'OP_BALANCE_AMOUNT',
       'amount_unit': '円'}},
     {'credit': {'memo_number': [],
       'memo_string': [],
       'account': '現金',
       'op_amount': 'OP_VALUE_AMOUNT',
       'amount': 5000,
       'remarks': 'test'}},
     {'credit': {'memo_number': [],
       'memo_string': [],
       'account': '預金',
       'op_amount': 'OP_VALUE_AMOUNT',
       'amount': 1000}}],
    'entry_header': {'memo_number': [],
     'memo_string': [],
     'date_or_datetime': ('DATE', '2022-12-14'),
     'remarks': 'test'},
    'entry_footer': {}}},
  {'journal_entry': {'entries': [{'debit': {'memo_number': [],
       'memo_string': [],
       'account': '通信費',
       'sub_account': '携帯代',
       'item': '0901234567',
       'price': {'ref_memo': 'rate'},
    

In [30]:
#会計ソフトの摘要欄に記載する場合のパーサ
# 摘要欄のうち」「//」以下の部分を解析
# 会計ソフトでは日付・借方・貸方は入力されているので、それ以外の部分を解析する

## ver20230610
tekiyou_def_str = r"""
    tekiyou: (TEXT_WITHOUT_SLASH "/")* TEXT_WITHOUT_SLASH? tekiyou_entry? TEXT?
    tekiyou_entry: tekiyou_entry_header (debit_info | credit_info)+ tekiyou_entry_footer
    tekiyou_entry_header: _ENTRY_START_MARK (prefixed_time? time_zone?) param_pair*
    debit_info:  DEBIT_SIGN  (_ITEM_MARK item)? (_PRICE_MARK price)? (_QUANTITY_MARK quantity quantity_unit)? (amount? amount_unit?) param_pair*
    credit_info: CREDIT_SIGN (_ITEM_MARK item)? (_PRICE_MARK price)? (_QUANTITY_MARK quantity quantity_unit)? (amount? amount_unit?) param_pair*
    tekiyou_entry_footer: _ENTRY_END_MARK
    """

tekiyou_entry_parser = Lark(tekiyou_def_str+lark_def_str,start ="tekiyou_entry",parser='lalr')
tekiyou_parser_lalr = Lark(tekiyou_def_str+lark_def_str,start ="tekiyou",parser='lalr')


In [27]:
#v20230610
class CaliculateTekiyou(CalculateTree):
    def __init__(self):
        super().__init__()

    def tekiyou(self,children):
        #tekiyou: string? tekiyou_entry? string?
        tekiyou = {"tekiyou_entry":None,"comment":[]}
        for child in children:
            if type(child) is dict:
                tekiyou.update(child)
            else:
                tekiyou["comment"].append(child.value)
                
        #print("tekiyou",tekiyou)
        return {"tekiyou":tekiyou}
    
    def tekiyou_entry(self,children):
        #tekiyou_entry: tekiyou_entry_header (debit_info | credit_info)+ tekiyou_entry_footer
        tekiyou_entry = {"tekiyou_entry_header":None,"debit_info":None,"credit_info":None,"tekiyou_entry_footer":None}
        for child in children:
            if type(child) is dict:
                tekiyou_entry.update(child)
        #print("tekiyou_entry",tekiyou_entry)
        return  {"tekiyou_entry":tekiyou_entry}

    def tekiyou_entry_header(self,children):
        #tekiyou_entry_header: _ENTRY_START_MARK (prefixed_time? time_zone?) param_pair*
        tekiyou_entry_header = {"memo_number":[],"memo_string":[]}
        for child in children:
            #PARTNER_MARK | PERSON_IN_CHARGE_MARK | MEMO_MARK | REMARKS_MARK
            if type(child) is tuple:
                #param_pair
                tekiyou_entry_header = self.get_updated_dict_with_param_pair(tekiyou_entry_header,child)
                #print("header after:",header)
            elif type(child) is dict:
                #{"prefixed_time":'12:00:00'}
                #{"time_zone":'Z'}
                #print(child)
                tekiyou_entry_header.update(child)
            else:
                pass
        #print("tekiyou_entry_header:",tekiyou_entry_header)
        return {"tekiyou_entry_header":tekiyou_entry_header}

    def debit_info(self,children):
        #debit_info:  DEBIT_SIGN  (_ITEM_MARK item)? (_PRICE_MARK price)? (_QUANTITY_MARK quantity quantity_unit)? (amount? amount_unit?) param_pair*
        debit_info = {"memo_number":[],"memo_string":[]}
        for child in children:
            if type(child) is tuple:
                #param_pair
                debit_info = self.get_updated_dict_with_param_pair(debit_info,child)
            elif type(child) is dict:
                #item price quantity quantity_unit amount amount_unit
                debit_info.update(child)

        return {"debit_info":debit_info}

    def credit_info(self,children):
        #credit_info: CREDIT_SIGN (_ITEM_MARK item)? (_PRICE_MARK price)? (_QUANTITY_MARK quantity quantity_unit)? (amount? amount_unit?) param_pair*
        credit_info = {"memo_number":[],"memo_string":[]}
        for child in children:
            if type(child) is tuple:
                #param_pair
                credit_info = self.get_updated_dict_with_param_pair(credit_info,child)
            elif type(child) is dict:
                #item price quantity quantity_unit amount amount_unit
                credit_info.update(child)

        return {"credit_info":credit_info}
    
    def tekiyou_entry_footer(self,children):
        #tekiyou_entry_footer: _ENTRY_END_MARK
        footer = {}
        #print(len(children))
        for child in children:
           #print(type(child))
           if type(child) is dict:
                footer.update(child)
        return {"tekiyou_entry_footer":footer}


In [32]:
#tekiyou_entry = "//T12:31:14Z $得意先あ >ken　&温度:23 ##test1 Dr　#0901234567 *10個 100,000.12円 &メッセージ::こんにちは　&気温:24 ##test2//"
#%time tree = tekiyou_entry_parser.parse(tekiyou_entry)

tekiyou = "a/aa//T12:31:14Z $得意先あ >ken　&温度:23 ##test1 Dr　#0901234567 *10個 100,000.12円 &メッセージ::こんにちは　&気温:24 ##test2//その他摘要２"
%time tree_tekiyou = tekiyou_parser_lalr.parse(tekiyou)
#Wall time: 2.99 ms

#tree
print(tree_tekiyou.pretty())

Wall time: 997 µs
tekiyou
  a
  aa
  tekiyou_entry
    tekiyou_entry_header
      prefixed_time	T12:31:14
      time_zone	Z
      param_pair
        param_mark	$
        param
          string	得意先あ
      param_pair
        param_mark	>
        param
          string	ken
      param_pair
        param_mark	&
        param
          memo_number
            key	温度
            number	23
      param_pair
        param_mark	##
        param
          string	test1
    debit_info
      Dr
      item	0901234567
      quantity
        number	10
      quantity_unit	個
      amount
        number	100,000.12
      amount_unit	円
      param_pair
        param_mark	&
        param
          memo_string
            key	メッセージ
            こんにちは
      param_pair
        param_mark	&
        param
          memo_number
            key	気温
            number	24
      param_pair
        param_mark	##
        param
          string	test2
    tekiyou_entry_footer
  その他摘要２



In [234]:
tree_tekiyou

Tree(Token('RULE', 'tekiyou'), [Token('TEXT_WITHOUT_SLASH', 'a'), Token('TEXT_WITHOUT_SLASH', 'aa'), Tree(Token('RULE', 'tekiyou_entry'), [Tree(Token('RULE', 'tekiyou_entry_header'), [Tree(Token('RULE', 'prefixed_time'), [Token('PREFIXED_TIME', 'T12:31:14')]), Tree(Token('RULE', 'time_zone'), [Token('TIME_ZONE', 'Z')]), Tree(Token('RULE', 'param_pair'), [Tree(Token('RULE', 'param_mark'), [Token('PARTNER_MARK', '$')]), Tree(Token('RULE', 'param'), [Tree(Token('RULE', 'string'), [Token('STRING', '得意先あ')])])]), Tree(Token('RULE', 'param_pair'), [Tree(Token('RULE', 'param_mark'), [Token('PERSON_IN_CHARGE_MARK', '>')]), Tree(Token('RULE', 'param'), [Tree(Token('RULE', 'string'), [Token('STRING', 'ken')])])]), Tree(Token('RULE', 'param_pair'), [Tree(Token('RULE', 'param_mark'), [Token('MEMO_MARK', '&')]), Tree(Token('RULE', 'param'), [Tree(Token('RULE', 'memo_number'), [Tree(Token('RULE', 'key'), [Token('STRING', '温度')]), Tree(Token('RULE', 'number'), [Token('NUMBER', '23')])])])]), Tree(Tok

In [33]:
cty = CaliculateTekiyou()
%time res = cty.transform(tree_tekiyou)
res

Wall time: 988 µs


{'tekiyou': {'tekiyou_entry': {'tekiyou_entry_header': {'memo_number': [('温度',
      23,
      None)],
    'memo_string': [],
    'prefixed_time': 'T12:31:14',
    'time_zone': 'Z',
    'partner': '得意先あ',
    'person_in_charge': 'ken',
    'remarks': 'test1'},
   'debit_info': {'memo_number': [('気温', 24, None)],
    'memo_string': [('メッセージ', 'こんにちは')],
    'item': '0901234567',
    'op_quantity': 'OP_VALUE_QUANTITY',
    'quantity': 10,
    'quantity_unit': '個',
    'op_amount': 'OP_VALUE_AMOUNT',
    'amount': 100000.12,
    'amount_unit': '円',
    'remarks': 'test2'},
   'credit_info': None,
   'tekiyou_entry_footer': {}},
  'comment': ['a', 'aa', 'その他摘要２']}}

In [112]:
res["tekiyou"]["tekiyou_entry"]["tekiyou_entry_header"]

{'memo_number': [('温度', 23, None)],
 'memo_string': [],
 'prefixed_time': 'T12:31:14',
 'time_zone': 'Z',
 'partner': '得意先あ',
 'person_in_charge': 'ken',
 'remarks': 'test1'}

In [22]:
number_parser = Lark(r"""
    number: NUMBER
    NUMBER: ["+"|"-"] INT ((","|"_") INT)* ["." INT]
    %import common (LETTER, DIGIT, INT, DECIMAL)
    """,start ="number")
entry = "123_512.34"
tree = number_parser.parse(entry)
#tree
print(tree.pretty())

number	123_512.34



In [35]:
example_journal_text= r"""
以下に仕訳の例を示します。
*仕訳の例１ abc 123


//2022-12-14 ##test
Dr　通信費/<担当者>　#<特売品> @600 ?B円 
 Cr　現金 5000 ##test
 Cr　預金 1000//

次に、仕訳の例２を示します。
仕訳の例２

//
2022-12-14
##こめんと２です。
Dr　通信費/携帯代　#0901234567 @<rate> *<販売上限>個 <所持金額>円 
 Cr　現金 5000 ##test
 Cr　預金 1000 &気温:25.0度
 Cr　預金 1000 &天気::晴れ
//

このようにテキストの中に仕訳を記述し、抽出できます。

//2022-12-15 >ken &rate:100 &天気::晴れ//
//2022-12-16 >gen &rate:110 &天気::雨//


このようにパラメータだけの指定もできます
"""

journal_parser_tr_lalr = Lark(journal_str+lark_def_str,start ="journal",parser='lalr',transformer=CaliculateJournal())
res =journal_parser_tr_lalr.parse(example_journal_text)
print(res)

{'journal': ['以下に仕訳の例を示します。', '*仕訳の例１ abc 123', {'journal_entry': {'entries': [{'debit': {'memo_number': [], 'memo_string': [], 'account': '通信費', 'sub_account': {'ref_memo': '担当者'}, 'item': {'ref_memo': '特売品'}, 'price': 600, 'op_amount': 'OP_BALANCE_AMOUNT', 'amount_unit': '円'}}, {'credit': {'memo_number': [], 'memo_string': [], 'account': '現金', 'op_amount': 'OP_VALUE_AMOUNT', 'amount': 5000, 'remarks': 'test'}}, {'credit': {'memo_number': [], 'memo_string': [], 'account': '預金', 'op_amount': 'OP_VALUE_AMOUNT', 'amount': 1000}}], 'entry_header': {'memo_number': [], 'memo_string': [], 'date_or_datetime': ('DATE', '2022-12-14'), 'remarks': 'test'}, 'entry_footer': {}}}, '次に、仕訳の例２を示します。', '仕訳の例２', {'journal_entry': {'entries': [{'debit': {'memo_number': [], 'memo_string': [], 'account': '通信費', 'sub_account': '携帯代', 'item': '0901234567', 'price': {'ref_memo': 'rate'}, 'op_quantity': 'OP_REF_QUANTITY', 'quantity': {'ref_memo': '販売上限'}, 'quantity_unit': '個', 'op_amount': 'OP_REF_AMOUNT', 'amo

In [36]:
journal_sorted = get_journal_sorted(res)
journal_sorted

{'journal': [{'journal_entry': {'entries': [{'debit': {'memo_number': [],
       'memo_string': [],
       'account': '通信費',
       'sub_account': {'ref_memo': '担当者'},
       'item': {'ref_memo': '特売品'},
       'price': 600,
       'op_amount': 'OP_BALANCE_AMOUNT',
       'amount_unit': '円'}},
     {'credit': {'memo_number': [],
       'memo_string': [],
       'account': '現金',
       'op_amount': 'OP_VALUE_AMOUNT',
       'amount': 5000,
       'remarks': 'test'}},
     {'credit': {'memo_number': [],
       'memo_string': [],
       'account': '預金',
       'op_amount': 'OP_VALUE_AMOUNT',
       'amount': 1000}}],
    'entry_header': {'memo_number': [],
     'memo_string': [],
     'date_or_datetime': ('DATE', '2022-12-14'),
     'remarks': 'test'},
    'entry_footer': {}}},
  {'journal_entry': {'entries': [{'debit': {'memo_number': [],
       'memo_string': [],
       'account': '通信費',
       'sub_account': '携帯代',
       'item': '0901234567',
       'price': {'ref_memo': 'rate'},
    

In [37]:
def get_dict_param(dic,*keys):
    if type(dic) is not dict:
        #print("not dic")
        return None
    if len(keys)==0:
        #print("no keys")
        return None
    key = keys[0]
    #print("key:",key)
    if key not in dic:
        #print("key not in dic")
        return None
    value = dic[key]
    #print("keys",keys,"value",value)
    if len(keys)==1:
        return value
    if type(value) is not dict:
        return None
    return get_dict_param(value,*keys[1:])

In [40]:
#最初の仕訳
entry0 = journal_sorted["journal"][0]

In [39]:
entry0

{'journal_entry': {'entries': [{'debit': {'memo_number': [],
     'memo_string': [],
     'account': '通信費',
     'sub_account': {'ref_memo': '担当者'},
     'item': {'ref_memo': '特売品'},
     'price': 600,
     'op_amount': 'OP_BALANCE_AMOUNT',
     'amount_unit': '円'}},
   {'credit': {'memo_number': [],
     'memo_string': [],
     'account': '現金',
     'op_amount': 'OP_VALUE_AMOUNT',
     'amount': 5000,
     'remarks': 'test'}},
   {'credit': {'memo_number': [],
     'memo_string': [],
     'account': '預金',
     'op_amount': 'OP_VALUE_AMOUNT',
     'amount': 1000}}],
  'entry_header': {'memo_number': [],
   'memo_string': [],
   'date_or_datetime': ('DATE', '2022-12-14'),
   'remarks': 'test'},
  'entry_footer': {}}}

In [229]:
#最初の仕訳の借方の補助科目
p = get_dict_param(get_dict_param(entry0,"journal_entry","entries")[0],"debit","sub_account")
print(p)

{'ref_memo': '担当者'}
