In [None]:
from pathlib import Path
import os

data_directory = Path(os.getcwd()) / 'data'

print(data_directory)

In [None]:
from lark import Lark

grammar_file = Path(os.getcwd()).parent.parent / 'ledes_parser' / 'grammars' / 'spec_98B' / 'line_item.lark'
print(grammar_file)

parser = Lark.open(grammar_file)
line_item = """
19990225|96542|00711|0528|1684.45|19990101|19990131|For services rendered|1|F|2.00|-70|630|19990115|L510||A102|22547|Research Attorney's fees, Set off claim|24-6437381|350|Arnsley, Robert|PARTNR|423-987[]
"""
ast = parser.parse(line_item)
print(ast.pretty())


In [None]:
from decimal import Decimal
from lark import Token
from lark.visitors import Transformer
from datetime import datetime

class LineItemTransformer(Transformer):
    def __init__(self, visit_tokens: bool = True) -> None:
        self._date_format = '%Y%m%d'
        super().__init__(visit_tokens)
    
    def fee(self, children):
        # TODO: Make a class per line item type, to enforce individual validation.
        # Each class gets a set of specifications. This method would return the constructor with **children as the kwargs.
        # In the constructor, all specifications that can run at the line item level, do.
        # Raises an error if invlalid - else, passes validation up the chain to the invoice level.
        # Ultimately validation is a chain of responsibility pattern going from ledes file -> fields for each line item -> line item -> invoice (maybe)
        return {k: v for d in children for k, v in d.items()}
        
    
    def F(self, children):
        return { 'exp_fee_inv_adj_type': 'F' }
    
    def invoice_date(self, children):
        (invoice_date,) = children 
        return { 'invoice_date': invoice_date }
    
    def INVOICE_DATE(self, t: Token):
        return datetime.strptime(t.value, self._date_format)
    
    def billing_start_date(self, children):
        (billing_start_date,) = children 
        return { 'billing_start_date': billing_start_date }
    
    def BILLING_START_DATE(self, t: Token):
        return datetime.strptime(t.value, self._date_format)
    
    def billing_end_date(self, children):
        (billing_end_date,) = children 
        return { 'billing_end_date': billing_end_date }
    
    def BILLING_END_DATE(self, t: Token):
        return datetime.strptime(t.value, self._date_format)
    
    def line_item_date(self, children):
        (line_item_date,) = children 
        return { 'line_item_date': line_item_date }
    
    def LINE_ITEM_DATE(self, t: Token):
        return datetime.strptime(t.value, self._date_format)

    def client_id(self, children):
        (client_id,) = children
        return { 'client_id': client_id }
    
    def CLIENT_ID(self, t: Token):
        return str(t.value)

    def law_firm_matter_id(self, children):
        (law_firm_matter_id,) = children
        return { 'law_firm_matter_id': law_firm_matter_id }
    
    def LAW_FIRM_MATTER_ID(self, t: Token):
        return str(t.value)

    def invoice_total(self, children):
        (invoice_total,) = children
        return { 'invoice_total': invoice_total }
    
    def INVOICE_TOTAL(self, t: Token):
        return Decimal(t.value)
    
    def client_matter_id(self, children):
        (client_matter_id,) = children
        return { 'client_matter_id': client_matter_id }
    
    def CLIENT_MATTER_ID(self, t: Token):
        return str(t.value)
    
    def invoice_description(self, children):
        (invoice_description,) = children
        return { 'invoice_description': invoice_description }
    
    def INVOICE_DESCRIPTION(self, t: Token):
        return str(t.value)
    
    def line_item_number(self, children):
        (line_item_number,) = children
        return { 'line_item_number': line_item_number }
    
    def LINE_ITEM_NUMBER(self, t: Token):
        return str(t.value)
    
    def invoice_number(self, children):
        (invoice_number,) = children
        return { 'invoice_number': invoice_number }
    
    def INVOICE_NUMBER(self, t: Token):
        return str(t.value)
    
    def timekeeper_classification(self, children):
        (timekeeper_classification,) = children
        return { 'timekeeper_classification': timekeeper_classification }
    
    def TIMEKEEPER_CLASSIFICATION(self, t: Token):
        return str(t.value)
    
    def timekeeper_name(self, children):
        (timekeeper_name,) = children
        return { 'timekeeper_name': timekeeper_name }
    
    def TIMEKEEPER_NAME(self, t: Token):
        return str(t.value)
    
    def line_item_task_code(self, children):
        (line_item_task_code,) = (children or ('',))
        return { 'line_item_task_code': line_item_task_code }
    
    def LINE_ITEM_TASK_CODE(self, t: Token):
        return str(t.value)
    
    def line_item_activity_code(self, children):
        (line_item_activity_code,) = (children or ('',))
        return { 'line_item_activity_code': line_item_activity_code }
    
    def LINE_ITEM_ACTIVITY_CODE(self, t: Token):
        return str(t.value)
    
    def timekeeper_id(self, children):
        (timekeeper_id,) = (children or ('',))
        return { 'timekeeper_id': timekeeper_id }
    
    def TIMEKEEPER_ID(self, t: Token):
        return str(t.value)
    
    def line_item_description(self, children):
        (line_item_description,) = (children or ('',))
        return { 'line_item_description': line_item_description }
    
    def LINE_ITEM_DESCRIPTION(self, t: Token):
        return str(t.value)
    
    def law_firm_id(self, children):
        (law_firm_id,) = (children or ('',))
        return { 'law_firm_id': law_firm_id }
    
    def LAW_FIRM_ID(self, t: Token):
        return str(t.value)
    
    def line_item_expense_code(self, children):
        (line_item_expense_code,) = (children or ('',))
        return { 'line_item_expense_code': line_item_expense_code }
    
    def LINE_ITEM_EXPENSE_CODE(self, t: Token):
        return str(t.value)
    
    def line_item_unit_cost(self, children):
        (line_item_unit_cost,) = children
        return { 'line_item_unit_cost': line_item_unit_cost }
    
    def LINE_ITEM_UNIT_COST(self, t: Token):
        return Decimal(t.value)
    
    def line_item_number_of_units(self, children):
        (line_item_number_of_units,) = children
        return { 'line_item_number_of_units': line_item_number_of_units }
    
    def LINE_ITEM_NUMBER_OF_UNITS(self, t: Token):
        return Decimal(t.value)
    
    def line_item_adjustment_amount(self, children):
        (line_item_adjustment_amount,) = children
        return { 'line_item_adjustment_amount': line_item_adjustment_amount }
    
    def LINE_ITEM_ADJUSTMENT_AMOUNT(self, t: Token):
        return Decimal(t.value)
    
    def line_item_total(self, children):
        (line_item_total,) = children
        return { 'line_item_total': line_item_total }
    
    def LINE_ITEM_TOTAL(self, t: Token):
        return Decimal(t.value)

line_item_transformer = LineItemTransformer()
line_item_tree = parser.parse(line_item)
result = line_item_transformer.transform(line_item_tree)
import pprint
pprint.pprint(result)
