In [112]:
from openpyxl import load_workbook

class Account(object):

    MAX_ACCOUNT_NR_BALANCE = 99999
    MAX_ACCOUNT_NR_OTHERS = 9999

    class TYPE:
        BALANCE = 1  # Bilanz
        FUNCTIONAL = 2  # Funktionale Gliederung
        INCOME = 3  # Erfolgsrechnung
        INVEST = 5  # Investitionsrechnung
    
    def __init__(self, file_path, type, function):
        # Init
        self.file_path = file_path
        
        # Validity check
        if self.is_valid_function(function):
            self.function = function
            self.function_0 = function[0]
            self.function_filled = self.get_function_w_leading_zeros(funciton)
        
        if self.is_valid_type(type):
            self.type = type    
            self.max_account_number = self.get_max_account_number()

    @staticmethod
    def clean_cell(cell):
        if cell is None or type(cell) in (int, float):
            return cell
        else:
            cell = str(cell).strip()
            if cell.isnumeric():
                return int(cell)
            else:
                return cell      

    def clean_string(s):
        # Strip whitespace from the start and end, and replace multiple spaces with a single space
        cleaned = ' '.join(s.split())
        
        # Remove non-printable characters and unwanted characters (e.g., quotes)
        cleaned = ''.join(c for c in cleaned if c.isprintable() and c not in ['"', "'", " "])        
        return cleaned

    # helpers init
    def is_valid_function(self, function):
        # Check if the type is one of the values in TYPE class
        if not function or type(function) != int:            
            raise ValueError(f"Invalid function: {type}. Must be 1 to 9999")
        if 0 < function <= self.MAX_ACCOUNT_NR_OTHERS:
            return True
        return False
    
    def is_valid_type(self, type):
        # Check if the type is one of the values in TYPE class
        if type in vars(self.TYPE).values():
            return True
        else:
            raise ValueError((
                f"Invalid type: {type}. Must be one of: "
                f"{[v for v in vars(self.TYPE).values()]}"))          
    
    # helpers account number
    def is_valid_account_4plus2(self, account_str):
        if not account_str:
            raise ValueError(f"account_4plus2 emtpy")
        
        try:
            nr = float(account_str)         
        except:
            raise ValueError(f"account_4plus2 {account_str} not a number")
        
        if nr > self.max_account_number or int(nr) != nr or int(nr) == 0:
            raise ValueError(f"sachkonto {sachkonto} not a valid number")
            
        return True    
    
    def has_ff_in_account_4plus2(self, account_4plus2):
        if  account_4plus2 and 'ff' in str(account_4plus2):
            return True        
        return False
        
    def clean_account_4plus2(self, account_4plus2):
        if not account_4plus2:
            return None        
        if account_4plus2 and 'ff' in str(account_4plus2):
            return str(account_4plus2).replace('ff', '')
        return account_4plus2
        
    def get_max_account_number(self):
        if self.type == self.TYPE.BALANCE:
            return self.MAX_ACCOUNT_NR_BALANCE
        else:
            return self.MAX_ACCOUNT_NR_OTHERS   

    def get_account_number(self, sachkonto, account_str):
        # sachkonto must be 
        sachkonto_0 = sachkonto[0]
        function = self.get_function(function_str)
        return f"{sachkonto_0}{self.function_filled}{account_str}"
        
    # helpers sachkonto
    def is_valid_sachkonto(self, account_str):
        if not account_str:
            raise ValueError(f"sachkonto emtpy")
        
        try:
            nr = float(sachkonto)         
        except:
            raise ValueError(f"sachkonto {sachkonto} not a number")
        
        if int(nr) != nr or int(nr) == 0:
            raise ValueError(f"sachkonto {account_str} not a valid number")
            
        return True    

    # helpers function
    def get_function_w_leading_zeros(self, function):
        # Padding with leading zeros to ensure X digits
        fill = len(str(self.max_account_number)) - 1
        return [account.zfill(3) for account in str(function)]        

    # helpers worksheet
    def get_worksheet_rows(self):
        # Load the workbook in read-only mode
        try:
            wb = load_workbook(filename=self.file_path, read_only=True)
            ws = wb.active  # Assuming you're working with the first sheet
        except:
            raise ValueError((f"Invalid filename: {self.file_path}"))
        
        # Loop through the rows starting from the first row
        rows = [row for row in ws.iter_rows(values_only=True)]
        wb.close()  # Close the workbook after processing

        return rows
    
    # Methods
       
    
    def load(self):
        # Init
        accounts = []
        headers = []
        category = None

        # Load the workbook in read-only mode
        rows = self.get_worksheet_rows()
        
        # Get accounts
        for row_nr, row in enumerate(rows, start=1):
            # Get headers (first non-empty row that contains 'Sachkonto')
            if not headers:
                if row[0] == 'Sachkonto':  # You may want to adjust this condition for flexibility
                    headers = list(row)  # Extract the headers from this row
                continue
        
            # Once headers are found, process the subsequent rows as data
            if headers:
                # Create a dictionary of headers to values for each row
                data = dict(zip(headers, [self.clean_cell(x) for x in row]))

                # Get category
                if data['Sachkonto']:                    
                    category = clean_string(data['Sachkonto'])

                    # Check validity:
                    try:
                        self.is_valid_sachkonto(category)
                    except ValueError as e:
                        print(f"row {row_nr}: Sachkonto '{category}' not valid. Message: {e}")
                        return []
                else:
                    # Check validity: Konto 4+2
                    account = clean_string(data['Konto 4+2'])
                    try:
                        self.is_valid_account_4plus2(account)
                    except ValueError as e:
                        print(f"row {row_nr}: Konto 4+2 '{account}' not valid. Message: {e}")
                        return []          
                        
                    # assign ff
                    data['ff'] = self.has_ff_in_account_4plus2(account)
                    data['number'] = get_account_number(category, account)
                
                accounts.append(data)  # Add to accounts list for further processing
        
        return accounts

In [113]:
function = 

file_path_b = "./accounting/fixtures/kontenpläne/Kontenplan_SO_Bürgergemeinden_Bilanz.xlsx"
b = Account(file_path_b, type=Account.TYPE.BALANCE)

# file_path_i = "./accounting/fixtures/kontenpläne/Kontenplan_SO_Bürgergemeinden_Erfolgsrechnung.xlsx"
# i = Account(file_path_i, type=Account.TYPE.INCOME)

accounts = b.load()
if accounts:
    print(f"*successfully loaded {len(accounts)} accounts")
else:
    print(f"*completed with errors")

TypeError: __init__() missing 1 required positional argument: 'function'

In [108]:
accounts[15]

{'Sachkonto': None,
 'Konto 4+2': '10020.02ff',
 'Bezeichnung': 'Bankkontokorrent B',
 'Hinweise': '',
 'HRM1': None,
 'Hinweise HRM1': None}

In [83]:
file_path_b

'./accounting/fixtures/kontenpläne/Kontenplan_SO_Bürgergemeinden_Bilanz.xlsx'