In [1]:
#Variables

TEXT_PATTERNS = {
        'khanbank_3': [
            r'Printed date:',
            r'Депозит дансны дэлгэрэнгүй хуулга',
            r'Харилцагчийн нэр:',
            r'Дансны дугаар:',
            r'IBAN no:',
            r'Гүйлгээний Харьцсан',
            r'№ Цаг Салбар Эхний үлдэгдэл Дебит гүйлгээ Кредит гүйлгээ Эцсийн үлдэгдэл Гүйлгээний утга',
        ],
        'khanbank_regular': [
            r'Printed Date:',
            r'Депозит дансны хуулга',
            r'Хэрэглэгч:',
            r'Дансны дугаар:',
            r'Интервал:',
            r'Гүйлгээний огноо Салбар Эхний үлдэгдэл Дебит гүйлгээ Кредит гүйлгээ Эцсийн үлдэгдэл Гүйлгээний утга Харьцсан данс',
        ],
        'khanbank_compact': [
            r'Харилцагчийн нэр:',
            r'Хамтран эзэмшигч:',
            r'Дансны төрөл:',
            r'Дансны дугаар:',
            r'/КИОСК/',
            r'Website: www.khanbank.com',
            r'№ Харьцсан данс Цаг Хуулгын агуулга Орлого Зарлага Үлдэгдэл',
        ],
        'tdb_old': [
            r'Депозит дансны хуулга',
            r'Хэвлэсэн огноо:',
            r'Дансны дугаар:',
            r'Хамрах хугацаа:',
            r'Эцсийн үлдэгдэл:',
            r'Боломжит үлдэгдэл:',
            r'Огноо Теллер Орлого Зарлага Ханш Харьцсан Үлдэгдэл Гүйлгээний утга',
        ],
        'tdb_3': [
            r'Хэвлэсэн огноо:',
            r'Депозит дансны хуулга',
            r'Харилцагч:',
            r'Дансны дугаар:',
            r'IBAN дугаар:',
            r'Хамрах хугацаа:',
            r'Огноо Теллер Орлого Зарлага Ханш Үлдэгдэл Гүйлгээний утга',
        ],
        'tdb_1': [
            r'Хэвлэсэн огноо:',
            r'Депозит дансны хуулга',
            r'Харилцагч:',
            r'Дансны дугаар:',
            r'Хамрах хугацаа:',
            r'Огноо Теллер Орлого Зарлага Ханш Харьцсан данс Үлдэгдэл Гүйлгээний утга',
        ]
    }

    

In [4]:
import re
from pathlib import Path
from typing import Dict, Any, Optional, Type, List
from datetime import datetime, timezone
from decimal import Decimal, InvalidOperation
import pdfplumber

class KhanBank3StatementParser():
    """
    Parser for the KhanBank 3 statement format.
    """
    
    def __init__(
        self,
        sources_dir: str = "sources",
        results_dir: str = "results",
        name_pattern: str = r'Харилцагчийн нэр:\s*([^\n]+)',
        account_pattern: str = r'Дансны дугаар:\s*(\d+)\s*',
        account_type_pattern: str = r'Дансны дугаар:.*?\((\w+)\)',
        table_start_pattern: str = r'Цаг Салбар',
        table_end_pattern: str = r'Нийт дүн:',
        statement_parser: str = 'khanbank_3',
        pattern: str = (
            r'^(\d+)\s+'
            r'(\d{4}/\d{2}/\d{2})\s+'
            r'(\d{2}:\d{2})\s+'
            r'(\d+)\s+'
            r'(-?\d{1,3}(?:,\d{3})*\.\d{2})\s+'
            r'(-?\d{1,3}(?:,\d{3})*\.\d{2})\s+'
            r'(-?\d{1,3}(?:,\d{3})*\.\d{2})\s+'
            r'(.*?)(?:\s+(\d+))?\s*$'
        )
    ):
        self.name_pattern = name_pattern
        self.account_pattern = account_pattern
        self.account_type_pattern = account_type_pattern
        self.table_start_pattern = table_start_pattern
        self.table_end_pattern = table_end_pattern
        self.pattern = pattern
    
    def extract_customer_details(self, text: str) -> Dict[str, str]:
        """Extract customer details from the text content."""
        details = {
            'first_name': '',
            'last_name': '',
            'account_number': '',
            'account_type': '',
            'statement_parser': self.statement_parser
        }
        
        try:
            # Extract customer name
            name_match = re.search(self.name_pattern, text)
            if name_match:
                full_name = name_match.group(1).strip()
                # Split the name into first and last name
                name_parts = full_name.split()
                if len(name_parts) >= 2:
                    details['last_name'] = name_parts[0]
                    details['first_name'] = name_parts[1]
            
            # Extract account number
            account_match = re.search(self.account_pattern, text)
            if account_match:
                details['account_number'] = account_match.group(1)
            
            # Extract account type
            account_type_match = re.search(self.account_type_pattern, text)
            if account_type_match:
                details['account_type'] = account_type_match.group(1).strip()
                
        except Exception as e:
            print(f"Error extracting customer details: {str(e)}")
            
        return details
    
    def find_table_boundaries(self, text: str) -> tuple[Optional[int], Optional[int]]:
        """Find the start and end positions of the statement table in the text."""
        start_pos = None
        end_pos = None
        
        # Find table start
        start_match = re.search(self.table_start_pattern, text)
        if start_match:
            start_pos = start_match.start()
            
        # Find table end
        end_match = re.search(self.table_end_pattern, text)
        if end_match:
            end_pos = end_match.end()
            
        return start_pos, end_pos
    
    def _filter_header_content(self, text: str) -> str:
        """Filter out header content when table boundaries aren't found."""
        lines = text.split('\n')
        filtered_lines = []
        
        # Patterns that indicate header/metadata content (not transactions)
        header_patterns = [
            r'Харилцагчийн нэр:',           # Customer name
            r'Дансны дугаар:',              # Account number
            r'Дансны төрөл:',               # Account type
            r'IBAN',                        # IBAN number
            r'Printed',                     # Printed date/info
            r'Депозит дансны.*хуулга',      # Statement title
            r'Хуудас \d+',                  # Page number
            r'Website:',                    # Website info
            r'Банк энэхүү хуулгыг',         # Bank disclaimer
            r'Гүйлгээний Салбар Журнал',    # Transaction header
            r'№.*Харьцсан данс.*Цаг',       # Column headers
            r'^\s*Цаг\s+Салбар',            # Table header start pattern
            r'Нийт дүн:',                   # Total amount footer
            r'Эхний үлдэгдэл',              # Starting balance
            r'Эцсийн үлдэгдэл',             # Ending balance
            r'^\s*$',                       # Empty lines
        ]
        
        for line in lines:

            line = line.strip()
            
            # Skip empty lines
            if not line:
                continue
                
            
            # Check if line matches any header pattern
            is_header = False
            checker = line

            # print("this is checker") 
            # print(checker)

            # # Define regex pattern for the line structure
            # pattern = r'^(\d+)\s+'  # Sequential number / 0
            # pattern += r'(\d{4}/\d{2}/\d{2})\s+'  # Date (YYYY/MM/DD) / 1
            # pattern += r'(\d{2}:\d{2})\s+'  # Time (HH:MM) / 2
            # pattern += r'(\d+)\s+'  # Branch number / 3
            # pattern += r'(-?\d{1,3}(?:,\d{3})*\.\d{2})\s+'  # First balance / 4
            # pattern += r'(-?\d{1,3}(?:,\d{3})*\.\d{2})\s+'  # Amount (debit/credit) / 5
            # pattern += r'(-?\d{1,3}(?:,\d{3})*\.\d{2})\s*'  # Last balance / 6
            # pattern += r'(.*?)(?:\s+(\d+))?\s*$'  # Optional description and optional related account / 7,8

            match = re.match(self.pattern, checker.strip())
            haveDescription = True
            if not match:
                print(f"Line did not match expected pattern in (checker): {checker}")
                haveDescription = False

            if haveDescription:
                # Extract all groups
                try:
                    groups = list(match.groups())
                    groups.pop(7)
                    checker = " ".join(groups)

                except Exception as e:
                    print("checker error", e)


            for pattern in header_patterns:
                if re.search(pattern, checker, re.IGNORECASE):
                    # print("concept of line -------------->", pattern, checker, re.IGNORECASE)
                    is_header = True
                    break
            
            # Only include lines that look like actual transaction data
            # Transaction lines typically start with a number (sequence) followed by date
            if not is_header and re.match(r'^\d+\s+\d{4}/\d{2}/\d{2}', line):
                filtered_lines.append(line)
            elif not is_header and not re.match(r'^\d+\s', line):
                # This might be a description continuation line
                # Only include if we already have some transaction lines
                if filtered_lines:
                    filtered_lines.append(line)
        
        result = '\n'.join(filtered_lines)
        print(f"Header filtering: {len(lines)} lines -> {len(filtered_lines)} lines")
        return result
    
    def parse_statement_line(self, line: str) -> Optional[Dict[str, Any]]:
        """Parse a single line from the statement table."""
        # Skip empty lines, header lines, or disclaimer lines
        if (not line.strip() or 
            'Банк энэхүү хуулгыг гаргаж өгснөөр' in line or
            'Гүйлгээний Салбар Журнал' in line or
            'Харьцсан данс Цаг' in line):
            return None
            
        try:

            # # Define regex pattern for the line structure
            # pattern = r'^(\d+)\s+'  # Sequential number / 0
            # pattern += r'(\d{4}/\d{2}/\d{2})\s+'  # Date (YYYY/MM/DD) / 1
            # pattern += r'(\d{2}:\d{2})\s+'  # Time (HH:MM) / 2
            # pattern += r'(\d+)\s+'  # Branch number / 3
            # pattern += r'(-?\d{1,3}(?:,\d{3})*\.\d{2})\s+'  # First balance / 4
            # pattern += r'(-?\d{1,3}(?:,\d{3})*\.\d{2})\s+'  # Amount (debit/credit) / 5
            # pattern += r'(-?\d{1,3}(?:,\d{3})*\.\d{2})\s*'  # Last balance / 6
            # pattern += r'(.*?)(?:\s+(\d+))?\s*$'  # Optional description and optional related account / 7,8
            
            match = re.match(self.pattern, line.strip())
            if not match:
                print(f"Line did not match expected pattern: {line}")
                return None

            # Extract all groups
            groups = match.groups()

            print(groups)

            # Parse sequential number
            seq_number = int(groups[0])
            
            # print("Transaction id debug---------------->", seq_number)
            # Parse date
            transaction_date = datetime.strptime(groups[1], '%Y/%m/%d').date()

            # Parse time
            transaction_time = datetime.strptime(groups[2], '%H:%M').time()
            
            # Parse branch number
            branch_number = int(groups[3])

            # Parse first balance
            first_balance = Decimal(groups[4].replace(',', ''))

            # Parse amount (remove commas and handle negative)
            amount_str = groups[5].replace(',', '')
            try:
                amount = Decimal(amount_str)
            except (ValueError, InvalidOperation):
                print(f"Invalid amount format: {amount_str}")
                return None

            # Parse balance (remove commas)
            balance_str = groups[6].replace(',', '')
            try:
                balance = Decimal(balance_str)
            except (ValueError, InvalidOperation):
                print(f"Invalid balance format: {balance_str}")
                return None
            
            # Get description (trimmed)
            description = groups[7].strip()
            if len(description) > 255:
                description = description[:255]
            
            # Determine transaction type and amounts
            if amount < 0:
                transaction_type = 'expense'
                expense = abs(amount)
                income = None
            else:
                transaction_type = 'income'
                income = amount
                expense = None
            
            # Parse related account if present
            related_account = int(groups[8]) if groups[8] else None

            return {
                'transaction_id': seq_number,
                'type': transaction_type,
                'date': datetime.combine(transaction_date, transaction_time).replace(tzinfo=timezone.utc),
                'income': income,
                'expense': expense,
                'balance_end': balance,
                'description': description,
                'related_account': related_account,
            }
            
        except Exception as e:
            print(f"Error parsing statement line: {str(e)}")
            print(f"Problematic line: {line}")
            return None

    def extract_statement_details(self, text: str) -> List[Dict[str, Any]]:
        """
        Extract statement details from the text content.
        
        Args:
            text (str): Text content containing the statement table
            
        Returns:
            List[Dict[str, Any]]: List of parsed transactions
        """
        transactions = []
        
        try:
            # Find table boundaries
            start_pos, end_pos = self.find_table_boundaries(text)
            
            # If boundaries are not found (which can happen when processing specific page ranges),
            # try to process the entire text but filter out header content more aggressively
            if start_pos is None or end_pos is None:
                print("Could not find table boundaries in the text - attempting to process entire content with header filtering")
                table_text = self._filter_header_content(text.strip())
            else:
                # Extract table content, excluding the start and end identifiers
                start_line_end = text.find('\n', start_pos)
                if start_line_end == -1:
                    start_line_end = start_pos
                else:
                    start_line_end += 1
                    
                end_line_start = text.rfind('\n', 0, end_pos)
                if end_line_start == -1:
                    end_line_start = end_pos
                    
                table_text = text[start_line_end:end_line_start].strip()
            
            if not table_text:
                print("No table content found in the text")
                return transactions
                
            lines = table_text.split('\n')
            
            print(f"Total lines in table: {len(lines)}")
            processed_count = 0
            skipped_count = 0

            i = 0
            description_lines = []

            while i < len(lines):
                line = lines[i].strip()

                # Skip empty lines and header lines (enhanced filtering)
                if (not line or 
                    'Гүйлгээний Салбар Журнал' in line or
                    '№ Харьцсан данс Цаг' in line or
                    'Банк энэхүү хуулгыг гаргаж өгснөөр' in line or
                    'Хуудас' in line or
                    'огноо данс' in line or
                    'Харилцагчийн нэр:' in line or
                    'Дансны дугаар:' in line or
                    'Дансны төрөл:' in line or
                    'IBAN' in line or
                    'Printed' in line.lower() or
                    'Website:' in line or
                    'Депозит дансны' in line or
                    'Эхний үлдэгдэл' in line or
                    'Эцсийн үлдэгдэл' in line or
                    'Нийт дүн:' in line):
                    print("maybe here?----------------->")
                    skipped_count += 1
                    i += 1
                    continue


                # Check if this line matches transaction format (starts with number and date)
                if re.match(r'^\d+\s+\d{4}/\d{2}/\d{2}', line):
                    # Parse the current transaction line
                    transaction = self.parse_statement_line(line)
                    if transaction and processed_count == 0:
                        # Log the first transaction for debugging
                        print(f"First transaction parsed: {transaction.get('description', 'N/A')[:50]}...")
                    if not transaction:
                        print(f"Failed to parse: {line}")
                        skipped_count += 1
                        i += 1
                        continue
                    
                    # Look ahead for description lines that belong to this transaction
                    related_account = None
                    j = i + 1
                    
                    # Look ahead until we find the next transaction line or end of lines
                    if description_lines:
                        while j < len(lines):
                            next_line = lines[j].strip()
                            
                            # Skip empty lines and header lines
                            if (not next_line or 
                                'Гүйлгээний Салбар Журнал' in next_line or
                                '№ Харьцсан данс Цаг' in next_line or
                                'Банк энэхүү хуулгыг гаргаж өгснөөр' in next_line or
                                'Хуудас' in next_line or
                                'огноо данс' in next_line):
                                j += 1
                                continue
                            
                            # If we find another transaction line, stop looking ahead
                            if re.match(r'^\d+\s+\d{4}/\d{2}/\d{2}', next_line):
                                break
                            
                            # This is a description line for the current transaction
                            description_lines.append(next_line)
                            j += 1
                    
                    # Process the description lines for this transaction
                    if description_lines:
                        description_parts = []
                        
                        # Start with the original description from the transaction line
                        original_description = transaction.get('description', '')

                        if re.match(r'^\d{8,}$', original_description):
                            related_account = int(original_description)
                            transaction['related_account'] = related_account
                        else:
                            description_parts.append(original_description)
                        
                        # Process each description line
                        for desc_line in description_lines:
                            desc_line = desc_line.strip()
                            # Check if this line is a related account number (8+ digits, numeric only)
                            if re.match(r'^\d{8,}$', desc_line):
                                related_account = int(desc_line)
                            else:
                                description_parts.append(desc_line)
                        
                        # Join the description parts
                        final_description = ' '.join(description_parts).strip()
                        transaction['description'] = final_description
                        if related_account:
                            transaction['related_account'] = related_account
                        description_lines.clear()
                    
                    # Add the transaction to our list
                    transactions.append(transaction)
                    processed_count += 1
                    
                    # Move to the next line after processing all description lines
                    i = j
                else:
                    # Keep description lines for the next transaction
                    description_lines.append(line)
                    # print(f"Added to description lines: {line}")

                    # This line doesn't match transaction format, skip it
                    skipped_count += 1
                    i += 1

            print(f"Processed: {processed_count}, Skipped: {skipped_count}")

        except Exception as e:
            print(f"Error extracting statement details: {str(e)}")
            
        return transactions

    def parse_pdf(self, pdf_path: Path, register_number: str = None, request=None, start_page: int = None, end_page: int = None, chunk_size: int = 10) -> str:
        """Parse a single PDF file and extract its text content.
        
        Args:
            pdf_path (Path): Path to the PDF file
            register_number (str): Customer registration number
            request: Optional request object containing user information
            start_page (int, optional): Starting page number (1-indexed). If None, starts from page 1.
            end_page (int, optional): Ending page number (1-indexed). If None, processes all pages.
        """

        result = {
            'id': "",
            'name': "", # full name
            'account': "",
            'created_transactions': 0
        }
        
        try:
            with pdfplumber.open(pdf_path) as pdf:
                # Get first page for customer details
                first_page = pdf.pages[0]
                first_page_text = first_page.extract_text()
                
                # Extract and save customer details
                # customer_details = self.extract_customer_details(first_page_text)
                # customer, account = self.save_customer_details(customer_details, register_number, request)
                
                # if account:
                # print(f"Successfully saved customer details for account {account.account_number} {customer.last_name} {customer.first_name}")
                
                # Determine page range for processing
                total_pages = len(pdf.pages)
                start_idx = (start_page - 1) if start_page is not None else 0
                end_idx = end_page if end_page is not None else total_pages
                
                # Validate page range
                start_idx = max(0, start_idx) 
                    # Ensure start is not negative
                end_idx = min(total_pages, end_idx)  # Ensure end doesn't exceed total pages
                
                if start_idx >= end_idx:
                    print(f"Invalid page range: start_page={start_page}, end_page={end_page}, total_pages={total_pages}")
                    return result
                
                print(f"Processing pages {start_idx + 1} to {end_idx} of {total_pages} total pages in chunks of {chunk_size}")
                
                # Process pages in chunks to prevent memory issues
                total_transactions = 0
                chunk_start = start_idx
                
                while chunk_start < end_idx:
                    chunk_end = min(chunk_start + chunk_size, end_idx)
                    print(f"Processing chunk: pages {chunk_start + 1} to {chunk_end}")
                    
                    # Extract text from current chunk 
                    chunk_text = []
                    for page_idx in range(chunk_start, chunk_end):
                        page = pdf.pages[page_idx]
                        text = page.extract_text()
                        if text:
                            chunk_text.append(text)
                            print(f"Extracted text from page {page_idx + 1}: {len(text)} characters")
                        else:
                            print(f"No text extracted from page {page_idx + 1}")
                    
                    if chunk_text:
                        full_chunk_text = "\n\n".join(chunk_text)
                        print(f"Chunk {chunk_start + 1}-{chunk_end}: extracted {len(full_chunk_text)} characters")
                        
                        # Extract transactions from this chunk
                        chunk_transactions = self.extract_statement_details(full_chunk_text)
                        print(f"Chunk {chunk_start + 1}-{chunk_end}: found {len(chunk_transactions)} transactions")
                        
                        # Save transactions from this chunk
                        if chunk_transactions:
                            # if self.save_transactions(chunk_transactions, account):
                            total_transactions += len(chunk_transactions)
                            print(f"Chunk {chunk_start + 1}-{chunk_end}: successfully saved {len(chunk_transactions)} transactions")
                            # else:
                            #     print(f"Chunk {chunk_start + 1}-{chunk_end}: failed to save transactions")
                        
                        # Clear chunk text from memory
                        del chunk_text
                        del full_chunk_text
                        del chunk_transactions
                    
                    chunk_start = chunk_end
                
                print(f"Successfully processed {total_transactions} total transactions from pages {start_idx + 1}-{end_idx}")
                # else:
                #     print(f"Failed to save customer details for {pdf_path.name}")
                
                # result['id'] = str(customer.id)
                # result['name'] = customer.last_name + " " + customer.first_name
                # result['account'] = account.account_number
                result['created_transactions'] = total_transactions

        except Exception as e:
            print(f"Error parsing {pdf_path}: {str(e)}")
        
        return result

In [5]:
parser_container = KhanBank3StatementParser()
result = parser_container.parse_pdf("Statement_MNT_5751276069.pdf", )
#checker

CropBox missing from /Page, defaulting to MediaBox
CropBox missing from /Page, defaulting to MediaBox
CropBox missing from /Page, defaulting to MediaBox
CropBox missing from /Page, defaulting to MediaBox
CropBox missing from /Page, defaulting to MediaBox
CropBox missing from /Page, defaulting to MediaBox
CropBox missing from /Page, defaulting to MediaBox
CropBox missing from /Page, defaulting to MediaBox
CropBox missing from /Page, defaulting to MediaBox
CropBox missing from /Page, defaulting to MediaBox
CropBox missing from /Page, defaulting to MediaBox
CropBox missing from /Page, defaulting to MediaBox
CropBox missing from /Page, defaulting to MediaBox
CropBox missing from /Page, defaulting to MediaBox
CropBox missing from /Page, defaulting to MediaBox
CropBox missing from /Page, defaulting to MediaBox
CropBox missing from /Page, defaulting to MediaBox
CropBox missing from /Page, defaulting to MediaBox
CropBox missing from /Page, defaulting to MediaBox
CropBox missing from /Page, def

Processing pages 1 to 69 of 69 total pages in chunks of 10
Processing chunk: pages 1 to 10
Extracted text from page 1: 6933 characters
Extracted text from page 2: 7917 characters
Extracted text from page 3: 7968 characters
Extracted text from page 4: 8027 characters
Extracted text from page 5: 8177 characters
Extracted text from page 6: 8060 characters
Extracted text from page 7: 7979 characters
Extracted text from page 8: 8065 characters
Extracted text from page 9: 8252 characters
Extracted text from page 10: 8313 characters
Chunk 1-10: extracted 79709 characters
Could not find table boundaries in the text - attempting to process entire content with header filtering
Line did not match expected pattern in (checker): Printed date: 2025-08-17 11:40:51
Line did not match expected pattern in (checker): Депозит дансны дэлгэрэнгүй хуулга
Line did not match expected pattern in (checker): Харилцагчийн нэр: ТҮРГЭН СҮЛЖЭЭ Интервал: 2025/01/01 - 2025/03/31
Line did not match expected pattern in (

CropBox missing from /Page, defaulting to MediaBox
CropBox missing from /Page, defaulting to MediaBox
CropBox missing from /Page, defaulting to MediaBox
CropBox missing from /Page, defaulting to MediaBox
CropBox missing from /Page, defaulting to MediaBox
CropBox missing from /Page, defaulting to MediaBox
CropBox missing from /Page, defaulting to MediaBox
CropBox missing from /Page, defaulting to MediaBox
CropBox missing from /Page, defaulting to MediaBox
CropBox missing from /Page, defaulting to MediaBox
CropBox missing from /Page, defaulting to MediaBox
CropBox missing from /Page, defaulting to MediaBox
CropBox missing from /Page, defaulting to MediaBox
CropBox missing from /Page, defaulting to MediaBox
CropBox missing from /Page, defaulting to MediaBox
CropBox missing from /Page, defaulting to MediaBox
CropBox missing from /Page, defaulting to MediaBox
CropBox missing from /Page, defaulting to MediaBox
CropBox missing from /Page, defaulting to MediaBox
CropBox missing from /Page, def


('5318', '2025/03/31', '10:22', '5029', '19,438,747.05', '421,310.00', '19,860,057.05', '9n husel 5460328', '5175003233')
('5319', '2025/03/31', '10:36', '5032', '19,860,057.05', '534,750.00', '20,394,807.05', '2860810 мөнгөн арвижих', '5065354017')
('5320', '2025/03/31', '10:43', '5034', '20,394,807.05', '120,500.00', '20,515,307.05', '100352 shine 5303923', '5030427349')
('5321', '2025/03/31', '10:52', '5749', '20,515,307.05', '200,000.00', '20,715,307.05', 'baylag 6495672', '5026648796')
('5322', '2025/03/31', '11:16', '5729', '20,715,307.05', '174,150.00', '20,889,457.05', '2714396, Абажий', '5308274401')
('5323', '2025/03/31', '11:33', '5071', '20,889,457.05', '685,550.00', '21,575,007.05', '102123 мөнх дуурсах РД2769115', '5071073515')
('5324', '2025/03/31', '11:34', '5029', '21,575,007.05', '212,400.00', '21,787,407.05', 'eh undarga 6561349/ turgen suljee', '5029304477')
('5325', '2025/03/31', '11:44', '5024', '21,787,407.05', '217,870.00', '22,005,277.05', 'Гэгээ д 1000 504211

In [7]:
print(result)

{'id': '', 'name': '', 'account': '', 'created_transactions': 5413}


In [None]:
# Index writer

# f = open("./output_file.txt", "a", encoding="utf-8")

# f.write(line + "\n")

# f.close()

In [None]:
# Lost index checker


counter = 0

with open(r"D:\Projects\innoscore-backend\output_file.txt", encoding="utf-8", errors="replace") as f:
    striped = f.read().splitlines()

    print(len(striped))

    failed = 0

    for col in striped:
        number = col.split(" ")[0]

        try:
            number = int(number)
            counter = counter + 1

            # print(counter, number)

            if counter != number:
                print("skipped:", counter, number)

        except Exception:
            # print("int conversion error:", number)
            failed += 1
            pass

    print(failed)

In [None]:
# Performance checker

import time
        
        
start_time = time.perf_counter()
return_value = []
end_time = time.perf_counter()
return_value.data['performance'] = round(end_time - start_time, 2)