In [1]:
import xml.etree.ElementTree as ET

def parse_first_1000_lines(file_path):
    # Counter for the number of lines processed
    line_count = 0

    # Open the XML file in binary mode to ensure compatibility with iterparse
    with open(file_path, 'rb') as xml_file:
        # Create an iterparse object for the XML file
        context = ET.iterparse(xml_file, events=("start", "end"))
        
        # Turn it into an iterator
        context = iter(context)

        # Get the root element
        event, root = next(context)

        for event, elem in context:
            if event == "start":
                line_count += 1
            
            if line_count >= 25:
                # Stop after processing 1000 lines
                break
            
            if event == "end":
                # Process the element (elem) here as needed
                # For example, you could print the element's tag and attributes
                print(f"Tag: {elem.tag}, Attributes: {elem.attrib}, Text: {elem.text.strip() if elem.text else 'None'}")
                # It's important to clear the elements to free memory
                elem.clear()

                # Also, clear the root to prevent build-up of processed elements
                root.clear()

# Replace 'your_large_file.xml' with the path to your XML file
parse_first_1000_lines('v20240401_v2_xml.xml')

Tag: LaatstVerwerkteMutatieVolgnummer, Attributes: {}, Text: 16587138
Tag: Pand_opnamedatum, Attributes: {}, Text: 20240330
Tag: Pand_opnametype, Attributes: {}, Text: Detailopname
Tag: Pand_status, Attributes: {}, Text: Vergunningsaanvraag
Tag: Pand_berekeningstype, Attributes: {}, Text: NTA 8800:2023 (detailopname woningbouw)
Tag: Pand_energieklasse, Attributes: {}, Text: A+++
Tag: Pand_is_op_basis_van_referentie_gebouw, Attributes: {}, Text: 0
Tag: Pand_gebouwklasse, Attributes: {}, Text: W
Tag: Meting_geldig_tot, Attributes: {}, Text: 20340330
Tag: Pand_registratiedatum, Attributes: {}, Text: 20240330
Tag: Pand_gebouwtype, Attributes: {}, Text: Vrijstaande woning
Tag: Pand_projectnaam, Attributes: {}, Text: Vrijstaande woning Brinks Enterstraat Rijssen
Tag: Pand_projectobject, Attributes: {}, Text: woning Brinks Enterstraat  Rijssen
Tag: Pand_gebruiksoppervlakte_thermische_zone, Attributes: {}, Text: 210.93
Tag: Pand_energiebehoefte, Attributes: {}, Text: 71.38
Tag: Pand_eis_energi

In [3]:
def copy_first_100_lines_binary_mode(source_file_path, destination_file_path):
    try:
        with open(source_file_path, 'rb') as source_file:  # Open in binary read mode
            with open(destination_file_path, 'wb') as destination_file:  # Open in binary write mode
                line_count = 0
                while line_count < 10:
                    line = source_file.readline()
                    # Break the loop if we reach the end of the file
                    if not line:
                        break
                    destination_file.write(line)
                    line_count += 1
        print("Successfully copied the first 100 lines in binary mode.")
    except Exception as e:
        print(f"An error occurred: {e}")

# Specify the path to your source XML file
source_file_path = 'v20240401_v2_xml.xml'  # Replace 'source.xml' with your actual source file path
# Specify the path where you want to save the first 100 lines
destination_file_path = 'first_100_lines.xml'  # Replace 'first_100_lines.xml' with your desired destination file path

# Call the function with the paths
copy_first_100_lines_binary_mode(source_file_path, destination_file_path)


Successfully copied the first 100 lines in binary mode.


In [5]:
def copy_first_chunk_of_xml(source_file_path, destination_file_path, chunk_size=1024, max_chunks=100):
    try:
        with open(source_file_path, 'rb') as source_file:
            with open(destination_file_path, 'wb') as destination_file:
                chunk_count = 0
                while chunk_count < max_chunks:
                    chunk = source_file.read(chunk_size)
                    if not chunk:
                        break
                    destination_file.write(chunk)
                    chunk_count += 1
        print("Successfully copied the first part of the file in chunks.")
    except Exception as e:
        print(f"An error occurred: {e}")

# Specify the path to your source XML file
source_file_path = 'v20240401_v2_xml.xml'  # Replace 'source.xml' with your actual source file path
# Specify the path where you want to save the first part of the file
destination_file_path = 'first_part_of_xml.xml'  # Adjust the file name as needed

# Call the function with the paths
copy_first_chunk_of_xml(source_file_path, destination_file_path)


Successfully copied the first part of the file in chunks.
