In [60]:
import re

def extract_text(file_path):
    try:
        with open(file_path, 'r', encoding='utf-8') as f:
            content = f.read()

        # Find the starting point (either "Abstract" or a 4-digit year)
        abstract_match = re.search(r"\nAbstract\n", content, re.IGNORECASE)
        year_match = re.search(r"\d{4}\)", content)

        start_index = None
        if abstract_match and year_match:
            start_index = min(abstract_match.end(), year_match.end())  # Use the earliest match
        elif abstract_match:
            start_index = abstract_match.end()
        elif year_match:
            start_index = year_match.end()

        if start_index is None:
            print("No valid start point ('Abstract' or year) found.")
            return None

        # Find the ending point ("Introduction", "PACS", or "Key words")
        introduction_match = re.search(r"Introduction\n", content, re.IGNORECASE)
        pacs_match = re.search(r"\bPACS\b", content, re.IGNORECASE)
        key_words_match = re.search(r"Key words:", content, re.IGNORECASE)

        end_index = None
        if introduction_match and pacs_match:
            end_index = min(introduction_match.start(), pacs_match.start())  # Use the earliest match
        elif introduction_match and key_words_match:
            end_index = min(introduction_match.start(), key_words_match.start())  # Use the earliest match
        elif key_words_match:
            end_index = key_words_match.start()
        elif introduction_match:
            end_index = introduction_match.start()
        elif pacs_match:
            end_index = pacs_match.start()

        if end_index is None:
            print("No valid end point ('Introduction', 'PACS', or 'Key words') found.")
            return None

        if start_index >= end_index:
            print("Start marker appears after the end marker.")
            return None

        extracted_text = content[start_index:end_index].strip()

        # Remove isolated characters or lines with random sequences
        filtered_lines = [line for line in extracted_text.split("\n") if len(line.strip()) > 1 and not re.match(r"^\s*[\w\d]{1,2}\s*$", line)]

        return "\n".join(filtered_lines)

    except FileNotFoundError:
        print(f"Error: File not found at {file_path}")
        return None
    except Exception as e:
        print(f"An error occurred: {e}")
        return None

# Example usage:
file_path = "/content/Universal quantum computation on the power of quantum non-demolition measurements.txt"  # Replace with the actual file path
extracted_content = extract_text(file_path)

if extracted_content:
    print("\n--- Extracted Text ---\n")
    print(extracted_content)
    print("\n----------------------\n")



--- Extracted Text ---

In this letter we investigate the linear and nonlinear models of optical quantum computation and
discuss their scalability and eﬃciency. We show how there are signiﬁcantly diﬀerent scaling prop-
erties in single photon computation when weak cross-Kerr nonlinearities are allowed to supplement
the usual linear optical set. In particular we show how quantum non-demolition measurements are
an eﬃcient resource for universal quantum computation.

----------------------



In [59]:
import os
import re

def extract_text(file_path):
    try:
        with open(file_path, 'r', encoding='utf-8') as f:
            content = f.read()

        # Find the starting point (either "Abstract" or a 4-digit year)
        abstract_match = re.search(r"\nAbstract\n", content, re.IGNORECASE)
        year_match = re.search(r"\d{4}\)", content)

        start_index = None
        if abstract_match and year_match:
            start_index = min(abstract_match.end(), year_match.end())  # Use the earliest match
        elif abstract_match:
            start_index = abstract_match.end()
        elif year_match:
            start_index = year_match.end()

        if start_index is None:
            print(f"No valid start point ('Abstract' or year) found in {file_path}.")
            return None

        # Find the ending point ("Introduction", "PACS", or "Key words")
        introduction_match = re.search(r"\nIntroduction\n", content, re.IGNORECASE)
        pacs_match = re.search(r"\bPACS\b", content, re.IGNORECASE)
        key_words_match = re.search(r"Key words:", content, re.IGNORECASE)

        end_index = None
        if introduction_match and pacs_match:
            end_index = min(introduction_match.start(), pacs_match.start())  # Use the earliest match
        elif introduction_match and key_words_match:
            end_index = min(introduction_match.start(), key_words_match.start())  # Use the earliest match
        elif key_words_match:
            end_index = key_words_match.start()
        elif introduction_match:
            end_index = introduction_match.start()
        elif pacs_match:
            end_index = pacs_match.start()

        if end_index is None:
            print(f"No valid end point ('Introduction', 'PACS', or 'Key words') found in {file_path}.")
            return None

        if start_index >= end_index:
            print(f"Start marker appears after the end marker in {file_path}.")
            return None

        extracted_text = content[start_index:end_index].strip()

        # Remove isolated characters or lines with random sequences
        filtered_lines = [line for line in extracted_text.split("\n") if len(line.strip()) > 1 and not re.match(r"^\s*[\w\d]{1,2}\s*$", line)]

        return "\n".join(filtered_lines)

    except FileNotFoundError:
        print(f"Error: File not found at {file_path}")
        return None
    except Exception as e:
        print(f"An error occurred with {file_path}: {e}")
        return None

def process_folder(folder_path):
    """ Process all .txt files in the specified folder """
    if not os.path.exists(folder_path):
        print(f"Error: Folder '{folder_path}' does not exist.")
        return

    txt_files = [f for f in os.listdir(folder_path) if f.endswith('.txt')]

    if not txt_files:
        print(f"No .txt files found in '{folder_path}'.")
        return

    for txt_file in txt_files:
        file_path = os.path.join(folder_path, txt_file)
        print(f"\n--- Processing {txt_file} ---\n")
        extracted_content = extract_text(file_path)
        if extracted_content:
            print(extracted_content)
            print("\n----------------------\n")

# Example usage:
folder_path = "/content/test"  # Replace with your actual folder path
process_folder(folder_path)



--- Processing Thoughts on Noise and Quantum Computation.txt ---

We will try to explore, primarily from the complexity-theoretic
point of view, limitations of error-correction and fault-tolerant quan-
tum computation.
We consider stochastic models of quantum computation on n qubits
subject to noise operators that are obtained as products of tiny noise
operators acting on a small number of qubits. We conjecture that
for realistic random noise operators of this kind there will be sub-
stantial dependencies between the noise on individual qubits and, in
addition, we propose that the dependence structure of the noise act-
ing on individual qubits will necessarily depend (systematically) on
∗Research supported in part by an NSF grant, by an ISF Bikura grant and by a
BSF grant. Part of this work was carried out when the author visited the Mittag-Leﬂer
Institute in Djursholm, Sweden. I am very thankful to Dorit Aharonov, Robert Alicki,
Michael Ben-Or, Greg Kuperberg, and Boris Tsirelson for