In [1]:
import os
import re
import zipfile
import tempfile
import logging

In [2]:
def setup_logging(zip_path):
    """
    Configure logging to use a log file based on the input ZIP file name.
    """
    zip_name = os.path.splitext(os.path.basename(zip_path))[0]
    log_file = f"{zip_name}_log.txt"
    logging.basicConfig(
        filename=log_file,
        level=logging.INFO,
        format="%(asctime)s - %(levelname)s - %(message)s"
    )
    return log_file

In [3]:
def convert_longtable_to_text(content):
    """
    Convert longtable environments to plain text by removing LaTeX formatting,
    simplifying column definitions, and handling custom commands.
    """
    longtable_pattern = re.compile(r'\\begin{longtable}.*?\\end{longtable}', re.DOTALL)
    matches = longtable_pattern.findall(content)

    for match in matches:
        logging.info("Converting longtable to plain text.")

        # Remove \begin{longtable} and \end{longtable}
        table_text = re.sub(r'\\begin{longtable}.*?\n', '', match)
        table_text = re.sub(r'\\end{longtable}', '', table_text)

        # Remove any column definitions in braces (e.g., {l l p{1.5cm}})
        table_text = re.sub(r'\{.*?\}', '', table_text)

        # Replace & with a pipe symbol (|) to represent columns
        table_text = re.sub(r'&', ' | ', table_text)

        # Replace \\ with newlines to separate rows
        table_text = re.sub(r'\\\\', '\n', table_text)

        # Remove horizontal lines and formatting commands
        table_text = re.sub(r'\\hline|\\midrule', '', table_text)

        # Replace any occurrence of \PAR or similar commands with a newline
        table_text = re.sub(r'\\PAR', '\n', table_text)

        # Remove remaining LaTeX commands
        table_text = re.sub(r'\\[a-zA-Z]+\*?(?:\{.*?\})?', '', table_text)
        table_text = re.sub(r'\{|\}', '', table_text)

        # Add a note indicating the table was converted
        plain_text_table = f"\n(Converted Table via Python script to support Latexdiff execution)\n{table_text}\n"

        # Replace the original longtable with the cleaned plain text version
        content = content.replace(match, plain_text_table)

    return content

In [4]:
def merge_tex(file_path, base_dir):
    logging.info(f"Processing file: {file_path}")
    merged_content = []

    with open(file_path, "r") as f:
        content = f.read()
        content = convert_longtable_to_text(content)
        
        for line_number, line in enumerate(content.splitlines(), start=1):
            match = re.match(r"\\(input|include)\{(.+?)\}", line)
            if match:
                included_file = os.path.normpath(os.path.join(base_dir, match.group(2) + ".tex"))
                if os.path.exists(included_file):
                    logging.info(f"Inserting content from: {included_file}")
                    merged_content.append(f"% Start of {included_file}\n")
                    merged_content.append(merge_tex(included_file, base_dir))
                    merged_content.append(f"% End of {included_file}\n")
                else:
                    logging.warning(f"File not found: {included_file} (Referenced in {file_path} at line {line_number})")
                    merged_content.append(f"% Warning: {included_file} not found\n")
            else:
                merged_content.append(line + "\n")

    return "".join(merged_content)

In [5]:
def process_zip(zip_path):
    # Generate output file name based on the input ZIP file name
    zip_name = os.path.splitext(os.path.basename(zip_path))[0]
    output_file = f"{zip_name}_merged.tex"
    log_file = setup_logging(zip_path)

    with tempfile.TemporaryDirectory() as temp_dir:
        logging.info(f"Extracting ZIP file: {zip_path}")
        with zipfile.ZipFile(zip_path, 'r') as zip_ref:
            zip_ref.extractall(temp_dir)
            logging.info(f"Extraction complete. Files extracted to: {temp_dir}")
        
        # Find the main .tex file (assuming it's named 'main.tex')
        main_file = None
        for root, _, files in os.walk(temp_dir):
            if "main.tex" in files:
                main_file = os.path.join(root, "main.tex")
                break
        
        if not main_file:
            logging.error("No 'main.tex' file found in the ZIP archive.")
            print(f"Error: No 'main.tex' file found in the ZIP archive. See {log_file} for details.")
            return
        
        base_dir = os.path.dirname(main_file)
        logging.info(f"Main file found: {main_file}")
        
        # Merge the .tex files starting from the main file
        merged_content = merge_tex(main_file, base_dir)
        
        # Write the merged content to the output file
        with open(output_file, "w") as f:
            f.write(merged_content)
        
        logging.info(f"Merged content written to: {output_file}")
        print(f"Merged content written to {output_file}. Log file created: {log_file}")

In [6]:
zip_path = "original.zip"  # Replace with the path to your ZIP file
process_zip(zip_path)

Merged content written to original_merged.tex. Log file created: original_log.txt


In [7]:
# latexdiff original_merged.tex modified_merged.tex > colored.tex