In [1]:
import os
import re
import glob

# Define the directory containing the files (current directory in this case)
folder_path = '.'

# Pattern for the output file names: python_job_*.out
file_pattern = os.path.join(folder_path, "python_job_*.out")

# Open the output file for writing results
with open("output.txt", "w") as outfile:
    # Loop through each matching file
    for file_path in glob.glob(file_pattern):
        file_name = os.path.basename(file_path)
        
        # Read the entire file content
        with open(file_path, "r") as infile:
            content = infile.read()
        
        # Extract the parameter information.
        # This regex looks for a sequence of key-value pairs like "M: 1024, K: 1024, ..." 
        params_match = re.search(
            r"(M:\s*\d+\s*,\s*K:\s*\d+\s*,\s*N:\s*\d+\s*,\s*Mt:\s*\d+\s*,\s*Kt:\s*\d+\s*,\s*Nt:\s*\d+\s*,\s*Pr:\s*\d+\s*,\s*Cycle:\s*\d+\s*,\s*Pc:\s*\d+\s*,\s*channel:\s*\d+\s*,\s*buffer:\s*\d+)",
            content
        )
        params_str = params_match.group(1) if params_match else "Parameters not found"

        # Extract the machine time which follows "Timestamps transfer done"
        machine_time_match = re.search(r"Timestamps transfer done\s*[:]\s*([\d.]+)", content)
        machine_time = machine_time_match.group(1) if machine_time_match else "N/A"
        
        # Extract the program time at the end of the file
        program_time_match = re.search(r"Program completed in\s*[:]\s*(\S+)", content)
        program_time = program_time_match.group(1) if program_time_match else "N/A"
        
        # Create the output line following the specified format
        output_line = (
            f"{file_name} {params_str}, machine time: {machine_time}, program time: {program_time}\n"
        )
        
        # Write the output line to the output file
        outfile.write(output_line)

print("Extraction complete. Check output.txt for results.")


Extraction complete. Check output.txt for results.


In [None]:
import os
import re
import glob
import pandas as pd

# Define the directory containing the files (current directory)
folder_path = '.'
file_pattern = os.path.join(folder_path, "python_job_*.out")

# List to hold dictionaries for each file's extracted data
data = []

# Define individual regex patterns for each parameter
patterns = {
    "M": r"M:\s*(\d+)",
    "K": r"K:\s*(\d+)",
    "N": r"N:\s*(\d+)",
    "Mt": r"Mt:\s*(\d+)",
    "Kt": r"Kt:\s*(\d+)",
    "Nt": r"Nt:\s*(\d+)",
    "Pr": r"Pr:\s*(\d+)",
    "Cycle": r"Cycle:\s*(\d+)",
    "Pc": r"Pc:\s*(\d+)",
    "channel": r"channel:\s*(\d+)",
    "buffer": r"buffer:\s*(\d+)"
}

# Process each matching file
for file_path in glob.glob(file_pattern):
    file_name = os.path.basename(file_path)
    
    # Read file content
    with open(file_path, "r") as infile:
        content = infile.read()
    
    # Dictionary to store extracted data for the file
    file_data = {"File Name": file_name}
    
    # Extract each parameter individually using its pattern
    for key, pattern in patterns.items():
        match = re.search(pattern, content)
        file_data[key] = int(match.group(1)) if match else None
    
    # Extract machine time
    # This pattern allows an optional colon after the phrase and any whitespace
    machine_time_match = re.search(r"Timestamps transfer done\s*:?\s*([\d.]+)", content)
    file_data["machine_time"] = float(machine_time_match.group(1)) if machine_time_match else None

    # Extract program time (assumed to be in a format like "00:01:25")
    program_time_match = re.search(r"Program completed in\s*:?\s*([\d:]+)", content)
    file_data["program_time"] = program_time_match.group(1) if program_time_match else None

    data.append(file_data)

# Create a DataFrame from the extracted data
df = pd.DataFrame(data)

# Write the DataFrame to an Excel file
output_excel = "Systolic_Feb_result.xlsx"
df.to_excel(output_excel, index=False)

print(f"Extraction complete. Check {output_excel} for results.")


Extraction and ranking complete. Check output.xlsx for results.
