In [2]:
import os
import re
import glob

# Define the directory containing the files (current directory in this case)
folder_path = '.'

# Pattern for the output file names: python_job_*.out
file_pattern = os.path.join(folder_path, "python_job_*.out")

# Open the output file for writing results
with open("output.txt", "w") as outfile:
    # Loop through each matching file
    for file_path in glob.glob(file_pattern):
        file_name = os.path.basename(file_path)
        
        # Read the entire file content
        with open(file_path, "r") as infile:
            content = infile.read()
        
        # Extract the parameter information.
        # This regex looks for a sequence of key-value pairs like "M: 1024, K: 1024, ..." 
        params_match = re.search(
            r"(M:\s*\d+\s*,\s*K:\s*\d+\s*,\s*N:\s*\d+\s*,\s*Mt:\s*\d+\s*,\s*Kt:\s*\d+\s*,\s*Nt:\s*\d+\s*,\s*Pr:\s*\d+\s*,\s*Cycle:\s*\d+\s*,\s*Pc:\s*\d+\s*,\s*channel:\s*\d+\s*,\s*buffer:\s*\d+)",
            content
        )
        params_str = params_match.group(1) if params_match else "Parameters not found"

        # Extract the machine time which follows "Timestamps transfer done"
        machine_time_match = re.search(r"Timestamps transfer done\s*[:]\s*([\d.]+)", content)
        machine_time = machine_time_match.group(1) if machine_time_match else "N/A"
        
        # Extract the program time at the end of the file
        program_time_match = re.search(r"Program completed in\s*[:]\s*(\S+)", content)
        program_time = program_time_match.group(1) if program_time_match else "N/A"
        
        # Create the output line following the specified format
        output_line = (
            f"{file_name} {params_str}, machine time: {machine_time}, program time: {program_time}\n"
        )
        
        # Write the output line to the output file
        outfile.write(output_line)

print("Extraction complete. Check output.txt for results.")


Extraction complete. Check output.txt for results.


In [None]:
import os
import re
import glob
import pandas as pd

# Define the directory containing the files (current directory)
folder_path = '.'
file_pattern = os.path.join(folder_path, "python_job_*.out")

# List to hold dictionaries for each file's extracted data
data = []

# Define regex patterns for each parameter in the parameter line.
# Note: We now include 'P' instead of 'Pr, Cycle, Pc'
param_patterns = {
    "M": r"M:\s*(\d+)",
    "K": r"K:\s*(\d+)",
    "N": r"N:\s*(\d+)",
    "Mt": r"Mt:\s*(\d+)",
    "Kt": r"Kt:\s*(\d+)",
    "Nt": r"Nt:\s*(\d+)",
    "P": r"P:\s*(\d+)",
    "channel": r"channel:\s*(\d+)",
    "buffer": r"buffer:\s*(\d+)"
}

# Process each matching file
for file_path in glob.glob(file_pattern):
    file_name = os.path.basename(file_path)
    
    # Read file content line by line, skipping empty lines
    with open(file_path, "r") as infile:
        lines = [line.strip() for line in infile if line.strip() != '']
    
    file_data = {"File Name": file_name}
    
    # Look for the line that contains the parameter information.
    param_line = None
    for line in lines:
        # Check if the line likely contains parameters by searching for "M:" and "K:" as examples.
        if re.search(r"M:\s*\d+", line) and re.search(r"K:\s*\d+", line):
            param_line = line
            break

    # If found, extract each parameter individually
    if param_line:
        for key, pattern in param_patterns.items():
            match = re.search(pattern, param_line)
            file_data[key] = int(match.group(1)) if match else None
    else:
        # If no parameter line is found, set parameters to None
        for key in param_patterns.keys():
            file_data[key] = None

    # Extract machine time:
    # Find the line "Timestamps transfer done" then take the next non-empty line as machine time.
    machine_time = None
    for i, line in enumerate(lines):
        if line.startswith("Timestamps transfer done"):
            if i + 1 < len(lines):
                try:
                    machine_time = float(lines[i+1])
                except ValueError:
                    machine_time = None
            break
    file_data["machine_time"] = machine_time

    # Extract program time:
    # Look for the line that starts with "Program completed in:"
    program_time = None
    for line in lines:
        if line.startswith("Program completed in:"):
            program_time = line.split("Program completed in:")[-1].strip()
            break
    file_data["program_time"] = program_time

    data.append(file_data)

# Create a DataFrame from the extracted data
df = pd.DataFrame(data)

# Write the DataFrame to an Excel file
output_excel = "SUMMA_Feb.xlsx"
df.to_excel(output_excel, index=False)

print(f"Extraction complete. Check {output_excel} for results.")


Extraction complete. Check output.xlsx for results.
