In [2]:
pip install pandas numpy pulp pm4py requests

Defaulting to user installation because normal site-packages is not writeable
Note: you may need to restart the kernel to use updated packages.


In [3]:
pip install cbcpy

Defaulting to user installation because normal site-packages is not writeable
Note: you may need to restart the kernel to use updated packages.


ERROR: Could not find a version that satisfies the requirement cbcpy (from versions: none)
ERROR: No matching distribution found for cbcpy


In [4]:
pip install pm4py[full]

Defaulting to user installation because normal site-packages is not writeable
Note: you may need to restart the kernel to use updated packages.




In [5]:
pip install pm4py

Defaulting to user installation because normal site-packages is not writeable
Note: you may need to restart the kernel to use updated packages.


In [6]:
pip install torch

Defaulting to user installation because normal site-packages is not writeable
Note: you may need to restart the kernel to use updated packages.


In [1]:
import os
import requests

if not os.path.exists("src"):
    os.makedirs("src")

src_files = ["Classes.py", "icpm_experiments.py", "preprocessing.py", "RunningHorizon.py", "utils.py"]
base_url = "https://raw.githubusercontent.com/maorliby/Conformance-Checking/main/src/"

for file_name in src_files:
    response = requests.get(f"{base_url}{file_name}")
    if response.status_code == 200:
        with open(f"src/{file_name}", "wb") as f:
            f.write(response.content)
        print(f"Downloaded {file_name} to src/")
    else:
        print(f"Failed to download {file_name}")

Downloaded Classes.py to src/
Downloaded icpm_experiments.py to src/
Downloaded preprocessing.py to src/
Downloaded RunningHorizon.py to src/
Downloaded utils.py to src/


In [2]:
import importlib.util
import sys
import os
import re
import pandas as pd
import numpy as np
import pulp
import time
import requests
from io import StringIO
from pm4py.objects.petri_net.utils import petri_utils
from pm4py.objects.petri_net.obj import PetriNet, Marking
from pm4py.objects.log.obj import EventLog, Trace, Event
from pm4py.algo.conformance.alignments.petri_net import algorithm as alignments_algo
import logging

# הגדרת לוגר להדפסת זמני ריצה בזמן אמת
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(message)s')

# יבוא פונקציות מ-icpm_experiments
sys.path.append("src")
from icpm_experiments import load_and_preprocess_log, generate_model_from_file

# התקנת חבילות נדרשות
packages = {
    "pulp": "pulp",
    "pm4py": "pm4py",
    "numpy": "numpy",
    "pandas": "pandas",
    "requests": "requests"
}
for module_name, pip_name in packages.items():
    if importlib.util.find_spec(module_name) is None:
        print(f"Installing {pip_name}...")
        os.system(f"pip install {pip_name}")
    else:
        print(f"{pip_name} already installed.")

pulp already installed.
pm4py already installed.
numpy already installed.
pandas already installed.
requests already installed.


In [7]:
def build_synchronous_product(model_net, model_marking, trace):
    sp_net = PetriNet("Synchronous Product")
    place_map = {}

    for p in model_net.places:
        new_p = PetriNet.Place(f"{p.name}_m")
        sp_net.places.add(new_p)
        place_map[p.name] = new_p

    log_places = []
    for i in range(len(trace) + 1):
        p_log = PetriNet.Place(f"p{i}_l")
        sp_net.places.add(p_log)
        log_places.append(p_log)

    sp_marking = Marking()
    for p in model_marking:
        sp_marking[place_map[p.name]] = 1
    sp_marking[log_places[0]] = 1

    for t in model_net.transitions:
        label = t.label if t.label is not None else "τ"
        for i, event in enumerate(trace):
            if t.label == event:
                sync_t = PetriNet.Transition(f"{label}_S_{i}", f"{label},{label}")
                sp_net.transitions.add(sync_t)
                for arc in t.in_arcs:
                    petri_utils.add_arc_from_to(place_map[arc.source.name], sync_t, sp_net)
                for arc in t.out_arcs:
                    petri_utils.add_arc_from_to(sync_t, place_map[arc.target.name], sp_net)
                petri_utils.add_arc_from_to(log_places[i], sync_t, sp_net)
                petri_utils.add_arc_from_to(sync_t, log_places[i + 1], sp_net)

    for t in model_net.transitions:
        label = t.label if t.label is not None else "τ"
        if label == "τ":
            move_m = PetriNet.Transition(f"{label}", f"{label},{label}")
        else:
            move_m = PetriNet.Transition(f"{label}_M", f"{label},>>")
        sp_net.transitions.add(move_m)
        for arc in t.in_arcs:
            petri_utils.add_arc_from_to(place_map[arc.source.name], move_m, sp_net)
        for arc in t.out_arcs:
            petri_utils.add_arc_from_to(move_m, place_map[arc.target.name], sp_net)

    for i, event in enumerate(trace):
        label = event if event is not None else "τ"
        move_l = PetriNet.Transition(f"{label}_L_{i}", f">>,{label}")
        sp_net.transitions.add(move_l)
        petri_utils.add_arc_from_to(log_places[i], move_l, sp_net)
        petri_utils.add_arc_from_to(move_l, log_places[i + 1], sp_net)

    return sp_net, sp_marking

In [8]:
def compute_sorted_incidence_matrix(net):
    places = list(net.places)
    transitions = list(net.transitions)
    move_model, move_sync, move_log = [], [], []
    
    for t in transitions:
        if ">>" in t.label and t.label.endswith(">>"):
            move_model.append(t)
        elif ">>" in t.label and t.label.startswith(">>"):
            move_log.append(t)
        elif "," in t.label and ">>" not in t.label:
            move_sync.append(t)
    
    sorted_transitions = move_model + move_sync + move_log
    place_names = [p.name for p in places]
    place_index = {name: i for i, name in enumerate(place_names)}
    matrix = np.zeros((len(places), len(sorted_transitions)), dtype=int)
    
    for j, t in enumerate(sorted_transitions):
        for arc in t.in_arcs:
            i = place_index[arc.source.name]
            matrix[i, j] -= 1
        for arc in t.out_arcs:
            i = place_index[arc.target.name]
            matrix[i, j] += 1
    
    return matrix, place_names, [t.name for t in sorted_transitions]

In [9]:
def get_sync_initial_marking_vector(sync_net, model_marking, trace):
    place_names = [p.name for p in sync_net.places]
    vector = np.zeros(len(place_names), dtype=int)
    for p, tokens in model_marking.items():
        place_name = f"{p.name}_m"
        if place_name in place_names:
            idx = place_names.index(place_name)
            vector[idx] = tokens
    log_place = "p0_l"
    if log_place in place_names:
        vector[place_names.index(log_place)] = 1
    return vector

def get_sync_final_marking(sync_net, model_final_marking, trace):
    place_names = [p.name for p in sync_net.places]
    vector = np.zeros(len(place_names), dtype=int)
    for p, tokens in model_final_marking.items():
        place_name = f"{p.name}_m"
        if place_name in place_names:
            idx = place_names.index(place_name)
            vector[idx] = tokens
    log_place = f"p{len(trace)}_l"
    if log_place in place_names:
        vector[place_names.index(log_place)] = 1
    return vector

In [10]:
def build_incremental_lp(C, m0_vector, x, cost_map, mf, row_names, col_names, integer=True):
    prob = pulp.LpProblem("Incremental_Alignment", pulp.LpMinimize)
    num_places, num_trans = len(row_names), len(col_names)
    cat = "Binary" if integer else "Continuous"
    
    # הגדרת משתנים עם חסם תחתון של 0
    y = {(i, t): pulp.LpVariable(f"y_{i}_{t}", lowBound=0, cat=cat) for i in range(x) for t in range(num_trans)}
    m = {(i, p): pulp.LpVariable(f"m_{i}_{row_names[p]}", lowBound=0, cat="Integer") for i in range(x + 1) for p in range(num_places)}
    
    # הדפסת עלויות לבדיקה
    logging.info(f"Cost map: {{k: v for k, v in cost_map.items() if v != 0}}")
    
    # אילוץ סימון התחלתי
    for p in range(num_places):
        prob += m[(0, p)] == m0_vector[p], f"initial_marking_p{p}"
    
    # פונקציית מטרה
    prob += pulp.lpSum([cost_map[col_names[t]] * y[(i, t)] for i in range(x) for t in range(num_trans)])
    
    # אילוצים
    for i in range(x):
        # אילוץ: לכל היותר מעבר אחד בכל שלב
        prob += pulp.lpSum([y[(i, t)] for t in range(num_trans)]) <= 1, f"one_move_step_{i}"
        # אילוץ: בדיקת אפשרות של מעבר
        for t in range(num_trans):
            for p in range(num_places):
                if C[p, t] < 0:
                    prob += m[(i, p)] >= abs(C[p, t]) * y[(i, t)], f"enabled_check_p{p}_t{t}_step{i}"
        # עדכון סימונים
        for p in range(num_places):
            delta = pulp.lpSum([C[p, t] * y[(i, t)] for t in range(num_trans)])
            prob += m[(i + 1, p)] == m[(i, p)] + delta, f"marking_update_p{p}_step_{i}"
    
    # אילוץ סימון סופי
    for p in range(len(mf)):
        prob += m[(x, p)] == mf[p], f"final_marking_p{p}"
    
    # אילוץ נוסף: מגביל את סך המעברים
    prob += pulp.lpSum([y[(i, t)] for i in range(x) for t in range(num_trans)]) <= x, f"total_transitions_bound"
    
    return prob, y, m

In [None]:
import logging
import sys
import os
import re
import pandas as pd
import numpy as np
import pulp
import time
from io import StringIO
from pm4py.objects.petri_net.utils import petri_utils
from pm4py.objects.petri_net.obj import PetriNet, Marking
from pm4py.objects.log.obj import EventLog, Trace, Event
from pm4py.algo.conformance.alignments.petri_net import algorithm as alignments_algo
import threading

# Set console encoding to utf-8 to avoid UnicodeEncodeError
os.environ['PYTHONIOENCODING'] = 'utf-8'

# Configure logging without timestamp
logging.basicConfig(level=logging.INFO, format='%(message)s')
logger = logging.getLogger()
# Remove existing handlers to avoid duplicate output
logger.handlers = []
# Console handler
console_handler = logging.StreamHandler(sys.stdout)
console_handler.setFormatter(logging.Formatter('%(message)s'))
logger.addHandler(console_handler)
# File handler
file_handler = logging.FileHandler('log_output.txt', mode='a', encoding='utf-8')
file_handler.setFormatter(logging.Formatter('%(message)s'))
logger.addHandler(file_handler)

# Import functions from icpm_experiments
sys.path.append("src")
from icpm_experiments import load_and_preprocess_log, generate_model_from_file

# Function to determine the rerun suffix
def get_rerun_suffix(model_dir, base_filename):
    existing_files = [f for f in os.listdir(model_dir) if f.startswith(base_filename) and f.endswith('.csv')]
    rerun_nums = [int(re.search(r'\((\d+)\)\.csv$', f).group(1)) for f in existing_files if re.search(r'\((\d+)\)\.csv$', f)]
    return f'({max(rerun_nums) + 1})' if rerun_nums else '(2)'

# Function to select model and datasets
def select_model_and_datasets(data_dir="./pr"):
    models = [
        "pr-1-11-1244-A59.txt",
        "pr-3-11-1151-A48.txt",
        "pr-3-11-1908-A32.txt",
        "pr-8-11-1912-A57.txt"
    ]
    datasets = {
        "pr-1-11-1244-A59.txt": [
            "pr-1-11-1244-A59_m17_l1.csv",
            "pr-1-11-1244-A59_m17_l1_noise.csv",
            "pr-1-11-1244-A59_m29_l2.csv",
            "pr-1-11-1244-A59_m29_l2_noise.csv",
            "pr-1-11-1244-A59_m41_l3.csv",
            "pr-1-11-1244-A59_m41_l3_noise.csv",
            "pr-1-11-1244-A59_m55_l4.csv",
            "pr-1-11-1244-A59_m55_l4_noise.csv"
        ],
        "pr-3-11-1151-A48.txt": [
            "pr-3-11-1151-A48_m12_l1.csv",
            "pr-3-11-1151-A48_m12_l1_noise.csv",
            "pr-3-11-1151-A48_m23_l2.csv",
            "pr-3-11-1151-A48_m23_l2_noise.csv",
            "pr-3-11-1151-A48_m37_l3.csv",
            "pr-3-11-1151-A48_m37_l3_noise.csv",
            "pr-3-11-1151-A48_m50_l4.csv",
            "pr-3-11-1151-A48_m50_l4_noise.csv"
        ],
        "pr-3-11-1908-A32.txt": [
            "pr-3-11-1908-A32_m18_l1.csv",
            "pr-3-11-1908-A32_m18_l1_noise.csv",
            "pr-3-11-1908-A32_m27_l2.csv",
            "pr-3-11-1908-A32_m27_l2_noise.csv",
            "pr-3-11-1908-A32_m34_l3.csv",
            "pr-3-11-1908-A32_m34_l3_noise.csv",
            "pr-3-11-1908-A32_m41_l4.csv",
            "pr-3-11-1908-A32_m41_l4_noise.csv"
        ],
        "pr-8-11-1912-A57.txt": [
            "pr-8-11-1912-A57_m15_l1.csv",
            "pr-8-11-1912-A57_m15_l1_noise.csv",
            "pr-8-11-1912-A57_m26_l2.csv",
            "pr-8-11-1912-A57_m26_l2_noise.csv",
            "pr-8-11-1912-A57_m39_l3.csv",
            "pr-8-11-1912-A57_m39_l3_noise.csv",
            "pr-8-11-1912-A57_m52_l4.csv",
            "pr-8-11-1912-A57_m52_l4_noise.csv"
        ]
    }
    
    # Create data directory if it doesn't exist
    os.makedirs(data_dir, exist_ok=True)
    
    print("\n=== Step 1: Select a Model ===")
    print("Available process models:")
    for i, model in enumerate(models, 1):
        print(f"{i}. {model}")
    print("Please select a model by entering its number (e.g., '2' for pr-3-11-1151-A48.txt).")
    while True:
        sys.stdout.flush()
        choice = input("Model number (1-4): ").strip()
        try:
            choice = int(choice)
            if 1 <= choice <= len(models):
                selected_model = models[choice-1]
                break
            print(f"Invalid choice. Please enter a number between 1 and {len(models)}.")
        except ValueError:
            print("Please enter a valid number.")
    
    model_dir = selected_model.replace('.txt', '')
    os.makedirs(model_dir, exist_ok=True)
    results_file = os.path.join(model_dir, f"results_final_{model_dir}.csv")
    
    # Check if model file exists
    model_path = os.path.join(data_dir, selected_model)
    if not os.path.exists(model_path):
        logging.error(f"Model file {selected_model} not found in {data_dir}. Please ensure it exists.")
        print(f"Error: Model file {selected_model} not found in {data_dir}. Please ensure it exists.")
        sys.stdout.flush()
        return None, [], None
    
    completed_datasets = []
    incomplete_datasets = []
    dataset_status = {}
    valid_datasets = [ds for ds in datasets[selected_model] if os.path.exists(os.path.join(data_dir, ds))]
    
    for dataset in valid_datasets:
        try:
            # Read dataset to get total number of traces
            df = pd.read_csv(os.path.join(data_dir, dataset), encoding='utf-8-sig')
            total_traces = len(df.groupby('case:concept:name'))
            
            # Check temp_results file for last processed trace
            temp_results_file = os.path.join(model_dir, f"temp_results_{dataset.replace('.csv', '')}.csv")
            last_trace = 0
            if os.path.exists(temp_results_file):
                try:
                    temp_results_df = pd.read_csv(temp_results_file, encoding='utf-8-sig')
                    if 'Trace Range' in temp_results_df.columns:
                        valid_ranges = []
                        for trace_range in temp_results_df['Trace Range']:
                            if isinstance(trace_range, str) and '-' in trace_range:
                                try:
                                    end_trace = int(trace_range.split('-')[-1])
                                    valid_ranges.append(end_trace)
                                except (ValueError, TypeError) as e:
                                    logging.warning(f"Invalid Trace Range value in {temp_results_file}: '{trace_range}'. Skipping.")
                                    continue
                        last_trace = max(valid_ranges) if valid_ranges else 0
                except Exception as e:
                    logging.warning(f"Error reading {temp_results_file}: {e}. Trying with 'latin1' encoding.")
                    try:
                        temp_results_df = pd.read_csv(temp_results_file, encoding='latin1')
                        if 'Trace Range' in temp_results_df.columns:
                            valid_ranges = []
                            for trace_range in temp_results_df['Trace Range']:
                                if isinstance(trace_range, str) and '-' in trace_range:
                                    try:
                                        end_trace = int(trace_range.split('-')[-1])
                                        valid_ranges.append(end_trace)
                                    except (ValueError, TypeError) as e:
                                        logging.warning(f"Invalid Trace Range value in {temp_results_file}: '{trace_range}'. Skipping.")
                                        continue
                            last_trace = max(valid_ranges) if valid_ranges else 0
                    except Exception as e2:
                        logging.warning(f"Failed with 'latin1' encoding: {e2}. Assuming no progress.")
            
            # Determine status
            if last_trace >= total_traces and last_trace > 0:
                completed_datasets.append(dataset)
                dataset_status[dataset] = "(Completed)"
            else:
                incomplete_datasets.append(dataset)
                dataset_status[dataset] = f"(In progress, last trace: {last_trace})" if last_trace > 0 else ""
                
        except Exception as e:
            logging.warning(f"Error processing dataset {dataset}: {e}. Assuming incomplete.")
            incomplete_datasets.append(dataset)
            dataset_status[dataset] = ""
    
    print(f"\n=== Step 2: Review Datasets for Model {selected_model} ===")
    print("Available datasets:")
    for i, dataset in enumerate(valid_datasets, 1):
        print(f"{i}. {dataset} {dataset_status.get(dataset, '')}")
    sys.stdout.flush()
    
    if not incomplete_datasets and not completed_datasets:
        print("No datasets available for this model in the local directory. Exiting.")
        sys.stdout.flush()
        return None, [], None
    
    print("\n=== Step 3: Choose Processing Option ===")
    if incomplete_datasets:
        print(f"Continue from the last trace of the first incomplete dataset ({incomplete_datasets[0]})? (y/n)")
        print(f"  - This will append results to existing files (e.g., temp_results_{incomplete_datasets[0].replace('.csv', '')}.csv).")
    else:
        print("No incomplete datasets. Would you like to select a specific dataset to process? (y/n)")
    while True:
        sys.stdout.flush()
        choice = input("Enter y or n: ").strip().lower()
        if choice in ['y', 'n']:
            break
        print("Please enter 'y' or 'n'.")
        sys.stdout.flush()
    
    if choice == 'y' and incomplete_datasets:
        return selected_model, incomplete_datasets, model_dir
    else:
        print("\n=== Step 4: Select a Dataset ===")
        print("Please select a dataset by entering its number.")
        for i, dataset in enumerate(valid_datasets, 1):
            print(f"{i}. {dataset} {dataset_status.get(dataset, '')}")
        sys.stdout.flush()
        if not valid_datasets:
            print("No valid datasets found in the local directory. Exiting.")
            sys.stdout.flush()
            return None, [], None
        while True:
            sys.stdout.flush()
            dataset_choice = input(f"Dataset number (1-{len(valid_datasets)}): ").strip()
            try:
                dataset_choice = int(dataset_choice)
                if 1 <= dataset_choice <= len(valid_datasets):
                    selected_dataset = valid_datasets[dataset_choice-1]
                    break
                print(f"Invalid choice. Please enter a number between 1 and {len(valid_datasets)}.")
                sys.stdout.flush()
            except ValueError:
                print("Please enter a valid number.")
                sys.stdout.flush()
        
        temp_results_file = os.path.join(model_dir, f"temp_results_{selected_dataset.replace('.csv', '')}.csv")
        integer_lp_file = os.path.join(model_dir, f"integer_lp_traces_{selected_dataset.replace('.csv', '')}.csv")
        results_file = os.path.join(model_dir, f"results_final_{model_dir}.csv")
        start_from_scratch = False
        start_trace_idx = 1
        
        # Get the number of traces in the selected dataset
        try:
            logging.info(f"Loading dataset {selected_dataset} to determine total traces...")
            df = pd.read_csv(os.path.join(data_dir, selected_dataset), encoding='utf-8-sig')
            total_traces = len(df.groupby('case:concept:name'))
            logging.info(f"Dataset {selected_dataset} contains {total_traces} traces.")
            sys.stdout.flush()
        except Exception as e:
            logging.error(f"Failed to load dataset {selected_dataset}: {e}. Assuming max 10000 traces.")
            total_traces = 10000
            sys.stdout.flush()
        
        if selected_dataset in completed_datasets:
            print(f"\n=== Warning: Dataset {selected_dataset} Already Completed ===")
            print(f"This dataset has already processed {total_traces} traces. Results will be saved in new files:")
            print(f"  - temp_results_{selected_dataset.replace('.csv', '')}(n).csv")
            print(f"  - integer_lp_traces_{selected_dataset.replace('.csv', '')}(n).csv")
            print(f"  - results_final_{model_dir}(n).csv")
            sys.stdout.flush()
            rerun_suffix = get_rerun_suffix(model_dir, f"temp_results_{selected_dataset.replace('.csv', '')}")
            temp_results_file = os.path.join(model_dir, f"temp_results_{selected_dataset.replace('.csv', '')}{rerun_suffix}.csv")
            integer_lp_file = os.path.join(model_dir, f"integer_lp_traces_{selected_dataset.replace('.csv', '')}{rerun_suffix}.csv")
            results_file = os.path.join(model_dir, f"results_final_{model_dir}{rerun_suffix}.csv")
            start_from_scratch = True
        elif os.path.exists(temp_results_file):
            try:
                temp_results_df = pd.read_csv(temp_results_file, encoding='utf-8-sig')
                valid_ranges = []
                for trace_range in temp_results_df['Trace Range']:
                    if isinstance(trace_range, str) and '-' in trace_range:
                        try:
                            end_trace = int(trace_range.split('-')[-1])
                            valid_ranges.append(end_trace)
                        except (ValueError, TypeError) as e:
                            logging.warning(f"Invalid Trace Range value in {temp_results_file}: '{trace_range}'. Skipping.")
                            continue
                max_trace = max(valid_ranges) if valid_ranges else 0
                print(f"\nDataset {selected_dataset} is in progress. Last processed trace: {max_trace}.")
                print(f"Dataset contains {total_traces} traces.")
                print(f"Please enter the starting trace number (1 to {total_traces}, or 0 to continue from {max_trace + 1}):")
                sys.stdout.flush()
                while True:
                    sys.stdout.flush()
                    trace_choice = input(f"Starting trace number: ").strip()
                    try:
                        trace_choice = int(trace_choice)
                        if trace_choice == 0:
                            start_trace_idx = max_trace + 1
                            break
                        if 1 <= trace_choice <= total_traces:
                            start_trace_idx = trace_choice
                            break
                        print(f"Invalid choice. Please enter a number between 1 and {total_traces}, or 0 to continue from {max_trace + 1}.")
                        sys.stdout.flush()
                    except ValueError:
                        print("Please enter a valid number.")
                        sys.stdout.flush()
            except Exception as e:
                logging.warning(f"Error reading {temp_results_file}: {e}. Trying with 'latin1' encoding.")
                try:
                    temp_results_df = pd.read_csv(temp_results_file, encoding='latin1')
                    valid_ranges = []
                    for trace_range in temp_results_df['Trace Range']:
                        if isinstance(trace_range, str) and '-' in trace_range:
                            try:
                                end_trace = int(trace_range.split('-')[-1])
                                valid_ranges.append(end_trace)
                            except (ValueError, TypeError) as e:
                                logging.warning(f"Invalid Trace Range value in {temp_results_file}: '{trace_range}'. Skipping.")
                                continue
                    max_trace = max(valid_ranges) if valid_ranges else 0
                    print(f"\nDataset {selected_dataset} is in progress. Last processed trace: {max_trace}.")
                    print(f"Dataset contains {total_traces} traces.")
                    print(f"Please enter the starting trace number (1 to {total_traces}, or 0 to continue from {max_trace + 1}):")
                    sys.stdout.flush()
                    while True:
                        sys.stdout.flush()
                        trace_choice = input(f"Starting trace number: ").strip()
                        try:
                            trace_choice = int(trace_choice)
                            if trace_choice == 0:
                                start_trace_idx = max_trace + 1
                                break
                            if 1 <= trace_choice <= total_traces:
                                start_trace_idx = trace_choice
                                break
                            print(f"Invalid choice. Please enter a number between 1 and {total_traces}, or 0 to continue from {max_trace + 1}.")
                            sys.stdout.flush()
                        except ValueError:
                            print("Please enter a valid number.")
                            sys.stdout.flush()
                except Exception as e2:
                    logging.warning(f"Failed with 'latin1' encoding: {e2}. Starting from trace 1.")
                    print(f"\nDataset {selected_dataset} contains {total_traces} traces.")
                    print(f"Please enter the starting trace number (1 to {total_traces}):")
                    sys.stdout.flush()
                    while True:
                        sys.stdout.flush()
                        trace_choice = input(f"Starting trace number: ").strip()
                        try:
                            trace_choice = int(trace_choice)
                            if 1 <= trace_choice <= total_traces:
                                start_trace_idx = trace_choice
                                break
                            print(f"Invalid choice. Please enter a number between 1 and {total_traces}.")
                            sys.stdout.flush()
                        except ValueError:
                            print("Please enter a valid number.")
                            sys.stdout.flush()
        else:
            print(f"\nDataset {selected_dataset} contains {total_traces} traces.")
            print(f"Please enter the starting trace number (1 to {total_traces}):")
            sys.stdout.flush()
            while True:
                sys.stdout.flush()
                trace_choice = input(f"Starting trace number: ").strip()
                try:
                    trace_choice = int(trace_choice)
                    if 1 <= trace_choice <= total_traces:
                        start_trace_idx = trace_choice
                        break
                    print(f"Invalid choice. Please enter a number between 1 and {total_traces}.")
                    sys.stdout.flush()
                except ValueError:
                    print("Please enter a valid number.")
                    sys.stdout.flush()
        
        return selected_model, [selected_dataset], model_dir, temp_results_file, integer_lp_file, results_file, start_from_scratch, start_trace_idx

# Function to process a single dataset
def process_single_dataset(model_file, trace_file, model_dir, temp_results_file=None, integer_lp_file=None, results_file=None, start_from_scratch=False, start_trace_idx=1, data_dir="./pr"):
    # Initialize counters
    astar_times, astar_costs, lp_times, lp_costs, lp_success, continuous_lp_success = [], [], [], [], 0, 0
    integer_lp_traces = []
    
    if temp_results_file is None:
        temp_results_file = os.path.join(model_dir, f"temp_results_{trace_file.replace('.csv', '')}.csv")
    if integer_lp_file is None:
        integer_lp_file = os.path.join(model_dir, f"integer_lp_traces_{trace_file.replace('.csv', '')}.csv")
    if results_file is None:
        results_file = os.path.join(model_dir, f"results_final_{model_dir}.csv")
    
    logging.info(f"\n=== Processing model: {model_file}, trace file: {trace_file} ===")
    logging.info(f"Results will be saved to: {temp_results_file}, {results_file}, {integer_lp_file}")
    sys.stdout.flush()
    
    # Extract noise level
    noise_level_match = re.search(r'_l(\d+)', trace_file)
    noise_level = int(noise_level_match.group(1)) if noise_level_match else 1
    time_limit = {1: 420, 2: 500, 3: 700, 4:700}.get(noise_level, 700)
    batch_size = {1: 200, 2: 20, 3: 5, 4: 5}.get(noise_level, 5)
    logging.info(f"Noise level: l{noise_level}, Time limit per trace: {time_limit}s, Batch size: {batch_size}")
    sys.stdout.flush()
    
    # Load files
    model_path = os.path.join(data_dir, model_file)
    trace_path = os.path.join(data_dir, trace_file)
    
    try:
        # Check if model file exists
        if not os.path.exists(model_path):
            logging.error(f"Model file {model_file} not found in {data_dir}.")
            sys.stdout.flush()
            return {'status': 'failed', 'trace_file': trace_file}
        logging.info("Loading model file...")
        with open(model_path, 'r', encoding='utf-8') as f:
            model_content = f.read()
        sys.stdout.flush()
        
        # Check if trace file exists
        if not os.path.exists(trace_path):
            logging.error(f"Trace file {trace_file} not found in {data_dir}.")
            sys.stdout.flush()
            return {'status': 'failed', 'trace_file': trace_file}
        logging.info("Loading trace file...")
        with open(trace_path, 'r', encoding='utf-8') as f:
            trace_content = f.read()
        sys.stdout.flush()
    except Exception as e:
        logging.error(f"Failed to load files: {e}")
        sys.stdout.flush()
        return {'status': 'failed', 'trace_file': trace_file}
    
    # Read trace file
    try:
        logging.info("Reading trace file into DataFrame...")
        df = pd.read_csv(StringIO(trace_content), encoding='utf-8')
        sys.stdout.flush()
    except Exception as e:
        logging.error(f"Failed to read trace file into DataFrame: {e}")
        sys.stdout.flush()
        return {'status': 'failed', 'trace_file': trace_file}
    
    # Load model
    logging.info("Creating temporary model file...")
    temp_model_path = os.path.join(model_dir, f"temp_{model_file}")
    try:
        with open(temp_model_path, 'w', encoding='utf-8') as f:
            f.write(model_content)
        logging.info("Generating model from file...")
        model, init_marking, final_marking = generate_model_from_file(temp_model_path)
        pm4py_net = model.pm4py_net
        pm4py_initial_marking = model.pm4py_initial_marking
        pm4py_final_marking = model.pm4py_final_marking
        sys.stdout.flush()
    except Exception as e:
        logging.error(f"Failed to generate model: {e}")
        if os.path.exists(temp_model_path):
            os.remove(temp_model_path)
        sys.stdout.flush()
        return {'status': 'failed', 'trace_file': trace_file}
    
    # Process traces
    logging.info("Grouping traces by case:concept:name...")
    try:
        traces = df.groupby('case:concept:name')['Activity'].apply(list).to_dict()
    except Exception as e:
        logging.error(f"Failed to group traces: {e}")
        if os.path.exists(temp_model_path):
            os.remove(temp_model_path)
        sys.stdout.flush()
        return {'status': 'failed', 'trace_file': trace_file}
    
    num_traces = len(traces)
    avg_trace_length = np.mean([len(t) for t in traces.values()]) if traces else 0
    
    logging.info(f"Found {num_traces} traces with average length {avg_trace_length:.2f}")
    sys.stdout.flush()
    
    # Check processed traces
    if not start_from_scratch and os.path.exists(temp_results_file):
        try:
            temp_results_df = pd.read_csv(temp_results_file, encoding='utf-8-sig')
            processed_ranges = temp_results_df[temp_results_df['Dataset'] == trace_file]['Trace Range']
            if not processed_ranges.empty:
                valid_ranges = []
                for trace_range in processed_ranges:
                    if isinstance(trace_range, str) and '-' in trace_range:
                        try:
                            end_trace = int(trace_range.split('-')[-1])
                            valid_ranges.append(end_trace)
                        except (ValueError, TypeError) as e:
                            logging.warning(f"Invalid Trace Range value in {temp_results_file}: '{trace_range}'. Skipping.")
                            continue
                if valid_ranges:
                    end_trace = max(valid_ranges)
                    if end_trace >= num_traces:
                        logging.info(f"All {num_traces} traces already processed for {trace_file}")
                        if os.path.exists(temp_model_path):
                            os.remove(temp_model_path)
                        sys.stdout.flush()
                        return {'status': 'completed', 'trace_file': trace_file}
                    if start_trace_idx == 1:
                        start_trace_idx = end_trace + 1
                        logging.info(f"Found processed traces up to {end_trace}, starting from trace {start_trace_idx}")
                        sys.stdout.flush()
        except Exception as e:
            logging.warning(f"Error reading {temp_results_file}: {e}. Trying with 'latin1' encoding.")
            try:
                temp_results_df = pd.read_csv(temp_results_file, encoding='latin1')
                processed_ranges = temp_results_df[temp_results_df['Dataset'] == trace_file]['Trace Range']
                if not processed_ranges.empty:
                    valid_ranges = []
                    for trace_range in processed_ranges:
                        if isinstance(trace_range, str) and '-' in trace_range:
                            try:
                                end_trace = int(trace_range.split('-')[-1])
                                valid_ranges.append(end_trace)
                            except (ValueError, TypeError) as e:
                                logging.warning(f"Invalid Trace Range value in {temp_results_file}: '{trace_range}'. Skipping.")
                                continue
                    if valid_ranges:
                        end_trace = max(valid_ranges)
                        if end_trace >= num_traces:
                            logging.info(f"All {num_traces} traces already processed for {trace_file}")
                            if os.path.exists(temp_model_path):
                                os.remove(temp_model_path)
                            sys.stdout.flush()
                            return {'status': 'completed', 'trace_file': trace_file}
                        if start_trace_idx == 1:
                            start_trace_idx = end_trace + 1
                            logging.info(f"Found processed traces up to {end_trace}, starting from trace {start_trace_idx}")
                            sys.stdout.flush()
            except Exception as e2:
                logging.error(f"Failed to read {temp_results_file} with 'latin1': {e2}")
                sys.stdout.flush()
    
    # Validate start_trace_idx
    if start_trace_idx > num_traces:
        logging.error(f"Starting trace index {start_trace_idx} exceeds total traces {num_traces}. Starting from trace 1.")
        start_trace_idx = 1
        sys.stdout.flush()
    
    # Variables for temporary results
    temp_astar_times, temp_astar_costs, temp_lp_times, temp_lp_costs, temp_lp_success, temp_continuous_lp_success = [], [], [], [], 0, 0
    temp_trace_ids = []
    temp_integer_lp_traces = []
    batch_start_idx = start_trace_idx
    
    for trace_idx, (trace_id, trace) in enumerate(traces.items(), 1):
        if trace_idx < start_trace_idx:
            continue
        
        logging.info(f"\nProcessing trace {trace_idx}/{num_traces} (ID: {trace_id})")
        sys.stdout.flush()
        
        try:
            # A* Alignment
            trace_obj = Trace([{"concept:name": act} for act in trace])
            event_log = EventLog([trace_obj])
            start_astar = time.time()
            alignment_result = alignments_algo.apply(event_log, pm4py_net, pm4py_initial_marking, pm4py_final_marking)[0]
            end_astar = time.time()
            astar_cost = alignment_result['cost'] / 10000
            astar_time = end_astar - start_astar
            temp_astar_times.append(astar_time)
            temp_astar_costs.append(astar_cost)
            temp_trace_ids.append(trace_id)
            logging.info(f"A* - Time: {astar_time:.2f}s, Cost: {astar_cost:.2f}")
            sys.stdout.flush()
            
            # Build synchronous product
            sync_net, sync_marking = build_synchronous_product(pm4py_net, pm4py_initial_marking, trace)
            logging.info(f"Synchronous net: {len(sync_net.places)} places, {len(sync_net.transitions)} transitions")
            sys.stdout.flush()
            
            C_np, row_names, col_names = compute_sorted_incidence_matrix(sync_net)
            logging.info(f"Incidence matrix shape: {C_np.shape}")
            sys.stdout.flush()
            
            m0 = get_sync_initial_marking_vector(sync_net, pm4py_initial_marking, trace)
            mf = get_sync_final_marking(sync_net, pm4py_final_marking, trace)
            cost_map = {t.name: 0 if t.label.split(",")[0] == t.label.split(",")[1] else 1 for t in sync_net.transitions}
            
            # Dynamic upper bound
            margin = {1: 1 if astar_cost == 0 else 3, 2: 1 if astar_cost == 0 else 3, 3: 3 if astar_cost == 0 else 5, 4: 5 if astar_cost == 0 else 5}.get(noise_level, 2 if astar_cost == 0 else 5)
            x = len(trace) + int(astar_cost) + margin
            logging.info(f"Setting upper bound x = {x} (trace length: {len(trace)}, margin: {margin})")
            sys.stdout.flush()
            
            # Continuous LP
            logging.info("Running Continuous LP...")
            sys.stdout.flush()
            prob, y_vars, m_vars = build_incremental_lp(C_np, m0, x, cost_map, mf, row_names, col_names, integer=False)
            start_lp = time.time()
            lp_thread = threading.Thread(target=lambda: prob.solve(pulp.PULP_CBC_CMD(timeLimit=time_limit, msg=False, options=['-maxSeconds', str(time_limit)])))
            lp_thread.start()
            notification_triggered = False
            run_integer_lp = False
            
            while lp_thread.is_alive():
                elapsed = time.time() - start_lp
                if elapsed > time_limit * 0.75 and not notification_triggered:
                    logging.info(f"Continuous LP - Elapsed: {elapsed:.2f}s")
                    notification_triggered = True
                    sys.stdout.flush()
                time.sleep(0.1)
                if elapsed > time_limit + 10:
                    logging.warning(f"Continuous LP for trace {trace_idx}/{num_traces} exceeded time limit by 10s, forcing termination")
                    temp_lp_times.append(elapsed)
                    run_integer_lp = True
                    lp_thread.join(1)
                    sys.stdout.flush()
                    break
            
            end_lp = time.time()
            lp_time = end_lp - start_lp
            lp_status = pulp.LpStatus[prob.status]
            lp_cost = None
            
            if lp_status == "Optimal":
                lp_cost = pulp.value(prob.objective)
                logging.info(f"Continuous LP - Time: {lp_time:.2f}s, Cost: {lp_cost:.2f}, Status: {lp_status}")
                if abs(lp_cost - astar_cost) < 1e-6:
                    temp_lp_times.append(lp_time)
                    temp_lp_costs.append(lp_cost)
                    temp_lp_success += 1
                    temp_continuous_lp_success += 1
                else:
                    logging.info("Continuous LP cost differs from A*, running Integer LP...")
                    run_integer_lp = True
                sys.stdout.flush()
            else:
                logging.info(f"Continuous LP - Time: {lp_time:.2f}s, Status: {lp_status} (Failed to converge)")
                temp_lp_times.append(lp_time)
                run_integer_lp = True
                sys.stdout.flush()
            
            # Integer LP
            if run_integer_lp:
                logging.info("Running Integer LP...")
                sys.stdout.flush()
                prob, y_vars, m_vars = build_incremental_lp(C_np, m0, x, cost_map, mf, row_names, col_names, integer=True)
                start_lp = time.time()
                lp_thread = threading.Thread(target=lambda: prob.solve(pulp.PULP_CBC_CMD(timeLimit=time_limit, msg=False, options=['-maxSeconds', str(time_limit)])))
                lp_thread.start()
                notification_triggered = False
                
                while lp_thread.is_alive():
                    elapsed = time.time() - start_lp
                    if elapsed > time_limit * 0.75 and not notification_triggered:
                        logging.info(f"Integer LP - Elapsed: {elapsed:.2f}s")
                        notification_triggered = True
                        sys.stdout.flush()
                    time.sleep(0.1)
                    if elapsed > time_limit + 10:
                        logging.warning(f"Integer LP for trace {trace_idx}/{num_traces} exceeded time limit by 10s, forcing termination")
                        temp_lp_times.append(elapsed)
                        temp_integer_lp_traces.append(trace_id)
                        lp_thread.join(1)
                        sys.stdout.flush()
                        break
                else:
                    end_lp = time.time()
                    lp_time = end_lp - start_lp
                    lp_status = pulp.LpStatus[prob.status]
                    if lp_status == "Optimal":
                        lp_cost = pulp.value(prob.objective)
                        temp_lp_times.append(lp_time)
                        temp_lp_costs.append(lp_cost)
                        temp_lp_success += 1
                        temp_integer_lp_traces.append(trace_id)
                        logging.info(f"Integer LP - Time: {lp_time:.2f}s, Cost: {lp_cost:.2f}, Status: {lp_status}")
                    else:
                        temp_lp_times.append(lp_time)
                        temp_integer_lp_traces.append(trace_id)
                        logging.info(f"Integer LP - Time: {lp_time:.2f}s, Status: {lp_status} (Failed to converge)")
                    sys.stdout.flush()
            
            # Update global counters only if trace processing completes
            astar_times.extend(temp_astar_times)
            astar_costs.extend(temp_astar_costs)
            lp_times.extend(temp_lp_times)
            lp_costs.extend(temp_lp_costs)
            lp_success += temp_lp_success
            continuous_lp_success += temp_continuous_lp_success
            integer_lp_traces.extend(temp_integer_lp_traces)
            
            # Save temporary results
            if len(temp_astar_times) >= batch_size or trace_idx == num_traces:
                save_results(trace_file, trace_idx, num_traces, avg_trace_length, astar_times, astar_costs, lp_times, lp_costs, lp_success, continuous_lp_success, temp_astar_times, temp_astar_costs, temp_lp_times, temp_lp_costs, temp_lp_success, temp_continuous_lp_success, results_file, temp_results_file, batch_start_idx)
                save_integer_lp_traces(temp_integer_lp_traces, trace_file, integer_lp_file, batch_start_idx, trace_idx)
                temp_astar_times, temp_astar_costs, temp_lp_times, temp_lp_costs, temp_lp_success, temp_continuous_lp_success = [], [], [], [], 0, 0
                temp_integer_lp_traces = []
                temp_trace_ids = []
                batch_start_idx = trace_idx + 1
        
        except KeyboardInterrupt:
            logging.info(f"Interrupted at trace {trace_idx}/{num_traces}")
            temp_lp_times.append(time.time() - start_lp if 'start_lp' in locals() else 0)
            if temp_astar_times:
                save_results(trace_file, trace_idx, num_traces, avg_trace_length, astar_times, astar_costs, lp_times, lp_costs, lp_success, continuous_lp_success, temp_astar_times, temp_astar_costs, temp_lp_times, temp_lp_costs, temp_lp_success, temp_continuous_lp_success, results_file, temp_results_file, batch_start_idx)
                save_integer_lp_traces(temp_integer_lp_traces, trace_file, integer_lp_file, batch_start_idx, trace_idx)
            if os.path.exists(temp_model_path):
                os.remove(temp_model_path)
            sys.stdout.flush()
            return {'status': 'interrupted', 'trace_file': trace_file}
        except Exception as e:
            logging.error(f"Error processing trace {trace_idx}/{num_traces}: {e}")
            temp_astar_times, temp_astar_costs, temp_lp_times, temp_lp_costs, temp_lp_success, temp_continuous_lp_success = [], [], [], [], 0, 0
            temp_integer_lp_traces = []
            temp_trace_ids = []
            sys.stdout.flush()
            continue
    
    # Save final results
    if astar_times:  # Save only if at least one trace was processed
        save_results(trace_file, trace_idx, num_traces, avg_trace_length, astar_times, astar_costs, lp_times, lp_costs, lp_success, continuous_lp_success, temp_astar_times, temp_astar_costs, temp_lp_times, temp_lp_costs, temp_lp_success, temp_continuous_lp_success, results_file, temp_results_file, batch_start_idx)
        save_integer_lp_traces(integer_lp_traces, trace_file, integer_lp_file, 1, num_traces)
    
    if os.path.exists(temp_model_path):
        os.remove(temp_model_path)
    
    sys.stdout.flush()
    return {'status': 'completed', 'trace_file': trace_file}

# Function to save temporary results
def save_results(trace_file, trace_idx, num_traces, avg_trace_length, astar_times, astar_costs, lp_times, lp_costs, lp_success, continuous_lp_success, temp_astar_times, temp_astar_costs, temp_lp_times, temp_lp_costs, temp_lp_success, temp_continuous_lp_success, results_file, temp_results_file, batch_start_idx):
    temp_avg_astar_time = np.mean(temp_astar_times) if temp_astar_times else 0
    temp_avg_astar_cost = np.mean(temp_astar_costs) if temp_astar_costs else 0
    temp_avg_lp_time = np.mean([t for t in temp_lp_times if t is not None]) if any(t is not None for t in temp_lp_times) else 0
    temp_avg_lp_cost = np.mean([c for c in temp_lp_costs if c is not None]) if any(c is not None for c in temp_lp_costs) else None
    temp_lp_success_rate = (temp_lp_success / len(temp_astar_times)) * 100 if len(temp_astar_times) > 0 else 0
    temp_continuous_lp_success_rate = (temp_continuous_lp_success / len(temp_astar_times)) * 100 if len(temp_astar_times) > 0 else 0
    
    temp_result = {
        "Dataset": trace_file,
        "Trace Range": f"{batch_start_idx}-{trace_idx}",
        "Number of Traces": len(temp_astar_times),
        "Average Trace Length": avg_trace_length,
        "Average A* Time (s)": temp_avg_astar_time,
        "Average A* Cost": temp_avg_astar_cost,
        "LP Success Rate (%)": temp_lp_success_rate,
        "Continuous LP Success Rate (%)": temp_continuous_lp_success_rate,
        "Average LP Time (s)": temp_avg_lp_time,
        "Average LP Cost": temp_avg_lp_cost
    }
    
    if os.path.exists(temp_results_file):
        try:
            temp_results_df = pd.read_csv(temp_results_file, encoding='utf-8-sig')
            temp_results_df = pd.concat([temp_results_df, pd.DataFrame([temp_result])], ignore_index=True)
        except Exception as e:
            logging.error(f"Failed to update {temp_results_file}: {e}. Trying with 'latin1' encoding.")
            try:
                temp_results_df = pd.read_csv(temp_results_file, encoding='latin1')
                temp_results_df = pd.concat([temp_results_df, pd.DataFrame([temp_result])], ignore_index=True)
            except Exception as e2:
                logging.error(f"Failed with 'latin1' encoding: {e2}. Creating new file.")
                temp_results_df = pd.DataFrame([temp_result])
    else:
        temp_results_df = pd.DataFrame([temp_result])
    temp_results_df.to_csv(temp_results_file, index=False, encoding='utf-8-sig')
    
    logging.info(f"\n✅ === Temporary results for traces {batch_start_idx}-{trace_idx} saved to {temp_results_file} === ✅")
    logging.info(f"Temporary result: {temp_result}")
    sys.stdout.flush()

# Function to save integer LP traces
def save_integer_lp_traces(integer_lp_traces, trace_file, integer_lp_file, batch_start_idx, trace_idx):
    if not integer_lp_traces:
        return
    integer_result = {
        "Dataset": trace_file,
        "Trace Range": f"{batch_start_idx}-{trace_idx}",
        "Integer LP Trace IDs": ",".join(integer_lp_traces)
    }
    if os.path.exists(integer_lp_file):
        try:
            integer_df = pd.read_csv(integer_lp_file, encoding='utf-8-sig')
            integer_df = pd.concat([integer_df, pd.DataFrame([integer_result])], ignore_index=True)
        except Exception as e:
            logging.error(f"Failed to update {integer_lp_file}: {e}. Trying with 'latin1' encoding.")
            try:
                integer_df = pd.read_csv(integer_lp_file, encoding='latin1')
                integer_df = pd.concat([integer_df, pd.DataFrame([integer_result])], ignore_index=True)
            except Exception as e2:
                logging.error(f"Failed with 'latin1' encoding: {e2}. Creating new file.")
                integer_df = pd.DataFrame([integer_result])
    else:
        integer_df = pd.DataFrame([integer_result])
    integer_df.to_csv(integer_lp_file, index=False, encoding='utf-8-sig')
    logging.info(f"Saved integer LP traces to {integer_lp_file}: {integer_result}")
    sys.stdout.flush()

# Function to save final results
def save_final_results(trace_file, num_traces, avg_trace_length, astar_times, astar_costs, lp_times, lp_costs, lp_success, continuous_lp_success, results_file):
    avg_astar_time = np.mean(astar_times) if astar_times else 0
    avg_astar_cost = np.mean(astar_costs) if astar_costs else 0
    avg_lp_time = np.mean([t for t in lp_times if t is not None]) if any(t is not None for t in lp_times) else 0
    avg_lp_cost = np.mean([c for c in lp_costs if c is not None]) if any(c is not None for c in lp_costs) else None
    lp_success_rate = (lp_success / num_traces) * 100 if num_traces > 0 else 0
    continuous_lp_success_rate = (continuous_lp_success / num_traces) * 100 if num_traces > 0 else 0
    
    logging.info(f"\nSummary for {trace_file}:")
    logging.info(f"Number of Traces: {num_traces}")
    logging.info(f"Average Trace Length: {avg_trace_length:.2f}")
    logging.info(f"Average A* Time: {avg_astar_time:.2f}s")
    logging.info(f"Average A* Cost: {avg_astar_cost:.2f}")
    logging.info(f"LP Success Rate: {lp_success_rate:.2f}%")
    logging.info(f"Continuous LP Success Rate: {continuous_lp_success_rate:.2f}%")
    logging.info(f"Average LP Time: {avg_lp_time:.2f}s")
    logging.info(f"Average LP Cost: {'N/A' if avg_lp_cost is None else f'{avg_lp_cost:.2f}'}")
    sys.stdout.flush()
    
    result = {
        "Dataset": trace_file,
        "Number of Traces": num_traces,
        "Average Trace Length": avg_trace_length,
        "Average A* Time (s)": avg_astar_time,
        "Average A* Cost": avg_astar_cost,
        "LP Success Rate (%)": lp_success_rate,
        "Continuous LP Success Rate (%)": continuous_lp_success_rate,
        "Average LP Time (s)": avg_lp_time,
        "Average LP Cost": avg_lp_cost
    }
    
    if os.path.exists(results_file):
        try:
            results_df = pd.read_csv(results_file, encoding='utf-8-sig')
            results_df = results_df[results_df['Dataset'] != trace_file]
            results_df = pd.concat([results_df, pd.DataFrame([result])], ignore_index=True)
        except Exception as e:
            logging.error(f"Failed to update {results_file}: {e}. Trying with 'latin1' encoding.")
            try:
                results_df = pd.read_csv(results_file, encoding='latin1')
                results_df = results_df[results_df['Dataset'] != trace_file]
                results_df = pd.concat([results_df, pd.DataFrame([result])], ignore_index=True)
            except Exception as e2:
                logging.error(f"Failed with 'latin1' encoding: {e2}. Creating new file.")
                results_df = pd.DataFrame([result])
    else:
        results_df = pd.DataFrame([result])
    results_df.to_csv(results_file, index=False, encoding='utf-8-sig')
    
    logging.info(f"Final result for dataset: {result}")
    logging.info(f"\n✅ === Dataset {trace_file} Completed! Results Saved to {results_file} === ✅")
    sys.stdout.flush()

In [12]:
import logging
import pandas as pd
import os

# Configure logging
logging.basicConfig(level=logging.INFO, format='%(message)s')
logger = logging.getLogger()
if not any(isinstance(handler, logging.FileHandler) for handler in logger.handlers):
    file_handler = logging.FileHandler('log_output.txt', mode='a', encoding='utf-8')
    file_handler.setFormatter(logging.Formatter('%(message)s'))
    logger.addHandler(file_handler)

# Main execution
if __name__ == "__main__":
    while True:
        result = select_model_and_datasets()
        if result is None or (len(result) == 3 and not result[1]):
            logging.info("✅ No datasets available to process. Exiting.")
            break
        if len(result) == 3:
            model_file, trace_files, model_dir = result
            start_from_scratch = False
            start_trace_idx = 1
        else:
            model_file, trace_files, model_dir, temp_results_file, integer_lp_file, results_file, start_from_scratch, start_trace_idx = result
        
        if not trace_files:
            logging.info(f"✅ No valid datasets available for model {model_file}. Skipping to next model selection.")
            continue
        
        for trace_file in trace_files:
            # Check if dataset is completed
            results_file_temp = os.path.join(model_dir, f"results_final_{model_dir}.csv")
            if os.path.exists(results_file_temp):
                try:
                    results_df = pd.read_csv(results_file_temp, encoding='utf-8-sig')
                    dataset_results = results_df[results_df['Dataset'] == trace_file]
                    if not dataset_results.empty:
                        try:
                            df = pd.read_csv(os.path.join("./pr", trace_file), encoding='utf-8-sig')
                            total_traces = len(df.groupby('case:concept:name'))
                            if dataset_results['Number of Traces'].iloc[-1] >= total_traces:
                                logging.info(f"✅ Dataset {trace_file} already completed with {total_traces} traces. Skipping.")
                                continue
                        except Exception as e:
                            logging.warning(f"Error reading dataset {trace_file} to determine total traces: {e}. Processing dataset.")
                except Exception as e:
                    logging.warning(f"Error reading {results_file_temp}: {e}. Trying with 'latin1' encoding.")
                    try:
                        results_df = pd.read_csv(results_file_temp, encoding='latin1')
                        dataset_results = results_df[results_df['Dataset'] == trace_file]
                        if not dataset_results.empty:
                            try:
                                df = pd.read_csv(os.path.join("./pr", trace_file), encoding='utf-8-sig')
                                total_traces = len(df.groupby('case:concept:name'))
                                if dataset_results['Number of Traces'].iloc[-1] >= total_traces:
                                    logging.info(f"✅ Dataset {trace_file} already completed with {total_traces} traces. Skipping.")
                                    continue
                            except Exception as e2:
                                logging.warning(f"Error reading dataset {trace_file} to determine total traces: {e2}. Processing dataset.")
                    except Exception as e2:
                        logging.error(f"Failed with 'latin1' encoding: {e2}. Processing dataset.")
            
            if len(result) == 3:
                temp_results_file = os.path.join(model_dir, f"temp_results_{trace_file.replace('.csv', '')}.csv")
                integer_lp_file = os.path.join(model_dir, f"integer_lp_traces_{trace_file.replace('.csv', '')}.csv")
                results_file = os.path.join(model_dir, f"results_final_{model_dir}.csv")
            
            result = process_single_dataset(
                model_file, trace_file, model_dir, 
                temp_results_file=temp_results_file, 
                integer_lp_file=integer_lp_file, 
                results_file=results_file, 
                start_from_scratch=start_from_scratch, 
                start_trace_idx=start_trace_idx
            )
            logging.info(f"✅ Result for {trace_file}: {result['status']}")
            if result['status'] == 'interrupted':
                logging.info("✅ Program interrupted by user. Restart to continue from last trace.")
                break
        else:
            continue
        break


=== Step 1: Select a Model ===
Available process models:
1. pr-1-11-1244-A59.txt
2. pr-3-11-1151-A48.txt
3. pr-3-11-1908-A32.txt
4. pr-8-11-1912-A57.txt
Please select a model by entering its number (e.g., '2' for pr-3-11-1151-A48.txt).

=== Step 2: Review Datasets for Model pr-1-11-1244-A59.txt ===
Available datasets:
1. pr-1-11-1244-A59_m17_l1.csv 
2. pr-1-11-1244-A59_m17_l1_noise.csv 
3. pr-1-11-1244-A59_m29_l2.csv (Completed)
4. pr-1-11-1244-A59_m29_l2_noise.csv (In progress, last trace: 996)
5. pr-1-11-1244-A59_m41_l3.csv 
6. pr-1-11-1244-A59_m41_l3_noise.csv 
7. pr-1-11-1244-A59_m55_l4.csv (In progress, last trace: 183)
8. pr-1-11-1244-A59_m55_l4_noise.csv 

=== Step 3: Choose Processing Option ===
Continue from the last trace of the first incomplete dataset (pr-1-11-1244-A59_m17_l1.csv)? (y/n)
  - This will append results to existing files (e.g., temp_results_pr-1-11-1244-A59_m17_l1.csv).

=== Step 4: Select a Dataset ===
Please select a dataset by entering its number.
1. pr-1-1