In [3]:
import pandas as pd
import pm4py
from pm4py.objects.log.util import dataframe_utils
from pm4py.objects.conversion.log import converter as log_converter
from pm4py.visualization.petri_net import visualizer as pn_visualizer 

import os

In [4]:
def load_training_data():
    # Get the base simulated_data directory
    base_dir = "../simulated_data"
    training_dfs = []
    
    # Check if directory exists
    if not os.path.exists(base_dir):
        print(f"Directory {base_dir} does not exist")
        return training_dfs
        
    # Iterate through each folder in simulated_data
    for folder in os.listdir(base_dir):
        folder_path = os.path.join(base_dir, folder)
        
        # Check if it's a directory and contains main_results
        main_results_path = os.path.join(folder_path, "autonomous")
        if os.path.isdir(folder_path) and os.path.exists(main_results_path):
            
            # Look for train_preprocessed.csv
            train_file = os.path.join(main_results_path, "train_preprocessed.csv")
            if os.path.exists(train_file):
                try:
                    # Read the CSV file
                    df = pd.read_csv(train_file)
                    
                    # Store the dataframe with folder name for reference
                    training_dfs.append({
                        "folder": folder,
                        "data": df
                    })
                    print(f"Loaded training data from {folder}")
                except Exception as e:
                    print(f"Error loading {train_file}: {str(e)}")
    
    return training_dfs

In [5]:
dfs= load_training_data()

In [6]:
def rename_dataframe_columns(training_dfs):
    # Column mapping dictionary
    column_map = {
        'end_timestamp': 'time:timestamp',
        'case_id': 'case:concept:name',
        'activity_name': 'concept:name',  # Note: using activity_name based on your codebase
        'activity': 'concept:name'  # Including both variations to be safe
    }
    
    logs = []
    # Iterate through each dataset
    for dataset in training_dfs:
        # Rename columns that exist in the DataFrame
        existing_columns = {old: new for old, new in column_map.items() 
                          if old in dataset['data'].columns}
        
        # Apply the renaming
        dataset['data'].rename(columns=existing_columns, inplace=True)
        
        print(f"Renamed columns in {dataset['folder']}")
        print(f"New columns: {dataset['data'].columns.tolist()}")

        dataset['data']['start_timestamp'] = pd.to_datetime(dataset['data']['start_timestamp'], format='mixed')
        dataset['data']['time:timestamp'] = pd.to_datetime(dataset['data']['time:timestamp'], format='mixed')

        # dataset['data'] = dataframe_utils.convert_timestamp_columns_in_df(dataset['data'])
        log = log_converter.apply(dataset['data'])
        logs.append(log)
    return logs
logs= rename_dataframe_columns(dfs)

In [7]:
for log in logs:
    # generate full petri net
    net, initial_marking, final_marking = pm4py.discover_petri_net_inductive(
                                            log=log, 
                                            noise_threshold=0.0,
                                            timestamp_key='start_timestamp',)
    gviz = pn_visualizer.apply(net, initial_marking, final_marking)
    pn_visualizer.view(gviz)