In [58]:
import pandas as pd
import os
import datetime

In [59]:
# Graph Templates
def create_pd3_template(lld_templates: str, gpm_templates: str, lld_edges: str, lld_gpm_edges: str) -> str:
    PD3_Templeate = f"""
classDiagram
    direction LR

    %% LLD Actions\n{lld_templates}
    %% GPM Actions\n{gpm_templates}
    %% LLD Edges\n{lld_edges}
    %% LLD-GPM Edges\n{lld_gpm_edges}
    """
    return PD3_Templeate

def create_GPM_templates(GPM_IDs: list[int], GPM_inputs: list[str], GPM_names: list[str], GPM_outputs: list[str]) -> str:
    GPMs = ""
    for row in range(len(GPM_names)):
        GPMs += f"""
        class GPM_{GPM_IDs[row]}["GPM_{GPM_IDs[row]}: {GPM_names[row]}"] {{
            Input: {GPM_inputs[row]}
            Output: {GPM_outputs[row]}
        }}
        \n"""
    return GPMs

def create_LLD_templates(LLD_IDs: list[int], LLD_inputs: list[str], LLD_names: list[str], LLD_outputs: list[str]) -> str:
    LLDs = ""
    for row in range(len(LLD_names)):
        LLDs += f"""
        class LLD_{LLD_IDs[row]}["LLD_{LLD_IDs[row]}: {LLD_names[row]}"] {{
            Input: {LLD_inputs[row]}
            Output: {LLD_outputs[row]}
        }}
        \n"""
    return LLDs

def create_LLD_edges(LLD_IDs: list[int]) -> str:
    LLD_edges = ""
    for row in range(1, len(LLD_IDs)):
        LLD_edges += f"""LLD_{LLD_IDs[row-1]} --> LLD_{LLD_IDs[row]}\n"""
    return LLD_edges

def create_LLD_GPM_edges(LLD_IDs: list[int], LLD_GPM_edges: list[int]) -> str:
    generalization_edges = ""
    for row in range(len(LLD_GPM_edges)):
        generalization_edges += f"""LLD_{LLD_IDs[row]} <|.. GPM_{LLD_GPM_edges[row]} : Implements\n"""
    return generalization_edges


In [60]:
# PartOf Relationship Template
def create_GPM_partof_diagram(gpm_templates: str, gpm_edges: str) -> str:
    PD3_Templeate = f"""
classDiagram
    direction BT

    %% GPM Actions\n{gpm_templates}
    %% GPM Edges\n{gpm_edges}
    """
    return PD3_Templeate

def create_GPM_edges(GPM_IDs: list[str], gpm_partOfs: list[str]) -> str:
    edges_str = ""
    for row in range(len(gpm_partOfs)):
        edges_str += f"GPM_{GPM_IDs[row]} *-- GPM_{gpm_partOfs[row]}\n"
    return edges_str

In [61]:
# Which LLD actions GPM actions derive from
def create_LLD_templates_with_Log(LLD_logs: list[int], LLD_IDs: list[int], LLD_inputs: list[str], LLD_names: list[str], LLD_outputs: list[str]) -> str:
    LLDs = ""
    for row in range(len(LLD_names)):
        LLDs += f"""
        class LLD_{LLD_IDs[row]}["LLD_{LLD_IDs[row]}: {LLD_names[row]}"] {{
            Log: {LLD_logs[row]}
            Input: {LLD_inputs[row]}
            Output: {LLD_outputs[row]}
        }}
        \n"""
    return LLDs

def create_GPM_reference_template(lld_templates: str, gpm_templates: str, lld_gpm_edges: str) -> str:
    PD3_Templeate = f"""
classDiagram
    direction LR
    %% GPM Actions\n{gpm_templates}
    %% LLD Actions\n{lld_templates}
    %% LLD-GPM Edges\n{lld_gpm_edges}
    """
    return PD3_Templeate

def GPM_LLD_references(df_pd3, df_GPM, LLD_Logs = "Log", LLD_IDs = "Action ID", LLD_Inputs="Input", LLD_Names="Action", LLD_Outputs="Output", GPM_IDs="ClassID", GPM_Inputs="ClassInput", GPM_Names="ClassName", GPM_Outputs="ClassOutput", LLD_GPM_Edges="ClassID"):
    """
    Args: 
        Information on the correspondence of GPM actions to LLD actions.
    Returns:
        A template of each GPM action with its LLD action references.
    """
    LLD_templates = create_LLD_templates_with_Log(df_pd3[LLD_Logs].tolist(), df_pd3[LLD_IDs].tolist(), df_pd3[LLD_Inputs].tolist(), df_pd3[LLD_Names].tolist(), df_pd3[LLD_Outputs].tolist())
    GPM_templates = create_GPM_templates(df_GPM[GPM_IDs].tolist(), df_GPM[GPM_Inputs].tolist(), df_GPM[GPM_Names].tolist(), df_GPM[GPM_Outputs].tolist())
    LLD_GPM_edges = create_LLD_GPM_edges(df_pd3[LLD_IDs].tolist(), df_pd3[LLD_GPM_Edges].tolist())
    gpm_lld_references = create_GPM_reference_template(LLD_templates, GPM_templates, LLD_GPM_edges)
    return gpm_lld_references

In [62]:
# Switch LLD actions into GPM actions
def switch_lld_to_gpm(gpm_templates, gpm_edges) -> str:
    """
    Switches LLD actions into GPM actions
    Args:
        gpm_templates(str): GPM templates
        gpm_edges(str): GPM edges
    """
    GPM_DIAGRAM_TEMPLATE = f"""
classDiagram
    direction LR
    %% GPM Actions\n{gpm_templates}
    %% GPM Edges\n{gpm_edges}
    """
    return GPM_DIAGRAM_TEMPLATE

def create_GPM_diagram_edges(GPM_IDs: list[(int, int)]) -> str:
    GPM_edges = ""
    for (from_gpm, to_gpm) in GPM_IDs:
        GPM_edges += f"""GPM_{from_gpm} --> GPM_{to_gpm}\n"""
    return GPM_edges

def create_GPM_diagram_edges_with_counter(GPM_edges: list[((int, int), (int, int))]) -> str:
    GPM_edges_str = ""
    for ((from_gpm, from_gpm_frequency), (to_gpm, to_gpm_frequency)) in GPM_edges:
        GPM_edges_str += f"""GPM_{from_gpm}_{from_gpm_frequency} --> GPM_{to_gpm}_{to_gpm_frequency}\n"""
    return GPM_edges_str

def create_GPM_templates_for_logs(GPM_IDs: list[int], GPM_inputs: list[str], GPM_names: list[str], GPM_outputs: list[str]) -> str:
    GPMs = ""
    for row in range(len(GPM_names)):
        GPMs += f"""
        class GPM_{GPM_IDs[row]}["GPM_{GPM_IDs[row]}: {GPM_names[row]}"] {{
            Input: {GPM_inputs[row]}
            Output: {GPM_outputs[row]}
        }}
        \n"""
    return GPMs

def create_GPM_templates_with_counter(df_GPM: pd.DataFrame, GPM_edges: list[((int, int), (int, int))], GPM_IDs="ClassID", GPM_Inputs="ClassInput", GPM_Names="ClassName", GPM_Outputs="ClassOutput") -> str:
    GPMs = ""
    gpm_id_freq_set = set()
    for ((from_gpm, from_gpm_frequency), (to_gpm, to_gpm_frequency)) in GPM_edges:
        gpm_id_freq_set.add((from_gpm, from_gpm_frequency))
        gpm_id_freq_set.add((to_gpm, to_gpm_frequency))
    for (gpm_id, gpm_frequency) in gpm_id_freq_set:
        GPMs += f"""
        class GPM_{gpm_id}_{gpm_frequency}["GPM_{gpm_id}: {df_GPM[df_GPM[GPM_IDs]==gpm_id][GPM_Names].values[0]}"] {{
            Input: {df_GPM[df_GPM[GPM_IDs] == gpm_id][GPM_Inputs].values[0]}
            Output: {df_GPM[df_GPM[GPM_IDs] == gpm_id][GPM_Outputs].values[0]}
        }}
        \n"""
    return GPMs

def create_GPM_diagram(df_pd3_original: pd.DataFrame, df_gpm: pd.DataFrame, LLD_IDs = "Action ID", GPM_IDs="ClassID", GPM_Inputs="ClassInput", GPM_Names="ClassName", GPM_Outputs="ClassOutput") -> str:
    """
    Creates a GPM diagram from the given dataframes
    Args:
        df_pd3(pd.DataFrame): PD3 dataframe
        df_gpm(pd.DataFrame): GPM dataframe
    Returns:
        str: GPM diagram
    """
    from collections import Counter
    # FIX 1: Reset index so that iterrows() index (now 0, 1, 2...) matches iloc positions
    # This prevents errors/empty graphs when processing Log groups 2, 3, etc.
    df_pd3 = df_pd3_original.copy().reset_index(drop=True)
    # Get the flow(edges) of LLD actions for each Log
    LLD_edges: list[(int, int)] = [(row[LLD_IDs], df_pd3.iloc[row_index + 1][LLD_IDs]) for row_index, row in df_pd3.iterrows() if row_index < len(df_pd3) - 1]
    
    # Switch LLD actions into GPM actions Action ID -> ClassID
    ## (ID, Frequency)
    GPM_ID_frequency_count = Counter()
    GPM_edges: list[((int, int), (int, int))] = []
    for (from_LLD, to_LLD) in LLD_edges:
        from_GPM = df_pd3[df_pd3[LLD_IDs] == from_LLD][GPM_IDs].values[0]
        from_freq = GPM_ID_frequency_count.get(from_GPM, 0)
        to_GPM = df_pd3[df_pd3[LLD_IDs] == to_LLD][GPM_IDs].values[0]
        # FIX 2: Handle self-loops (e.g., A->A) correctly.
        # If we transition from A to A, we want A_0 -> A_1, not A_0 -> A_0.
        if from_GPM == to_GPM:
            to_freq = from_freq + 1
        else:
            to_freq = GPM_ID_frequency_count.get(to_GPM, 0)
        GPM_edges.append(((from_GPM, from_freq), (to_GPM, to_freq)))
        GPM_ID_frequency_count.update([from_GPM])
    
    # Get the templates of GPM actions
    gpm_templates = create_GPM_templates_with_counter(df_gpm, GPM_edges)

    # Get the templates of GPM edges
    gpm_edges_template = create_GPM_diagram_edges_with_counter(GPM_edges)

    # Combine the templates and edges
    return switch_lld_to_gpm(gpm_templates, gpm_edges_template)

In [67]:
def get_GPM_graph(df) -> dict:
    GPM_graph = {}
    for Class_ID in df["ClassID"].unique():
        parent = df[df["ClassID"] == Class_ID]["PartOf"].unique()
        # print(f"parent: {parent}")
        if len(parent) > 0:
            GPM_graph.setdefault(parent[0], []).append(Class_ID)
    return GPM_graph
GPM_graph = get_GPM_graph(df_GPM)
print(f"GPM_graph: {GPM_graph}")

def get_GPM_descendants(GPM_graph):
    GPM_descendants = {}
    seen = set()
    def get_all_descendants(v, GPM_graph, GPM_descendants, seen):
        if v in seen:
            return
        seen.add(v)
        children = GPM_graph.get(v, [])
        descendants = set()
        descendants.add(v)
        for child in children:
            if child == v:
                continue
            get_all_descendants(child, GPM_graph, GPM_descendants, seen)
            descendants.update(GPM_descendants[child].copy())
        GPM_descendants[v] = descendants
    get_all_descendants(0, GPM_graph, GPM_descendants, seen)
    return GPM_descendants

GPM_descendants = get_GPM_descendants(GPM_graph)
print(f"GPM_descendants: {GPM_descendants}")

GPM_graph: {np.int64(0): [np.int64(1), np.int64(2), np.int64(3), np.int64(4), np.int64(5)], np.int64(2): [np.int64(11), np.int64(12), np.int64(13), np.int64(14), np.int64(15), np.int64(17), np.int64(18), np.int64(25)], np.int64(3): [np.int64(16)], np.int64(4): [np.int64(19), np.int64(21), np.int64(22), np.int64(23), np.int64(24)], np.int64(5): [np.int64(20)]}
GPM_descendants: {np.int64(1): {np.int64(1)}, np.int64(11): {np.int64(11)}, np.int64(12): {np.int64(12)}, np.int64(13): {np.int64(13)}, np.int64(14): {np.int64(14)}, np.int64(15): {np.int64(15)}, np.int64(17): {np.int64(17)}, np.int64(18): {np.int64(18)}, np.int64(25): {np.int64(25)}, np.int64(2): {np.int64(2), np.int64(11), np.int64(12), np.int64(13), np.int64(14), np.int64(15), np.int64(17), np.int64(18), np.int64(25)}, np.int64(16): {np.int64(16)}, np.int64(3): {np.int64(16), np.int64(3)}, np.int64(19): {np.int64(19)}, np.int64(21): {np.int64(21)}, np.int64(22): {np.int64(22)}, np.int64(23): {np.int64(23)}, np.int64(24): {np.in

In [63]:
# Create a visualizer for GPM that is loyal to the origianl LLD flows
START_ID = -1
END_ID = -2
def create_GPM_node_templates(GPM_IDs: list[int], GPM_inputs: list[str], GPM_names: list[str], GPM_outputs: list[str]) -> str:
    GPMs = ""
    for row in range(len(GPM_names)):
        GPMs += f"""
        GPM_{GPM_IDs[row]}["<b>GPM_{GPM_IDs[row]}: {GPM_names[row]}</b><br/>
Input: {GPM_inputs[row]}<br/>
Output: {GPM_outputs[row]}"]
        \n"""
    return GPMs

def get_compatible_GPM_template(start_actions_templates, end_actions_templates, gpm_templates, gpm_part_of_edges, gpm_containers) -> str:
    compatible_GPM_Templeate = f"""
flowchart TB
    %% Start Actions\n{start_actions_templates}
    %% End Actions\n{end_actions_templates}
    %% GPM Actions\n{gpm_templates}
    %% GPM PartOf Edges\n{gpm_part_of_edges}
    %% GPM Containers\n{gpm_containers}
    """
    return compatible_GPM_Templeate

def get_container_flowchart_template(container_id, log_edges_template, df_GPM, GPM_IDs="ClassID", GPM_Names="ClassName") -> str:
    """makes a LR flowchart of the container
    Args:
        GPM actions
        Log edges
    """

    if container_id == 0:
        return f"""
subgraph Container_0 [Root]
    direction LR
    %% Log Edges\n{log_edges_template}
end
    """

    container_template = f"""
subgraph Container_{container_id} [GPM_{container_id}: {df_GPM[df_GPM[GPM_IDs]==container_id][GPM_Names].values[0]}]
    direction LR
    %% Log Edges\n{log_edges_template}
end
    """
    return container_template

def get_log_edges(df_pd3, df_GPM, GPM_graph, GPM_descendants, LLD_Logs = "Log", LLD_IDs = "Action ID", GPM_IDs="ClassID", GPM_Inputs="ClassInput", GPM_Names="ClassName", GPM_Outputs="ClassOutput", GPM_Parents="PartOf") -> list[(int, int)]:
    df_each_log = df_pd3.groupby(LLD_Logs)
    log_edges = set()
    for (log_id, df_log_original) in df_each_log:
        df_log = df_log_original.copy().reset_index(drop=True)
        # Boundary Condition
        log_edges.add((START_ID, df_log.at[0, GPM_IDs]))
        log_edges.add((df_log.at[len(df_log)-1, GPM_IDs], END_ID))
        for index in range(len(df_log) - 1):
            from_action = int(df_log.at[index, GPM_IDs])
            to_action = int(df_log.at[index + 1, GPM_IDs])
            print(f"from_action: {from_action}, to_action: {to_action}\n")
            print(f"parent of from_action{from_action}: {int(df_GPM[df_GPM[GPM_IDs]==from_action][GPM_Parents].values[0])}\n")
            print(f"parent of to_action{to_action}: {int(df_GPM[df_GPM[GPM_IDs]==to_action][GPM_Parents].values[0])}\n")
            if int(df_GPM[df_GPM[GPM_IDs]==from_action][GPM_Parents].values[0]) == int(df_GPM[df_GPM[GPM_IDs]==to_action][GPM_Parents].values[0]):
                log_edges.add((from_action, to_action))
            elif int(df_GPM[df_GPM[GPM_IDs]==from_action][GPM_Parents].values[0]) == to_action:
                log_edges.add((START_ID, from_action))
                log_edges.add((from_action, END_ID))
            elif int(df_GPM[df_GPM[GPM_IDs]==to_action][GPM_Parents].values[0]) == from_action:
                log_edges.add((START_ID, to_action))
                log_edges.add((to_action, END_ID))
            else:
                ## What would you do if the parents are different?
                ancenstor_of_from_action = [from_action]
                ancenstor_of_to_action = [to_action]
                parent_of_from_action = int(df_GPM[df_GPM[GPM_IDs]==from_action][GPM_Parents].values[0])
                parent_of_to_action = int(df_GPM[df_GPM[GPM_IDs]==to_action][GPM_Parents].values[0])
                while parent_of_from_action != 0:
                    ancenstor_of_from_action.append(parent_of_from_action)
                    grand_parent_of_from_action = int(df_GPM[df_GPM[GPM_IDs]==parent_of_from_action][GPM_Parents].values[0])
                    parent_of_from_action = grand_parent_of_from_action
                while parent_of_to_action != 0:
                    ancenstor_of_to_action.append(parent_of_to_action)
                    grand_parent_of_to_action = int(df_GPM[df_GPM[GPM_IDs]==parent_of_to_action][GPM_Parents].values[0])
                    parent_of_to_action = grand_parent_of_to_action
                for ancestor_of_from_action in ancenstor_of_from_action:
                    for ancestor_of_to_action in ancenstor_of_to_action:
                        if ancestor_of_from_action != ancestor_of_to_action and int(df_GPM[df_GPM[GPM_IDs]==ancestor_of_from_action][GPM_Parents].values[0]) == int(df_GPM[df_GPM[GPM_IDs]==ancestor_of_to_action][GPM_Parents].values[0]):
                            log_edges.add((from_action, END_ID))
                            log_edges.add((ancestor_of_from_action, ancestor_of_to_action))
                            log_edges.add((START_ID, to_action))
    return log_edges

def get_part_of_edges_template(GPM_graph, container_ids) -> str:
    part_of_edges = ""
    for from_action, to_actions in GPM_graph.items():
        for to_action in to_actions:
            if from_action in container_ids and to_action in container_ids:
                part_of_edges += f"Container_{from_action} --> Container_{to_action}\n"
    return part_of_edges

def get_log_edges_template(container_id, log_edges) -> str:
    log_edges_template = ""
    for from_action, to_action in log_edges:
        if from_action == START_ID:
            log_edges_template += f"start_{container_id} --> GPM_{to_action}\n"
        elif to_action == END_ID:
            log_edges_template += f"GPM_{from_action} --> end_{container_id}\n"
        else:
            log_edges_template += f"GPM_{from_action} --> GPM_{to_action}\n"
    return log_edges_template

def get_compatible_GPM_graph(df_pd3, df_GPM, GPM_graph, GPM_descendants, LLD_Logs = "Log", LLD_IDs = "Action ID", GPM_IDs="ClassID", GPM_Inputs="ClassInput", GPM_Names="ClassName", GPM_Outputs="ClassOutput", GPM_Parents="PartOf") -> str:
    """
    Args:
        df_pd3 (pd.DataFrame): LLD data
        df_GPM (pd.DataFrame): GPM data
    Returns:
        compatible_GPM_graph_template (pd.DataFrame): GPM graph template that is loyal to the original LLD flows
    """
    # get GPM actions
    gpm_templates = create_GPM_node_templates(df_GPM[GPM_IDs].unique(), df_GPM[GPM_Inputs].unique(), df_GPM[GPM_Names].unique(), df_GPM[GPM_Outputs].unique())

    # get GPM containers
    ## create containers
    container_ids = set()
    for parent in GPM_graph.keys():
        if len(GPM_descendants[parent]) > 0:
            container_ids.add(parent)
    ## get start and end actions for each container
    start_actions_templates = ""
    end_actions_templates = ""
    for container_id in container_ids:
        start_action = f"start_{container_id}[start_{container_id}]"
        end_action = f"end_{container_id}[end_{container_id}]"
        start_actions_templates += start_action + "\n"
        end_actions_templates += end_action + "\n"
    ## 1. Calculate ALL valid edges first (including high-level ones)
    log_all_edges = get_log_edges(df_pd3, df_GPM, GPM_graph, GPM_descendants)
    ## 2. Group edges by their container
    #    Since 'u' and 'v' in 'log_all_edges' are siblings, they share a parent.
    #    We can look up u's parent in df_GPM to know which subgraph owns this edge.
    container_edge_map = {} # { container_id : [list of edges] }
    for (u, v) in log_all_edges:
        # Find parent of u. If u is a top-level node, parent is 0 (Root).
        # Note: You need a helper or df lookup here. 
        # Assuming get_parent(u) returns 0 if u is top-level.
        if u == 0:
            parent_id = 0
        elif u == START_ID:
            if v == 0:
                parent_id = 0
            else:
                parent_id = int(df_GPM[df_GPM[GPM_IDs]==v][GPM_Parents].values[0])
        else:
            parent_id = int(df_GPM[df_GPM[GPM_IDs]==u][GPM_Parents].values[0])
        
        if parent_id not in container_edge_map:
            container_edge_map[parent_id] = []
        container_edge_map[parent_id].append((u, v))
    ## make container template for each container
    container_templates = ""
    # Ensure we iterate over ALL containers that have edges, plus any structural ones
    all_active_containers = container_ids.union(container_edge_map.keys())
    for container_id in all_active_containers:
        log_edges = container_edge_map[container_id]
        log_edges_template = get_log_edges_template(container_id, log_edges)
        container_template = get_container_flowchart_template(container_id, log_edges_template, df_GPM, GPM_IDs, GPM_Names)
        container_templates += container_template + '\n'

    # connect gpm containers
    ## create part-of edges
    part_of_edges = get_part_of_edges_template(GPM_graph, container_ids)

    # get compatible GPM graph template
    compatible_GPM_graph_template = get_compatible_GPM_template(start_actions_templates, end_actions_templates, gpm_templates, part_of_edges, container_templates)
    return compatible_GPM_graph_template


In [None]:
# LLD Transcription with the Frequencies
def get_log_edges_with_frequency(df_pd3, df_GPM, GPM_graph, GPM_descendants, LLD_Logs = "Log", LLD_IDs = "Action ID", GPM_IDs="ClassID", GPM_Inputs="ClassInput", GPM_Names="ClassName", GPM_Outputs="ClassOutput", GPM_Parents="PartOf") -> dict[(int, int), int]:
    from collections import Counter
    df_each_log = df_pd3.groupby(LLD_Logs)
    log_edges = Counter()
    for (log_id, df_log_original) in df_each_log:
        df_log = df_log_original.copy().reset_index(drop=True)
        # Boundary Condition
        log_edges[(START_ID, df_log.at[0, GPM_IDs])] += 1
        log_edges[(df_log.at[len(df_log)-1, GPM_IDs], END_ID)] += 1
        for index in range(len(df_log) - 1):
            from_action = int(df_log.at[index, GPM_IDs])
            to_action = int(df_log.at[index + 1, GPM_IDs])
            print(f"from_action: {from_action}, to_action: {to_action}\n")
            print(f"parent of from_action{from_action}: {int(df_GPM[df_GPM[GPM_IDs]==from_action][GPM_Parents].values[0])}\n")
            print(f"parent of to_action{to_action}: {int(df_GPM[df_GPM[GPM_IDs]==to_action][GPM_Parents].values[0])}\n")
            if int(df_GPM[df_GPM[GPM_IDs]==from_action][GPM_Parents].values[0]) == int(df_GPM[df_GPM[GPM_IDs]==to_action][GPM_Parents].values[0]):
                log_edges[(from_action, to_action)] += 1
            elif int(df_GPM[df_GPM[GPM_IDs]==from_action][GPM_Parents].values[0]) == to_action:
                log_edges[(START_ID, from_action)] += 1
                log_edges[(from_action, END_ID)] += 1
            elif int(df_GPM[df_GPM[GPM_IDs]==to_action][GPM_Parents].values[0]) == from_action:
                log_edges[(START_ID, to_action)] += 1
                log_edges[(to_action, END_ID)] += 1
            else:
                ## What would you do if the parents are different?
                ancenstor_of_from_action = [from_action]
                ancenstor_of_to_action = [to_action]
                parent_of_from_action = int(df_GPM[df_GPM[GPM_IDs]==from_action][GPM_Parents].values[0])
                parent_of_to_action = int(df_GPM[df_GPM[GPM_IDs]==to_action][GPM_Parents].values[0])
                while parent_of_from_action != 0:
                    ancenstor_of_from_action.append(parent_of_from_action)
                    grand_parent_of_from_action = int(df_GPM[df_GPM[GPM_IDs]==parent_of_from_action][GPM_Parents].values[0])
                    parent_of_from_action = grand_parent_of_from_action
                while parent_of_to_action != 0:
                    ancenstor_of_to_action.append(parent_of_to_action)
                    grand_parent_of_to_action = int(df_GPM[df_GPM[GPM_IDs]==parent_of_to_action][GPM_Parents].values[0])
                    parent_of_to_action = grand_parent_of_to_action
                for ancestor_of_from_action in ancenstor_of_from_action:
                    for ancestor_of_to_action in ancenstor_of_to_action:
                        if ancestor_of_from_action != ancestor_of_to_action and int(df_GPM[df_GPM[GPM_IDs]==ancestor_of_from_action][GPM_Parents].values[0]) == int(df_GPM[df_GPM[GPM_IDs]==ancestor_of_to_action][GPM_Parents].values[0]):
                            log_edges[(from_action, END_ID)] += 1
                            log_edges[(ancestor_of_from_action, ancestor_of_to_action)] += 1
                            log_edges[(START_ID, to_action)] += 1
    return log_edges

def get_log_edges_with_frequency_template(container_id, log_edges: list[(int, int, int)]) -> str:
    log_edges_template = ""
    for from_action, to_action, frequency in log_edges:
        if from_action == START_ID:
            log_edges_template += f"start_{container_id} --{frequency}--> GPM_{to_action}\n"
        elif to_action == END_ID:
            log_edges_template += f"GPM_{from_action} --{frequency}--> end_{container_id}\n"
        else:
            log_edges_template += f"GPM_{from_action} --{frequency}--> GPM_{to_action}\n"
    return log_edges_template

def get_compatible_GPM_graph_with_frequency(df_pd3, df_GPM, GPM_graph, GPM_descendants, LLD_Logs = "Log", LLD_IDs = "Action ID", GPM_IDs="ClassID", GPM_Inputs="ClassInput", GPM_Names="ClassName", GPM_Outputs="ClassOutput", GPM_Parents="PartOf") -> str:
    """
    Args:
        df_pd3 (pd.DataFrame): LLD data
        df_GPM (pd.DataFrame): GPM data
    Returns:
        compatible_GPM_graph_template (pd.DataFrame): GPM graph template that is loyal to the original LLD flows
    """
    # get GPM actions
    gpm_templates = create_GPM_node_templates(df_GPM[GPM_IDs].unique(), df_GPM[GPM_Inputs].unique(), df_GPM[GPM_Names].unique(), df_GPM[GPM_Outputs].unique())

    # get GPM containers
    ## create containers
    container_ids = set()
    for parent in GPM_graph.keys():
        if len(GPM_descendants[parent]) > 0:
            container_ids.add(parent)
    ## get start and end actions for each container
    start_actions_templates = ""
    end_actions_templates = ""
    for container_id in container_ids:
        start_action = f"start_{container_id}[start_{container_id}]"
        end_action = f"end_{container_id}[end_{container_id}]"
        start_actions_templates += start_action + "\n"
        end_actions_templates += end_action + "\n"
    ## 1. Calculate ALL valid edges first (including high-level ones)
    log_all_edges_with_frequency = get_log_edges_with_frequency(df_pd3, df_GPM, GPM_graph, GPM_descendants)
    ## 2. Group edges by their container
    #    Since 'u' and 'v' in 'log_all_edges' are siblings, they share a parent.
    #    We can look up u's parent in df_GPM to know which subgraph owns this edge.
    container_edge_map = {} # { container_id : [(from, to, frequency)]}
    for (u, v) in log_all_edges_with_frequency.keys():
        # Find parent of u. If u is a top-level node, parent is 0 (Root).
        # Note: You need a helper or df lookup here. 
        # Assuming get_parent(u) returns 0 if u is top-level.
        if u == 0:
            parent_id = 0
        elif u == START_ID:
            if v == 0:
                parent_id = 0
            else:
                parent_id = int(df_GPM[df_GPM[GPM_IDs]==v][GPM_Parents].values[0])
        else:
            parent_id = int(df_GPM[df_GPM[GPM_IDs]==u][GPM_Parents].values[0])
        
        if parent_id not in container_edge_map:
            container_edge_map[parent_id] = []
        container_edge_map[parent_id].append((u, v, log_all_edges_with_frequency[(u, v)]))
    ## make container template for each container
    container_templates = ""
    # Ensure we iterate over ALL containers that have edges, plus any structural ones
    all_active_containers = container_ids.union(container_edge_map.keys())
    for container_id in all_active_containers:
        log_edges = container_edge_map[container_id]
        log_edges_with_frequency_template = get_log_edges_with_frequency_template(container_id, log_edges)
        container_template = get_container_flowchart_template(container_id, log_edges_with_frequency_template, df_GPM, GPM_IDs, GPM_Names)
        container_templates += container_template + '\n'

    # connect gpm containers
    ## create part-of edges
    part_of_edges = get_part_of_edges_template(GPM_graph, container_ids)

    # get compatible GPM graph template
    compatible_GPM_graph_template = get_compatible_GPM_template(start_actions_templates, end_actions_templates, gpm_templates, part_of_edges, container_templates)
    return compatible_GPM_graph_template

In [64]:
def files_in_dir(directory_path):
    files_and_dirs = os.listdir(directory_path)
    print(files_and_dirs)

def get_current_datetime_components():
    """Returns the current year, month, day, hour, and minute as formatted strings."""
    now = datetime.datetime.now()
    return f"{now.strftime("%Y")}-{now.strftime("%m")}-{now.strftime("%d")}-{now.strftime("%H")}-{now.strftime("%M")}"

def create_mermaid_diagram(df_pd3, df_GPM, LLD_IDs = "Action ID", LLD_Inputs="Input", LLD_Names="Action", LLD_Outputs="Output", GPM_IDs="ClassID", GPM_Inputs="ClassInput", GPM_Names="ClassName", GPM_Outputs="ClassOutput", LLD_GPM_Edges="ClassID"):
    """
    Args:
        df (pandas.DataFrame): DataFrame containing the class hierarchy data.
    Returns:
        str: Mermaid diagram code with Input, Class name and Output, representing the class hierarchy.
    """
    LLD_actions = create_LLD_templates(df_pd3[LLD_IDs].tolist(), df_pd3[LLD_Inputs].tolist(), df_pd3[LLD_Names].tolist(), df_pd3[LLD_Outputs].tolist())
    GPM_actions = create_GPM_templates(df_GPM[GPM_IDs].tolist(), df_GPM[GPM_Inputs].tolist(), df_GPM[GPM_Names].tolist(), df_GPM[GPM_Outputs].tolist())
    LLD_edges = create_LLD_edges(df_pd3[LLD_IDs].tolist())
    LLD_GPM_edges = create_LLD_GPM_edges(df_pd3[LLD_IDs].tolist(), df_pd3[LLD_GPM_Edges].tolist())
    pd3_template = create_pd3_template(LLD_actions, GPM_actions, LLD_edges, LLD_GPM_edges)
    return pd3_template

def create_GPM_containers(df_GPM, GPM_IDs="ClassID", GPM_Inputs="ClassInput", GPM_Names="ClassName", GPM_Outputs="ClassOutput", GPM_Parents="PartOf"):
    """
    Args:
        df (pandas.DataFrame): DataFrame containing the class hierarchy data.
    Returns:
        str: Mermaid diagram code with Input, Class name and Output, representing the class hierarchy.
    """
    GPM_actions = create_GPM_templates(df_GPM[GPM_IDs].tolist(), df_GPM[GPM_Inputs].tolist(), df_GPM[GPM_Names].tolist(), df_GPM[GPM_Outputs].tolist())
    GPM_edges = create_GPM_edges(df_GPM[GPM_IDs].tolist(), df_GPM[GPM_Parents].tolist())
    GPM_containers = create_GPM_partof_diagram(GPM_actions, GPM_edges)
    return GPM_containers

In [65]:
files_in_dir("./target/")

['maWelding', 'nikkeiBP', 'learningFactory', 'LF-gpmreconstruction-2026-01-12-17-09.xlsx', 'LF-lldgrouping-2026-01-12-17-09.xlsx', 'merged_lld_gpm-2026-01-12-18-59.xlsx']


In [66]:
df_GPM = pd.read_excel("./target/LF-gpmreconstruction-2026-01-12-17-09.xlsx")
df_pd3 = pd.read_excel("./target/LF-lldgrouping-2026-01-12-17-09.xlsx")
print(df_GPM.head(1))
df_each_log = df_pd3.groupby("Log", as_index=False)
df_each_log.head(1)

   ClassID                      ClassInput      ClassName         ClassOutput  \
0        1  運転指示/対象マシン(1/2/3)/運転モード(自動・排出)  生産ラインを起動・運転する  ライン稼働状態(CT取得可能な稼働)   

            ClassIntent                            ClassConstructionReason  \
0  現状観察・計測のためにラインを動作させる  LLDの「起動」「自動運転」「排出運転」「ボタンを押す」を動詞類似で統合。Domain Kn...   

   PartOf                                       PartOfReason  
0       0  改善活動の最上位は全体の改善プロセスであり、その中で運転は独立に発生する基盤行為のためトップ...  


Unnamed: 0,Action ID,Log,Input,Action,Output,Intention,Annotation,Rationale,Tools/Knowledge,Engineering Cycle,ClassID,ClassificationReason
0,1,1,no input,サイクルタイム（CT）の設計値を確認する．,サイクルタイムの設計値（目標値）：各マシン共に14s,no intention,no annotation,no rationale,no tools/knowledge,information collection/analysis,11,「CTの設計値を確認する」はサイクルタイムの基準/目標を把握する行為。Domain Know...
23,24,2,no input,表の基準値の行を確認する．,マシン１の基準値：14ｓ\nマシン2の基準値：14ｓ\nマシン3の基準値：14ｓ,no intention,no annotation,no rationale,IoT Data View,information collection/analysis,11,「表の基準値の行を確認」＝IoT Data Viewで基準値(=設計/目標)確認。Keywo...
51,52,3,no input,CTの過去履歴の有無を確認する,ない,正常時と現状の差異を確認したい,no annotation,no rationale,no tools/knowledge,information collection/analysis,18,「CTの過去履歴の有無を確認」＝履歴/参照データ有無の確認。正常時との差分確認意図より、デー...
94,95,4,no input,"マシン1,2,3の自動運転ボタンを押す",no output,no intention,no annotation,no rationale,no tools/knowledge,execution,1,「自動運転ボタンを押す」＝運転実行。


In [69]:
for group in df_each_log.groups.keys():
    graph = create_mermaid_diagram(df_each_log.get_group(group), df_GPM)
    with open(f"./outputs/graph_{group}-{get_current_datetime_components()}.mmd", "w") as f:
        f.write(graph)
    print(f"Log: {group} => The PD3 has been created.")

Log: 1 => The PD3 has been created.
Log: 2 => The PD3 has been created.
Log: 3 => The PD3 has been created.
Log: 4 => The PD3 has been created.


In [68]:
containers = create_GPM_containers(df_GPM)
with open(f"./outputs/GPM_containers-{get_current_datetime_components()}.mmd", "w") as f:
    f.write(containers)

In [70]:
gpm_lld_references_diagram = GPM_LLD_references(df_pd3, df_GPM)
with open(f"./outputs/gpm_references-{get_current_datetime_components()}.mmd", "w") as f:
    f.write(gpm_lld_references_diagram)

In [71]:
for group in df_each_log.groups.keys():
    gpm_diagram_for_group = create_GPM_diagram(df_each_log.get_group(group), df_GPM)
    with open(f"./outputs/GPM_Flow_{group}-{get_current_datetime_components()}.mmd", "w") as f:
        f.write(gpm_diagram_for_group)
    print(f"Log: {group} => The PD3 has been created.")

Log: 1 => The PD3 has been created.
Log: 2 => The PD3 has been created.
Log: 3 => The PD3 has been created.
Log: 4 => The PD3 has been created.


In [72]:
compatible_gpm = get_compatible_GPM_graph(df_pd3, df_GPM, GPM_graph, GPM_descendants)
with open(f"./outputs/compatible_GPM-{get_current_datetime_components()}.mmd", "w") as f:
    f.write(compatible_gpm)
print(f"Compatible GPM graph has been saved to ./outputs/compatible_GPM-{get_current_datetime_components()}.mmd")

from_action: 11, to_action: 1

parent of from_action11: 2

parent of to_action1: 0

from_action: 1, to_action: 12

parent of from_action1: 0

parent of to_action12: 2

from_action: 12, to_action: 13

parent of from_action12: 2

parent of to_action13: 2

from_action: 13, to_action: 13

parent of from_action13: 2

parent of to_action13: 2

from_action: 13, to_action: 13

parent of from_action13: 2

parent of to_action13: 2

from_action: 13, to_action: 19

parent of from_action13: 2

parent of to_action19: 4

from_action: 19, to_action: 20

parent of from_action19: 4

parent of to_action20: 5

from_action: 20, to_action: 14

parent of from_action20: 5

parent of to_action14: 2

from_action: 14, to_action: 15

parent of from_action14: 2

parent of to_action15: 2

from_action: 15, to_action: 22

parent of from_action15: 2

parent of to_action22: 4

from_action: 22, to_action: 20

parent of from_action22: 4

parent of to_action20: 5

from_action: 20, to_action: 15

parent of from_action20: 5

In [75]:
compatible_gpm = get_compatible_GPM_graph_with_frequency(df_pd3, df_GPM, GPM_graph, GPM_descendants)
with open(f"./outputs/compatible_GPM_with_freq-{get_current_datetime_components()}.mmd", "w") as f:
    f.write(compatible_gpm)
print(f"Compatible GPM graph has been saved to ./outputs/compatible_GPM_with_freq-{get_current_datetime_components()}.mmd")

from_action: 11, to_action: 1

parent of from_action11: 2

parent of to_action1: 0

from_action: 1, to_action: 12

parent of from_action1: 0

parent of to_action12: 2

from_action: 12, to_action: 13

parent of from_action12: 2

parent of to_action13: 2

from_action: 13, to_action: 13

parent of from_action13: 2

parent of to_action13: 2

from_action: 13, to_action: 13

parent of from_action13: 2

parent of to_action13: 2

from_action: 13, to_action: 19

parent of from_action13: 2

parent of to_action19: 4

from_action: 19, to_action: 20

parent of from_action19: 4

parent of to_action20: 5

from_action: 20, to_action: 14

parent of from_action20: 5

parent of to_action14: 2

from_action: 14, to_action: 15

parent of from_action14: 2

parent of to_action15: 2

from_action: 15, to_action: 22

parent of from_action15: 2

parent of to_action22: 4

from_action: 22, to_action: 20

parent of from_action22: 4

parent of to_action20: 5

from_action: 20, to_action: 15

parent of from_action20: 5

TypeError: list indices must be integers or slices, not tuple