In [5]:
import re
import networkx as nx
from pyvis.network import Network
from IPython.display import display, HTML

# Sample COBOL code
cobol_code = """
IDENTIFICATION DIVISION.
PROGRAM-ID. SAMPLE.

PROCEDURE DIVISION.
    MAIN-LOGIC.
        PERFORM INITIALIZE.
        PERFORM VALIDATE.
        IF VALID-FLAG = "N"
            PERFORM ERROR-HANDLING
        ELSE
            PERFORM PROCESS-REQUEST
        END-IF.
        CALL 'EXTERNAL-PROGRAM'.
        PERFORM FINALIZE.
        STOP RUN.

    INITIALIZE.
        MOVE SPACES TO WS-VARIABLES.

    VALIDATE.
        IF WS-INPUT IS NUMERIC
            MOVE "Y" TO VALID-FLAG
        ELSE
            MOVE "N" TO VALID-FLAG
        END-IF.

    ERROR-HANDLING.
        DISPLAY "ERROR OCCURRED".

    PROCESS-REQUEST.
        DISPLAY "PROCESSING REQUEST".

    FINALIZE.
        DISPLAY "FINALIZING PROGRAM".

"""

In [10]:
import networkx as nx
from pyvis.network import Network
from IPython.display import display, HTML
def parse_cobol_code(code):
    """
    Parses COBOL code to extract chunks (e.g., paragraphs, sections) and their relationships.
    Returns a dictionary where each paragraph maps to its control flow relationships.
    """
    # Patterns for COBOL constructs
    paragraph_pattern = re.compile(r"^\s*([A-Z0-9-]+)\.\s*$", re.MULTILINE)  # Paragraph names
    perform_pattern = re.compile(r"PERFORM\s+([A-Z0-9-]+)")  # PERFORM calls
    call_pattern = re.compile(r"CALL\s+['\"]([A-Z0-9-]+)['\"]")  # External program calls
    goto_pattern = re.compile(r"GOTO\s+([A-Z0-9-]+)")  # GOTO statements
    if_else_pattern = re.compile(r"IF\s+(.+?)\s+THEN|ELSE", re.DOTALL)  # IF-ELSE constructs

    # Data structure to store relationships
    relationships = {}
    current_paragraph = None

    # Process the COBOL code line by line
    for line in code.splitlines():
        # Detect paragraph names
        paragraph_match = paragraph_pattern.match(line)
        if paragraph_match:
            current_paragraph = paragraph_match.group(1)
            relationships[current_paragraph] = {"PERFORM": [], "CALL": [], "GOTO": [], "IF": []}
        
        # If inside a paragraph, extract relationships
        if current_paragraph:
            # PERFORM statements
            perform_matches = perform_pattern.findall(line)
            relationships[current_paragraph]["PERFORM"].extend(perform_matches)
            
            # CALL statements
            call_matches = call_pattern.findall(line)
            relationships[current_paragraph]["CALL"].extend(call_matches)
            
            # GOTO statements
            goto_matches = goto_pattern.findall(line)
            relationships[current_paragraph]["GOTO"].extend(goto_matches)
            
            # IF-ELSE constructs
            if_match = if_else_pattern.findall(line)
            relationships[current_paragraph]["IF"].extend(if_match)
    
    return relationships
def build_control_flow_graph(parsed_code):
    """
    Builds a directed graph representing the control flow of a COBOL program.
    """
    graph = nx.DiGraph()
    
    for paragraph, details in parsed_code.items():
        # Add the paragraph as a node
        graph.add_node(paragraph)
        
        # Add edges for PERFORM relationships
        for target in details.get("PERFORM", []):
            graph.add_edge(paragraph, target, label="PERFORM")
        
        # Add edges for CALL relationships
        for target in details.get("CALL", []):
            graph.add_edge(paragraph, target, label="CALL")
        
        # Add edges for GOTO relationships
        for target in details.get("GOTO", []):
            graph.add_edge(paragraph, target, label="GOTO")
        
        # Add edges for IF conditions (dummy nodes for branching)
        for condition in details.get("IF", []):
            condition_node = f"{paragraph}_IF_{condition}"
            graph.add_node(condition_node, label=f"Condition: {condition}")
            graph.add_edge(paragraph, condition_node, label="IF")
    
    return graph



# Step 3: Visualize the graph
def visualize_control_flow(graph, output_file="control_flow.html"):
    net = Network(height="750px", width="100%", directed=True)
    
    for node in graph.nodes:
        net.add_node(node, label=node)
    for edge in graph.edges(data=True):
        net.add_edge(edge[0], edge[1], title=edge[2].get("label", ""))
    
    net.write_html(output_file)
    with open(output_file, "r") as f:
        html_content = f.read()
    display(HTML(html_content))


In [11]:
parsed_code = parse_cobol_code(cobol_code)
print(parsed_code)

control_flow_graph = build_control_flow_graph(parsed_code)
visualize_control_flow(control_flow_graph)

{'MAIN-LOGIC': {'PERFORM': ['INITIALIZE', 'VALIDATE', 'ERROR-HANDLING', 'PROCESS-REQUEST'], 'CALL': [], 'GOTO': [], 'IF': ['']}, 'END-IF': {'PERFORM': [], 'CALL': [], 'GOTO': [], 'IF': []}, 'INITIALIZE': {'PERFORM': [], 'CALL': [], 'GOTO': [], 'IF': []}, 'VALIDATE': {'PERFORM': [], 'CALL': [], 'GOTO': [], 'IF': ['']}, 'ERROR-HANDLING': {'PERFORM': [], 'CALL': [], 'GOTO': [], 'IF': []}, 'PROCESS-REQUEST': {'PERFORM': [], 'CALL': [], 'GOTO': [], 'IF': []}, 'FINALIZE': {'PERFORM': [], 'CALL': [], 'GOTO': [], 'IF': []}}


# Trying with cobol file

In [37]:
import re
import networkx as nx
from pyvis.network import Network
from IPython.display import display, HTML

# Parse COBOL file for control flow relationships
def parse_cobol_code(file_path):
    with open(file_path, 'r') as file:
        code = file.read()
    
    paragraph_pattern = re.compile(r'^\s*([0-9A-Z-]+)\.\s*$', re.MULTILINE)
    perform_pattern = re.compile(r'PERFORM\s+([0-9A-Z-]+)')
    call_pattern = re.compile(r'CALL\s+["\']([A-Z0-9-]+)["\']')
    if_pattern = re.compile(r'IF\s+(.+?)\s+THEN|EVALUATE\s+TRUE', re.DOTALL)

    relationships = {}
    current_paragraph = None

    for line in code.splitlines():
        paragraph_match = paragraph_pattern.match(line)
        if paragraph_match:
            current_paragraph = paragraph_match.group(1)
            relationships[current_paragraph] = {"PERFORM": [], "CALL": [], "IF": []}
        
        if current_paragraph:
            perform_matches = perform_pattern.findall(line)
            relationships[current_paragraph]["PERFORM"].extend(perform_matches)
            
            call_matches = call_pattern.findall(line)
            relationships[current_paragraph]["CALL"].extend(call_matches)
            
            if_matches = if_pattern.findall(line)
            relationships[current_paragraph]["IF"].extend(if_matches)
    
    return relationships

# Build a directed graph from parsed COBOL code
def build_control_flow_graph(parsed_code):
    graph = nx.DiGraph()
    for paragraph, details in parsed_code.items():
        graph.add_node(paragraph)
        for target in details.get("PERFORM", []):
            graph.add_edge(paragraph, target, label="PERFORM")
        for target in details.get("CALL", []):
            graph.add_edge(paragraph, target, label="CALL")
        for condition in details.get("IF", []):
            condition_node = f"{paragraph}_IF_{condition.strip()}"
            graph.add_node(condition_node, label=f"Condition: {condition.strip()}")
            graph.add_edge(paragraph, condition_node, label="IF")
    return graph

# Visualize the control flow graph
def visualize_control_flow(graph, output_file="control_flow.html"):
    net = Network(height="750px", width="100%", directed=True)
    for node in graph.nodes:
        net.add_node(node, label=node)
    for edge in graph.edges(data=True):
        net.add_edge(edge[0], edge[1], title=edge[2].get("label", ""))
    net.write_html(output_file)
    with open(output_file, "r") as f:
        html_content = f.read()
    # display(HTML(html_content))

# Parse, build, and visualize control flow for the uploaded COBOL file
file_path = 'COCRDLIC.cbl'
parsed_code = parse_cobol_code(file_path)
control_flow_graph = build_control_flow_graph(parsed_code)
visualize_control_flow(control_flow_graph)


# Try2

In [35]:
import re
import networkx as nx
from pyvis.network import Network
from IPython.display import display, HTML

# Parse COBOL file for control flow relationships
def parse_cobol_code(file_path):
    """
    Parses the COBOL code to extract paragraphs, PERFORM, GOTO, CALL statements, 
    and other control flow constructs.
    """
    with open(file_path, 'r') as file:
        code = file.read()
    
    # Regex patterns for COBOL constructs
    paragraph_pattern = re.compile(r'^\s*([0-9A-Z-]+)\.\s*$', re.MULTILINE)
    perform_pattern = re.compile(r'PERFORM\s+([0-9A-Z-]+)')
    call_pattern = re.compile(r'CALL\s+["\']([A-Z0-9-]+)["\']')
    goto_pattern = re.compile(r'GOTO\s+([0-9A-Z-]+)')
    if_pattern = re.compile(r'IF\s+(.+?)\s+THEN|EVALUATE\s+TRUE', re.DOTALL)

    relationships = {}
    paragraphs = []  # Sequential order of paragraphs
    current_paragraph = None

    for line in code.splitlines():
        # Match paragraph names
        paragraph_match = paragraph_pattern.match(line)
        if paragraph_match:
            current_paragraph = paragraph_match.group(1)
            paragraphs.append(current_paragraph)
            relationships[current_paragraph] = {"PERFORM": [], "CALL": [], "GOTO": [], "IF": []}
        
        if current_paragraph:
            # Match PERFORM statements
            perform_matches = perform_pattern.findall(line)
            relationships[current_paragraph]["PERFORM"].extend(perform_matches)
            
            # Match CALL statements
            call_matches = call_pattern.findall(line)
            relationships[current_paragraph]["CALL"].extend(call_matches)
            
            # Match GOTO statements
            goto_matches = goto_pattern.findall(line)
            relationships[current_paragraph]["GOTO"].extend(goto_matches)
            
            # Match IF/EVALUATE conditions
            if_matches = if_pattern.findall(line)
            relationships[current_paragraph]["IF"].extend(if_matches)

    return relationships, paragraphs

# Build control flow graph from parsed COBOL code
def build_control_flow_graph(parsed_code, paragraphs):
    """
    Constructs a directed graph from COBOL relationships.
    Connects sequential paragraphs and control flow structures.
    """
    graph = nx.DiGraph()
    previous_paragraph = None

    for paragraph in paragraphs:
        graph.add_node(paragraph)
        if previous_paragraph:
            # Link sequential paragraphs
            graph.add_edge(previous_paragraph, paragraph, label="SEQUENTIAL")
        previous_paragraph = paragraph

        # Add edges for control flow constructs
        for target in parsed_code[paragraph]["PERFORM"]:
            graph.add_edge(paragraph, target, label="PERFORM")
        for target in parsed_code[paragraph]["CALL"]:
            graph.add_edge(paragraph, target, label="CALL")
        for target in parsed_code[paragraph]["GOTO"]:
            graph.add_edge(paragraph, target, label="GOTO")
        for condition in parsed_code[paragraph]["IF"]:
            condition_node = f"{paragraph}_IF_{condition.strip()}"
            graph.add_node(condition_node)
            graph.add_edge(paragraph, condition_node, label="IF")
    
    return graph

# Visualize the control flow graph
def visualize_control_flow(graph, output_file="control_flow.html"):
    """
    Visualizes the control flow graph using PyVis and renders it in an interactive HTML.
    """
    net = Network(height="750px", width="100%", directed=True)
    for node in graph.nodes:
        net.add_node(node, label=node)
    for edge in graph.edges(data=True):
        net.add_edge(edge[0], edge[1], title=edge[2].get("label", ""))
    net.write_html(output_file)
    with open(output_file, "r") as f:
        html_content = f.read()
    # display(HTML(html_content))

# Full control flow analysis pipeline
def main(file_path):
    parsed_code, paragraphs = parse_cobol_code(file_path)
    control_flow_graph = build_control_flow_graph(parsed_code, paragraphs)
    visualize_control_flow(control_flow_graph)

# Run the pipeline on the uploaded COBOL file
file_path = 'COCRDLIC.cbl'
main(file_path)



In [15]:
parsed_code, paragraphs = parse_cobol_code(file_path)

In [16]:
parsed_code

{'PROGRAM-ID': {'PERFORM': [], 'CALL': [], 'GOTO': [], 'IF': []},
 'COCRDLIC': {'PERFORM': [], 'CALL': [], 'GOTO': [], 'IF': []},
 'DATE-WRITTEN': {'PERFORM': [], 'CALL': [], 'GOTO': [], 'IF': []},
 'DATE-COMPILED': {'PERFORM': [], 'CALL': [], 'GOTO': [], 'IF': []},
 'LOW-VALUES': {'PERFORM': [], 'CALL': [], 'GOTO': [], 'IF': []},
 'WS-EDIT-SELECT-ERROR-FLAGS': {'PERFORM': [],
  'CALL': [],
  'GOTO': [],
  'IF': []},
 '0000-MAIN': {'PERFORM': ['YYYY-STORE-PFKEY',
   '2000-RECEIVE-MAP',
   '9000-READ-FORWARD',
   '1000-SEND-MAP',
   '9000-READ-FORWARD',
   '1000-SEND-MAP',
   '9000-READ-FORWARD',
   '1000-SEND-MAP',
   '9000-READ-FORWARD',
   '1000-SEND-MAP',
   '9100-READ-BACKWARDS',
   '1000-SEND-MAP',
   '9000-READ-FORWARD',
   '1000-SEND-MAP',
   '1000-SEND-MAP'],
  'CALL': [],
  'GOTO': [],
  'IF': ['']},
 'COMMON-RETURN': {'PERFORM': [], 'CALL': [], 'GOTO': [], 'IF': []},
 '0000-MAIN-EXIT': {'PERFORM': [], 'CALL': [], 'GOTO': [], 'IF': []},
 '1000-SEND-MAP': {'PERFORM': ['1100-SCR

# try2 

In [33]:
import re
import networkx as nx
from pyvis.network import Network
from IPython.display import display, HTML

def parse_cobol_file(file_path):
    """
    Parses a COBOL file to extract control flow constructs.
    Returns relationships and the list of paragraphs in order.
    """
    with open(file_path, 'r') as file:
        code = file.read()

    # Patterns for COBOL constructs
    paragraph_pattern = re.compile(r'^\s*([0-9A-Z-]+)\.\s*$', re.MULTILINE)  # Paragraphs
    perform_pattern = re.compile(r'PERFORM\s+([0-9A-Z-]+)')  # PERFORM calls
    call_pattern = re.compile(r'CALL\s+["\']([A-Z0-9-]+)["\']')  # CALL statements
    goto_pattern = re.compile(r'GOTO\s+([0-9A-Z-]+)')  # GOTO statements
    if_pattern = re.compile(r'(IF|EVALUATE)\s+(.*?)\s+(THEN|WHEN)', re.DOTALL)  # IF/EVALUATE

    relationships = {}
    paragraphs = []  # To maintain order of paragraphs
    current_paragraph = None

    for line in code.splitlines():
        # Detect paragraphs
        paragraph_match = paragraph_pattern.match(line)
        if paragraph_match:
            current_paragraph = paragraph_match.group(1)
            paragraphs.append(current_paragraph)
            relationships[current_paragraph] = {"PERFORM": [], "CALL": [], "GOTO": [], "IF": []}

        # Capture relationships within the current paragraph
        if current_paragraph:
            for target in perform_pattern.findall(line):
                relationships[current_paragraph]["PERFORM"].append(target)
            for target in call_pattern.findall(line):
                relationships[current_paragraph]["CALL"].append(target)
            for target in goto_pattern.findall(line):
                relationships[current_paragraph]["GOTO"].append(target)
            for condition in if_pattern.findall(line):
                relationships[current_paragraph]["IF"].append(condition[1].strip())

    return relationships, paragraphs


# Build control flow graph from parsed COBOL code
def build_control_flow_graph(parsed_code, paragraphs):
    """
    Constructs a directed graph from COBOL relationships.
    Connects sequential paragraphs and control flow structures.
    """
    graph = nx.DiGraph()
    previous_paragraph = None

    for paragraph in paragraphs:
        graph.add_node(paragraph)
        if previous_paragraph:
            # Link sequential paragraphs
            graph.add_edge(previous_paragraph, paragraph, label="SEQUENTIAL")
        previous_paragraph = paragraph

        # Add edges for control flow constructs
        for target in parsed_code[paragraph]["PERFORM"]:
            graph.add_edge(paragraph, target, label="PERFORM")
        for target in parsed_code[paragraph]["CALL"]:
            graph.add_edge(paragraph, target, label="CALL")
        for target in parsed_code[paragraph]["GOTO"]:
            graph.add_edge(paragraph, target, label="GOTO")
        for condition in parsed_code[paragraph]["IF"]:
            condition_node = f"{paragraph}_IF_{condition.strip()}"
            graph.add_node(condition_node)
            graph.add_edge(paragraph, condition_node, label="IF")
    
    return graph

# Visualize the control flow graph
def visualize_control_flow(graph, output_file="control_flow.html"):
    """
    Visualizes the control flow graph using PyVis and renders it in an interactive HTML.
    """
    net = Network(height="750px", width="100%", directed=True)
    for node in graph.nodes:
        net.add_node(node, label=node)
    for edge in graph.edges(data=True):
        net.add_edge(edge[0], edge[1], title=edge[2].get("label", ""))
    net.write_html(output_file)
    with open(output_file, "r") as f:
        html_content = f.read()
    # display(HTML(html_content))

# Full control flow analysis pipeline
def main(file_path, outout_file):
    parsed_code, paragraphs = parse_cobol_code(file_path)
    control_flow_graph = build_control_flow_graph(parsed_code, paragraphs)
    visualize_control_flow(control_flow_graph, outout_file)
    return parsed_code, paragraphs

# Run the pipeline on the uploaded COBOL file
file_path = 'DB1024_2.cbl'
# file_path = 'COCRDLIC.cbl'
main(file_path, file_path.replace('.cbl', '.html'))


({}, [])

# Try3

In [44]:
import re
import networkx as nx
from pyvis.network import Network

# Parse COBOL file for control flow relationships
def parse_cobol_code(file_path):
    """
    Parses COBOL code to extract control flow constructs and their relationships.
    """
    with open(file_path, 'r') as file:
        code = file.read()
    
    # Patterns for COBOL constructs
    paragraph_pattern = re.compile(r'^\s*([0-9A-Z-]+)\.\s*$', re.MULTILINE)
    perform_pattern = re.compile(r'\bPERFORM\s+([0-9A-Z-]+(?:\s+THRU\s+[0-9A-Z-]+)?)', re.IGNORECASE)
    perform_until_pattern = re.compile(r'\bPERFORM\s+(.+?)\s+UNTIL\s+(.+)', re.IGNORECASE)
    perform_varying_pattern = re.compile(r'\bPERFORM\s+VARYING\s+(.+?)\s+FROM\s+(.+?)\s+BY\s+(.+?)\s+UNTIL\s+(.+)', re.IGNORECASE)
    if_pattern = re.compile(r'\bIF\s+(.+?)\s+THEN\s+(.+?)(?:\s+ELSE\s+(.+?))?\s+END-IF', re.IGNORECASE | re.DOTALL)
    evaluate_pattern = re.compile(r'\bEVALUATE\s+(.+?)\s+WHEN\s+(.+?)\s+END-EVALUATE', re.IGNORECASE | re.DOTALL)
    goto_pattern = re.compile(r'\bGO\s*TO\s+([0-9A-Z-]+)', re.IGNORECASE)
    call_pattern = re.compile(r'\bCALL\s+["\']?([A-Z0-9-]+)["\']?', re.IGNORECASE)
    stop_run_pattern = re.compile(r'\bSTOP\s+RUN', re.IGNORECASE)
    exit_program_pattern = re.compile(r'\bEXIT\s+PROGRAM', re.IGNORECASE)
    declaratives_pattern = re.compile(r'^\s*DECLARATIVES\s*$', re.IGNORECASE | re.MULTILINE)
    end_declaratives_pattern = re.compile(r'^\s*END\s+DECLARATIVES\s*$', re.IGNORECASE | re.MULTILINE)

    relationships = {}
    paragraphs = []
    current_paragraph = None
    in_declaratives = False

    for line in code.splitlines():
        # Check for start/end of DECLARATIVES
        if declaratives_pattern.match(line):
            in_declaratives = True
            continue
        elif end_declaratives_pattern.match(line):
            in_declaratives = False
            continue

        # Skip lines within DECLARATIVES
        if in_declaratives:
            continue

        # Match paragraph names
        paragraph_match = paragraph_pattern.match(line)
        if paragraph_match:
            current_paragraph = paragraph_match.group(1)
            paragraphs.append(current_paragraph)
            relationships[current_paragraph] = {
                "PERFORM": [], "PERFORM_UNTIL": [], "PERFORM_VARYING": [],
                "IF": [], "EVALUATE": [], "GOTO": [], "CALL": [],
                "STOP_RUN": False, "EXIT_PROGRAM": False
            }
            continue

        if current_paragraph:
            # Match PERFORM statements
            for match in perform_pattern.findall(line):
                relationships[current_paragraph]["PERFORM"].append(match.strip())
            
            # Match PERFORM UNTIL statements
            for match in perform_until_pattern.findall(line):
                relationships[current_paragraph]["PERFORM_UNTIL"].append(match)
            
            # Match PERFORM VARYING statements
            for match in perform_varying_pattern.findall(line):
                relationships[current_paragraph]["PERFORM_VARYING"].append(match)
            
            # Match IF statements
            for match in if_pattern.findall(line):
                condition, then_part, else_part = match
                relationships[current_paragraph]["IF"].append({
                    "condition": condition.strip(),
                    "then": then_part.strip(),
                    "else": else_part.strip() if else_part else None
                })
            
            # Match EVALUATE statements
            for match in evaluate_pattern.findall(line):
                condition, when_part = match
                relationships[current_paragraph]["EVALUATE"].append({
                    "condition": condition.strip(),
                    "when": when_part.strip()
                })
            
            # Match GOTO statements
            for match in goto_pattern.findall(line):
                relationships[current_paragraph]["GOTO"].append(match.strip())
            
            # Match CALL statements
            for match in call_pattern.findall(line):
                relationships[current_paragraph]["CALL"].append(match.strip())
            
            # Match STOP RUN statements
            if stop_run_pattern.search(line):
                relationships[current_paragraph]["STOP_RUN"] = True
            
            # Match EXIT PROGRAM statements
            if exit_program_pattern.search(line):
                relationships[current_paragraph]["EXIT_PROGRAM"] = True

    return relationships, paragraphs

# Build control flow graph from parsed COBOL code
def build_control_flow_graph(parsed_code, paragraphs):
    """
    Constructs a directed graph from COBOL relationships.
    Connects sequential paragraphs and control flow structures.
    """
    graph = nx.DiGraph()
    previous_paragraph = None

    for paragraph in paragraphs:
        graph.add_node(paragraph, shape='box', label=paragraph)
        if previous_paragraph:
            # Link sequential paragraphs
            graph.add_edge(previous_paragraph, paragraph, label="SEQUENTIAL")
        previous_paragraph = paragraph

        # Add edges for PERFORM statements
        for target in parsed_code[paragraph]["PERFORM"]:
            graph.add_edge(paragraph, target, label="PERFORM")

        # Add edges for PERFORM UNTIL statements
        for condition, target in parsed_code[paragraph]["PERFORM_UNTIL"]:
            condition_node = f"{paragraph}_PERFORM_UNTIL_{condition.strip()}"
            graph.add_node(condition_node, shape="diamond", label=condition.strip())
            graph.add_edge(paragraph, condition_node, label="PERFORM UNTIL")
            graph.add_edge(condition_node, target.strip(), label="Loop")

        # Add edges for PERFORM VARYING statements
        for variable, start, step, condition in parsed_code[paragraph]["PERFORM_VARYING"]:
            condition_node = f"{paragraph}_PERFORM_VARYING_{condition.strip()}"
            graph.add_node(condition_node, shape="diamond", label=f"{variable} VARYING")
            graph.add_edge(paragraph, condition_node, label="PERFORM VARYING")
            graph.add_edge(condition_node, start.strip(), label=f"Start={start.strip()}")
            graph.add_edge(condition_node, condition.strip(), label=f"Condition={condition.strip()}")

        # Add edges for IF statements
        for if_block in parsed_code[paragraph]["IF"]:
            condition = if_block["condition"]
            condition_node = f"{paragraph}_IF_{condition.strip()}"
            graph.add_node(condition_node, shape="diamond", label=f"IF {condition.strip()}")
            graph.add_edge(paragraph, condition_node, label="IF")
            graph.add_edge(condition_node, if_block["then"].strip(), label="THEN")
            if if_block["else"]:
                graph.add_edge(condition_node, if_block["else"].strip(), label="ELSE")

        # Add edges for EVALUATE statements
        for eval_block in parsed_code[paragraph]["EVALUATE"]:
            condition = eval_block["condition"]
            condition_node = f"{paragraph}_EVALUATE_{condition.strip()}"
            graph.add_node(condition_node, shape="diamond", label=f"EVALUATE {condition.strip()}")
            graph.add_edge(paragraph, condition_node, label="EVALUATE")
            graph.add_edge(condition_node, eval_block["when"].strip(), label="WHEN")

        # Add edges for GOTO statements
        for target in parsed_code[paragraph]["GOTO"]:
            graph.add_edge(paragraph, target, label="GOTO")

        # Add edges for CALL statements
        for target in parsed_code[paragraph]["CALL"]:
            graph.add_edge(paragraph, target, label="CALL")

        # Add STOP RUN and EXIT PROGRAM nodes
        if parsed_code[paragraph]["STOP_RUN"]:
            stop_node = f"{paragraph}_STOP_RUN"
            graph.add_node(stop_node, shape="ellipse", label="STOP RUN")
            graph.add_edge(paragraph, stop_node, label="STOP RUN")

        if parsed_code[paragraph]["EXIT_PROGRAM"]:
            exit_node = f"{paragraph}_EXIT_PROGRAM"
            graph.add_node(exit_node, shape="ellipse", label="EXIT PROGRAM")
            graph.add_edge(paragraph, exit_node, label="EXIT PROGRAM")

    return graph

# Visualize the control flow graph
def visualize_control_flow(graph, output_file="control_flow.html"):
    """
    Visualizes the control flow graph using PyVis and renders it in an interactive HTML.
    """
    net = Network(height="750px", width="100%", directed=True)
    for node, data in graph.nodes(data=True):
        net.add_node(node, label=data.get("label", node), shape=data.get("shape", "box"))
    for edge in graph.edges(data=True):
        net.add_edge(edge[0], edge[1], title=edge[2].get("label", ""))
    # net.show(output_file)
    net.write_html(output_html)
    with open(output_html, "r") as f:
        html_content = f.read()
    
# Full control flow analysis pipeline
def main(file_path, output_file):
    parsed_code, paragraphs = parse_cobol_code(file_path)
    control_flow_graph = build_control_flow_graph(parsed_code, paragraphs)
    visualize_control_flow(control_flow_graph, output_file)
    return parsed_code, paragraphs

# Example usage
file_path = "COCRDLIC.cbl"
output_file = "control_flow_cocrdlic.html"
main(file_path, output_file)


({'PROGRAM-ID': {'PERFORM': [],
   'PERFORM_UNTIL': [],
   'PERFORM_VARYING': [],
   'IF': [],
   'EVALUATE': [],
   'GOTO': [],
   'CALL': [],
   'STOP_RUN': False,
   'EXIT_PROGRAM': False},
  'COCRDLIC': {'PERFORM': [],
   'PERFORM_UNTIL': [],
   'PERFORM_VARYING': [],
   'IF': [],
   'EVALUATE': [],
   'GOTO': [],
   'CALL': [],
   'STOP_RUN': False,
   'EXIT_PROGRAM': False},
  'DATE-WRITTEN': {'PERFORM': [],
   'PERFORM_UNTIL': [],
   'PERFORM_VARYING': [],
   'IF': [],
   'EVALUATE': [],
   'GOTO': [],
   'CALL': [],
   'STOP_RUN': False,
   'EXIT_PROGRAM': False},
  'DATE-COMPILED': {'PERFORM': [],
   'PERFORM_UNTIL': [],
   'PERFORM_VARYING': [],
   'IF': [],
   'EVALUATE': [],
   'GOTO': [],
   'CALL': [],
   'STOP_RUN': False,
   'EXIT_PROGRAM': False},
  'LOW-VALUES': {'PERFORM': [],
   'PERFORM_UNTIL': [],
   'PERFORM_VARYING': [],
   'IF': [],
   'EVALUATE': [],
   'GOTO': [],
   'CALL': [],
   'STOP_RUN': False,
   'EXIT_PROGRAM': False},
  'WS-EDIT-SELECT-ERROR-FLAGS': 

In [54]:
import re
import networkx as nx
from pyvis.network import Network

# Enhanced COBOL parsing function
def parse_cobol_code(file_path):
    """
    Parses COBOL code to extract control flow constructs and their relationships.
    Handles fixed-format COBOL standards and advanced constructs.
    """
    with open(file_path, 'r') as file:
        code = file.readlines()

    relationships = {}
    paragraphs = []
    current_paragraph = None

    # Regular expressions for parsing constructs
    paragraph_pattern = re.compile(r'^\s*([0-9A-Z-]+)\.\s*$', re.MULTILINE)
    perform_pattern = re.compile(r'\bPERFORM\s+([0-9A-Z-]+(?:\s+THRU\s+[0-9A-Z-]+)?)', re.IGNORECASE)
    if_pattern = re.compile(r'\bIF\s+(.+?)\s+THEN\s+(.+?)(?:\s+ELSE\s+(.+?))?\s+END-IF', re.IGNORECASE | re.DOTALL)
    exec_sql_pattern = re.compile(r'EXEC\s+SQL\s+(.*?)\s+END-EXEC', re.IGNORECASE | re.DOTALL)
    exec_cics_pattern = re.compile(r'EXEC\s+CICS\s+(.*?)\s+END-EXEC', re.IGNORECASE | re.DOTALL)
    json_pattern = re.compile(r'JSON\s+(PARSE|GENERATE)\s+([A-Z0-9-]+)', re.IGNORECASE)
    db2_pattern = re.compile(r'\bEXEC\s+SQL\s+(.*?SELECT|INSERT|UPDATE|DELETE).*?\s+END-EXEC', re.IGNORECASE | re.DOTALL)
    call_pattern = re.compile(r'\bCALL\s+["\']?([A-Z0-9-_]+)["\']?', re.IGNORECASE)
    copy_include_pattern = re.compile(r'\b(COPY|INCLUDE|\+\+INCLUDE)\s+([A-Z0-9-]+)', re.IGNORECASE)

    # Normalize code by processing fixed-format columns
    source_code = []
    for line in code:
        # Skip sequence numbers and comments
        sequence_number = line[:6].strip()
        indicator = line[6:7]
        content = line[7:72].strip()

        if indicator in ('*', '/'):  # Comment lines
            continue
        if indicator == '-':  # Continuation line
            source_code[-1] += content
        else:
            source_code.append(content)

    # Process normalized code
    for line in source_code:
        # Match paragraph names
        paragraph_match = paragraph_pattern.match(line)
        if paragraph_match:
            current_paragraph = paragraph_match.group(1)
            paragraphs.append(current_paragraph)
            relationships[current_paragraph] = {
                "PERFORM": [], "IF": [], "EXEC_SQL": [],
                "EXEC_CICS": [], "JSON": [], "DB2": [],
                "CALL": [], "COPY_INCLUDE": []
            }
            continue

        if current_paragraph:
            # Match PERFORM statements
            for match in perform_pattern.findall(line):
                relationships[current_paragraph]["PERFORM"].append(match.strip())

            # Match IF statements
            for match in if_pattern.findall(line):
                condition, then_part, else_part = match
                relationships[current_paragraph]["IF"].append({
                    "condition": condition.strip(),
                    "then": then_part.strip(),
                    "else": else_part.strip() if else_part else None
                })

            # Match EXEC SQL statements
            for match in exec_sql_pattern.findall(line):
                relationships[current_paragraph]["EXEC_SQL"].append(match.strip())

            # Match EXEC CICS statements
            for match in exec_cics_pattern.findall(line):
                relationships[current_paragraph]["EXEC_CICS"].append(match.strip())

            # Match JSON statements
            for match in json_pattern.findall(line):
                operation, variable = match
                relationships[current_paragraph]["JSON"].append({
                    "operation": operation.strip(),
                    "variable": variable.strip()
                })

            # Match DB2 statements
            for match in db2_pattern.findall(line):
                relationships[current_paragraph]["DB2"].append(match.strip())

            # Match CALL statements
            for match in call_pattern.findall(line):
                relationships[current_paragraph]["CALL"].append(match.strip())

            # Match COPY and INCLUDE statements
            for match in copy_include_pattern.findall(line):
                directive, member = match
                relationships[current_paragraph]["COPY_INCLUDE"].append({
                    "directive": directive.strip(),
                    "member": member.strip()
                })

    return relationships, paragraphs

# Build control flow graph from parsed COBOL code
def build_control_flow_graph(parsed_code, paragraphs):
    """
    Constructs a directed graph from COBOL relationships.
    Connects sequential paragraphs and control flow structures.
    """
    graph = nx.DiGraph()
    previous_paragraph = None

    for paragraph in paragraphs:
        graph.add_node(paragraph, label=paragraph, shape="box")
        if previous_paragraph:
            # Link sequential paragraphs
            graph.add_edge(previous_paragraph, paragraph, label="SEQUENTIAL")
        previous_paragraph = paragraph

        # Add edges for control flow constructs
        for target in parsed_code[paragraph]["PERFORM"]:
            graph.add_edge(paragraph, target, label="PERFORM")

        for if_block in parsed_code[paragraph]["IF"]:
            condition = if_block["condition"]
            condition_node = f"{paragraph}_IF_{condition.strip()}"
            graph.add_node(condition_node, label=f"IF {condition.strip()}", shape="diamond")
            graph.add_edge(paragraph, condition_node, label="IF")
            graph.add_edge(condition_node, if_block["then"].strip(), label="THEN")
            if if_block["else"]:
                graph.add_edge(condition_node, if_block["else"].strip(), label="ELSE")

        for match in parsed_code[paragraph]["EXEC_SQL"]:
            sql_node = f"{paragraph}_SQL"
            graph.add_node(sql_node, label="SQL", shape="ellipse")
            graph.add_edge(paragraph, sql_node, label="SQL EXEC")

        for match in parsed_code[paragraph]["EXEC_CICS"]:
            cics_node = f"{paragraph}_CICS"
            graph.add_node(cics_node, label="CICS", shape="ellipse")
            graph.add_edge(paragraph, cics_node, label="CICS EXEC")

        for json_op in parsed_code[paragraph]["JSON"]:
            json_node = f"{paragraph}_JSON_{json_op['operation']}"
            graph.add_node(json_node, label=f"JSON {json_op['operation']}", shape="hexagon")
            graph.add_edge(paragraph, json_node, label="JSON OP")

    return graph

# Visualize the control flow graph
def visualize_control_flow(graph, output_file):
    """
    Visualizes the control flow graph using PyVis and saves it as an HTML file.
    """
    net = Network(height="750px", width="100%", directed=True)
    for node, data in graph.nodes(data=True):
        net.add_node(node, label=data.get("label", node), shape=data.get("shape", "box"))
    for edge in graph.edges(data=True):
        net.add_edge(edge[0], edge[1], title=edge[2].get("label", ""))
    net.write_html(output_file)
    with open(output_file, "r") as f:
        html_content = f.read()

# Full control flow analysis pipeline
def main(file_path, output_file):
    parsed_code, paragraphs = parse_cobol_code(file_path)
    control_flow_graph = build_control_flow_graph(parsed_code, paragraphs)
    visualize_control_flow(control_flow_graph, output_file)

# Example usage
file_path = "DB1024_2.cbl"  # Path to your COBOL file
output_file = "control_flow.html"
main(file_path, output_file)


In [52]:
parse_cobol_code(file_path)

({'PROGRAM-ID': {'PERFORM': [],
   'IF': [],
   'EXEC_SQL': [],
   'EXEC_CICS': [],
   'JSON': [],
   'DB2': [],
   'CALL': [],
   'COPY_INCLUDE': []},
  'DB102A': {'PERFORM': [],
   'IF': [],
   'EXEC_SQL': [],
   'EXEC_CICS': [],
   'JSON': [],
   'DB2': [],
   'CALL': [],
   'COPY_INCLUDE': []},
  'AUTHOR': {'PERFORM': [],
   'IF': [],
   'EXEC_SQL': [],
   'EXEC_CICS': [],
   'JSON': [],
   'DB2': [],
   'CALL': [],
   'COPY_INCLUDE': []},
  'INSTALLATION': {'PERFORM': [],
   'IF': [],
   'EXEC_SQL': [],
   'EXEC_CICS': [],
   'JSON': [],
   'DB2': [],
   'CALL': [],
   'COPY_INCLUDE': []},
  'DATE-WRITTEN': {'PERFORM': [],
   'IF': [],
   'EXEC_SQL': [],
   'EXEC_CICS': [],
   'JSON': [],
   'DB2': [],
   'CALL': [],
   'COPY_INCLUDE': []},
  'SECURITY': {'PERFORM': [],
   'IF': [],
   'EXEC_SQL': [],
   'EXEC_CICS': [],
   'JSON': [],
   'DB2': [],
   'CALL': [],
   'COPY_INCLUDE': []},
  'NONE': {'PERFORM': [],
   'IF': [],
   'EXEC_SQL': [],
   'EXEC_CICS': [],
   'JSON': [],
 

# Try 4

In [55]:
import re
import networkx as nx
from pyvis.network import Network

# Enhanced COBOL parsing function
def parse_cobol_code(file_path):
    """
    Parses COBOL code to extract control flow constructs and their relationships.
    Handles fixed-format COBOL standards and advanced constructs.
    """
    with open(file_path, 'r') as file:
        code = file.readlines()

    relationships = {}
    paragraphs = []
    current_paragraph = None

    # Regular expressions for parsing constructs
    paragraph_pattern = re.compile(r'^\s*([0-9A-Z-]+)\.\s*$', re.MULTILINE)
    perform_pattern = re.compile(r'\bPERFORM\s+([0-9A-Z-]+(?:\s+THRU\s+[0-9A-Z-]+)?)', re.IGNORECASE)
    if_pattern = re.compile(r'\bIF\s+(.+?)\s+THEN\s+(.+?)(?:\s+ELSE\s+(.+?))?\s+END-IF', re.IGNORECASE | re.DOTALL)
    exec_sql_pattern = re.compile(r'EXEC\s+SQL\s+(.*?)\s+END-EXEC', re.IGNORECASE | re.DOTALL)
    exec_cics_pattern = re.compile(r'EXEC\s+CICS\s+(.*?)\s+END-EXEC', re.IGNORECASE | re.DOTALL)
    json_pattern = re.compile(r'JSON\s+(PARSE|GENERATE)\s+([A-Z0-9-]+)', re.IGNORECASE)
    call_pattern = re.compile(r'\bCALL\s+["\']?([A-Z0-9-_]+)["\']?', re.IGNORECASE)

    # Normalize code by processing fixed-format columns
    source_code = []
    for line in code:
        # Skip sequence numbers and comments
        sequence_number = line[:6].strip()
        indicator = line[6:7]
        content = line[7:72].strip()

        if indicator in ('*', '/'):  # Comment lines
            continue
        if indicator == '-':  # Continuation line
            source_code[-1] += content
        else:
            source_code.append(content)

    # Process normalized code
    for line in source_code:
        # Match paragraph names
        paragraph_match = paragraph_pattern.match(line)
        if paragraph_match:
            current_paragraph = paragraph_match.group(1)
            paragraphs.append(current_paragraph)
            relationships[current_paragraph] = {
                "PERFORM": [], "IF": [], "EXEC_SQL": [],
                "EXEC_CICS": [], "JSON": [], "CALL": []
            }
            continue

        if current_paragraph:
            # Match PERFORM statements
            for match in perform_pattern.findall(line):
                relationships[current_paragraph]["PERFORM"].append(match.strip())

            # Match IF statements
            for match in if_pattern.findall(line):
                condition, then_part, else_part = match
                relationships[current_paragraph]["IF"].append({
                    "condition": condition.strip(),
                    "then": then_part.strip(),
                    "else": else_part.strip() if else_part else None
                })

            # Match EXEC SQL statements
            for match in exec_sql_pattern.findall(line):
                relationships[current_paragraph]["EXEC_SQL"].append(match.strip())

            # Match EXEC CICS statements
            for match in exec_cics_pattern.findall(line):
                relationships[current_paragraph]["EXEC_CICS"].append(match.strip())

            # Match JSON statements
            for match in json_pattern.findall(line):
                operation, variable = match
                relationships[current_paragraph]["JSON"].append({
                    "operation": operation.strip(),
                    "variable": variable.strip()
                })

            # Match CALL statements
            for match in call_pattern.findall(line):
                relationships[current_paragraph]["CALL"].append(match.strip())

    return relationships, paragraphs

# Build control flow graph from parsed COBOL code
def build_control_flow_graph(parsed_code, paragraphs):
    """
    Constructs a directed graph from COBOL relationships.
    Connects sequential paragraphs and control flow structures.
    """
    graph = nx.DiGraph()
    previous_paragraph = None

    for paragraph in paragraphs:
        graph.add_node(paragraph, label=paragraph, shape="box")
        if previous_paragraph:
            # Link sequential paragraphs
            graph.add_edge(previous_paragraph, paragraph, label="SEQUENTIAL")
        previous_paragraph = paragraph

        # Add edges for control flow constructs
        for target in parsed_code[paragraph]["PERFORM"]:
            graph.add_edge(paragraph, target, label="PERFORM")

        for if_block in parsed_code[paragraph]["IF"]:
            condition = if_block["condition"]
            condition_node = f"{paragraph}_IF_{condition.strip()}"
            graph.add_node(condition_node, label=f"IF {condition.strip()}", shape="diamond")
            graph.add_edge(paragraph, condition_node, label="IF")
            graph.add_edge(condition_node, if_block["then"].strip(), label="THEN")
            if if_block["else"]:
                graph.add_edge(condition_node, if_block["else"].strip(), label="ELSE")

        for sql_op in parsed_code[paragraph]["EXEC_SQL"]:
            sql_node = f"{paragraph}_SQL"
            graph.add_node(sql_node, label="SQL EXEC", shape="ellipse")
            graph.add_edge(paragraph, sql_node, label="SQL EXEC")

        for cics_op in parsed_code[paragraph]["EXEC_CICS"]:
            cics_node = f"{paragraph}_CICS"
            graph.add_node(cics_node, label="CICS EXEC", shape="ellipse")
            graph.add_edge(paragraph, cics_node, label="CICS EXEC")

        for json_op in parsed_code[paragraph]["JSON"]:
            json_node = f"{paragraph}_JSON_{json_op['operation']}"
            graph.add_node(json_node, label=f"JSON {json_op['operation']}", shape="hexagon")
            graph.add_edge(paragraph, json_node, label=f"JSON {json_op['operation']}")

        for call_target in parsed_code[paragraph]["CALL"]:
            call_node = f"{paragraph}_CALL_{call_target}"
            graph.add_node(call_node, label=f"CALL {call_target}", shape="ellipse")
            graph.add_edge(paragraph, call_node, label="CALL")

    return graph

# Visualize the control flow graph
def visualize_control_flow(graph, output_file):
    """
    Visualizes the control flow graph using PyVis and saves it as an HTML file.
    """
    net = Network(height="1000px", width="100%", directed=True)
    for node, data in graph.nodes(data=True):
        net.add_node(node, label=data.get("label", node), shape=data.get("shape", "box"))
    for edge in graph.edges(data=True):
        net.add_edge(edge[0], edge[1], title=edge[2].get("label", ""))
    net.write_html(output_file)
    with open(output_file, "r") as f:
        html_content = f.read()

# Full control flow analysis pipeline
def main(file_path, output_file):
    parsed_code, paragraphs = parse_cobol_code(file_path)
    control_flow_graph = build_control_flow_graph(parsed_code, paragraphs)
    visualize_control_flow(control_flow_graph, output_file)

# Example usage
file_path = "DB1024_2.cbl"  # Path to your COBOL file
output_file = "control_flow.html"
main(file_path, output_file)


# Try 5

In [57]:
import re
import networkx as nx
from pyvis.network import Network

def parse_cobol_code(file_path):
    """
    Parses COBOL code to extract hierarchical relationships (Program, Division,
    Section, Paragraphs) and flow constructs (PERFORM, IF, CALL, etc.).
    """
    with open(file_path, 'r') as file:
        code = file.readlines()

    structure = {"Program": None, "Divisions": {}, "Flow": {}}
    current_division = None
    current_section = None
    current_paragraph = None

    # Regular expressions for parsing COBOL structure and flow
    program_id_pattern = re.compile(r'^\s*PROGRAM-ID\.\s+([A-Z0-9-_]+)', re.IGNORECASE)
    division_pattern = re.compile(r'^\s*([A-Z-]+)\s+DIVISION\.\s*$', re.IGNORECASE)
    section_pattern = re.compile(r'^\s*([A-Z0-9-]+)\s+SECTION\.\s*$', re.IGNORECASE)
    paragraph_pattern = re.compile(r'^\s*([A-Z0-9-]+)\.\s*$', re.IGNORECASE)
    perform_pattern = re.compile(r'\bPERFORM\s+([A-Z0-9-]+(?:\s+THRU\s+[A-Z0-9-]+)?)', re.IGNORECASE)
    if_pattern = re.compile(r'\bIF\s+(.+?)\s+THEN\s+(.+?)(?:\s+ELSE\s+(.+?))?\s+END-IF', re.IGNORECASE | re.DOTALL)

    # Normalize code by processing fixed-format columns
    source_code = []
    for line in code:
        # Skip sequence numbers and comments
        sequence_number = line[:6].strip()
        indicator = line[6:7]
        content = line[7:72].strip()

        if indicator in ('*', '/'):  # Comment lines
            continue
        if indicator == '-':  # Continuation line
            source_code[-1] += content
        else:
            source_code.append(content)

    # Process normalized code
    for line in source_code:
        # Match PROGRAM-ID
        program_match = program_id_pattern.match(line)
        if program_match:
            structure["Program"] = program_match.group(1)
            continue

        # Match Divisions
        division_match = division_pattern.match(line)
        if division_match:
            current_division = division_match.group(1)
            structure["Divisions"][current_division] = {}
            current_section = None
            continue

        # Match Sections
        section_match = section_pattern.match(line)
        if section_match and current_division:
            current_section = section_match.group(1)
            structure["Divisions"][current_division][current_section] = []
            current_paragraph = None
            continue

        # Match Paragraphs
        paragraph_match = paragraph_pattern.match(line)
        if paragraph_match:
            current_paragraph = paragraph_match.group(1)
            if current_division and current_section:
                structure["Divisions"][current_division][current_section].append(current_paragraph)
            structure["Flow"][current_paragraph] = {"PERFORM": [], "IF": []}
            continue

        # Match PERFORM and IF statements within Paragraphs
        if current_paragraph:
            for match in perform_pattern.findall(line):
                structure["Flow"][current_paragraph]["PERFORM"].append(match.strip())

            for match in if_pattern.findall(line):
                condition, then_part, else_part = match
                structure["Flow"][current_paragraph]["IF"].append({
                    "condition": condition.strip(),
                    "then": then_part.strip(),
                    "else": else_part.strip() if else_part else None
                })

    return structure

def build_hierarchical_graph(structure):
    """
    Constructs a hierarchical graph based on the COBOL structure (Program, Division,
    Section, Paragraphs) and flow (PERFORM, IF).
    """
    graph = nx.DiGraph()

    # Add Program Node
    program_node = structure["Program"]
    graph.add_node(program_node, label=f"Program: {program_node}", shape="box")

    # Add Divisions, Sections, and Paragraphs
    for division, sections in structure["Divisions"].items():
        division_node = f"{program_node}_{division}"
        graph.add_node(division_node, label=f"Division: {division}", shape="box")
        graph.add_edge(program_node, division_node)

        for section, paragraphs in sections.items():
            section_node = f"{division_node}_{section}"
            graph.add_node(section_node, label=f"Section: {section}", shape="ellipse")
            graph.add_edge(division_node, section_node)

            for paragraph in paragraphs:
                paragraph_node = f"{section_node}_{paragraph}"
                graph.add_node(paragraph_node, label=f"Paragraph: {paragraph}", shape="box")
                graph.add_edge(section_node, paragraph_node)

    # Add Control Flow for Paragraphs
    for paragraph, flow in structure["Flow"].items():
        for target in flow["PERFORM"]:
            graph.add_edge(f"{program_node}_{paragraph}", f"{program_node}_{target}", label="PERFORM")

        for if_block in flow["IF"]:
            condition = if_block["condition"]
            condition_node = f"{program_node}_{paragraph}_IF_{condition.strip()}"
            graph.add_node(condition_node, label=f"IF {condition.strip()}", shape="diamond")
            graph.add_edge(f"{program_node}_{paragraph}", condition_node, label="IF")
            graph.add_edge(condition_node, f"{program_node}_{if_block['then']}", label="THEN")
            if if_block["else"]:
                graph.add_edge(condition_node, f"{program_node}_{if_block['else']}", label="ELSE")

    return graph

def visualize_hierarchical_graph(graph, output_file):
    """
    Visualizes the hierarchical graph using PyVis and saves it as an HTML file.
    """
    net = Network(height="1000px", width="100%", directed=True, layout=True)
    for node, data in graph.nodes(data=True):
        net.add_node(node, label=data.get("label", node), shape=data.get("shape", "box"))
    for edge in graph.edges(data=True):
        net.add_edge(edge[0], edge[1], title=edge[2].get("label", ""))
    net.write_html(output_file)
    with open(output_file, "r") as f:
        html_content = f.read()

# Full control flow analysis pipeline
def main(file_path, output_file):
    structure  = parse_cobol_code(file_path)
    control_flow_graph = build_control_flow_graph(parsed_code, paragraphs)
    visualize_control_flow(control_flow_graph, output_file)

# Example usage
file_path = "DB1024_2.cbl"  # Path to your COBOL file
output_file = "control_flow.html"
main(file_path, output_file)


KeyError: 'EXEC_SQL'

# Try 6

In [60]:
import re
import networkx as nx
from pyvis.network import Network


def parse_program(code_lines):
    """
    Parse PROGRAM-ID from COBOL file.
    """
    program_id_pattern = re.compile(r'^\s*PROGRAM-ID\.\s+([A-Z0-9-_]+)', re.IGNORECASE)
    for line in code_lines:
        match = program_id_pattern.match(line)
        if match:
            return match.group(1)
    print("WARNING: PROGRAM-ID not found. Using default value.")
    return "DEFAULT_PROGRAM"


def parse_divisions(code_lines):
    """
    Parse DIVISIONS, SECTIONS, and Paragraphs from COBOL file.
    """
    structure = {"Divisions": {}, "Flow": {}}
    current_division = None
    current_section = None
    current_paragraph = None

    division_pattern = re.compile(r'^\s*([A-Z-]+)\s+DIVISION\.\s*$', re.IGNORECASE)
    section_pattern = re.compile(r'^\s*([A-Z0-9-]+)\s+SECTION\.\s*$', re.IGNORECASE)
    paragraph_pattern = re.compile(r'^\s*([A-Z0-9-]+)\.\s*$', re.IGNORECASE)

    for line in code_lines:
        # Match Divisions
        division_match = division_pattern.match(line)
        if division_match:
            current_division = division_match.group(1)
            structure["Divisions"][current_division] = {}
            current_section = None
            continue

        # Match Sections
        section_match = section_pattern.match(line)
        if section_match and current_division:
            current_section = section_match.group(1)
            structure["Divisions"][current_division][current_section] = []
            current_paragraph = None
            continue

        # Match Paragraphs
        paragraph_match = paragraph_pattern.match(line)
        if paragraph_match:
            current_paragraph = paragraph_match.group(1)
            if current_division and current_section:
                structure["Divisions"][current_division][current_section].append(current_paragraph)
            structure["Flow"][current_paragraph] = {"PERFORM": [], "IF": [], "EXEC_SQL": []}
            continue

    return structure


def parse_flow(code_lines, flow_structure):
    """
    Parse control flow constructs (PERFORM, IF, EXEC SQL).
    """
    perform_pattern = re.compile(r'\bPERFORM\s+([A-Z0-9-]+(?:\s+THRU\s+[A-Z0-9-]+)?)', re.IGNORECASE)
    if_pattern = re.compile(r'\bIF\s+(.+?)\s+THEN\s+(.+?)(?:\s+ELSE\s+(.+?))?\s+END-IF', re.IGNORECASE | re.DOTALL)

    current_paragraph = None
    paragraph_pattern = re.compile(r'^\s*([A-Z0-9-]+)\.\s*$', re.IGNORECASE)

    for line in code_lines:
        # Track current paragraph
        paragraph_match = paragraph_pattern.match(line)
        if paragraph_match:
            current_paragraph = paragraph_match.group(1)
            continue

        # Match PERFORM statements
        if current_paragraph:
            for match in perform_pattern.findall(line):
                flow_structure[current_paragraph]["PERFORM"].append(match.strip())

            # Match IF statements
            for match in if_pattern.findall(line):
                condition, then_part, else_part = match
                flow_structure[current_paragraph]["IF"].append({
                    "condition": condition.strip(),
                    "then": then_part.strip(),
                    "else": else_part.strip() if else_part else None
                })

    return flow_structure


def normalize_cobol_code(code_lines):
    """
    Normalizes COBOL code by removing sequence numbers, comments, and handling continuation lines.
    """
    normalized_code = []
    for line in code_lines:
        sequence_number = line[:6].strip()
        indicator = line[6:7]
        content = line[7:72].strip()

        if indicator in ('*', '/'):  # Comment lines
            continue
        if indicator == '-':  # Continuation line
            normalized_code[-1] += content
        else:
            normalized_code.append(content)

    return normalized_code


def build_hierarchical_graph(structure, program_id):
    """
    Constructs a hierarchical graph based on the COBOL structure and flow.
    """
    graph = nx.DiGraph()

    # Add Program Node
    graph.add_node(program_id, label=f"Program: {program_id}", shape="box")

    # Add Divisions, Sections, and Paragraphs
    for division, sections in structure["Divisions"].items():
        division_node = f"{program_id}_{division}"
        graph.add_node(division_node, label=f"Division: {division}", shape="box")
        graph.add_edge(program_id, division_node)

        for section, paragraphs in sections.items():
            section_node = f"{division_node}_{section}"
            graph.add_node(section_node, label=f"Section: {section}", shape="ellipse")
            graph.add_edge(division_node, section_node)

            for paragraph in paragraphs:
                paragraph_node = f"{section_node}_{paragraph}"
                graph.add_node(paragraph_node, label=f"Paragraph: {paragraph}", shape="box")
                graph.add_edge(section_node, paragraph_node)

    # Add Control Flow for Paragraphs
    for paragraph, flow in structure["Flow"].items():
        paragraph_node = f"{program_id}_{paragraph}"

        for target in flow["PERFORM"]:
            target_node = f"{program_id}_{target}"
            graph.add_edge(paragraph_node, target_node, label="PERFORM")

        for if_block in flow["IF"]:
            condition = if_block["condition"]
            condition_node = f"{paragraph_node}_IF_{condition.strip()}"
            graph.add_node(condition_node, label=f"IF {condition.strip()}", shape="diamond")
            graph.add_edge(paragraph_node, condition_node, label="IF")
            graph.add_edge(condition_node, f"{program_id}_{if_block['then']}", label="THEN")
            if if_block["else"]:
                graph.add_edge(condition_node, f"{program_id}_{if_block['else']}", label="ELSE")

    return graph


def visualize_hierarchical_graph(graph, output_file):
    """
    Visualizes the hierarchical graph using PyVis and saves it as an HTML file.
    """
    net = Network(height="1000px", width="100%", directed=True, layout=True)
    for node, data in graph.nodes(data=True):
        net.add_node(node, label=data.get("label", node), shape=data.get("shape", "box"))
    for edge in graph.edges(data=True):
        net.add_edge(edge[0], edge[1], title=edge[2].get("label", ""))

    net.write_html(output_file)
    with open(output_file, "r") as f:
        html_content = f.read()


def main(file_path, output_file):
    with open(file_path, 'r') as file:
        code_lines = file.readlines()

    # Normalize and Parse
    normalized_code = normalize_cobol_code(code_lines)
    program_id = parse_program(normalized_code)
    structure = parse_divisions(normalized_code)
    structure["Flow"] = parse_flow(normalized_code, structure["Flow"])

    # Build and Visualize Graph
    graph = build_hierarchical_graph(structure, program_id)
    visualize_hierarchical_graph(graph, output_file)


# Example Usage
file_path = "DB1024_2.cbl"  # Path to your COBOL file
output_file = "cobol_hierarchy.html"
main(file_path, output_file)




# Try 7

In [62]:
import re
import networkx as nx
from pyvis.network import Network


def parse_program(code_lines):
    """
    Parse PROGRAM-ID from COBOL file.
    """
    program_id_pattern = re.compile(r'^\s*PROGRAM-ID\.\s+([A-Z0-9-_]+)', re.IGNORECASE)
    for line in code_lines:
        match = program_id_pattern.match(line)
        if match:
            return match.group(1)
    print("WARNING: PROGRAM-ID not found. Using default value.")
    return "DEFAULT_PROGRAM"


def parse_divisions(code_lines):
    """
    Parse DIVISIONS, SECTIONS, and Paragraphs from COBOL file.
    """
    structure = {"Divisions": {}, "Flow": {}}
    current_division = None
    current_section = None
    current_paragraph = None

    division_pattern = re.compile(r'^\s*([A-Z-]+)\s+DIVISION\.\s*$', re.IGNORECASE)
    section_pattern = re.compile(r'^\s*([A-Z0-9-]+)\s+SECTION\.\s*$', re.IGNORECASE)
    paragraph_pattern = re.compile(r'^\s*([A-Z0-9-]+)\.\s*$', re.IGNORECASE)

    for line in code_lines:
        # Match Divisions
        division_match = division_pattern.match(line)
        if division_match:
            current_division = division_match.group(1)
            structure["Divisions"][current_division] = {}
            current_section = None
            continue

        # Match Sections
        section_match = section_pattern.match(line)
        if section_match and current_division:
            current_section = section_match.group(1)
            structure["Divisions"][current_division][current_section] = []
            current_paragraph = None
            continue

        # Match Paragraphs
        paragraph_match = paragraph_pattern.match(line)
        if paragraph_match:
            current_paragraph = paragraph_match.group(1)
            if current_division and current_section:
                structure["Divisions"][current_division][current_section].append(current_paragraph)
            structure["Flow"][current_paragraph] = {"PERFORM": [], "IF": [], "EXEC_SQL": []}
            continue

    return structure


def parse_flow(code_lines, flow_structure):
    """
    Parse control flow constructs (PERFORM, IF, EXEC SQL).
    """
    perform_pattern = re.compile(r'\bPERFORM\s+([A-Z0-9-]+(?:\s+THRU\s+[A-Z0-9-]+)?)', re.IGNORECASE)
    if_pattern = re.compile(r'\bIF\s+(.+?)\s+THEN\s+(.+?)(?:\s+ELSE\s+(.+?))?\s+END-IF', re.IGNORECASE | re.DOTALL)

    current_paragraph = None
    paragraph_pattern = re.compile(r'^\s*([A-Z0-9-]+)\.\s*$', re.IGNORECASE)

    for line in code_lines:
        # Track current paragraph
        paragraph_match = paragraph_pattern.match(line)
        if paragraph_match:
            current_paragraph = paragraph_match.group(1)
            continue

        # Match PERFORM statements
        if current_paragraph:
            for match in perform_pattern.findall(line):
                flow_structure[current_paragraph]["PERFORM"].append(match.strip())

            # Match IF statements
            for match in if_pattern.findall(line):
                condition, then_part, else_part = match
                flow_structure[current_paragraph]["IF"].append({
                    "condition": condition.strip(),
                    "then": then_part.strip(),
                    "else": else_part.strip() if else_part else None
                })

    return flow_structure


def normalize_cobol_code(code_lines):
    """
    Normalizes COBOL code by removing sequence numbers, comments, and handling continuation lines.
    """
    normalized_code = []
    for line in code_lines:
        sequence_number = line[:6].strip()
        indicator = line[6:7]
        content = line[7:72].strip()

        if indicator in ('*', '/'):  # Comment lines
            continue
        if indicator == '-':  # Continuation line
            normalized_code[-1] += content
        else:
            normalized_code.append(content)

    return normalized_code


def build_hierarchical_graph(structure, program_id):
    """
    Constructs a hierarchical graph based on the COBOL structure and flow.
    """
    graph = nx.DiGraph()

    # Add Program Node
    graph.add_node(program_id, label=f"Program: {program_id}", shape="box", group=0)

    # Add Divisions, Sections, and Paragraphs
    group_id = 1
    for division, sections in structure["Divisions"].items():
        division_node = f"{program_id}_{division}"
        graph.add_node(division_node, label=f"Division: {division}", shape="box", group=group_id)
        graph.add_edge(program_id, division_node)

        for section, paragraphs in sections.items():
            section_node = f"{division_node}_{section}"
            graph.add_node(section_node, label=f"Section: {section}", shape="ellipse", group=group_id + 1)
            graph.add_edge(division_node, section_node)

            for paragraph in paragraphs:
                paragraph_node = f"{section_node}_{paragraph}"
                graph.add_node(paragraph_node, label=f"Paragraph: {paragraph}", shape="box", group=group_id + 2)
                graph.add_edge(section_node, paragraph_node)

    # Add Control Flow for Paragraphs
    for paragraph, flow in structure["Flow"].items():
        paragraph_node = f"{program_id}_{paragraph}"

        for target in flow["PERFORM"]:
            target_node = f"{program_id}_{target}"
            graph.add_edge(paragraph_node, target_node, label="PERFORM")

        for if_block in flow["IF"]:
            condition = if_block["condition"]
            condition_node = f"{paragraph_node}_IF_{condition.strip()}"
            graph.add_node(condition_node, label=f"IF {condition.strip()}", shape="diamond", group=group_id + 3)
            graph.add_edge(paragraph_node, condition_node, label="IF")
            graph.add_edge(condition_node, f"{program_id}_{if_block['then']}", label="THEN")
            if if_block["else"]:
                graph.add_edge(condition_node, f"{program_id}_{if_block['else']}", label="ELSE")

    return graph


def visualize_hierarchical_graph(graph, output_file):
    """
    Visualizes the hierarchical graph using PyVis and saves it as an HTML file.
    """
    net = Network(height="1000px", width="100%", directed=True, layout=True)
    for node, data in graph.nodes(data=True):
        net.add_node(node, label=data.get("label", node), shape=data.get("shape", "box"), group=data.get("group", 0))
    for edge in graph.edges(data=True):
        net.add_edge(edge[0], edge[1], title=edge[2].get("label", ""))
    net.write_html(output_file)
    with open(output_file, "r") as f:
        html_content = f.read()



def main(file_path, output_file):
    with open(file_path, 'r') as file:
        code_lines = file.readlines()

    # Normalize and Parse
    normalized_code = normalize_cobol_code(code_lines)
    program_id = parse_program(normalized_code)
    structure = parse_divisions(normalized_code)
    structure["Flow"] = parse_flow(normalized_code, structure["Flow"])

    # Build and Visualize Graph
    graph = build_hierarchical_graph(structure, program_id)
    visualize_hierarchical_graph(graph, output_file)


# Example Usage
# file_path = "DB1024_2.cbl"  # Path to your COBOL file
file_path = "COCRDLIC.cbl"
output_file = "cobol_hierarchy.html"
main(file_path, output_file)




# Try 8

In [67]:
import re
import networkx as nx
from pyvis.network import Network


def parse_cobol_code(file_path):
    """
    Parses COBOL code to extract hierarchical relationships and flow constructs.
    Handles missing PROGRAM-ID gracefully by assigning a default value.
    """
    with open(file_path, 'r') as file:
        code = file.readlines()

    structure = {"Program": None, "Divisions": {}, "Flow": {}}
    current_division = None
    current_section = None
    current_paragraph = None

    # Regular expressions for parsing COBOL structure and flow
    program_id_pattern = re.compile(r'^\s*PROGRAM-ID\.\s+([A-Z0-9-_]+)', re.IGNORECASE)
    division_pattern = re.compile(r'^\s*([A-Z-]+)\s+DIVISION\.\s*$', re.IGNORECASE)
    section_pattern = re.compile(r'^\s*([A-Z0-9-]+)\s+SECTION\.\s*$', re.IGNORECASE)
    paragraph_pattern = re.compile(r'^\s*([A-Z0-9-]+)\.\s*$', re.IGNORECASE)
    perform_pattern = re.compile(r'\bPERFORM\s+([A-Z0-9-]+(?:\s+THRU\s+[A-Z0-9-]+)?)', re.IGNORECASE)
    if_pattern = re.compile(r'\bIF\s+(.+?)\s+THEN\s+(.+?)(?:\s+ELSE\s+(.+?))?\s+END-IF', re.IGNORECASE | re.DOTALL)

    # Normalize code
    source_code = normalize_cobol_code(code)

    # Process normalized code
    for line in source_code:
        # Match PROGRAM-ID
        program_match = program_id_pattern.match(line)
        if program_match:
            structure["Program"] = program_match.group(1)
            continue

        # Match Divisions
        division_match = division_pattern.match(line)
        if division_match:
            current_division = division_match.group(1)
            structure["Divisions"][current_division] = {}
            current_section = None
            continue

        # Match Sections
        section_match = section_pattern.match(line)
        if section_match and current_division:
            current_section = section_match.group(1)
            structure["Divisions"][current_division][current_section] = []
            current_paragraph = None
            continue

        # Match Paragraphs
        paragraph_match = paragraph_pattern.match(line)
        if paragraph_match:
            current_paragraph = paragraph_match.group(1)
            if current_division and current_section:
                structure["Divisions"][current_division][current_section].append(current_paragraph)
            structure["Flow"][current_paragraph] = {"PERFORM": [], "IF": []}
            continue

        # Match Control Flow Constructs
        if current_paragraph:
            for match in perform_pattern.findall(line):
                structure["Flow"][current_paragraph]["PERFORM"].append(match.strip())

            for match in if_pattern.findall(line):
                condition, then_part, else_part = match
                structure["Flow"][current_paragraph]["IF"].append({
                    "condition": condition.strip(),
                    "then": then_part.strip(),
                    "else": else_part.strip() if else_part else None
                })

    # Assign a default PROGRAM-ID if missing
    if not structure["Program"]:
        print("WARNING: PROGRAM-ID not found. Assigning default value.")
        structure["Program"] = "DEFAULT_PROGRAM"

    return structure


def normalize_cobol_code(code_lines):
    """
    Normalizes COBOL code by removing sequence numbers, comments, and handling continuation lines.
    """
    normalized_code = []
    for line in code_lines:
        sequence_number = line[:6].strip()
        indicator = line[6:7]
        content = line[7:72].strip()

        if indicator in ('*', '/'):  # Comment lines
            continue
        if indicator == '-':  # Continuation line
            normalized_code[-1] += content
        else:
            normalized_code.append(content)

    return normalized_code


def build_collapsible_graph(structure):
    """
    Constructs a collapsible graph based on COBOL hierarchy and control flow.
    """
    graph = nx.DiGraph()

    # Add Program Node
    program_node = structure["Program"]
    graph.add_node(program_node, label=f"Program: {program_node}", shape="box")

    # Add Divisions, Sections, Paragraphs, and Flow
    for division, sections in structure["Divisions"].items():
        division_node = f"{program_node}_{division}"
        graph.add_node(division_node, label=f"Division: {division}", shape="box")
        graph.add_edge(program_node, division_node)

        for section, paragraphs in sections.items():
            section_node = f"{division_node}_{section}"
            graph.add_node(section_node, label=f"Section: {section}", shape="ellipse")
            graph.add_edge(division_node, section_node)

            for paragraph in paragraphs:
                paragraph_node = f"{section_node}_{paragraph}"
                graph.add_node(paragraph_node, label=f"Paragraph: {paragraph}", shape="box")
                graph.add_edge(section_node, paragraph_node)

    # Add Control Flow for Paragraphs
    for paragraph, flow in structure["Flow"].items():
        paragraph_node = f"{program_node}_{paragraph}"

        for target in flow["PERFORM"]:
            target_node = f"{program_node}_{target}"
            graph.add_edge(paragraph_node, target_node, label="PERFORM")

        for if_block in flow["IF"]:
            condition = if_block["condition"]
            condition_node = f"{paragraph_node}_IF_{condition.strip()}"
            graph.add_node(condition_node, label=f"IF {condition.strip()}", shape="diamond")
            graph.add_edge(paragraph_node, condition_node, label="IF")
            graph.add_edge(condition_node, f"{program_node}_{if_block['then']}", label="THEN")
            if if_block["else"]:
                graph.add_edge(condition_node, f"{program_node}_{if_block['else']}", label="ELSE")

    return graph


def visualize_collapsible_graph(graph, output_file):
    """
    Visualizes the collapsible graph using PyVis and saves it as an HTML file.
    """
    net = Network(height="1000px", width="100%", directed=True)
    net.set_options("""
    var options = {
      "layout": {
        "hierarchical": {
          "enabled": true,
          "levelSeparation": 150,
          "nodeSpacing": 200,
          "treeSpacing": 300,
          "blockShifting": true,
          "edgeMinimization": true,
          "parentCentralization": true,
          "direction": "UD", 
          "sortMethod": "directed"
        }
      },
      "physics": {
        "enabled": false
      }
    }
    """)
    for node, data in graph.nodes(data=True):
        net.add_node(node, label=data.get("label", node), shape=data.get("shape", "box"))
    for edge in graph.edges(data=True):
        net.add_edge(edge[0], edge[1], title=edge[2].get("label", ""))

    net.write_html(output_file)
    with open(output_file, "r") as f:
        html_content = f.read()
        
def main(file_path, output_file):
    with open(file_path, 'r') as file:
        code_lines = file.readlines()

    # Parse COBOL Structure
    structure = parse_cobol_code(file_path)

    # Build and Visualize Collapsible Graph
    graph = build_collapsible_graph(structure)
    visualize_collapsible_graph(graph, output_file)


# Example Usage
file_path = "COCRDLIC.cbl"  # Path to your COBOL file
output_file = "cobol_collapsible.html"
main(file_path, output_file)





# Try 9

In [83]:
import re
import json


def parse_cobol_code(file_path):
    """
    Parses COBOL code to extract hierarchical relationships and control flow.
    """
    with open(file_path, 'r') as file:
        code = file.readlines()

    structure = {"name": None, "children": []}
    paragraphs = {}

    # Regular expressions for parsing COBOL structure and flow
    program_id_pattern = re.compile(r'^\s*PROGRAM-ID\.\s+([A-Z0-9-_]+)', re.IGNORECASE)
    paragraph_pattern = re.compile(r'^\s*([A-Z0-9-]+)\.\s*$', re.IGNORECASE)
    perform_pattern = re.compile(r'\bPERFORM\s+([A-Z0-9-]+)(?:\s+THRU\s+([A-Z0-9-]+))?', re.IGNORECASE)
    if_pattern = re.compile(r'\bIF\s+(.+?)\s+THEN\s+PERFORM\s+([A-Z0-9-]+)', re.IGNORECASE)

    # Process COBOL code line by line
    current_paragraph = None
    for line in code:
        # Match PROGRAM-ID
        program_match = program_id_pattern.match(line)
        if program_match:
            structure["name"] = program_match.group(1)
            continue

        # Match Paragraphs
        paragraph_match = paragraph_pattern.match(line)
        if paragraph_match:
            current_paragraph = paragraph_match.group(1)
            paragraphs[current_paragraph] = {"name": current_paragraph, "children": []}
            continue

        # Match Perform Statements
        if current_paragraph:
            for match in perform_pattern.findall(line):
                target = match[0]
                if target not in paragraphs:
                    paragraphs[target] = {"name": target, "children": []}
                paragraphs[current_paragraph]["children"].append(paragraphs[target])

            # Match If Conditions
            for match in if_pattern.findall(line):
                condition = f"IF {match[0]}"
                perform_target = match[1]
                condition_node = {"name": condition, "children": []}
                if perform_target not in paragraphs:
                    paragraphs[perform_target] = {"name": perform_target, "children": []}
                condition_node["children"].append(paragraphs[perform_target])
                paragraphs[current_paragraph]["children"].append(condition_node)

    # Build hierarchical structure
    structure["children"] = list(paragraphs.values())
    return structure

def write_d3_collapsible_graph(structure, output_file):
    """
    Writes a collapsible graph using D3.js based on the parsed COBOL structure.
    """
    json_data = json.dumps(structure, indent=2)

    html_template = """
<!DOCTYPE html>
<html lang="en">
<head>
    <meta charset="UTF-8">
    <meta name="viewport" content="width=device-width, initial-scale=1.0">
    <script src="https://d3js.org/d3.v7.min.js"></script>
    <title>Interactive COBOL Control Flow</title>
    <style>
        body {{
            margin: 0;
            padding: 0;
            overflow: hidden;
            background-color: white;
        }}
        .node circle {{
            fill: #69b3a2;
            stroke: #555;
            stroke-width: 1.5px;
            cursor: pointer;
            transition: fill 0.3s;
        }}
        .node text {{
            font: 14px sans-serif;
            fill: #000;
            pointer-events: none;
        }}
        .link {{
            fill: none;
            stroke: #d2b48c; /* Light brown lines */
            stroke-width: 1.5px;
        }}
    </style>
</head>
<body>
    <script>
        const data = __DATA_PLACEHOLDER__;

        const width = window.innerWidth;
        const height = window.innerHeight;

        const svg = d3.select("body").append("svg")
            .attr("width", width)
            .attr("height", height)
            .call(d3.zoom().on("zoom", (event) => {
                g.attr("transform", event.transform);
            }))
            .append("g");

        const g = svg.append("g")
            .attr("transform", "translate(50, 50)");

        const tree = d3.tree().size([height - 200, width - 200]);

        const root = d3.hierarchy(data);

        root.descendants().forEach((d, i) => {
            d.id = i;
            d._children = d.children;
            d.children = d.depth < 3 ? d.children : null; // Keep top 3 levels expanded
        });

        const update = (source) => {
            const nodes = root.descendants();
            const links = root.links();

            tree(root);

            // Links
            const link = g.selectAll(".link")
                .data(links, d => d.target.id);

            link.enter().append("line")
                .attr("class", "link")
                .merge(link)
                .attr("x1", d => d.source.y)
                .attr("y1", d => d.source.x)
                .attr("x2", d => d.target.y)
                .attr("y2", d => d.target.x)
                .attr("stroke", "#d2b48c") // Light brown color
                .attr("stroke-width", 1.5); // Thin lines

            link.exit().remove();

            // Nodes
            const node = g.selectAll(".node")
                .data(nodes, d => d.id);

            const nodeEnter = node.enter().append("g")
                .attr("class", "node")
                .attr("transform", d => `translate(${source.y0 || 0},${source.x0 || 0})`)
                .on("click", (event, d) => {
                    d.children = d.children ? null : d._children;
                    update(d);
                });

            nodeEnter.append("circle")
                .attr("r", 6)
                .attr("fill", "#69b3a2");

            nodeEnter.append("text")
                .attr("dy", 3)
                .attr("x", d => d.children || d._children ? -10 : 10)
                .style("text-anchor", d => d.children || d._children ? "end" : "start")
                .text(d => d.data.name);

            const nodeUpdate = nodeEnter.merge(node);

            nodeUpdate.transition()
                .duration(200)
                .attr("transform", d => `translate(${d.y},${d.x})`);

            nodes.forEach(d => {
                d.x0 = d.x;
                d.y0 = d.y;
            });

            node.exit().transition()
                .duration(200)
                .remove();
        };

        update(root);
    </script>
</body>
</html>
    """

    html_content = html_template.replace("__DATA_PLACEHOLDER__", json_data)

    with open(output_file, "w") as file:
        file.write(html_content)



def main(cobol_files, output_file):
    """
    Main function to parse COBOL files and generate a collapsible graph.
    """
    combined_structure = {"name": "Control Flow", "children": []}

    for file_path in cobol_files:
        structure = parse_cobol_code(file_path)
        combined_structure["children"].append(structure)

    write_d3_collapsible_graph(combined_structure, output_file)


# Example Usage
cobol_files = ["COCRDLIC.cbl"]  # Add more COBOL file paths if needed
output_file = "collapsible_cobol_flow.html"
main(cobol_files, output_file)


# Try 10

In [151]:
def parse_cobol_code(file_path):
    """
    Parses COBOL code to extract hierarchical relationships and control flow.
    """
    with open(file_path, 'r') as file:
        code = file.readlines()

    structure = {"name": None, "children": []}
    paragraphs = {}

    # Regular expressions for parsing COBOL structure and flow
    program_id_pattern = re.compile(r'^\s*PROGRAM-ID\.\s+([A-Z0-9-_]+)', re.IGNORECASE)
    paragraph_pattern = re.compile(r'^\s*([A-Z0-9-]+)\.\s*$', re.IGNORECASE)
    perform_pattern = re.compile(r'\bPERFORM\s+([A-Z0-9-]+)(?:\s+THRU\s+([A-Z0-9-]+))?', re.IGNORECASE)
    if_pattern = re.compile(r'\bIF\s+(.+?)\s+THEN\s+PERFORM\s+([A-Z0-9-]+)', re.IGNORECASE)

    # Process COBOL code line by line
    current_paragraph = None
    for line in code:
        # Match PROGRAM-ID
        program_match = program_id_pattern.match(line)
        if program_match:
            structure["name"] = program_match.group(1)
            continue

        # Match Paragraphs
        paragraph_match = paragraph_pattern.match(line)
        if paragraph_match:
            current_paragraph = paragraph_match.group(1)
            paragraphs[current_paragraph] = {"name": current_paragraph, "children": []}
            continue

        # Match Perform Statements
        if current_paragraph:
            for match in perform_pattern.findall(line):
                target = match[0]
                if target not in paragraphs:
                    paragraphs[target] = {"name": target, "children": []}
                paragraphs[current_paragraph]["children"].append(paragraphs[target])

            # Match If Conditions
            for match in if_pattern.findall(line):
                condition = f"IF {match[0]}"
                perform_target = match[1]
                condition_node = {"name": condition, "children": []}
                if perform_target not in paragraphs:
                    paragraphs[perform_target] = {"name": perform_target, "children": []}
                condition_node["children"].append(paragraphs[perform_target])
                paragraphs[current_paragraph]["children"].append(condition_node)

    # Build hierarchical structure
    structure["children"] = list(paragraphs.values())
    return structure
def write_d3_collapsible_graph(structure, output_file):
    """
    Writes a collapsible graph using D3.js based on the parsed COBOL structure.
    """
    json_data = json.dumps(structure, indent=2)

    html_template = """
<!DOCTYPE html>
<html lang="en">
<head>
    <meta charset="UTF-8">
    <meta name="viewport" content="width=device-width, initial-scale=1.0">
    <script src="https://d3js.org/d3.v7.min.js"></script>
    <title>Interactive COBOL Control Flow</title>
    <style>
        body {{
            margin: 0;
            padding: 0;
            overflow: hidden;
            background-color: white;
        }}
        .node circle {{
            stroke-width: 2px;
            cursor: pointer;
            transition: fill 0.3s, stroke 0.3s;
        }}
        .node text {{
            font: 14px sans-serif;
            fill: #000;
            pointer-events: none;
        }}
        .link {{
            fill: none;
            stroke: #d2b48c; /* Light brown lines */
            stroke-width: 1.5px;
            marker-end: url(#arrow); /* Add directional arrows */
        }}
    </style>
</head>
<body>
    <script>
        const data = __DATA_PLACEHOLDER__;

        const width = window.innerWidth;
        const height = window.innerHeight;

        const svg = d3.select("body").append("svg")
            .attr("width", width)
            .attr("height", height)
            .call(d3.zoom().on("zoom", (event) => {
                g.attr("transform", event.transform);
            }))
            .append("g");

        const defs = svg.append("defs");
        defs.append("marker")
            .attr("id", "arrow")
            .attr("viewBox", "0 -5 10 10")
            .attr("refX", 10)
            .attr("refY", 0)
            .attr("markerWidth", 6)
            .attr("markerHeight", 6)
            .attr("orient", "auto")
            .append("path")
            .attr("d", "M0,-5L10,0L0,5")
            .attr("fill", "#d2b48c");

        const g = svg.append("g")
            .attr("transform", "translate(50, 50)");

        const tree = d3.tree().size([height - 200, width * 0.8 - 300]); // Reduce width by 20%

        const root = d3.hierarchy(data);

        root.descendants().forEach((d, i) => {
            d.id = i;
            d._children = d.children;
            d.children = d.depth < 3 ? d.children : null; // Keep top 3 levels expanded
        });

        const colorScale = d3.scaleSequential(d3.interpolateCool)
            .domain([0, 10]); // Adjust the domain as per depth levels

        const update = (source) => {
            const nodes = root.descendants();
            const links = root.links();

            tree(root);

            // Links
            const link = g.selectAll(".link")
                .data(links, d => d.target.id);

            link.enter().append("line")
                .attr("class", "link")
                .merge(link)
                .attr("x1", d => d.source.y)
                .attr("y1", d => d.source.x)
                .attr("x2", d => d.target.y)
                .attr("y2", d => d.target.x)
                .attr("stroke", "#d2b48c") // Light brown color
                .attr("stroke-width", 1.5); // Thin lines

            link.exit().remove();

            // Nodes
            const node = g.selectAll(".node")
                .data(nodes, d => d.id);

            const nodeEnter = node.enter().append("g")
                .attr("class", "node")
                .attr("transform", d => `translate(${source.y0 || 0},${source.x0 || 0})`)
                .on("click", (event, d) => {
                    d.children = d.children ? null : d._children;
                    update(d);
                });

            nodeEnter.append("circle")
                .attr("r", 8)
                .attr("fill", d => colorScale(d.depth))
                .attr("stroke", d => (d._children ? "#000" : "#aaa")) // Black border for collapsible nodes
                .attr("stroke-width", d => (d._children ? 2 : 1)); // Thicker border for collapsible nodes

            nodeEnter.append("text")
                .attr("dy", 3)
                .attr("x", d => d.children || d._children ? -10 : 10)
                .style("text-anchor", d => d.children || d._children ? "end" : "start")
                .text(d => d.data.name);

            const nodeUpdate = nodeEnter.merge(node);

            nodeUpdate.transition()
                .duration(200)
                .attr("transform", d => `translate(${d.y},${d.x})`);

            nodes.forEach(d => {
                d.x0 = d.x;
                d.y0 = d.y;
            });

            node.exit().transition()
                .duration(200)
                .remove();
        };

        update(root);
    </script>
</body>
</html>
    """

    html_content = html_template.replace("__DATA_PLACEHOLDER__", json_data)

    with open(output_file, "w") as file:
        file.write(html_content)


In [152]:
def main(cobol_files, output_file):
    """
    Main function to parse COBOL files and generate a collapsible graph.
    """
    combined_structure = {"name": "Control Flow", "children": []}

    for file_path in cobol_files:
        structure = parse_cobol_code(file_path)
        combined_structure["children"].append(structure)

    write_d3_collapsible_graph(combined_structure, output_file)


# Example Usage
cobol_files = ["COCRDLIC.cbl"]  # Add more COBOL file paths if needed
output_file = "collapsible_cobol_flow.html"
main(cobol_files, output_file)

# Try 11#

In [153]:
import re
import json


def parse_cobol_code(file_path):
    """
    Parses COBOL code to extract a hierarchical structure based on syntax,
    ensuring accurate execution flow and handling 'PERFORM' constructs properly.
    """
    with open(file_path, 'r') as file:
        code_lines = file.readlines()

    # Root node containing the full COBOL file
    structure = {"name": "root", "text": "".join(code_lines), "children": []}
    current_division = None
    current_section = None
    current_paragraph = None
    current_text = []

    # Regular expressions for parsing COBOL syntax
    division_pattern = re.compile(r'^\s*([A-Z-]+)\s+DIVISION\.\s*$', re.IGNORECASE)
    section_pattern = re.compile(r'^\s*([A-Z0-9-]+)\s+SECTION\.\s*$', re.IGNORECASE)
    paragraph_pattern = re.compile(r'^\s*([A-Z0-9-]+)\.\s*$', re.IGNORECASE)
    perform_pattern = re.compile(r'\bPERFORM\s+([A-Z0-9-]+)(?:\s+THRU\s+([A-Z0-9-]+))?', re.IGNORECASE)
    if_pattern = re.compile(r'\bIF\s+(.+?)\s+THEN\s+PERFORM\s+([A-Z0-9-]+)', re.IGNORECASE)

    # Helper function to finalize node text
    def finalize_node(node, text_storage):
        if node:
            node["text"] = "\n".join(text_storage).strip()
            text_storage.clear()

    for line in code_lines:
        # Match Division
        division_match = division_pattern.match(line)
        if division_match:
            finalize_node(current_paragraph, current_text)
            finalize_node(current_section, current_text)
            finalize_node(current_division, current_text)
            current_division = {
                "name": f"{division_match.group(1)} DIVISION",
                "text": "",
                "children": []
            }
            structure["children"].append(current_division)
            current_section = None
            current_paragraph = None
            continue

        # Match Section
        section_match = section_pattern.match(line)
        if section_match and current_division:
            finalize_node(current_paragraph, current_text)
            finalize_node(current_section, current_text)
            current_section = {
                "name": f"{section_match.group(1)} SECTION",
                "text": "",
                "children": []
            }
            current_division["children"].append(current_section)
            current_paragraph = None
            continue

        # Match Paragraph
        paragraph_match = paragraph_pattern.match(line)
        if paragraph_match:
            finalize_node(current_paragraph, current_text)
            current_paragraph = {"name": paragraph_match.group(1), "text": "", "children": []}
            if current_section:
                current_section["children"].append(current_paragraph)
            elif current_division and current_division["name"] == "PROCEDURE DIVISION":
                current_division["children"].append(current_paragraph)
            continue

        # Capture text for the current node
        current_text.append(line.strip())

        # Match PERFORM Statements
        if current_division and current_division["name"] == "PROCEDURE DIVISION" and current_paragraph:
            for match in perform_pattern.findall(line):
                target = match[0]
                if match[1]:  # Handle 'THRU' cases
                    # Exclude the last paragraph (completion indicator)
                    target_end = match[1]
                    if target_end not in current_paragraph["children"]:
                        perform_node = {"name": target, "text": line.strip(), "children": []}
                        current_paragraph["children"].append(perform_node)
                else:
                    perform_node = {"name": target, "text": line.strip(), "children": []}
                    current_paragraph["children"].append(perform_node)

            # Match IF Statements
            for match in if_pattern.findall(line):
                condition = f"IF {match[0]}"
                perform_target = match[1]
                condition_node = {"name": condition, "text": line.strip(), "children": []}
                condition_node["children"].append({"name": perform_target, "text": "", "children": []})
                current_paragraph["children"].append(condition_node)

    # Finalize all remaining nodes
    finalize_node(current_paragraph, current_text)
    finalize_node(current_section, current_text)
    finalize_node(current_division, current_text)

    return structure


In [154]:
def write_d3_collapsible_graph(structure, output_file):
    """
    Writes a collapsible graph using D3.js based on the parsed COBOL structure.
    """
    json_data = json.dumps(structure, indent=2)

    html_template = """
<!DOCTYPE html>
<html lang="en">
<head>
    <meta charset="UTF-8">
    <meta name="viewport" content="width=device-width, initial-scale=1.0">
    <script src="https://d3js.org/d3.v7.min.js"></script>
    <title>Interactive COBOL Control Flow</title>
    <style>
        body { margin: 0; padding: 0; overflow: hidden; background-color: white; }
        .node circle { stroke-width: 2px; cursor: pointer; transition: fill 0.3s, stroke 0.3s; }
        .node text { font: 14px sans-serif; fill: #000; pointer-events: none; }
        .link { fill: none; stroke: #d2b48c; stroke-width: 1.5px; marker-end: url(#arrow); }
    </style>
</head>
<body>
    <script>
        const data = __DATA_PLACEHOLDER__;

        const width = window.innerWidth;
        const height = window.innerHeight;

        const svg = d3.select("body").append("svg")
            .attr("width", width)
            .attr("height", height)
            .call(d3.zoom().on("zoom", (event) => {
                g.attr("transform", event.transform);
            }))
            .append("g");

        const defs = svg.append("defs");
        defs.append("marker")
            .attr("id", "arrow")
            .attr("viewBox", "0 -5 10 10")
            .attr("refX", 10)
            .attr("refY", 0)
            .attr("markerWidth", 6)
            .attr("markerHeight", 6)
            .attr("orient", "auto")
            .append("path")
            .attr("d", "M0,-5L10,0L0,5")
            .attr("fill", "#d2b48c");

        const g = svg.append("g").attr("transform", "translate(50, 50)");

        const tree = d3.tree().size([height - 200, width * 0.8 - 300]);

        const root = d3.hierarchy(data);

        root.descendants().forEach((d, i) => {
            d.id = i;
            d._children = d.children;
            d.children = d.depth < 3 ? d.children : null;
        });

        const update = (source) => {
            const nodes = root.descendants();
            const links = root.links();

            tree(root);

            const link = g.selectAll(".link").data(links, d => d.target.id);

            link.enter().append("line")
                .attr("class", "link")
                .merge(link)
                .attr("x1", d => d.source.y)
                .attr("y1", d => d.source.x)
                .attr("x2", d => d.target.y)
                .attr("y2", d => d.target.x);

            link.exit().remove();

            const node = g.selectAll(".node").data(nodes, d => d.id);

            const nodeEnter = node.enter().append("g")
                .attr("class", "node")
                .attr("transform", d => `translate(${source.y0 || 0},${source.x0 || 0})`)
                .on("click", (event, d) => {
                    d.children = d.children ? null : d._children;
                    update(d);
                });

            nodeEnter.append("circle")
                .attr("r", 8)
                .style("fill", "#555")
                .style("stroke", "#000")
                .style("stroke-width", 1.5);

            nodeEnter.append("text")
                .attr("dy", 3)
                .attr("x", 10)
                .style("text-anchor", "start")
                .text(d => d.data.name);

            const nodeUpdate = nodeEnter.merge(node);
            nodeUpdate.transition().duration(200).attr("transform", d => `translate(${d.y},${d.x})`);

            node.exit().transition().duration(200).remove();
        };

        update(root);
    </script>
</body>
</html>
    """

    html_content = html_template.replace("__DATA_PLACEHOLDER__", json_data)

    with open(output_file, "w") as file:
        file.write(html_content)

In [155]:
# Example Usage
file_path = "COCRDLIC.cbl"
output_file = "cobol_control_flow.html"

parsed_structure = parse_cobol_code(file_path)
write_d3_collapsible_graph(parsed_structure, output_file)

print(f"Graph saved to {output_file}")

Graph saved to cobol_control_flow.html


# Try 12

In [159]:
# Parse the COBOL file and generate the hierarchical structure
cobol_file_path = "/mnt/data/COCRDLIC.cbl"

def parse_cobol_file(file_path):
    """
    Parses COBOL code into a hierarchical structure following Enterprise COBOL standards.
    Includes support for DB2, JSON, API calls, and containers.
    """
    with open(file_path, 'r') as file:
        code_lines = file.readlines()

    # Root node containing the full COBOL file
    structure = {"name": "root", "text": "".join(code_lines), "children": []}
    current_division = None
    current_section = None
    current_paragraph = None
    current_sentence = []
    current_text = []

    # Regular expressions for parsing COBOL syntax
    division_pattern = re.compile(r'^\s*([A-Z-]+)\s+DIVISION\.\s*$', re.IGNORECASE)
    section_pattern = re.compile(r'^\s*([A-Z0-9-]+)\s+SECTION\.\s*$', re.IGNORECASE)
    paragraph_pattern = re.compile(r'^\s{7}[A-Z0-9-]+\.\s*$', re.IGNORECASE)
    perform_pattern = re.compile(r'\bPERFORM\s+([A-Z0-9-]+)(?:\s+THRU\s+([A-Z0-9-]+))?', re.IGNORECASE)
    if_pattern = re.compile(r'\bIF\s+(.+?)\s+THEN\s+PERFORM\s+([A-Z0-9-]+)', re.IGNORECASE)
    exec_sql_pattern = re.compile(r'EXEC\s+SQL\s+(.*?)\s+END-EXEC', re.IGNORECASE | re.DOTALL)
    json_pattern = re.compile(r'\bJSON\s+(PARSE|GENERATE)\s+([A-Z0-9-]+)', re.IGNORECASE)
    call_pattern = re.compile(r'\bCALL\s+["\']?([A-Z0-9-_]+)["\']?', re.IGNORECASE)

    # Helper functions
    def finalize_node(node, text_storage):
        """Finalize the text content for a node."""
        if node:
            node["text"] = "\n".join(text_storage).strip()
            text_storage.clear()

    def add_sentence_to_paragraph(paragraph, sentence_storage):
        """Add a complete sentence to the paragraph."""
        if paragraph and sentence_storage:
            sentence = {"name": "Sentence", "text": "".join(sentence_storage).strip(), "children": []}
            paragraph["children"].append(sentence)
            sentence_storage.clear()

    for line in code_lines:
        # Ignore sequence numbers (columns 1-6) and notes (columns 73-80)
        source_code = line[7:72].rstrip()

        # Skip comments and debugging lines
        if line[6:7] in ('*', '/', 'D'):
            continue

        # Match Division
        division_match = division_pattern.match(source_code)
        if division_match:
            finalize_node(current_paragraph, current_sentence)
            finalize_node(current_section, current_text)
            finalize_node(current_division, current_text)
            current_division = {
                "name": f"{division_match.group(1)} DIVISION",
                "text": "",
                "children": []
            }
            structure["children"].append(current_division)
            current_section = None
            current_paragraph = None
            continue

        # Match Section
        section_match = section_pattern.match(source_code)
        if section_match and current_division:
            finalize_node(current_paragraph, current_sentence)
            finalize_node(current_section, current_text)
            current_section = {
                "name": f"{section_match.group(1)} SECTION",
                "text": "",
                "children": []
            }
            current_division["children"].append(current_section)
            current_paragraph = None
            continue

        # Match Paragraph
        paragraph_match = paragraph_pattern.match(line)
        if paragraph_match:
            finalize_node(current_paragraph, current_text)
            current_paragraph = {"name": paragraph_match.group(0).strip(), "text": "", "children": []}
            if current_section:
                current_section["children"].append(current_paragraph)
            elif current_division and current_division["name"] == "PROCEDURE DIVISION":
                current_division["children"].append(current_paragraph)
            continue

        # Add text to the current node
        current_text.append(source_code)

        # Match Sentences
        if source_code.endswith('.'):
            add_sentence_to_paragraph(current_paragraph, current_sentence)
        else:
            current_sentence.append(source_code)

        # Match PERFORM Statements
        if current_division and current_division["name"] == "PROCEDURE DIVISION" and current_paragraph:
            for match in perform_pattern.findall(source_code):
                target = match[0]
                if match[1]:  # Handle 'THRU' cases
                    target_end = match[1]
                    perform_node = {"name": target, "text": source_code, "children": []}
                    current_paragraph["children"].append(perform_node)
                else:
                    perform_node = {"name": target, "text": source_code, "children": []}
                    current_paragraph["children"].append(perform_node)

            # Match IF Statements
            for match in if_pattern.findall(source_code):
                condition = f"IF {match[0]}"
                perform_target = match[1]
                condition_node = {"name": condition, "text": source_code, "children": []}
                condition_node["children"].append({"name": perform_target, "text": "", "children": []})
                current_paragraph["children"].append(condition_node)

            # Match CALL Statements
            for match in call_pattern.findall(source_code):
                call_node = {"name": f"CALL {match}", "text": source_code, "children": []}
                current_paragraph["children"].append(call_node)

            # Match EXEC SQL Statements
            for match in exec_sql_pattern.findall(source_code):
                sql_node = {"name": "EXEC SQL", "text": match.strip(), "children": []}
                current_paragraph["children"].append(sql_node)

            # Match JSON Operations
            for match in json_pattern.findall(source_code):
                operation, variable = match
                json_node = {"name": f"JSON {operation} {variable}", "text": source_code, "children": []}
                current_paragraph["children"].append(json_node)

    # Finalize all remaining nodes
    add_sentence_to_paragraph(current_paragraph, current_sentence)
    finalize_node(current_paragraph, current_text)
    finalize_node(current_section, current_text)
    finalize_node(current_division, current_text)

    return structure

# Parse the COBOL file
parsed_structure = parse_cobol_file(file_path)

# Display a part of the parsed structure for validation
parsed_structure_snippet = json.dumps(parsed_structure, indent=2)
parsed_structure_snippet[:1000]  # Displaying first 1000 characters for brevity


'{\n  "name": "root",\n  "text": "      *****************************************************************         \\n      * Program:     COCRDLIC.CBL                                     *         \\n      * Layer:       Business logic                                   *         \\n      * Function:    List Credit Cards                                          \\n      *              a) All cards if no context passed and admin user           \\n      *              b) Only the ones associated with ACCT in COMMAREA          \\n      *                 if user is not admin                                    \\n      ******************************************************************\\n      * Copyright Amazon.com, Inc. or its affiliates.                   \\n      * All Rights Reserved.                                            \\n      *                                                                 \\n      * Licensed under the Apache License, Version 2.0 (the \\"License\\"). \\n      

In [164]:
parsed_structure['children'][2]['children']

[{'name': 'WORKING-STORAGE SECTION',
  'text': "01  WS-MISC-STORAGE.\n\n  05 WS-CICS-PROCESSNG-VARS.\n     07 WS-RESP-CD                          PIC S9(09) COMP\n                                            VALUE ZEROS.\n     07 WS-REAS-CD                          PIC S9(09) COMP\n                                            VALUE ZEROS.\n     07 WS-TRANID                           PIC X(4)\n                                            VALUE SPACES.\n  05 WS-INPUT-FLAG                          PIC X(1).\n    88  INPUT-OK                            VALUES '0'\n                                                   ' '\n                                            LOW-VALUES.\n    88  INPUT-ERROR                         VALUE '1'.\n  05  WS-EDIT-ACCT-FLAG                     PIC X(1).\n    88  FLG-ACCTFILTER-NOT-OK               VALUE '0'.\n    88  FLG-ACCTFILTER-ISVALID             VALUE '1'.\n    88  FLG-ACCTFILTER-BLANK                VALUE ' '.\n  05  WS-EDIT-CARD-FLAG                     P

In [166]:
# Rewriting the function to save the D3.js graph and ensure correct syntax
def write_d3_collapsible_graph(structure, output_file):
    """
    Writes a collapsible graph using D3.js based on the parsed COBOL structure.
    """
    json_data = json.dumps(structure, indent=2)

    html_template = """
<!DOCTYPE html>
<html lang="en">
<head>
    <meta charset="UTF-8">
    <meta name="viewport" content="width=device-width, initial-scale=1.0">
    <script src="https://d3js.org/d3.v7.min.js"></script>
    <title>Interactive COBOL Control Flow</title>
    <style>
        body {
            margin: 0;
            padding: 0;
            overflow: hidden;
            background-color: white;
        }
        .node circle {
            stroke-width: 2px;
            cursor: pointer;
            transition: fill 0.3s, stroke 0.3s;
        }
        .node text {
            font: 14px sans-serif;
            fill: #000;
            pointer-events: none;
        }
        .link {
            fill: none;
            stroke: #d2b48c; /* Light brown lines */
            stroke-width: 1.5px;
            marker-end: url(#arrow); /* Add directional arrows */
        }
    </style>
</head>
<body>
    <script>
        const data = __DATA_PLACEHOLDER__;

        const width = window.innerWidth;
        const height = window.innerHeight;

        const svg = d3.select("body").append("svg")
            .attr("width", width)
            .attr("height", height)
            .call(d3.zoom().on("zoom", (event) => {
                g.attr("transform", event.transform);
            }))
            .append("g");

        const defs = svg.append("defs");
        defs.append("marker")
            .attr("id", "arrow")
            .attr("viewBox", "0 -5 10 10")
            .attr("refX", 10)
            .attr("refY", 0)
            .attr("markerWidth", 6)
            .attr("markerHeight", 6)
            .attr("orient", "auto")
            .append("path")
            .attr("d", "M0,-5L10,0L0,5")
            .attr("fill", "#d2b48c");

        const g = svg.append("g")
            .attr("transform", "translate(50, 50)");

        const tree = d3.tree().size([height - 200, width * 0.8 - 300]); // Reduce width by 20%

        const root = d3.hierarchy(data);

        root.descendants().forEach((d, i) => {
            d.id = i;
            d._children = d.children;
            d.children = d.depth < 3 ? d.children : null; // Keep top 3 levels expanded
        });

        const colorScale = d3.scaleSequential(d3.interpolateCool)
            .domain([0, 10]); // Adjust the domain as per depth levels

        const update = (source) => {
            const nodes = root.descendants();
            const links = root.links();

            tree(root);

            // Links
            const link = g.selectAll(".link")
                .data(links, d => d.target.id);

            link.enter().append("line")
                .attr("class", "link")
                .merge(link)
                .attr("x1", d => d.source.y)
                .attr("y1", d => d.source.x)
                .attr("x2", d => d.target.y)
                .attr("y2", d => d.target.x)
                .attr("stroke", "#d2b48c") // Light brown color
                .attr("stroke-width", 1.5); // Thin lines

            link.exit().remove();

            // Nodes
            const node = g.selectAll(".node")
                .data(nodes, d => d.id);

            const nodeEnter = node.enter().append("g")
                .attr("class", "node")
                .attr("transform", d => `translate(${source.y0 || 0},${source.x0 || 0})`)
                .on("click", (event, d) => {
                    d.children = d.children ? null : d._children;
                    update(d);
                });

            nodeEnter.append("circle")
                .attr("r", 8)
                .attr("fill", d => colorScale(d.depth))
                .attr("stroke", d => (d._children ? "#000" : "#aaa")) // Black border for collapsible nodes
                .attr("stroke-width", d => (d._children ? 2 : 1)); // Thicker border for collapsible nodes

            nodeEnter.append("text")
                .attr("dy", 3)
                .attr("x", d => d.children || d._children ? -10 : 10)
                .style("text-anchor", d => d.children || d._children ? "end" : "start")
                .text(d => d.data.name);

            const nodeUpdate = nodeEnter.merge(node);

            nodeUpdate.transition()
                .duration(200)
                .attr("transform", d => `translate(${d.y},${d.x})`);

            nodes.forEach(d => {
                d.x0 = d.x;
                d.y0 = d.y;
            });

            node.exit().transition()
                .duration(200)
                .remove();
        };

        update(root);
    </script>
</body>
</html>
    """

    html_content = html_template.replace("__DATA_PLACEHOLDER__", json_data)

    with open(output_file, "w") as file:
        file.write(html_content)

# Example Usage
file_path = "COCRDLIC.cbl"
output_file = "cobol_control_flow.html"

parsed_structure = parse_cobol_file(file_path)
write_d3_collapsible_graph(parsed_structure, output_file)

print(f"Graph saved to {output_file}")


Graph saved to cobol_control_flow.html


# Try 13

In [167]:
def parse_cobol_file(file_path):
    """
    Parses COBOL code into a hierarchical structure following Enterprise COBOL standards.
    Adds hover text for paragraphs and sentences, and handles EXEC CICS creatively.
    """
    with open(file_path, 'r') as file:
        code_lines = file.readlines()

    # Root node containing the full COBOL file
    structure = {"name": "root", "text": "".join(code_lines), "children": []}
    current_division = None
    current_section = None
    current_paragraph = None
    current_sentence = []
    current_text = []

    # Regular expressions for parsing COBOL syntax
    division_pattern = re.compile(r'^\s*([A-Z-]+)\s+DIVISION\.\s*$', re.IGNORECASE)
    section_pattern = re.compile(r'^\s*([A-Z0-9-]+)\s+SECTION\.\s*$', re.IGNORECASE)
    paragraph_pattern = re.compile(r'^\s{7}[A-Z0-9-]+\.\s*$', re.IGNORECASE)
    exec_cics_pattern = re.compile(r'EXEC\s+CICS\s+(.*?)\s+END-EXEC', re.IGNORECASE | re.DOTALL)
    perform_pattern = re.compile(r'\bPERFORM\s+([A-Z0-9-]+)(?:\s+THRU\s+([A-Z0-9-]+))?', re.IGNORECASE)
    if_pattern = re.compile(r'\bIF\s+(.+?)\s+THEN\s+PERFORM\s+([A-Z0-9-]+)', re.IGNORECASE)
    call_pattern = re.compile(r'\bCALL\s+["\']?([A-Z0-9-_]+)["\']?', re.IGNORECASE)

    # Helper functions
    def finalize_node(node, text_storage):
        """Finalize the text content for a node."""
        if node:
            node["text"] = "\n".join(text_storage).strip()
            text_storage.clear()

    def add_sentence_to_paragraph(paragraph, sentence_storage):
        """Add a complete sentence to the paragraph."""
        if paragraph and sentence_storage:
            sentence_text = "".join(sentence_storage).strip()
            sentence_name = f"S: {sentence_text[:20]}..." if len(sentence_text) > 20 else f"S: {sentence_text}"
            sentence = {"name": sentence_name, "text": sentence_text, "children": []}
            paragraph["children"].append(sentence)
            sentence_storage.clear()

    for line in code_lines:
        # Ignore sequence numbers (columns 1-6) and notes (columns 73-80)
        source_code = line[7:72].rstrip()

        # Skip comments and debugging lines
        if line[6:7] in ('*', '/', 'D'):
            continue

        # Match Division
        division_match = division_pattern.match(source_code)
        if division_match:
            finalize_node(current_paragraph, current_sentence)
            finalize_node(current_section, current_text)
            finalize_node(current_division, current_text)
            current_division = {
                "name": f"{division_match.group(1)} DIVISION",
                "text": "",
                "children": []
            }
            structure["children"].append(current_division)
            current_section = None
            current_paragraph = None
            continue

        # Match Section
        section_match = section_pattern.match(source_code)
        if section_match and current_division:
            finalize_node(current_paragraph, current_sentence)
            finalize_node(current_section, current_text)
            current_section = {
                "name": f"{section_match.group(1)} SECTION",
                "text": "",
                "children": []
            }
            current_division["children"].append(current_section)
            current_paragraph = None
            continue

        # Match Paragraph
        paragraph_match = paragraph_pattern.match(line)
        if paragraph_match:
            finalize_node(current_paragraph, current_text)
            current_paragraph = {"name": paragraph_match.group(0).strip(), "text": "", "children": []}
            if current_section:
                current_section["children"].append(current_paragraph)
            elif current_division and current_division["name"] == "PROCEDURE DIVISION":
                current_division["children"].append(current_paragraph)
            continue

        # Add text to the current node
        current_text.append(source_code)

        # Match Sentences
        if source_code.endswith('.'):
            add_sentence_to_paragraph(current_paragraph, current_sentence)
        else:
            current_sentence.append(source_code)

        # Match EXEC CICS Statements
        for match in exec_cics_pattern.findall(source_code):
            exec_cics_node = {"name": "EXEC CICS", "text": match.strip(), "children": []}
            if current_paragraph:
                current_paragraph["children"].append(exec_cics_node)

        # Match PERFORM Statements
        if current_division and current_division["name"] == "PROCEDURE DIVISION" and current_paragraph:
            for match in perform_pattern.findall(source_code):
                target = match[0]
                perform_node = {"name": target, "text": source_code, "children": []}
                current_paragraph["children"].append(perform_node)

            # Match IF Statements
            for match in if_pattern.findall(source_code):
                condition = f"IF {match[0]}"
                perform_target = match[1]
                condition_node = {"name": condition, "text": source_code, "children": []}
                condition_node["children"].append({"name": perform_target, "text": "", "children": []})
                current_paragraph["children"].append(condition_node)

            # Match CALL Statements
            for match in call_pattern.findall(source_code):
                call_node = {"name": f"CALL {match}", "text": source_code, "children": []}
                current_paragraph["children"].append(call_node)

    # Finalize all remaining nodes
    add_sentence_to_paragraph(current_paragraph, current_sentence)
    finalize_node(current_paragraph, current_text)
    finalize_node(current_section, current_text)
    finalize_node(current_division, current_text)

    return structure


In [168]:
import json

def write_d3_collapsible_graph(structure, output_file):
    """
    Writes a collapsible graph using D3.js based on the parsed COBOL structure.
    Enhancements include hover text, arrow adjustments, and `EXEC CICS` styling.
    """
    json_data = json.dumps(structure, indent=2)

    html_template = """
<!DOCTYPE html>
<html lang="en">
<head>
    <meta charset="UTF-8">
    <meta name="viewport" content="width=device-width, initial-scale=1.0">
    <script src="https://d3js.org/d3.v7.min.js"></script>
    <title>Interactive COBOL Control Flow</title>
    <style>
        body {
            margin: 0;
            padding: 0;
            overflow: hidden;
            background-color: white;
        }
        .node circle {
            stroke-width: 2px;
            cursor: pointer;
            transition: fill 0.3s, stroke 0.3s;
        }
        .node text {
            font: 14px sans-serif;
            fill: #000;
            pointer-events: none;
        }
        .link {
            fill: none;
            stroke: #d2b48c;
            stroke-width: 1.5px;
            marker-end: url(#arrow);
        }
        .tooltip {
            position: absolute;
            text-align: center;
            width: auto;
            height: auto;
            padding: 5px;
            font: 12px sans-serif;
            background: lightgrey;
            border: 1px solid black;
            border-radius: 5px;
            pointer-events: none;
            visibility: hidden;
        }
    </style>
</head>
<body>
    <div class="tooltip"></div>
    <script>
        const data = __DATA_PLACEHOLDER__;

        const width = window.innerWidth;
        const height = window.innerHeight;

        const tooltip = d3.select(".tooltip");

        const svg = d3.select("body").append("svg")
            .attr("width", width)
            .attr("height", height)
            .call(d3.zoom().on("zoom", (event) => {
                g.attr("transform", event.transform);
            }))
            .append("g");

        const defs = svg.append("defs");
        defs.append("marker")
            .attr("id", "arrow")
            .attr("viewBox", "0 -5 10 10")
            .attr("refX", 10)
            .attr("refY", 0)
            .attr("markerWidth", 6)
            .attr("markerHeight", 6)
            .attr("orient", "auto")
            .append("path")
            .attr("d", "M0,-5L10,0L0,5")
            .attr("fill", "#d2b48c");

        const g = svg.append("g")
            .attr("transform", "translate(50, 50)");

        const tree = d3.tree().size([height - 200, width * 0.8 - 300]);

        const root = d3.hierarchy(data);

        root.descendants().forEach((d, i) => {
            d.id = i;
            d._children = d.children;
            d.children = d.depth < 3 ? d.children : null;
        });

        const colorScale = d3.scaleSequential(d3.interpolateCool)
            .domain([0, 10]);

        const update = (source) => {
            const nodes = root.descendants();
            const links = root.links();

            tree(root);

            const link = g.selectAll(".link")
                .data(links, d => d.target.id);

            link.enter().append("line")
                .attr("class", "link")
                .merge(link)
                .attr("x1", d => d.source.y + 8)
                .attr("y1", d => d.source.x)
                .attr("x2", d => d.target.y - 8)
                .attr("y2", d => d.target.x)
                .attr("stroke", "#d2b48c")
                .attr("stroke-width", 1.5);

            link.exit().remove();

            const node = g.selectAll(".node")
                .data(nodes, d => d.id);

            const nodeEnter = node.enter().append("g")
                .attr("class", "node")
                .attr("transform", d => `translate(${source.y0 || 0},${source.x0 || 0})`)
                .on("mouseover", (event, d) => {
                    if (d.data.text) {
                        tooltip.style("visibility", "visible")
                            .html(d.data.text.substring(0, 30) + "...");
                    }
                })
                .on("mousemove", (event) => {
                    tooltip.style("top", (event.pageY + 10) + "px")
                        .style("left", (event.pageX + 10) + "px");
                })
                .on("mouseout", () => {
                    tooltip.style("visibility", "hidden");
                })
                .on("click", (event, d) => {
                    d.children = d.children ? null : d._children;
                    update(d);
                });

            nodeEnter.append("circle")
                .attr("r", 8)
                .attr("fill", d => d.data.name === "EXEC CICS" ? "#ffcc00" : colorScale(d.depth))
                .attr("stroke", d => d.data.name === "EXEC CICS" ? "#ff9900" : (d._children ? "#000" : "#aaa"))
                .attr("stroke-width", d => d.data.name === "EXEC CICS" ? 2.5 : (d._children ? 2 : 1));

            nodeEnter.append("text")
                .attr("dy", 3)
                .attr("x", d => d.children || d._children ? -10 : 10)
                .style("text-anchor", d => d.children || d._children ? "end" : "start")
                .text(d => d.data.name);

            const nodeUpdate = nodeEnter.merge(node);

            nodeUpdate.transition()
                .duration(200)
                .attr("transform", d => `translate(${d.y},${d.x})`);

            nodes.forEach(d => {
                d.x0 = d.x;
                d.y0 = d.y;
            });

            node.exit().transition()
                .duration(200)
                .remove();
        };

        update(root);
    </script>
</body>
</html>
    """

    html_content = html_template.replace("__DATA_PLACEHOLDER__", json_data)

    with open(output_file, "w") as file:
        file.write(html_content)


In [169]:

# Example Usage
file_path = "COCRDLIC.cbl"
output_file = "cobol_control_flow.html"

parsed_structure = parse_cobol_file(file_path)
write_d3_collapsible_graph(parsed_structure, output_file)

print(f"Graph saved to {output_file}")


Graph saved to cobol_control_flow.html


# Try 14

In [339]:
def parse_cobol_ast_fixed(file_path):
    """
    Parses COBOL code into a complete hierarchical structure and paragraph dictionary.
    Ensures proper text accumulation, children attachment, and control flow extraction.
    """
    with open(file_path, 'r') as file:
        code_lines = file.readlines()

    structure = {"name": "root", "type": "root", "text": "".join(code_lines), "children": []}
    paragraphs = {}
    current_context = {"division": None, "section": None, "paragraph": None}
    current_sentence = []  # Accumulate lines to form a sentence

    # Reserved keywords and control flow constructs
    reserved_keywords = {"UNTIL", "VARYING", "EXIT", "GO", "TO", "IF", "ELSE", "END-IF"}
    control_flow_keywords = {"PERFORM", "CALL", "EXEC", "GOTO"}

    # Regular expressions for COBOL syntax
    division_pattern = re.compile(r'^\s*([A-Z-]+)\s+DIVISION\.\s*$', re.IGNORECASE)
    section_pattern = re.compile(r'^\s*([A-Z0-9-]+)\s+SECTION\.\s*$', re.IGNORECASE)
    paragraph_pattern = re.compile(r'^\s{7}([A-Z0-9-]+)\.\s*$', re.IGNORECASE)

    def create_node(name, node_type):
        """Create a new node for the AST."""
        return {"name": name, "type": node_type, "text": "", "children": [], "parents": []}

    def attach_to_context(node, context_key):
        """Attach a node to the appropriate context."""
        parent = current_context.get(context_key)
        if parent:
            parent["children"].append(node)
        else:
            structure["children"].append(node)

    def finalize_paragraph():
        """Finalize the current paragraph, storing its data in `paragraphs`."""
        paragraph = current_context["paragraph"]
        if paragraph:
            paragraph["text"] = paragraph["text"].strip()
            paragraphs[paragraph["name"]] = paragraph
            current_context["paragraph"] = None

    def extract_control_flow(sentence, paragraph):
        """Extract control flow constructs and link paragraphs."""
        words = sentence.split()
        for idx, word in enumerate(words):
            if word in control_flow_keywords:
                if word == "PERFORM" and idx + 1 < len(words):
                    target = words[idx + 1]
                    if target in paragraphs:
                        target_node = paragraphs[target]
                        paragraph["children"].append(target_node)
                        target_node["parents"].append(paragraph["name"])

    def process_sentence(sentence):
        """Process a single COBOL sentence."""
        nonlocal current_context

        # Skip reserved keywords
        if any(sentence.strip().startswith(keyword) for keyword in reserved_keywords):
            return

        # Match DIVISION
        division_match = division_pattern.match(sentence)
        if division_match:
            finalize_paragraph()
            division_node = create_node(f"{division_match.group(1)} DIVISION", "division")
            attach_to_context(division_node, "division")
            current_context["division"] = division_node
            current_context["section"] = None
            current_context["paragraph"] = None
            return

        # Match SECTION
        section_match = section_pattern.match(sentence)
        if section_match:
            finalize_paragraph()
            section_node = create_node(f"{section_match.group(1)} SECTION", "section")
            attach_to_context(section_node, "division")
            current_context["section"] = section_node
            current_context["paragraph"] = None
            return

        # Match PARAGRAPH
        paragraph_match = paragraph_pattern.match(sentence)
        if paragraph_match:
            finalize_paragraph()
            paragraph_name = paragraph_match.group(1)
            paragraph_node = create_node(paragraph_name, "paragraph")
            attach_to_context(paragraph_node, "section" if current_context["section"] else "division")
            current_context["paragraph"] = paragraph_node
            paragraphs[paragraph_name] = paragraph_node
            return

        # Add SENTENCE or STATEMENT
        paragraph = current_context["paragraph"]
        if paragraph:
            paragraph["text"] += f" {sentence.strip()}"
            sentence_node = create_node("Sentence", "sentence")
            paragraph["children"].append(sentence_node)

            # Add terminal nodes (statements) under the sentence
            for statement in sentence.split('.'):
                if statement.strip():
                    statement_node = create_node("Statement", "statement")
                    statement_node["text"] = statement.strip()
                    sentence_node["children"].append(statement_node)

            # Extract control flow
            extract_control_flow(sentence, paragraph)

    # Main parsing loop
    for line in code_lines:
        # Ignore sequence numbers (columns 1-6) and comments (columns 73-80)
        source_code = line[7:72].rstrip()
        if line[6:7] in ('*', '/', 'D'):  # Comment or debug line
            continue

        # Accumulate lines until a complete sentence (ends with ".")
        current_sentence.append(source_code)
        if source_code.endswith('.'):
            sentence = " ".join(current_sentence).strip()
            current_sentence.clear()
            process_sentence(sentence)

    finalize_paragraph()  # Finalize the last paragraph

    return {"structure": structure, "paragraphs": paragraphs}


In [340]:
def write_d3_collapsible_graph_with_arrows(structure, paragraphs, output_file):
    """
    Writes a collapsible graph using D3.js based on the parsed COBOL structure.
    Embeds data directly and ensures all features are implemented correctly.
    """
    # Minify JSON data for direct embedding
    json_data = json.dumps(structure)
    paragraph_data = json.dumps(paragraphs)

    html_template = f"""
<!DOCTYPE html>
<html lang="en">
<head>
    <meta charset="UTF-8">
    <meta name="viewport" content="width=device-width, initial-scale=1.0">
    <script src="https://d3js.org/d3.v7.min.js"></script>
    <title>Interactive COBOL Control Flow</title>
    <style>
        body {{
            margin: 0;
            padding: 0;
            overflow: hidden;
            background-color: white;
        }}
        .node circle {{
            stroke-width: 2px;
            cursor: pointer;
            transition: fill 0.3s, stroke 0.3s;
        }}
        .node text {{
            font: 14px sans-serif;
            fill: #000;
            pointer-events: none;
        }}
        .link {{
            fill: none;
            stroke: #d2b48c;
            stroke-width: 1.5px;
            marker-end: url(#arrow);
        }}
        .tooltip {{
            position: absolute;
            text-align: center;
            width: auto;
            height: auto;
            padding: 5px;
            font: 12px sans-serif;
            background: lightgrey;
            border: 1px solid black;
            border-radius: 5px;
            pointer-events: none;
            visibility: hidden;
        }}
    </style>
</head>
<body>
    <div class="tooltip"></div>
    <script>
        const data = {json_data};
        const paragraphs = {paragraph_data};

        const width = window.innerWidth;
        const height = window.innerHeight;

        const tooltip = d3.select(".tooltip");

        const svg = d3.select("body").append("svg")
            .attr("width", width)
            .attr("height", height)
            .call(d3.zoom().on("zoom", (event) => {{
                g.attr("transform", event.transform);
            }}))
            .append("g");

        const defs = svg.append("defs");
        defs.append("marker")
            .attr("id", "arrow")
            .attr("viewBox", "0 -5 10 10")
            .attr("refX", 10)
            .attr("refY", 0)
            .attr("markerWidth", 6)
            .attr("markerHeight", 6)
            .attr("orient", "auto")
            .append("path")
            .attr("d", "M0,-5L10,0L0,5")
            .attr("fill", "#d2b48c");

        const g = svg.append("g")
            .attr("transform", "translate(50, 50)");

        const tree = d3.tree().size([height - 200, width - 200]);

        const root = d3.hierarchy(data);

        root.descendants().forEach((d, i) => {{
            d.id = i;
            d.x0 = d.x || 0;
            d.y0 = d.y || 0;
            d._children = d.children;
            d.children = d.depth < 3 ? d.children : null;
        }});

        const colorScale = d3.scaleSequential(d3.interpolateCool)
            .domain([0, 10]);

        const update = (source) => {{
            const nodes = root.descendants();
            const links = root.links();

            tree(root);

            // Links
            const link = g.selectAll(".link")
                .data(links, link => link.target.id);

            link.enter().append("line")
                .attr("class", "link")
                .merge(link)
                .transition()
                .duration(300)
                .attr("x1", d => d.source.y)
                .attr("y1", d => d.source.x)
                .attr("x2", d => d.target.y)
                .attr("y2", d => d.target.x)
                .attr("stroke", "#d2b48c")
                .attr("stroke-width", 1.5);

            link.exit().remove();

            // Add multiple parent arrows
            Object.keys(paragraphs).forEach(paragraph => {{
                if (paragraphs[paragraph].parents) {{
                    paragraphs[paragraph].parents.forEach(parent => {{
                        const sourceNode = nodes.find(n => n.data.name === parent);
                        const targetNode = nodes.find(n => n.data.name === paragraph);
                        if (sourceNode && targetNode) {{
                            g.append("line")
                                .attr("class", "link")
                                .attr("x1", sourceNode.y)
                                .attr("y1", sourceNode.x)
                                .attr("x2", targetNode.y)
                                .attr("y2", targetNode.x)
                                .attr("stroke", "#d2b48c")
                                .attr("stroke-width", 1.5)
                                .attr("marker-end", "url(#arrow)");
                        }}
                    }});
                }}
            }});

            // Nodes
            const node = g.selectAll(".node")
                .data(nodes, node => node.id);

            const nodeEnter = node.enter().append("g")
                .attr("class", "node")
                .attr("transform", d => `translate(${{d.y0}},${{d.x0}})`)
                .on("mouseover", (event, d) => {{
                    if (d.data.text) {{
                        tooltip.style("visibility", "visible")
                            .html(d.data.text.substring(0, 50) + "...");
                    }}
                }})
                .on("mousemove", (event) => {{
                    tooltip.style("top", (event.pageY + 10) + "px")
                        .style("left", (event.pageX + 10) + "px");
                }})
                .on("mouseout", () => {{
                    tooltip.style("visibility", "hidden");
                }})
                .on("click", (event, d) => {{
                    d.children = d.children ? null : d._children;
                    update(d);
                }});

            nodeEnter.append("circle")
                .attr("r", 8)
                .attr("fill", d => d.data.name === "EXEC CICS" ? "#ffcc00" : colorScale(d.depth))
                .attr("stroke", d => d.data.name === "EXEC CICS" ? "#ff9900" : (d._children ? "#000" : "#aaa"))
                .attr("stroke-width", d => d.data.name === "EXEC CICS" ? 2.5 : (d._children ? 2 : 1));

            nodeEnter.append("text")
                .attr("dy", 3)
                .attr("x", d => d.children || d._children ? -10 : 10)
                .style("text-anchor", d => d.children || d._children ? "end" : "start")
                .text(d => d.data.name);

            const nodeUpdate = nodeEnter.merge(node);

            nodeUpdate.transition()
                .duration(300)
                .attr("transform", d => `translate(${{d.y}},${{d.x}})`);

            nodes.forEach(d => {{
                d.x0 = d.x;
                d.y0 = d.y;
            }});

            node.exit().transition()
                .duration(300)
                .remove();
        }};

        update(root);
    </script>
</body>
</html>
    """

    with open(output_file, "w") as file:
        file.write(html_template)



In [341]:
# Example Usage
file_path = "DB1024_2.cbl"
file_path = "COCRDLIC.cbl"
output_file_path = "cobol_control_flow.html"

parsed_structure = parse_cobol_ast_fixed(file_path)

write_d3_collapsible_graph_with_arrows(parsed_structure["structure"], parsed_structure["paragraphs"], output_file_path)

print(f"Graph saved to {output_file_path}")


Graph saved to cobol_control_flow.html


In [342]:
parsed_structure['structure']['children']

[{'name': 'IDENTIFICATION DIVISION',
  'type': 'division',
  'text': '',
  'children': [{'name': 'ENVIRONMENT DIVISION',
    'type': 'division',
    'text': '',
    'children': [{'name': 'INPUT-OUTPUT SECTION',
      'type': 'section',
      'text': '',
      'children': [],
      'parents': []},
     {'name': 'DATA DIVISION',
      'type': 'division',
      'text': '',
      'children': [{'name': 'WORKING-STORAGE SECTION',
        'type': 'section',
        'text': '',
        'children': [],
        'parents': []},
       {'name': 'LINKAGE SECTION',
        'type': 'section',
        'text': '',
        'children': [],
        'parents': []},
       {'name': 'PROCEDURE DIVISION',
        'type': 'division',
        'text': '',
        'children': [],
        'parents': []}],
      'parents': []}],
    'parents': []}],
  'parents': []}]

In [343]:
[a['children'] for a in parsed_structure['structure']['children'][-1]['children']]

[[{'name': 'INPUT-OUTPUT SECTION',
   'type': 'section',
   'text': '',
   'children': [],
   'parents': []},
  {'name': 'DATA DIVISION',
   'type': 'division',
   'text': '',
   'children': [{'name': 'WORKING-STORAGE SECTION',
     'type': 'section',
     'text': '',
     'children': [],
     'parents': []},
    {'name': 'LINKAGE SECTION',
     'type': 'section',
     'text': '',
     'children': [],
     'parents': []},
    {'name': 'PROCEDURE DIVISION',
     'type': 'division',
     'text': '',
     'children': [],
     'parents': []}],
   'parents': []}]]

In [336]:
parsed_structure['paragraphs']

{}