In [1]:
import json
import networkx as nx

# Step 1: Load the JSON File
with open('../dependency_graph.json', 'r') as f:
    graph_data = json.load(f)

# Step 2: Parse Nodes and Edges
nodes = graph_data['nodes']
edges = [(edge['source'], edge['target']) for edge in graph_data['edges']]

# Step 3: Analyze the Graph
# Initialize a graph object
G = nx.DiGraph()
G.add_nodes_from(nodes)
G.add_edges_from(edges)

# Basic Analysis
num_nodes = G.number_of_nodes()
num_edges = G.number_of_edges()
print(f"Number of nodes: {num_nodes}")
print(f"Number of edges: {num_edges}")

Number of nodes: 601
Number of edges: 1063


In [2]:
# Advanced Analysis

# Print the node with the highest in-degree
in_degree_centrality = nx.in_degree_centrality(G)
node_max_in_degree = max(in_degree_centrality, key=in_degree_centrality.get)
print(f"Node with highest in-degree:\n{node_max_in_degree}")

# Print the node with the highest out-degree
out_degree_centrality = nx.out_degree_centrality(G)
node_max_out_degree = max(out_degree_centrality, key=out_degree_centrality.get)
print(f"Node with highest out-degree:\n{node_max_out_degree}")

Node with highest in-degree:
name=fetchItemValue
file=wwpdb/apps/deposit/depui/apilayer.py
line=8814
Node with highest out-degree:
name=updateViewContext
file=wwpdb/apps/deposit/depui/views.py
line=368


In [3]:
# Identify nodes having only one outgoing edge
nodes_with_one_outgoing_edge = [node for node in G.nodes if G.out_degree(node) == 1]
print(f"Nodes with only one outgoing edge: {json.dumps(nodes_with_one_outgoing_edge, indent=2)}")

# Identify nodes having only one incoming edge
nodes_with_one_incoming_edge = [node for node in G.nodes if G.in_degree(node) == 1]
print(f"Nodes with only one incoming edge: {json.dumps(nodes_with_one_incoming_edge, indent=2)}")

# Identify nodes having only one incoming and one outgoing edge
nodes_with_one_incoming_and_outgoing_edge = [node for node in G.nodes if G.in_degree(node) == 1 and G.out_degree(node) == 1]
print(f"Nodes with only one incoming and one outgoing edge: {json.dumps(nodes_with_one_incoming_and_outgoing_edge, indent=2)}")

Nodes with only one outgoing edge: [
  "name=__init__\nfile=wwpdb/utils/config/ConfigInfoData.py\nline=646",
  "name=selectRows\nfile=wwpdb/utils/wf/dbapi/DbCommand.py\nline=255",
  "name=exist\nfile=wwpdb/utils/wf/dbapi/WfDbApi.py\nline=593",
  "name=__init__\nfile=wwpdb/apps/deposit/depui/snapshot/OtherData.py\nline=42",
  "name=__init__\nfile=wwpdb/apps/wf_engine/engine/dbAPI.py\nline=13",
  "name=__init__\nfile=wwpdb/utils/wf/DataValueContainer.py\nline=34",
  "name=__init__\nfile=wwpdb/utils/wf/DataSelector.py\nline=35",
  "name=setDepositionDataSetId\nfile=wwpdb/io/locator/DataReference.py\nline=521",
  "name=setContentTypeAndFormat\nfile=wwpdb/io/locator/DataReference.py\nline=414",
  "name=setStorageType\nfile=wwpdb/io/locator/DataReference.py\nline=464",
  "name=setWorkflowInstanceId\nfile=wwpdb/io/locator/DataReference.py\nline=543",
  "name=__isInternalReferenceValid\nfile=wwpdb/io/locator/DataReference.py\nline=784",
  "name=__getInternalPath\nfile=wwpdb/io/locator/DataRefe

In [4]:
# Given that the graph represents a call stack, identify non-branched paths of deep nested calls
# A non-branched path is a path where each node has only one incoming and one outgoing edge
# Initialize a list to hold all non-branched paths
non_branched_paths = []

# Iterate over nodes with only one outgoing edge to start paths
for start_node in nodes_with_one_outgoing_edge:
    current_path = [start_node]
    current_node = start_node
    
    # Traverse the graph following the non-branched path
    while True:
        # Find the next node if the current node has exactly one outgoing edge and is not an end node
        if current_node in nodes_with_one_incoming_and_outgoing_edge:
            next_node = next(iter(G.successors(current_node)), None)
            if next_node and next_node not in current_path:  # Avoid loops
                current_path.append(next_node)
                current_node = next_node
            else:
                break  # End the path if no next node or loop detected
        else:
            break  # End the path if current node does not meet the criteria
    
    # Save the path if it has more than one node
    if len(current_path) > 1:
        non_branched_paths.append(current_path)

# Print the identified non-branched paths
for i, path in enumerate(non_branched_paths, 1):
    print(f"Path {i}: {path}")

Path 1: ['name=selectRows\nfile=wwpdb/utils/wf/dbapi/DbCommand.py\nline=255', 'name=makeSqlConstraint\nfile=wwpdb/utils/wf/dbapi/DbCommand.py\nline=104']
Path 2: ['name=__init__\nfile=wwpdb/utils/wf/DataValueContainer.py\nline=34', 'name=__init__\nfile=wwpdb/io/locator/DataReference.py\nline=243']
Path 3: ['name=__latestVersion\nfile=wwpdb/io/locator/DataReference.py\nline=1105', 'name=getReferenceType\nfile=wwpdb/io/locator/DataReference.py\nline=71']
Path 4: ['name=check_complex_errors\nfile=wwpdb/apps/deposit/depui/upload.py\nline=2360', 'name=add_status\nfile=wwpdb/apps/deposit/depui/upload.py\nline=1324']
Path 5: ['name=get_em_map_upload\nfile=wwpdb/apps/deposit/depui/upload.py\nline=2916', 'name=loadFromPickle\nfile=wwpdb/apps/deposit/depui/apilayer.py\nline=3528']
Path 6: ['name=check_block_edit\nfile=wwpdb/apps/deposit/depui/upload.py\nline=2556', 'name=validate_getUploadValidate\nfile=wwpdb/apps/deposit/depui/apilayer.py\nline=6070']
Path 7: ['name=check_em_valid\nfile=wwpdb/a

In [5]:
# Identify the nodes whose "name" attribute is in camel case

# First, define a function to extract the name attribute from a node
import re

def extract_node_attributes(node_string):
    """Extract the name, file, and line attributes from the node string."""
    pattern = r'name=([^\n]+)\nfile=([^\n]+)\nline=(\d+)'
    match = re.search(pattern, node_string)
    if match:
        # Return the matched name, file path, and line number as a tuple
        return match.group(1), match.group(2), match.group(3)
    return None, None, None  # Return None for each attribute if not found

# Define a function that checks whether there is any character in uppercase in the string
def has_uppercase(string):
    """Check if a string has any uppercase character."""
    return any(char.isupper() for char in string)

# Then, define a function to check if a string is in camel case
def is_camel_case(string):
    """Check if a string is in camel case."""
    return bool(re.match(r'^[a-z]+(?:[A-Z][a-z]*)+$', string))

# Finally, identify the nodes whose "name" attribute is in camel case
camel_case_nodes = [node for node in nodes if is_camel_case(extract_node_attributes(node)[0]) or has_uppercase(extract_node_attributes(node)[0])]
print(f"Nodes with 'name' attribute in camel case: {json.dumps(camel_case_nodes, indent=2)}")
print(f"Number of nodes with 'name' attribute in camel case: {len(camel_case_nodes)}")

Nodes with 'name' attribute in camel case: [
  "name=__addMilestoneVariants\nfile=wwpdb/utils/config/ConfigInfoData.py\nline=743",
  "name=getConfigDictionary\nfile=wwpdb/utils/config/ConfigInfoData.py\nline=740",
  "name=getObject\nfile=wwpdb/utils/wf/dbapi/WfDbApi.py\nline=220",
  "name=checkId\nfile=wwpdb/utils/wf/dbapi/WfDbApi.py\nline=583",
  "name=selectRows\nfile=wwpdb/utils/wf/dbapi/DbCommand.py\nline=255",
  "name=makeSqlConstraint\nfile=wwpdb/utils/wf/dbapi/DbCommand.py\nline=104",
  "name=getUiReqs\nfile=wwpdb/apps/deposit/depui/constants.py\nline=379",
  "name=_generateCCDef\nfile=wwpdb/apps/deposit/depui/apilayer.py\nline=10127",
  "name=initProgress\nfile=wwpdb/apps/deposit/depui/apilayer.py\nline=6473",
  "name=getCifFileName\nfile=wwpdb/apps/deposit/depui/apilayer.py\nline=2154",
  "name=loadFromCif\nfile=wwpdb/apps/deposit/depui/apilayer.py\nline=665",
  "name=saveToPickle\nfile=wwpdb/apps/deposit/depui/apilayer.py\nline=6680",
  "name=setDepositionDataSetId\nfile=wwpd

In [6]:
def suggest_snake_case_name(camel_case_name):
    """Suggest a snake case name from a camel case name, handling leading underscores, acronyms, and numbers."""
    # Preserve leading underscores
    leading_underscores = ''.join(re.match(r'^_+', camel_case_name)[0] if re.match(r'^_+', camel_case_name) else '')
    # Remove leading underscores for processing
    camel_case_name = re.sub(r'^_+', '', camel_case_name)
    
    # Insert underscores before uppercase letters followed by lowercase letters or preceded by lowercase letters or numbers
    snake_case_name = re.sub(r'(?<=[a-z0-9])([A-Z])|(?<=[A-Z])([A-Z][a-z])', r'_\1\2', camel_case_name)
    
    return leading_underscores + snake_case_name.lower()

# # Example camel_case_nodes list for demonstration
# camel_case_nodes = [
#     "name=__addMilestoneVariants\nfile=/path/to/file.py\nline=23",
#     "name=getConfigDictionary\nfile=/path/to/another_file.py\nline=45"
# ]

# Suggest snake case names for the nodes with camel case names
suggested_names = []
for node in camel_case_nodes:
    name, file, line = extract_node_attributes(node)
    if name is not None:  # Check if name is not None
        suggested_names.append({
            "file": file,
            "line": line,
            "original_name": name,
            "suggested_name": suggest_snake_case_name(name)
        })

print(f"Suggested snake case names: {json.dumps(suggested_names, indent=2)}")
print(f"Number of nodes with camel case names: {len(camel_case_nodes)}")

Suggested snake case names: [
  {
    "file": "wwpdb/utils/config/ConfigInfoData.py",
    "line": "743",
    "original_name": "__addMilestoneVariants",
    "suggested_name": "__add_milestone_variants"
  },
  {
    "file": "wwpdb/utils/config/ConfigInfoData.py",
    "line": "740",
    "original_name": "getConfigDictionary",
    "suggested_name": "get_config_dictionary"
  },
  {
    "file": "wwpdb/utils/wf/dbapi/WfDbApi.py",
    "line": "220",
    "original_name": "getObject",
    "suggested_name": "get_object"
  },
  {
    "file": "wwpdb/utils/wf/dbapi/WfDbApi.py",
    "line": "583",
    "original_name": "checkId",
    "suggested_name": "check_id"
  },
  {
    "file": "wwpdb/utils/wf/dbapi/DbCommand.py",
    "line": "255",
    "original_name": "selectRows",
    "suggested_name": "select_rows"
  },
  {
    "file": "wwpdb/utils/wf/dbapi/DbCommand.py",
    "line": "104",
    "original_name": "makeSqlConstraint",
    "suggested_name": "make_sql_constraint"
  },
  {
    "file": "wwpdb/apps/d

In [7]:
# Filter only the suggested names from "wwpdb/apps/deposit/depui/upload.py"
filtered_suggested_names = [suggestion for suggestion in suggested_names if suggestion["file"] == "wwpdb/apps/deposit/depui/upload.py"]
print(f"Suggested snake case names in 'wwpdb/apps/deposit/depui/upload.py': {json.dumps(filtered_suggested_names, indent=2)}")
print(f"Number of nodes with camel case names in 'wwpdb/apps/deposit/depui/upload.py': {len(filtered_suggested_names)}")

Suggested snake case names in 'wwpdb/apps/deposit/depui/upload.py': []
Number of nodes with camel case names in 'wwpdb/apps/deposit/depui/upload.py': 0


In [8]:
# Filter only the suggested names from "wwpdb/apps/deposit/depui/dataObject/getContentEM.py"
filtered_suggested_names = [suggestion for suggestion in suggested_names if suggestion["file"] == "wwpdb/apps/deposit/depui/dataObject/getContentEM.py"]
print(f"Suggested snake case names in 'wwpdb/apps/deposit/depui/dataObject/getContentEM.py': {json.dumps(filtered_suggested_names, indent=2)}")
print(f"Number of nodes with camel case names in 'wwpdb/apps/deposit/depui/dataObject/getContentEM.py': {len(filtered_suggested_names)}")

Suggested snake case names in 'wwpdb/apps/deposit/depui/dataObject/getContentEM.py': []
Number of nodes with camel case names in 'wwpdb/apps/deposit/depui/dataObject/getContentEM.py': 0


In [9]:
# Filter only the suggested names from "wwpdb/apps/deposit/depui/apilayer.py"
filtered_suggested_names = [suggestion for suggestion in suggested_names if suggestion["file"] == "wwpdb/apps/deposit/depui/apilayer.py"]
print(f"Suggested snake case names in 'wwpdb/apps/deposit/depui/apilayer.py': {json.dumps(filtered_suggested_names, indent=2)}")
print(f"Number of nodes with camel case names in 'wwpdb/apps/deposit/depui/apilayer.py': {len(filtered_suggested_names)}")

Suggested snake case names in 'wwpdb/apps/deposit/depui/apilayer.py': [
  {
    "file": "wwpdb/apps/deposit/depui/apilayer.py",
    "line": "10127",
    "original_name": "_generateCCDef",
    "suggested_name": "_generate_cc_def"
  },
  {
    "file": "wwpdb/apps/deposit/depui/apilayer.py",
    "line": "6473",
    "original_name": "initProgress",
    "suggested_name": "init_progress"
  },
  {
    "file": "wwpdb/apps/deposit/depui/apilayer.py",
    "line": "2154",
    "original_name": "getCifFileName",
    "suggested_name": "get_cif_file_name"
  },
  {
    "file": "wwpdb/apps/deposit/depui/apilayer.py",
    "line": "665",
    "original_name": "loadFromCif",
    "suggested_name": "load_from_cif"
  },
  {
    "file": "wwpdb/apps/deposit/depui/apilayer.py",
    "line": "6680",
    "original_name": "saveToPickle",
    "suggested_name": "save_to_pickle"
  },
  {
    "file": "wwpdb/apps/deposit/depui/apilayer.py",
    "line": "6769",
    "original_name": "noValidate",
    "suggested_name": "no_