### Config

In [15]:
java_path = '/sise/home/noaradi/new_data_extraction/labels_repo'
save_to_path = '/sise/home/noaradi/meta_model/GraphEmbedding/graphs_new/batch3_manually_defined/'

### Imports

In [1]:
import javalang
import networkx as nx
import matplotlib.pyplot as plt
import os
import json

### Functions

In [2]:
def parse_java_files(path):
    """
    Parse Java files located in the specified directory and its subdirectories. (without Test files or Abstract files

    Args:
        path (str): The directory path containing Java files.

    Returns:
        list: A list of tuples containing parsed Java files.
              Each tuple consists of the filename and its parsed representation.
    """
    parsed_files = []

    # Iterate over Java files in the specified directory and its subdirectories
    for filename in glob.iglob(path + '/**/*.java', recursive=True):
        # Check if the file is a Java file and does not contain 'test', 'Test', or 'abstract' in its name
        if filename.endswith('.java') and 'test' not in filename and 'Test' not in filename and 'abstract' not in filename.lower():
            # Skip directories
            if os.path.isdir(filename):
                print(filename + " is a directory")
                continue
            
            # Read the contents of the Java file
            with open(filename, 'r') as file:
                try:
                    source_code = file.read()
                except UnicodeDecodeError:
                    # Skip files with UnicodeDecodeError
                    continue
            
            # Parse the Java source code and append the result to parsed_files
            try:
                parsed_files.append((filename, javalang.parse.parse(source_code)))
            except Exception as e:
                # Skip files that fail to parse
                print(f"Error parsing {filename}: {str(e)}")
                continue
    
    return parsed_files

In [3]:
def save_graph_to_json(graph, json_file):
    """
    Save a graph to a JSON file.

    Args:
        graph (networkx.Graph): The graph to be saved.
        json_file (str): The path to the JSON file where the graph will be saved.
    """
    # Convert the graph edges to a list of edge pairs
    edges = [[str(u), str(v)] for u, v in graph.edges]

    # Create a dictionary representing the graph
    graph_dict = {"edges": edges}

    # Save the graph dictionary as JSON to the specified file
    with open(json_file, 'w') as file:
        json.dump(graph_dict, file)

In [26]:
def implement_or_extend(tree):
    # Extract implemented interfaces (if any)
    implement = getattr(tree.types[0], 'implements', None) if len(tree.types)>0 else None

    # Extract extended class (if any)
    extends = getattr(tree.types[0], 'extends', None)if len(tree.types)>0 else None
    return implement, extends

In [6]:
def get_imports(tree):
    return [importname.path for importname in tree.imports]

In [7]:
def local_variable_dec_nodes(tree, dict_mapping):
    for path, node in tree.filter(javalang.tree.LocalVariableDeclaration):
        for declarator in node.declarators:
            if isinstance(declarator.initializer, javalang.tree.MethodInvocation):
                for children in declarator.children:
                    try:
                        dict_mapping[declarator.name] = children.qualifier
                    except:
                        pass
            else:  
                if declarator.initializer:
                    try:
                        dict_mapping[declarator.name] = declarator.initializer.type.name
                    except:
                        pass
                    try:
                        dict_mapping[declarator.name] = node.type.name
                    except:
                        pass
    return dict_mapping

In [8]:
def class_dec_nodes(tree, dict_mapping):
    for path, node in tree.filter(javalang.tree.ClassDeclaration):
        for p,declarator in node.filter(javalang.tree.StatementExpression):
            if isinstance(declarator.expression, javalang.tree.Assignment):
                try:
                    dict_mapping[declarator.expression.expressionl.member] = declarator.expression.value.type.name
                except:
                    pass
    return dict_mapping

In [9]:
def field_dec_nodes(tree, dict_mapping):
    for path, node in tree.filter(javalang.tree.FieldDeclaration):
        for declarator in node.declarators:
            dict_mapping[declarator.name] = node.type.name
    return dict_mapping

In [10]:
def formal_param_nodes(tree, dict_mapping):
    for path, node in tree.filter(javalang.tree.FormalParameter):
        dict_mapping[node.name] = node.type.name
    return dict_mapping

In [27]:
def create_call_graph(java_files,project_name):
    # Create an empty graph
    call_graph = nx.DiGraph()
    nodes_mapping = {}
    methods_declerated = []
    index = 0
    i = 0
    # Traverse the parsed files and add edges to the graph
    for file_path, tree in java_files:
        # print(i)
        i += 1
        imports = get_imports(tree)
        # check if the class is implementing another class
        implement, extends = implement_or_extend(tree)
        # map variable name to Class
        dict_mapping = {}
        dict_mapping = local_variable_dec_nodes(tree, dict_mapping)
        dict_mapping = class_dec_nodes(tree, dict_mapping)
        dict_mapping = field_dec_nodes(tree, dict_mapping)
        dict_mapping = formal_param_nodes(tree, dict_mapping)

        methods_in_file = [node.name for path, node in tree.filter(javalang.tree.MethodDeclaration) ]
        # for each declared method in class file
        for path, node in tree.filter(javalang.tree.MethodDeclaration):
            file = file_path.split('/')[-1].split('.')[0]
            caller_node = f'{file}.{node.name}'
            # add the caller node to the general dictionary
            if node.annotations != [] and node.annotations[0].name=='Deprecated':
                continue
            methods_declerated.append(caller_node)
            caller_node = nodes_mapping.get(f'{file}.{node.name}',None)
            if caller_node is None:
                nodes_mapping[f'{file}.{node.name}'] = index
                index += 1
            if implement:
                # if node.annotations != [] : #and node.annotations[0].name=='Override'
                    from_file = implement[0].name
                    implemented_node = nodes_mapping.get(f'{from_file}.{node.name}',None)
                    if implemented_node is None:
                        nodes_mapping[f'{from_file}.{node.name}'] = index
                        index += 1
                    call_graph.add_edge(nodes_mapping[f'{file}.{node.name}'], nodes_mapping[f'{from_file}.{node.name}'])
            if extends:
                if node.annotations != [] and node.annotations[0].name=='Override':
                    try:
                        from_file = extends.name
                    except:
                        from_file = extends[0].name
                    extended_node = nodes_mapping.get(f'{from_file}.{node.name}',None)
                    if extended_node is None:
                        nodes_mapping[f'{from_file}.{node.name}'] = index
                        index += 1
                    call_graph.add_edge(nodes_mapping[f'{file}.{node.name}'], nodes_mapping[f'{from_file}.{node.name}'])

            counter = 0
            for _, method_call in node.filter(javalang.tree.MethodInvocation):
                counter += 1
                if not method_call.qualifier:
                    if method_call.member in methods_in_file:
                        qualifier = file
                    else:
                        continue
                else:
                    qualifier = method_call.qualifier if method_call.qualifier else 'None'
                member = method_call.member
                if '.' in qualifier:
                    splitted = qualifier.split('.')
                    left = '.'.join(splitted[:-1])
                    right = splitted[-1]
                    qualifier = left
                if qualifier in dict_mapping:
                    qualifier = dict_mapping[qualifier]
                external = False
                for importname in imports:
                    if qualifier in importname and project_name not in importname:
                        external = True
                if external or qualifier in ['Character','String','Long','Integer']:
                    continue
                callee_node = nodes_mapping.get(f'{qualifier}.{member}',None)
                if callee_node is None:
                    nodes_mapping[f'{qualifier}.{member}'] = index
                    index += 1
                call_graph.add_edge(nodes_mapping[f'{file}.{node.name}'], nodes_mapping[f'{qualifier}.{member}'])
    nodes_to_keep = []
    for method in methods_declerated:
        nodes_to_keep.append(nodes_mapping[method])
    graph_to_return = nx.subgraph(call_graph,nodes_to_keep)
    return graph_to_return, nodes_mapping


In [16]:
import glob
import pandas as pd
def create_call_graph_projects(java_path,save_to_path,already_written = []):
    results_dictionary = {'project':[],'version':[],'without_filtering_total': [], 'without_filtering_found_total': [], 'without_filtering_not_found': [],
                      'with_filtering_total': [], 'with_filtering_found_total': [], 'with_filtering_not_found': [],
                     }
    for folder in sorted(os.listdir(java_path)):
        folder_path = java_path + "/" + folder
        # try:
        if os.path.isdir(folder) or folder in already_written: continue
        print(folder)
        for version in os.listdir(folder_path):
            # if os.path.isfile(f'graphs_new/batch1_manually_defined/{folder}_{version}.json'): continue
            if version == '.git' or '.csv' in version: continue
            i = 0
            print(version)
            version_path = folder_path + "/" + version
            results_dictionary['project'].append(folder)
            results_dictionary['version'].append(version)

            java_files = parse_java_files(version_path)
            call_graph,nodes_mapping = create_call_graph(java_files,folder)
            save_graph_to_json(call_graph, f'{save_to_path}/{folder}_{version}.json')

# Apply call graph extraction

In [None]:
create_call_graph_projects(java_path,save_to_path,already_written = [])