## USAGE
#### Use Javalang Parser to detect code entities and their types from a code snippet


In [1]:
import javalang
import pandas as pd

In [2]:
class args: 
    java_code_snippets = "./data/sampled_codexglue_data_java.csv"

In [3]:

def identify_entities(java_code):
    """
    Returns entity types based on javalang parser
    
    Parameters:
    java_code (str): Java Method
    
    Returns:
    entities (list of dicts): Entities with name, type and position
    wc (int): error count
    """
    # Dummy class name
    class_name = "xyz" 
    java_code = "class "+class_name.split('.')[-1]+" {" + java_code + "}"
    
    # Helper functions to add entities with their positions
    def add_class(node):
        entities.append({
            "entity_name": node.name,
            "entity_type": "CLASS",
            "position": (node.position.line, node.position.column) if node.position else None
        })
        

    
    def add_method(node):
        if isinstance(node, javalang.tree.ReferenceType):
            entities.append({
                "entity_name": node.name,
                "entity_type": "FUNCTION",
                "position": (node.position.line, node.position.column) if node.position else None
            })

        elif isinstance(node, javalang.tree.MethodInvocation):
            entities.append({
                "entity_name": node.member,
                "entity_type": "FUNCTION",
                "position": (node.position.line, node.position.column) if node.position else None
            })

        elif isinstance(node, javalang.tree.MethodDeclaration):
            entities.append({
                "entity_name": node.name,
                "entity_type": "FUNCTION",
                "position": (node.position.line, node.position.column) if node.position else None
            })
            
    
    def add_variable(node, position=None, type_node=None):
        if isinstance(node, javalang.tree.VariableDeclarator):
            entities.append({
                "entity_name": node.name,
                "entity_type": "VARIABLE",
                "position": (node.position.line, node.position.column) if node.position else None
            })

        elif isinstance(node, javalang.tree.FormalParameter):
            entities.append({
                "entity_name": node.name,
                "entity_type": "VARIABLE",
                "position": (node.position.line, node.position.column) if node.position else None
            })
            
        elif isinstance(node, javalang.tree.VariableDeclaration):
            entities.append({
                "entity_name": node.declarators[0].name,
                "entity_type": "VARIABLE",
                "position":  (node.declarators[0].position.line, node.declarators[0].position.column) if node.declarators[0].position else None
            })

        elif isinstance(node, javalang.tree.MemberReference):
            entities.append({
                "entity_name": node.member,
                "entity_type": "VARIABLE",
                "position":  (node.position.line, node.position.column) if node.position else None
            })
        
    
    def add_value(node):
        entities.append({
            "entity_name": node.value,
            "entity_type": "VALUE",
            "position": (node.position.line, node.position.column) if node.position else None
        })
        

    
    def add_data_type(node):
        if isinstance(node, javalang.tree.ReferenceType):
            entities.append({
            "entity_name": node.name,
            "entity_type": "DATA TYPE",
            "position": (node.position.line, node.position.column) if node.position else None
        })

        elif isinstance(node, javalang.tree.BasicType):
            entities.append({
            "entity_name": node.name,
            "entity_type": "DATA TYPE",
            "position": (node.position.line, node.position.column) if node.position else None
        })

    def add_library(node):
        entities.append({
            "entity_name": node.name,
            "entity_type": "LIBRARY",
            "position": (node.position.line, node.position.column) if node.position else None
        })
        
    entities = []

    # Parse the Java code using javalang
    tokens = list(javalang.tokenizer.tokenize(java_code))
    parser = javalang.parser.Parser(tokens)
    wc = 0

    try:
        tree = parser.parse()
    
                
        # Visit nodes in the AST
        for path, node in tree:
            if isinstance(node, javalang.tree.ClassDeclaration):
                add_class(node)
                
            elif isinstance(node, javalang.tree.MethodDeclaration):
                add_method(node)

            elif isinstance(node, javalang.tree.MethodInvocation):
                add_method(node)

            elif isinstance(node, javalang.tree.VariableDeclaration):
                add_variable(node)

            elif isinstance(node, javalang.tree.MemberReference):
                add_variable(node)
               
            elif isinstance(node, javalang.tree.FormalParameter):
                add_variable(node)
            
            elif isinstance(node, javalang.tree.Literal):
                add_value(node)
                
            elif isinstance(node, javalang.tree.Import):
                add_library(node)
               
            elif isinstance(node, javalang.tree.ReferenceType):
                add_data_type(node)

            elif isinstance(node, javalang.tree.BasicType):
                add_data_type(node)

    except javalang.parser.JavaSyntaxError as err:
        wc  = wc + 1
        print("syntax error on: "+str(err))
    except javalang.tokenizer.LexerError as err:
        wc = wc + 1
        print("lexer error")
    except IndexError as e:
        wc = wc + 1
    except TypeError as e:
        wc = wc + 1
    
    return entities[1:], wc


In [4]:
code_data = pd.read_csv(args.java_code_snippets)
java_code_snippets = list(code_data['code'])

In [7]:

javalang_out = []
for i in range(len(java_code_snippets)):
    parsed_ents, error = identify_entities(java_code_snippets[i])
    javalang_out.append(parsed_ents)
    