<a href="https://colab.research.google.com/github/mehenika/Performance-Log-Level/blob/main/eightFeatures.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [16]:
import javalang
import nltk
from collections import Counter
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize

def extract_features(code):
    # Parse the code into an AST
    tree = javalang.parse.parse(code)

    # Extracting features
    average_log_level = calculate_average_log_level(tree)
    first_block_type = get_first_block_type(tree)
    second_block_type = get_second_block_type(tree)
    has_throw_statement = check_throw_statement(tree)
    num_variables = count_variables(tree)
    num_tokens = count_tokens(code)
    text_length = len(code)
    word_similarity = calculate_word_similarity(code)

    # Return the extracted features
    return {
        "average_log_level": average_log_level,
        "first_block_type": first_block_type,
        "second_block_type": second_block_type,
        "has_throw_statement": has_throw_statement,
        "num_variables": num_variables,
        "num_tokens": num_tokens,
        "text_length": text_length,
        "word_similarity": word_similarity
    }

def calculate_average_log_level(tree):
    log_level_count = 0
    log_statement_count = 0
    
    for _, node in tree.filter(javalang.tree.MethodInvocation):
        
        if node.qualifier == 'logger':
            log_statement_count += 1
            if node.member == 'trace':
              log_level_count += 1
            if node.member == 'debug':
              log_level_count += 2
            if node.member == 'info':
              log_level_count += 3
            if node.member == 'warn':
              log_level_count += 4
            if node.member == 'error':
              log_level_count += 5
    
    if log_statement_count > 0:
        average_log_level = log_level_count / log_statement_count
    else:
        average_log_level = 0.0
    
    return average_log_level


def get_first_block_type(tree):
    # Implementation to extract the type of the first block
    # in the code (e.g., if, for, while, etc.)
    for _, node in tree.filter(javalang.tree.BlockStatement):
        if isinstance(node, javalang.tree.IfStatement):
            return 'if'
        elif isinstance(node, javalang.tree.ForStatement):
            return 'for'
        elif isinstance(node, javalang.tree.WhileStatement):
            return 'while'
        # Add more conditions for other block types as needed
    
    return 'unknown'

def get_second_block_type(tree):
    # Implementation to extract the type of the second block
    # in the code (e.g., if, for, while, etc.)
    for _, node in tree.filter(javalang.tree.BlockStatement):
        if isinstance(node, javalang.tree.IfStatement):
            return 'if'
        elif isinstance(node, javalang.tree.ForStatement):
            return 'for'
        elif isinstance(node, javalang.tree.WhileStatement):
            return 'while'
        # Add more conditions for other block types as needed
    return 'unknown'

def check_throw_statement(tree):
    # Implementation to check if the code contains a throw statement
    for _, node in tree:
        if isinstance(node, javalang.tree.ThrowStatement):
            return True
    
    return False


def count_variables(tree):
    # Implementation to count the number of variables in the code
    variable_counter = Counter()

    for _, node in tree:
        if isinstance(node, javalang.tree.LocalVariableDeclaration):
            for variable in node.declarators:
                variable_counter[variable.name] += 1

    return len(variable_counter)


def count_tokens(code):
    # Implementation to count the number of tokens in the code
    tokens = list(javalang.tokenizer.tokenize(code))
    return len(tokens)
    

def calculate_word_similarity(code):
    # Tokenize the code into words
    tokens = word_tokenize(code)

    # Remove stopwords
    stop_words = set(stopwords.words('english'))
    filtered_tokens = [token for token in tokens if token.lower() not in stop_words]

    # Calculate the word similarity
    word_similarity = len(set(filtered_tokens)) / len(tokens)

    return word_similarity


# Example usage
java_code = """
public class MyClass {
        public static void flakyTest(Runnable test, int rerunsOnFailure, String message)
    {
        AssertionError e = runCatchingAssertionError(test);
        if (e == null)
            return;     // success

        logger.info("Test failed. {}", message, e);
        logger.info("Re-running {} times to verify it isn't failing more often than it should.", rerunsOnFailure);

        int rerunsFailed = 0;
        for (int i = 0; i < rerunsOnFailure; ++i)
        {
            AssertionError t = runCatchingAssertionError(test);
            if (t != null)
            {
                ++rerunsFailed;
                e.addSuppressed(t);

                logger.debug("Test failed again, total num failures: {}", rerunsFailed, t);
            }
        }
        if (rerunsFailed > 0)
        {
            logger.error("Test failed in {} of the {} reruns.", rerunsFailed, rerunsOnFailure);
            throw e;
        }

        logger.info("All reruns succeeded. Failure treated as flake.");
    }
    }

"""

features = extract_features(java_code)
print(features)




{'average_log_level': 3.2, 'first_block_type': 'unknown', 'second_block_type': 'unknown', 'has_throw_statement': True, 'num_variables': 3, 'num_tokens': 144, 'text_length': 1039, 'word_similarity': 0.31216931216931215}


In [15]:
import nltk
nltk.download('stopwords')

[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Unzipping corpora/stopwords.zip.


True

In [12]:
import nltk
nltk.download('punkt')

[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt.zip.


True

In [2]:
pip install javalang

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting javalang
  Downloading javalang-0.13.0-py3-none-any.whl (22 kB)
Installing collected packages: javalang
Successfully installed javalang-0.13.0
