In [24]:
pip install javalang pandas


Note: you may need to restart the kernel to use updated packages.


In [25]:
import os
import javalang
import pandas as pd

def extract_features_from_java_file(file_path):
    with open(file_path, 'r', encoding='utf-8') as file:
        code = file.read()

    try:
        tree = javalang.parse.parse(code)

        features = {
            "file_name": os.path.basename(file_path),
            "num_classes": sum(1 for _ in tree.filter(javalang.tree.ClassDeclaration)),
            "num_methods": sum(1 for _ in tree.filter(javalang.tree.MethodDeclaration)),
            "num_reflections": sum(1 for _, node in tree.filter(javalang.tree.MethodInvocation) if node.member in ['getMethod', 'invoke']),
            "num_file_operations": sum(1 for _, node in tree.filter(javalang.tree.MethodInvocation) if node.member in ['FileInputStream', 'FileOutputStream']),
            "num_network_operations": sum(1 for _, node in tree.filter(javalang.tree.MethodInvocation) if node.member in ['Socket', 'ServerSocket']),
            # Add more features as needed
        }

        return features
    except:
        return None



In [26]:

def main():
    directory = "/Users/John/PycharmProjects/John_Pino_CYBR493A_Fall23/In-class Activities/synapse_java"  # Replace with your Java files directory
    files = [os.path.join(directory, f) for f in os.listdir(directory) if f.endswith('.java')]

    all_features = []

    for file in files:
        features = extract_features_from_java_file(file)
        if features:
            all_features.append(features)

    df = pd.DataFrame(all_features)
    df.to_csv("java_features.csv", index=False)

if __name__ == "__main__":
    main()