In [3]:
import pandas as pd
import math
import re

def count_operators_and_operands(code):
    # Define Java operators and operands
    operators = set(['+', '-', '*', '/', '%', '++', '--', '==', '!=', '>', '<', '>=', '<=', '&&', '||', '!', '&', '|', '^', '~', '<<', '>>', '>>>', '=', '+=', '-=', '*=', '/=', '%=', '&=', '|=', '^=', '<<=', '>>=', '>>>=', 'instanceof', 'new', 'return', 'throw', 'try', 'catch', 'finally', 'if', 'else', 'switch', 'case', 'default', 'for', 'while', 'do', 'break', 'continue', 'synchronized', 'this', 'super'])
    
    # Tokenize the code
    tokens = re.findall(r'\b\w+\b|[\+\-\*/%=<>!&|^~]+', code)
    
    # Count operators and operands
    operator_count = {}
    operand_count = {}
    
    for token in tokens:
        if token in operators:
            operator_count[token] = operator_count.get(token, 0) + 1
        else:
            operand_count[token] = operand_count.get(token, 0) + 1
    
    return operator_count, operand_count

def calculate_halstead_volume(operator_count, operand_count):
    n1 = len(operator_count)  # Number of unique operators
    n2 = len(operand_count)  # Number of unique operands
    N1 = sum(operator_count.values())  # Total number of operators
    N2 = sum(operand_count.values())  # Total number of operands
    
    n = n1 + n2  # Total number of unique operators and operands
    N = N1 + N2  # Total number of operators and operands
    
    if n == 0:
        return 0
    
    V = N * math.log2(n)  # Halstead Volume
    return V

def main():
    # Load the CSV file
    df = pd.read_csv('testInput.csv')
    
    # Calculate Halstead Volume for each code snippet
    halstead_volumes = []
    for code in df['Generated Java Code']:
        operator_count, operand_count = count_operators_and_operands(code)
        volume = calculate_halstead_volume(operator_count, operand_count)
        halstead_volumes.append(volume)
    
    # Create a new DataFrame with only the 'example' and 'Halstead_Volume' columns
    result_df = pd.DataFrame({
        'example': df['Example'],
        'Halstead_Volume': halstead_volumes
    })
    
    # Save the new DataFrame to a CSV file
    result_df.to_csv('halstead.csv', index=False)

if __name__ == "__main__":
    main()