In [3]:
import pandas as pd

In [8]:
# Load the dataset from a CSV file
file_path = 'data/java_test_dataset.csv'
df = pd.read_csv(file_path)

In [33]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 78388 entries, 0 to 78387
Data columns (total 38 columns):
repo_id                                78388 non-null int64
url                                    78388 non-null object
language                               49422 non-null object
fork_count                             78388 non-null int64
stargazer_count                        78388 non-null int64
focal_class_identifier                 78388 non-null object
focal_class_superclass                 21448 non-null object
focal_class_interfaces                 31320 non-null object
focal_class_fields                     78388 non-null object
focal_class_methods                    78388 non-null object
focal_class_file                       78388 non-null object
focal_method_identifier                78388 non-null object
focal_method_parameters                78388 non-null object
focal_method_modifiers                 75869 non-null object
focal_method_return                    7

In [20]:
def format_java_test_class(row):
    # Class definition
    class_def = f"public class {row['test_class_identifier']}"

    # Superclass
    if pd.notna(row['test_class_superclass']):
        class_def += f" extends {row['test_class_superclass']}"

    # Interfaces
    if pd.notna(row['test_class_interfaces']):
        class_def += f" implements {row['test_class_interfaces']}"

    class_def += " {\n"

    # Fields
    fields = row['test_class_fields'].split(';')  # Assuming fields are separated by semicolons
    for field in fields:
        class_def += f"    {field};\n"

    # Test case
    test_case = f"""
    {row['test_case_modifiers']} {row['test_case_return']} {row['test_case_identifier']}({row['test_case_parameters']}) {{
        {row['test_case_body']}
    }}
    """
    class_def += test_case

    # Closing class definition
    class_def += "\n}"

    return class_def.strip()

In [None]:
def format_java_focal_class(row):
    # Class definition
    class_def = f"public class {row['focal_class_identifier']}"

    # Superclass
    if pd.notna(row['focal_class_superclass']):
        class_def += f" extends {row['focal_class_superclass']}"

    # Interfaces
    if pd.notna(row['focal_class_interfaces']):
        class_def += f" implements {row['focal_class_interfaces']}"

    class_def += " {\n"

    # Fields
    fields = row['focal_class_fields'].split(';')  # Assuming fields are separated by semicolons
    for field in fields:
        class_def += f"    {field};\n"

    # Focal method
    focal_method = f"""
    {row['focal_case_modifiers']} {row['focal_case_return']} {row['focal_case_identifier']}({row['focal_case_parameters']}) {{
        {row['focal_case_body']}
    }}
    """
    class_def += focal_method

    # Closing class definition
    class_def += "\n}"

    return class_def.strip()

In [6]:
# Function to assemble the Java focal class code for each row
def assemble_focal_class_code(row):
    class_template = f"""
public class {row['focal_class_identifier']} {{
    {row['focal_class_fields']}
    
    {row['focal_class_methods']}
}}
"""
    return class_template.strip()

# Function to assemble the Java test case code for each row
def assemble_test_case_code(row):
    # Assuming 'test_case_body' contains formatted test cases
    # and 'focal_class_identifier' is used to name the test class
    test_class_template = f"""
class {row['focal_class_identifier']}Test {{
    {row['test_case_body']}
}}
"""
    return test_class_template.strip()

# Apply the functions to each row to create the separate Java code parts
df['focal_class_code'] = df.apply(assemble_focal_class_code, axis=1)
df['test_case_code'] = df.apply(assemble_test_case_code, axis=1)

# Optionally, save to a new CSV file
new_file_path = 'java_test_dataset_assembeled.csv'
df[['focal_class_code', 'test_case_code']].to_csv(new_file_path, index=False)

print(f"Dataset with separated Java focal class and test case code saved to {new_file_path}")


Dataset with separated Java focal class and test case code saved to java_test_dataset_assembeled.csv


In [32]:
df.head()

Unnamed: 0,repo_id,url,language,fork_count,stargazer_count,focal_class_identifier,focal_class_superclass,focal_class_interfaces,focal_class_fields,focal_class_methods,...,test_case_parameters,test_case_modifiers,test_case_return,test_case_body,test_case_signature,test_case_full_signature,test_case_class_method_signature,test_case_testcase,test_case_constructor,test_case_invocations
0,58314354,https://github.com/bytefish/JavaElasticSearchE...,Java,11,0,IgnoreMissingValuesConverter,,implements ITypeConverter<Float>,[{'original_string': 'private List<String> mis...,[{'identifier': 'IgnoreMissingValuesConverter'...,...,(),@Test public,void,@Test\n public void returns_null_if_value_i...,void returns_null_if_value_is_missing(),@Test public void returns_null_if_value_is_mis...,IgnoreMissingValuesConverterTest.returns_null_...,True,False,"['assertEquals', 'convert', 'assertEquals', 'c..."
1,58314354,https://github.com/bytefish/JavaElasticSearchE...,Java,11,0,DateUtilities,,,[],"[{'identifier': 'from', 'parameters': '(LocalD...",...,(),@Test public,void,@Test\n public void generated_date_has_utc_...,void generated_date_has_utc_offset_when_none_i...,@Test public void generated_date_has_utc_offse...,DateUtilitiesTest.generated_date_has_utc_offse...,True,False,"['of', 'of', 'atTime', 'atOffset', 'from', 'as..."
2,58314354,https://github.com/bytefish/JavaElasticSearchE...,Java,11,0,DateUtilities,,,[],"[{'identifier': 'from', 'parameters': '(LocalD...",...,(),@Test public,void,@Test\n public void generated_date_has_give...,void generated_date_has_given_offset_when_offs...,@Test public void generated_date_has_given_off...,DateUtilitiesTest.generated_date_has_given_off...,True,False,"['of', 'of', 'ofHours', 'atTime', 'atOffset', ..."
3,58314354,https://github.com/bytefish/JavaElasticSearchE...,Java,11,0,DateUtilities,,,[],"[{'identifier': 'from', 'parameters': '(LocalD...",...,(),@Test public,void,@Test\n public void generated_date_has_give...,void generated_date_has_given_timezone_when_gi...,@Test public void generated_date_has_given_tim...,DateUtilitiesTest.generated_date_has_given_tim...,True,False,"['of', 'of', 'atTime', 'ofHours', 'atOffset', ..."
4,58314354,https://github.com/bytefish/JavaElasticSearchE...,Java,11,0,LocalWeatherDataConverter,,,[],"[{'identifier': 'convert', 'parameters': '(csv...",...,(),@Test public,void,@Test\n public void testConvert() throws Ex...,void testConvert(),@Test public void testConvert(),LocalWeatherDataConverterTest.testConvert(),True,False,"['setWban', 'setDate', 'of', 'setTime', 'of', ..."


In [45]:
print(format_java_test_class(df.iloc[0]))

public class IgnoreMissingValuesConverterTest {
    [];

    @Test public void returns_null_if_value_is_missing(()) {
        @Test
    public void returns_null_if_value_is_missing() throws Exception {

        IgnoreMissingValuesConverter converter = new IgnoreMissingValuesConverter("M", "m");

        Assert.assertEquals(null, converter.convert(""));
        Assert.assertEquals(null, converter.convert("M"));
        Assert.assertEquals(null, converter.convert("m"));

        Assert.assertEquals(1.0f, converter.convert("1.0"), 1e-3);
    }
    }
    
}


In [46]:
print(format_java_focal_class(df.iloc[0]))

No match found for method: '[{'identifier': 'IgnoreMissingValuesConverter', 'parameters': '(String... missingValueRepresentation)', 'modifiers': 'public', 'return': '', 'signature': ' IgnoreMissingValuesConverter(String... missingValueRepresentation)', 'full_signature': 'public  IgnoreMissingValuesConverter(String... missingValueRepresentation)', 'class_method_signature': 'IgnoreMissingValuesConverter.IgnoreMissingValuesConverter(String... missingValueRepresentation)', 'testcase': False, 'constructor': True}, {'identifier': 'IgnoreMissingValuesConverter', 'parameters': '(List<String> missingValueRepresentation)', 'modifiers': 'public', 'return': '', 'signature': ' IgnoreMissingValuesConverter(List<String> missingValueRepresentation)', 'full_signature': 'public  IgnoreMissingValuesConverter(List<String> missingValueRepresentation)', 'class_method_signature': 'IgnoreMissingValuesConverter.IgnoreMissingValuesConverter(List<String> missingValueRepresentation)', 'testcase': False, 'construc

In [36]:
import pandas as pd

row_to_print = df.iloc[0]

for column in df.columns:
    print(f"\n\033[1m{column}\033[0m: {row_to_print[column]}\n")



[1mrepo_id[0m: 58314354


[1murl[0m: https://github.com/bytefish/JavaElasticSearchExperiment


[1mlanguage[0m: Java


[1mfork_count[0m: 11


[1mstargazer_count[0m: 0


[1mfocal_class_identifier[0m: IgnoreMissingValuesConverter


[1mfocal_class_superclass[0m: nan


[1mfocal_class_interfaces[0m: implements ITypeConverter<Float>


[1mfocal_class_fields[0m: [{'original_string': 'private List<String> missingValueRepresentation;', 'modifier': 'private', 'type': 'List<String>', 'declarator': 'missingValueRepresentation', 'var_name': 'missingValueRepresentation'}]


[1mfocal_class_methods[0m: [{'identifier': 'IgnoreMissingValuesConverter', 'parameters': '(String... missingValueRepresentation)', 'modifiers': 'public', 'return': '', 'signature': ' IgnoreMissingValuesConverter(String... missingValueRepresentation)', 'full_signature': 'public  IgnoreMissingValuesConverter(String... missingValueRepresentation)', 'class_method_signature': 'IgnoreMissingValuesConverter.IgnoreMiss