In [1]:
import re
import os
import pandas as pd
from collections import defaultdict
import matplotlib.pyplot as plt
import numpy as np
from typing import Dict
plt.style.use('default')
CSV_EXTENSION = '.csv'
SINGLE_FILE_KEY = 'single_file'
import warnings
from get_comment_dist import get_comment_to_ckpt_by_task
warnings.filterwarnings('ignore')

def process_horizon_categories_output(row):
    # Check if the row is a string
    if isinstance(row, str):
        # Define the regular expression pattern
        pattern = r"(?P<type>\w+) '.*?' used at line (?P<usage_line>\d+) is defined at line (?P<def_line>\d+)"

        # Use the re.findall function to find all matches in the row
        matches = re.findall(pattern, row)

        # Convert the matches to the required format
        results = []
        for match in matches:
            type, usage_line, def_line = match
            results.append((type, int(usage_line), int(def_line)))

        return results
    else:
        # If the row is not a string, return an empty list
        return []

def expand_lists_to_rows(df, column):
    # Create a new DataFrame where each item in the 'Processed' column is expanded into its own row
    expanded_df = df.explode(column)

    # Split the 'Processed' column into separate 'Type', 'usage_line', and 'def_line' columns
    expanded_df[['Type', 'usage_line', 'def_line']] = pd.DataFrame(expanded_df[column].tolist(), index=expanded_df.index)
    
    # Calculate the absolute differences between 'usage_line' and 'def_line'
    expanded_df['abs_diff'] = abs(expanded_df['usage_line'] - expanded_df['def_line'])

    # Drop the 'Processed' column
    expanded_df = expanded_df.drop(columns=[column])

    return expanded_df

def process_max_range(expanded_df, groupby_col=['code_task', 'start_line', 'end_line']):
    # Drop NaN values
    expanded_df = expanded_df.dropna(subset=groupby_col)
    # Reset the index
    expanded_df = expanded_df.reset_index(drop=True)
    # Find the index of the row with the maximum difference for each group
    idx = expanded_df.groupby(groupby_col)['abs_diff'].idxmax()
    # Print out the number of rows in the original DataFrame, dropped, and the number of rows after the groupby operation
    print(f"Original DataFrame: {len(expanded_df)} rows")
    # print(f"Rows dropped: {len(expanded_df) - len(idx)}")
    print(f"DataFrame after groupby: {len(idx)} rows")
    # Keep only the rows with the maximum difference
    expanded_df = expanded_df.loc[idx]
    return expanded_df

def extract_info(csv_file):
    # Remove the file extension
    name_without_ext = csv_file.split('.csv')[0]

    # Split the name into parts
    parts = name_without_ext.split('_')

    # Extract the information
    info = {
        'model_name': parts[0],
        'gen_mode': '_'.join(parts[1:3]),
        'task': parts[3],
        'time': '_'.join(parts[4:])
    }

    return info

def group_by_res(folder_path, group_by_keys=['model_name','gen_mode'], group_gpt_4_turbo=True, debug_print=False):
    # Get a list of all CSV files in the folder
    csv_files = [f for f in os.listdir(folder_path) if f.endswith('.csv')]

    # Create a dictionary to store the groups
    groups = defaultdict(list)

    # Group the CSV files
    for csv_file in csv_files:
        # Extract the information from the file name
        info = extract_info(csv_file)

        # Get the group key
        group_key_parts = [info[key] for key in group_by_keys]
        group_key = '_'.join(group_key_parts)

        # If group_gpt_4_turbo is True, treat 'gpt-4-turbo' and 'gpt-4-turbo-2024-04-09' as the same model_name
        if group_gpt_4_turbo and 'model_name' in group_by_keys and 'gpt-4-turbo' in group_key:
            group_key = group_key.replace('gpt-4-turbo-2024-04-09', 'gpt-4-turbo')

        # Add the CSV file to the group
        groups[group_key].append(csv_file)

    # Concatenate the CSV files in each group
    for group_key, group_files in groups.items():
        if debug_print:
            print(f"Grouped by {group_by_keys} and combined {', '.join(group_files)}")
        dfs = [pd.read_csv(os.path.join(folder_path, csv_file)) for csv_file in group_files]
        df_group = pd.concat(dfs, ignore_index=True)
        groups[group_key] = df_group

    return groups

def groupby_sanity_check(grouped_dfs):
    for group_key, df in grouped_dfs.items():
        print(f"Group Key: {group_key}")
        print(len(df))
        print("\n")
        
def process_folder(folder_path: str) -> Dict[str, pd.DataFrame]:
    global col_to_check
    # Check if the path is a directory
    if os.path.isdir(folder_path):
        # Group and concatenate the CSV files
        grouped_dfs = group_by_res(folder_path)
    elif os.path.isfile(folder_path) and folder_path.endswith(CSV_EXTENSION):
        # If the path is a CSV file, read it into a DataFrame
        grouped_dfs = {SINGLE_FILE_KEY: pd.read_csv(folder_path)}
    else:
        # If the path is not a directory and not a CSV file, raise an error
        raise ValueError(f"Invalid path: {folder_path}. Path must be a directory containing CSV files or a CSV file.")
    processed_dfs = {}
    for group_key, df in grouped_dfs.items():
        init_row_num = len(df)
        # Columns to check for NaN values
        initial_cols_to_check = ['horizon_categories_output', 'horizon_freq_analysis']

        # Drop rows with NaN values in the initial_cols_to_check
        df_after_initial_drop = df.dropna(subset=initial_cols_to_check)

        # Process the 'horizon_categories_output' column
        df_after_initial_drop['Processed'] = df_after_initial_drop['horizon_categories_output'].apply(process_horizon_categories_output)

        # Expand the lists to rows
        expanded_df = expand_lists_to_rows(df_after_initial_drop, 'Processed')

        # Additional columns to check for NaN values
        additional_cols_to_check = ['horizon_categories_output', 'horizon_freq_analysis', 'Type', 'usage_line', 'def_line', 'abs_diff', col_to_check]

        # Drop rows with NaN values in the additional_cols_to_check
        expanded_df_filtered = expanded_df.dropna(subset=additional_cols_to_check)

        print("#############################################")
        # Print out the number of rows in the original DataFrame, and dropped due to NaN values in the cols_to_check
        print(f"Original DataFrame: {init_row_num} rows")
        print(f"Rows dropped due to NaN in initial columns {initial_cols_to_check}: {init_row_num - len(df_after_initial_drop)}")
        print(f"Rows after expansion: {len(expanded_df)} rows")
        print(f"Rows dropped due to NaN in additional columns {additional_cols_to_check}: {len(expanded_df) - len(expanded_df_filtered)}")

        # Process the max range
        df = process_max_range(expanded_df_filtered)

        processed_dfs[group_key] = df

    return processed_dfs

def convert_to_float(val: str) -> float:
    try:
        return float(val)
    except ValueError:
        parts = val.split('/')
        if len(parts) == 2:
            num, denom = parts
            if denom != '0':
                return float(num) / float(denom)
        return 0.0
    
def draw_histogram(df, group_key, show_pass_dist=False, bins=30):
    global col_to_check
    plt.figure(dpi=400)
    # Calculate the absolute differences between 'usage_line' and 'def_line'
    differences = df['abs_diff']

    # Define the bin edges
    bin_edges = np.linspace(differences.min(), differences.max(), bins+1)

    # Create a histogram of the differences
    plt.hist(differences, bins=bin_edges, edgecolor='black', alpha=0.5, label='All data')

    if show_pass_dist:
        # Convert 'col_to_check' to float and filter the DataFrame where it is 1
        df[col_to_check] = df[col_to_check].apply(convert_to_float)
        filtered_df = df[df[col_to_check] == 1]

        # Create a histogram of the differences for the filtered DataFrame
        plt.hist(filtered_df['abs_diff'], bins=bin_edges, edgecolor='black', color='red', alpha=0.5, label='gen_code_passed')

    # Set the title and labels
    plt.title(group_key)
    plt.xlabel('Recall Distance')
    plt.ylabel('Frequency')

    # Add a legend
    plt.legend()

    # Display the histogram
    plt.show()

def plot_histograms(groups, show_pass_dist=True):
    global col_to_check
    for group_key, df_group in groups.items():
        print(f"Plotting histogram for group: {group_key}")
        draw_histogram(df_group, group_key, show_pass_dist)

def get_difference(row):
    start_line = row['start_line']
    list_of_tuples = row['list_of_tuples']
    matching_tuples = [t for t in list_of_tuples if t[0] == start_line]
    for t in matching_tuples:
        return t[0] - t[1]
    return None


def classify_and_calculate(df, program_type, task_type, short_range, medium_range, report_err_bar=True):
    global col_to_check
    def classify_abs_diff(x):
        if x <= short_range:
            return 'Short'
        elif x > short_range:
            return 'Long'
        else:
            return None
    df['list_of_tuples'] = df['code_task'].apply(lambda x: get_comment_to_ckpt_by_task(x, program_type, task_type))
    df['comment_dist'] = df.apply(get_difference, axis=1)
    df['range_class'] = df['comment_dist'].apply(classify_abs_diff)
    df[col_to_check] = df[col_to_check].apply(convert_to_float)

    total_counts = df.groupby('range_class').size()
    pass_counts = df[df[col_to_check] == 1].groupby('range_class').size()
    # Reindex the pass_counts to include all range classes & fill 0 for NaN values
    pass_counts = pass_counts.reindex(total_counts.index, fill_value=0)
    if report_err_bar:
        percentages, err_bar = cal_err_bar(pass_counts, total_counts)
        err_bar = err_bar.values
    else:
        percentages = pass_counts / total_counts * 100
        err_bar = None

    result_df = pd.DataFrame({
        'range_class': total_counts.index,
        'total_counts': total_counts.values,
        'passed_counts': pass_counts.values,
        'percentages': percentages.values
    })

    result_df['range_class'] = pd.Categorical(result_df['range_class'], categories=['Short', 'Medium', 'Long'], ordered=True)
    result_df = result_df.sort_values('range_class')
    if report_err_bar:
        result_df['percentages'] = result_df.apply(lambda row: f"{row['percentages']*100:.1f} ± {err_bar[row.name]*100:.1f}", axis=1)
    else:
        result_df['percentages'] = result_df['percentages'].apply(lambda x: f"{x:.1f}%")

    return percentages, result_df

# percentages, result_df = classify_and_calculate(combined_df['gpt-4-turbo_with_afterlines'], 30, 100)
# result_df

def process_all_dfs(combined_df, program_type, task_type, short_range, medium_range):
    all_results = []
    for key in combined_df:
        percentages, result_df = classify_and_calculate(combined_df[key], program_type, task_type, short_range, medium_range)
        # import pdb; pdb.set_trace()
        result_df.insert(0, 'key', key)
        all_results.append(result_df)
    final_df = pd.concat(all_results, ignore_index=True)
    return final_df

def split_df(final_df):
    unique_keys = final_df['key'].unique()
    sub_dfs = {key: final_df[final_df['key'] == key].drop(columns='key') for key in unique_keys}
    return sub_dfs

def process_and_plot(folder_path, col_to_check):
    # Set the global variable
    combined_df = process_folder(folder_path)
    # Call the function
    # plot_histograms(combined_df)
    return combined_df

def process_and_display(combined_df, program_type, task_type, short_range, medium_range):
    # Set the short and medium ranges
    final_df = process_all_dfs(combined_df, program_type, task_type, short_range, medium_range)

    sub_dfs = split_df(final_df)

    # Sort the keys based on model_name and gen_mode
    sorted_keys = sorted(sub_dfs.keys(), key=lambda x: (x.rsplit('_', 1)[0], x.rsplit('_', 1)[1]))

    for key in sorted_keys:
        df = sub_dfs[key]
        print(f"Key: {key}")
        display(df)
    return sub_dfs

def bootstrap_resampling(pass_count, total_count, num_resamples=10000):
    # Calculate model's performance
    performance = pass_count / total_count

    # Generate bootstrap resamples
    resamples = np.random.choice([0, 1], size=(num_resamples, total_count), p=[1-performance, performance])

    # Calculate pass count for each resample
    resample_pass_counts = resamples.sum(axis=1)

    # Calculate performance for each resample
    resample_performances = resample_pass_counts / total_count

    # Calculate average and 1.96 standard deviations of resample performances
    avg_performance = resample_performances.mean()
    std_dev_performance = resample_performances.std()

    return avg_performance, 1.96 * std_dev_performance

def cal_err_bar(pass_counts, total_counts, num_resamples=10000):
    percentages = []
    err_bars = []
    for pass_count, total_count in zip(pass_counts, total_counts):
        # Use bootstrap resampling to calculate average performance and error bar
        percentage, err_bar = bootstrap_resampling(pass_count, total_count, num_resamples)
        percentages.append(percentage)
        err_bars.append(err_bar)

    return pd.Series(percentages, index=total_counts.index), pd.Series(err_bars, index=total_counts.index)

def save_df_dict_to_csv_with_keys(df_dict, output_file_name):
    final_df = pd.DataFrame()

    for key, df in df_dict.items():
        key_row = pd.DataFrame({col: [key] if col == list(df.columns)[0] else [pd.NA] for col in df.columns})
        final_df = pd.concat([final_df, key_row, df], ignore_index=True)

    final_df.to_csv(output_file_name, index=False)

def analyze_language_results(language, analysis_type):
    print(f"################### Analyzing {language} {analysis_type} ###################")
    print()
    settings = {
        "Python": {
            "Completion": {"folder_path": "../Analysis_Results/storage_server/Python_all_res/Completion/4th_post_process_reason_update/Update_labels", "save_file_name": "Python_Completion_grouped_by_comment_dist.csv", "short_range": 10, "medium_range": 30},
            "infilling": {"folder_path": "../Analysis_Results/storage_server/Python_all_res/Infilling/4th_post_process_reason_update/Update_labels", "save_file_name": "Python_Infilling_grouped_by_comment_dist.csv", "short_range": 10, "medium_range": 30}
        },
        "Java": {
            "Completion": {"folder_path": "../Analysis_Results/storage_server/Java_all_res/Completion/4th_post_process_reason_update/Update_labels", "save_file_name": "Java_Completion_grouped_by_comment_dist.csv", "short_range": 10, "medium_range": 30},
            "infilling": {"folder_path": "../Analysis_Results/storage_server/Java_all_res/Infilling/4th_post_process_reason_update/Update_labels", "save_file_name": "Java_Infilling_grouped_by_comment_dist.csv", "short_range": 10, "medium_range": 30}
        }
    }
    program_type = language
    task_type = f"checkpoint_LN_{analysis_type}"
    setting = settings[language][analysis_type]
    folder_path = setting['folder_path']
    save_file_name = setting['save_file_name']
    short_range = setting['short_range']
    medium_range = setting['medium_range']

    combined_df = process_and_plot(folder_path, col_to_check)
    dict_dfs = process_and_display(combined_df, program_type, task_type, short_range, medium_range)
    save_df_dict_to_csv_with_keys(dict_dfs, save_file_name)

col_to_check = 'post_process_pass_ratio'
# Example usage
analyze_language_results("Python", "Completion")
analyze_language_results("Python", "infilling")
analyze_language_results("Java", "Completion")
analyze_language_results("Java", "infilling")


################### Analyzing Python Completion ###################

#############################################
Original DataFrame: 212 rows
Rows dropped due to NaN in initial columns ['horizon_categories_output', 'horizon_freq_analysis']: 2
Rows after expansion: 1175 rows
Rows dropped due to NaN in additional columns ['horizon_categories_output', 'horizon_freq_analysis', 'Type', 'usage_line', 'def_line', 'abs_diff', 'post_process_pass_ratio']: 3
Original DataFrame: 1172 rows
DataFrame after groupby: 207 rows
#############################################
Original DataFrame: 212 rows
Rows dropped due to NaN in initial columns ['horizon_categories_output', 'horizon_freq_analysis']: 2
Rows after expansion: 1175 rows
Rows dropped due to NaN in additional columns ['horizon_categories_output', 'horizon_freq_analysis', 'Type', 'usage_line', 'def_line', 'abs_diff', 'post_process_pass_ratio']: 3
Original DataFrame: 1172 rows
DataFrame after groupby: 207 rows
#################################

Unnamed: 0,range_class,total_counts,passed_counts,percentages
2,Short,105,56,53.3 ± 9.5
3,Long,102,37,36.3 ± 9.3


Key: Meta-Llama-3-8B-Instruct_no_afterlines


Unnamed: 0,range_class,total_counts,passed_counts,percentages
14,Short,105,37,35.2 ± 9.2
15,Long,102,27,26.5 ± 8.6


Key: claude-3-haiku-20240307_no_afterlines


Unnamed: 0,range_class,total_counts,passed_counts,percentages
8,Short,105,37,35.3 ± 9.0
9,Long,102,20,19.6 ± 7.7


Key: claude-3-opus-20240229_no_afterlines


Unnamed: 0,range_class,total_counts,passed_counts,percentages
6,Short,105,34,32.5 ± 9.0
7,Long,102,16,15.7 ± 7.1


Key: claude-3-sonnet-20240229_no_afterlines


Unnamed: 0,range_class,total_counts,passed_counts,percentages
18,Short,105,36,34.3 ± 9.0
19,Long,102,20,19.6 ± 7.6


Key: deepseek-coder-1.3b-instruct_no_afterlines


Unnamed: 0,range_class,total_counts,passed_counts,percentages
4,Short,105,14,13.4 ± 6.5
5,Long,102,11,10.8 ± 6.0


Key: deepseek-coder-7b-instruct_no_afterlines


Unnamed: 0,range_class,total_counts,passed_counts,percentages
12,Short,105,51,48.5 ± 9.5
13,Long,102,41,40.2 ± 9.4


Key: gpt-3.5-turbo-0125_no_afterlines


Unnamed: 0,range_class,total_counts,passed_counts,percentages
10,Short,105,62,59.1 ± 9.5
11,Long,102,46,45.0 ± 9.7


Key: gpt-4-turbo_no_afterlines


Unnamed: 0,range_class,total_counts,passed_counts,percentages
16,Short,105,67,63.8 ± 9.2
17,Long,102,49,48.0 ± 9.7


Key: phi-3-mini-4k_no_afterlines


Unnamed: 0,range_class,total_counts,passed_counts,percentages
0,Short,105,13,12.4 ± 6.4
1,Long,102,3,2.9 ± 3.3


################### Analyzing Python infilling ###################

#############################################
Original DataFrame: 382 rows
Rows dropped due to NaN in initial columns ['horizon_categories_output', 'horizon_freq_analysis']: 2
Rows after expansion: 1447 rows
Rows dropped due to NaN in additional columns ['horizon_categories_output', 'horizon_freq_analysis', 'Type', 'usage_line', 'def_line', 'abs_diff', 'post_process_pass_ratio']: 6
Original DataFrame: 1441 rows
DataFrame after groupby: 374 rows
#############################################
Original DataFrame: 382 rows
Rows dropped due to NaN in initial columns ['horizon_categories_output', 'horizon_freq_analysis']: 2
Rows after expansion: 1447 rows
Rows dropped due to NaN in additional columns ['horizon_categories_output', 'horizon_freq_analysis', 'Type', 'usage_line', 'def_line', 'abs_diff', 'post_process_pass_ratio']: 6
Original DataFrame: 1441 rows
DataFrame after groupby: 374 rows
##################################

Unnamed: 0,range_class,total_counts,passed_counts,percentages
8,Short,236,155,65.7 ± 6.0
9,Long,138,49,35.5 ± 8.0


Key: Meta-Llama-3-8B-Instruct_with_afterlines


Unnamed: 0,range_class,total_counts,passed_counts,percentages
18,Short,236,71,30.1 ± 5.9
19,Long,138,31,22.5 ± 7.0


Key: claude-3-haiku-20240307_with_afterlines


Unnamed: 0,range_class,total_counts,passed_counts,percentages
14,Short,236,98,41.5 ± 6.4
15,Long,138,31,22.5 ± 7.0


Key: claude-3-opus-20240229_with_afterlines


Unnamed: 0,range_class,total_counts,passed_counts,percentages
4,Short,236,135,57.2 ± 6.3
5,Long,138,46,33.3 ± 7.9


Key: claude-3-sonnet-20240229_with_afterlines


Unnamed: 0,range_class,total_counts,passed_counts,percentages
16,Short,236,134,56.8 ± 6.3
17,Long,138,49,35.6 ± 8.0


Key: deepseek-coder-1.3b-instruct_with_afterlines


Unnamed: 0,range_class,total_counts,passed_counts,percentages
2,Short,236,24,10.2 ± 3.9
3,Long,138,6,4.3 ± 3.4


Key: deepseek-coder-7b-instruct_with_afterlines


Unnamed: 0,range_class,total_counts,passed_counts,percentages
10,Short,236,33,14.0 ± 4.5
11,Long,138,8,5.8 ± 3.9


Key: gpt-3.5-turbo-0125_with_afterlines


Unnamed: 0,range_class,total_counts,passed_counts,percentages
0,Short,236,96,40.7 ± 6.3
1,Long,138,36,26.0 ± 7.3


Key: gpt-4-turbo_with_afterlines


Unnamed: 0,range_class,total_counts,passed_counts,percentages
6,Short,236,178,75.5 ± 5.5
7,Long,138,79,57.3 ± 8.2


Key: phi-3-mini-4k_with_afterlines


Unnamed: 0,range_class,total_counts,passed_counts,percentages
12,Short,236,13,5.5 ± 2.9
13,Long,138,5,3.7 ± 3.1


################### Analyzing Java Completion ###################

#############################################
Original DataFrame: 286 rows
Rows dropped due to NaN in initial columns ['horizon_categories_output', 'horizon_freq_analysis']: 8
Rows after expansion: 1274 rows
Rows dropped due to NaN in additional columns ['horizon_categories_output', 'horizon_freq_analysis', 'Type', 'usage_line', 'def_line', 'abs_diff', 'post_process_pass_ratio']: 0
Original DataFrame: 1274 rows
DataFrame after groupby: 278 rows
#############################################
Original DataFrame: 286 rows
Rows dropped due to NaN in initial columns ['horizon_categories_output', 'horizon_freq_analysis']: 8
Rows after expansion: 1274 rows
Rows dropped due to NaN in additional columns ['horizon_categories_output', 'horizon_freq_analysis', 'Type', 'usage_line', 'def_line', 'abs_diff', 'post_process_pass_ratio']: 0
Original DataFrame: 1274 rows
DataFrame after groupby: 278 rows
###################################

Unnamed: 0,range_class,total_counts,passed_counts,percentages
10,Short,193,114,59.1 ± 6.9
11,Long,85,36,42.3 ± 10.5


Key: Meta-Llama-3-8B-Instruct_no_afterlines


Unnamed: 0,range_class,total_counts,passed_counts,percentages
2,Short,193,63,32.6 ± 6.5
3,Long,85,10,11.7 ± 6.8


Key: claude-3-haiku-20240307_no_afterlines


Unnamed: 0,range_class,total_counts,passed_counts,percentages
8,Short,193,99,51.3 ± 7.1
9,Long,85,40,47.1 ± 10.7


Key: claude-3-opus-20240229_no_afterlines


Unnamed: 0,range_class,total_counts,passed_counts,percentages
16,Short,193,134,69.4 ± 6.5
17,Long,85,58,68.2 ± 9.9


Key: claude-3-sonnet-20240229_no_afterlines


Unnamed: 0,range_class,total_counts,passed_counts,percentages
4,Short,193,117,60.6 ± 6.9
5,Long,85,38,44.7 ± 10.6


Key: deepseek-coder-1.3b-instruct_no_afterlines


Unnamed: 0,range_class,total_counts,passed_counts,percentages
6,Short,193,37,19.1 ± 5.6
7,Long,85,8,9.4 ± 6.1


Key: deepseek-coder-7b-instruct_no_afterlines


Unnamed: 0,range_class,total_counts,passed_counts,percentages
18,Short,193,95,49.2 ± 7.1
19,Long,85,22,25.9 ± 9.4


Key: gpt-3.5-turbo-0125_no_afterlines


Unnamed: 0,range_class,total_counts,passed_counts,percentages
14,Short,193,101,52.3 ± 7.0
15,Long,85,17,20.0 ± 8.5


Key: gpt-4-turbo_no_afterlines


Unnamed: 0,range_class,total_counts,passed_counts,percentages
0,Short,193,133,69.0 ± 6.5
1,Long,85,39,45.9 ± 10.6


Key: phi-3-mini-4k_no_afterlines


Unnamed: 0,range_class,total_counts,passed_counts,percentages
12,Short,193,25,13.0 ± 4.8
13,Long,85,5,5.9 ± 5.0


################### Analyzing Java infilling ###################

#############################################
Original DataFrame: 283 rows
Rows dropped due to NaN in initial columns ['horizon_categories_output', 'horizon_freq_analysis']: 5
Rows after expansion: 1272 rows
Rows dropped due to NaN in additional columns ['horizon_categories_output', 'horizon_freq_analysis', 'Type', 'usage_line', 'def_line', 'abs_diff', 'post_process_pass_ratio']: 0
Original DataFrame: 1272 rows
DataFrame after groupby: 278 rows
#############################################
Original DataFrame: 283 rows
Rows dropped due to NaN in initial columns ['horizon_categories_output', 'horizon_freq_analysis']: 5
Rows after expansion: 1272 rows
Rows dropped due to NaN in additional columns ['horizon_categories_output', 'horizon_freq_analysis', 'Type', 'usage_line', 'def_line', 'abs_diff', 'post_process_pass_ratio']: 0
Original DataFrame: 1272 rows
DataFrame after groupby: 278 rows
####################################

Unnamed: 0,range_class,total_counts,passed_counts,percentages
16,Short,206,121,58.8 ± 6.7
17,Long,72,35,48.7 ± 11.5


Key: Meta-Llama-3-8B-Instruct_with_afterlines


Unnamed: 0,range_class,total_counts,passed_counts,percentages
12,Short,206,52,25.2 ± 5.9
13,Long,72,15,20.7 ± 9.3


Key: claude-3-haiku-20240307_with_afterlines


Unnamed: 0,range_class,total_counts,passed_counts,percentages
10,Short,206,71,34.5 ± 6.5
11,Long,72,14,19.3 ± 9.1


Key: claude-3-opus-20240229_with_afterlines


Unnamed: 0,range_class,total_counts,passed_counts,percentages
18,Short,206,159,77.2 ± 5.7
19,Long,72,44,61.1 ± 11.3


Key: claude-3-sonnet-20240229_with_afterlines


Unnamed: 0,range_class,total_counts,passed_counts,percentages
8,Short,206,132,64.1 ± 6.6
9,Long,72,28,39.0 ± 11.2


Key: deepseek-coder-1.3b-instruct_with_afterlines


Unnamed: 0,range_class,total_counts,passed_counts,percentages
4,Short,206,12,5.8 ± 3.2
5,Long,72,2,2.8 ± 3.8


Key: deepseek-coder-7b-instruct_with_afterlines


Unnamed: 0,range_class,total_counts,passed_counts,percentages
2,Short,206,15,7.3 ± 3.6
3,Long,72,1,1.4 ± 2.7


Key: gpt-3.5-turbo-0125_with_afterlines


Unnamed: 0,range_class,total_counts,passed_counts,percentages
6,Short,206,59,28.6 ± 6.2
7,Long,72,15,20.8 ± 9.5


Key: gpt-4-turbo_with_afterlines


Unnamed: 0,range_class,total_counts,passed_counts,percentages
0,Short,206,159,77.2 ± 5.8
1,Long,72,48,66.7 ± 10.9


Key: phi-3-mini-4k_with_afterlines


Unnamed: 0,range_class,total_counts,passed_counts,percentages
14,Short,206,19,9.2 ± 3.9
15,Long,72,3,4.2 ± 4.6


In [7]:
def test_comment_distance_classification(folder_path, program_type, task_type, short_range=10):
    combined_df = process_and_plot(folder_path, col_to_check)
    for key in combined_df:
        print("################ Analyzing", key, "################")
        df = combined_df[key]

        def classify_abs_diff(x):
            if x <= short_range:
                return 'Short'
            elif x > short_range:
                return 'Long'
            else:
                return None

        df['list_of_tuples'] = df['code_task'].apply(lambda x: get_comment_to_ckpt_by_task(x, program_type, task_type))
        df['comment_dist'] = df.apply(get_difference, axis=1)
        df['range_class'] = df['comment_dist'].apply(classify_abs_diff)
        df[col_to_check] = df[col_to_check].apply(convert_to_float)

        non_rows = df[df['comment_dist'].isnull()]
        len_non_rows = len(non_rows)
        print(f"Number of rows with NaN values in 'comment_dist': {len_non_rows}")

        display(df)

language = "Java"
analysis_type = "Completion"

settings = {
        "Python": {
            "Completion": {"folder_path": "../Analysis_Results/storage_server/Python_all_res/Completion/4th_post_process_reason_update/Update_labels", "save_file_name": "Python_Completion_grouped_by_comment_dist.csv", "short_range": 10, "medium_range": 30},
            "infilling": {"folder_path": "../Analysis_Results/storage_server/Python_all_res/Infilling/4th_post_process_reason_update/Update_labels", "save_file_name": "Python_Infilling_grouped_by_comment_dist.csv", "short_range": 10, "medium_range": 30}
        },
        "Java": {
            "Completion": {"folder_path": "../Analysis_Results/storage_server/Java_all_res/Completion/4th_post_process_reason_update/Update_labels", "save_file_name": "Java_Completion_grouped_by_comment_dist.csv", "short_range": 10, "medium_range": 30},
            "infilling": {"folder_path": "../Analysis_Results/storage_server/Java_all_res/Infilling/4th_post_process_reason_update/Update_labels", "save_file_name": "Java_Infilling_grouped_by_comment_dist.csv", "short_range": 10, "medium_range": 30}
        }
    }
program_type = language
task_type = f"checkpoint_LN_{analysis_type}"
setting = settings[language][analysis_type]
folder_path = setting['folder_path']
save_file_name = setting['save_file_name']
short_range = setting['short_range']
medium_range = setting['medium_range']

test_comment_distance_classification(folder_path, program_type, task_type, short_range=10)


Grouped by ['model_name', 'gen_mode'] and combined gpt-4-turbo_no_afterlines_MTreeTester_06_03_04_07.csv, gpt-4-turbo_no_afterlines_FactorizationTester_06_03_04_05.csv, gpt-4-turbo_no_afterlines_SparseArrayTester_06_03_03_58.csv, gpt-4-turbo_no_afterlines_CounterTester_06_03_04_02.csv, gpt-4-turbo_no_afterlines_RNGTester_06_03_04_02.csv, gpt-4-turbo_no_afterlines_DoubleVectorTester_06_03_04_00.csv, gpt-4-turbo_no_afterlines_TopKTester_06_03_04_01.csv, gpt-4-turbo_no_afterlines_DoubleMatrixTester_06_03_04_06.csv
Grouped by ['model_name', 'gen_mode'] and combined Meta-Llama-3-8B-Instruct_no_afterlines_TopKTester_06_03_04_05.csv, Meta-Llama-3-8B-Instruct_no_afterlines_FactorizationTester_06_03_04_03.csv, Meta-Llama-3-8B-Instruct_no_afterlines_SparseArrayTester_06_03_04_03.csv, Meta-Llama-3-8B-Instruct_no_afterlines_DoubleVectorTester_06_03_03_58.csv, Meta-Llama-3-8B-Instruct_no_afterlines_RNGTester_06_03_04_03.csv, Meta-Llama-3-8B-Instruct_no_afterlines_CounterTester_06_03_04_06.csv, Meta

Unnamed: 0,start_line,end_line,code_task,before,between,after,gen_code_dict,gen_code_process_steps,selected_last_post_process_step,gen_code_selected,...,post_process_eval_res,post_process_pass_ratio,2nd_post_process_steps,Type,usage_line,def_line,abs_diff,list_of_tuples,comment_dist,range_class
465,76,77,CounterTester,"['import junit.framework.TestCase;', 'import j...","[' return myWord.compareTo(w.myWord);', '...","["" // If the counts are not equal, subtract...",{'original_code': 'return myWord.compareTo(w.m...,"['cleaned_code', 'trimmed_code', 'indented_code']",cleaned_code,"['return myWord.compareTo(w.myWord);', ' }'...",...,['Success: Final output matches the expected r...,1.0,indented_code,Global_Variable,76,13,63,"[(76, 74), (79, 78), (107, 104), (116, 115), (...",2,Short
468,79,80,CounterTester,"['import junit.framework.TestCase;', 'import j...","[' return w.myCount - myCount;', ' }']","['}', '', '', '/**', ' * This class is used to...",{'original_code': 'return myCount - w.myCount;...,"['cleaned_code', 'trimmed_code', 'indented_code']",cleaned_code,['return myCount - w.myCount;'],...,['Error: '],0.0,indented_code,Global_Variable,79,14,65,"[(76, 74), (79, 78), (107, 104), (116, 115), (...",1,Short
471,107,111,CounterTester,"['import junit.framework.TestCase;', 'import j...","[' ones.remove (addMe);', ' WordWrap...","['', ' // first check to see if the word is...",{'original_code': 'WordWrapper existing = word...,"['cleaned_code', 'trimmed_code', 'indented_code']",cleaned_code,['WordWrapper existing = wordsIHaveSeen.get(ad...,...,['Error: '],0.0,indented_code,Class,108,11,97,"[(76, 74), (79, 78), (107, 104), (116, 115), (...",3,Short
479,116,117,CounterTester,"['import junit.framework.TestCase;', 'import j...",[' WordWrapper temp = wordsIHaveSeen.get ...,"['', ' // find the slot that we go to in ...",{'original_code': 'WordWrapper existing = word...,"['cleaned_code', 'trimmed_code', 'indented_code']",cleaned_code,['WordWrapper existing = wordsIHaveSeen.get(ad...,...,['Error: Compilation error: /tmp/tmpjuonmgzd/C...,0.0,indented_code,Class,116,11,105,"[(76, 74), (79, 78), (107, 104), (116, 115), (...",1,Short
489,150,156,CounterTester,"['import junit.framework.TestCase;', 'import j...","[' int which = extractedWords.size ();', ...","[' } else {', ' // If k is less than t...","{'original_code': 'return null;', 'cleaned_cod...","['cleaned_code', 'trimmed_code', 'indented_code']",cleaned_code,['return null;'],...,['Error: '],0.0,indented_code,Global_Variable,151,94,57,"[(76, 74), (79, 78), (107, 104), (116, 115), (...",2,Short
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1005,541,541,TopKTester,"['import junit.framework.TestCase;', 'import j...",[' k = numInserts;'],"[' ', ' // make sure the list is the rig...",{'original_code': 'k = numInserts; \n\nfor (in...,"['cleaned_code', 'trimmed_code', 'indented_code']",cleaned_code,"['k = numInserts; ', 'for (int i = 0; i < k; i...",...,['Success: Final output matches the expected r...,1.0,"['cleaned_code', 'trimmed_code', 'indented_code']",Variable,541,481,60,"[(167, 23), (190, 189), (212, 189), (215, 189)...",2,Short
1007,559,559,TopKTester,"['import junit.framework.TestCase;', 'import j...",[' reverseOrNot = controlTest[1];'],"[' ', ' // create a list of random ints'...",{'original_code': 'reverseOrNot = controlTest[...,"['cleaned_code', 'trimmed_code', 'indented_code']",cleaned_code,"['reverseOrNot = controlTest[1];', ' ITopKM...",...,['Success: Final output matches the expected r...,1.0,trimmed_code,Variable,559,553,6,"[(167, 23), (190, 189), (212, 189), (215, 189)...",4,Short
1009,566,566,TopKTester,"['import junit.framework.TestCase;', 'import j...",[' list[i] = numInserts - 1 - i;'],"[' else', ' list[i] = i; ', ' }...",{'original_code': ' list[i] = numInsert...,"['cleaned_code', 'trimmed_code', 'indented_code']",cleaned_code,"[' list[i] = numInserts - 1 - i;', ' ...",...,['Success: Final output matches the expected r...,1.0,trimmed_code,Variable,566,553,13,"[(167, 23), (190, 189), (212, 189), (215, 189)...",5,Short
1012,568,568,TopKTester,"['import junit.framework.TestCase;', 'import j...",[' list[i] = i; '],"[' }', ' ', ' // if we are looking fo...",{'original_code': ' list[i] = i; \n ...,"['cleaned_code', 'trimmed_code', 'indented_code']",cleaned_code,"[' list[i] = i; ', ' }']",...,['Error: Compilation error: /tmp/tmpkcemi5jr/T...,0.0,indented_code,Variable,568,564,4,"[(167, 23), (190, 189), (212, 189), (215, 189)...",7,Short


################ Analyzing Meta-Llama-3-8B-Instruct_no_afterlines ################
Number of rows with NaN values in 'comment_dist': 0


Unnamed: 0,start_line,end_line,code_task,before,between,after,gen_code_dict,gen_code_process_steps,selected_last_post_process_step,gen_code_selected,...,post_process_eval_res,post_process_pass_ratio,2nd_post_process_steps,Type,usage_line,def_line,abs_diff,list_of_tuples,comment_dist,range_class
637,76,77,CounterTester,"['import junit.framework.TestCase;', 'import j...","[' return myWord.compareTo(w.myWord);', '...","["" // If the counts are not equal, subtract...","{'original_code': ""Here is the Java code to co...","['cleaned_code', 'trimmed_code', 'indented_code']",cleaned_code,"['public int compareTo(WordWrapper w) {', ' ...",...,['Error: '],0.0,indented_code,Global_Variable,76,13,63,"[(76, 74), (79, 78), (107, 104), (116, 115), (...",2,Short
640,79,80,CounterTester,"['import junit.framework.TestCase;', 'import j...","[' return w.myCount - myCount;', ' }']","['}', '', '', '/**', ' * This class is used to...","{'original_code': ""Here is the completed Java ...","['cleaned_code', 'trimmed_code', 'indented_code']",cleaned_code,"['public int compareTo(WordWrapper w) {', ' ...",...,['Error: Compilation error: /tmp/tmpksxey74g/C...,0.0,indented_code,Global_Variable,79,14,65,"[(76, 74), (79, 78), (107, 104), (116, 115), (...",1,Short
643,107,111,CounterTester,"['import junit.framework.TestCase;', 'import j...","[' ones.remove (addMe);', ' WordWrap...","['', ' // first check to see if the word is...",{'original_code': 'Here is the completed Java ...,"['cleaned_code', 'trimmed_code', 'indented_code']",cleaned_code,"['public void insert(String addMe) {', ' if...",...,['Error: Problem reading file\njava.lang.NullP...,0.0,indented_code,Class,108,11,97,"[(76, 74), (79, 78), (107, 104), (116, 115), (...",3,Short
651,116,117,CounterTester,"['import junit.framework.TestCase;', 'import j...",[' WordWrapper temp = wordsIHaveSeen.get ...,"['', ' // find the slot that we go to in ...",{'original_code': 'public void insert(String a...,"['cleaned_code', 'trimmed_code', 'indented_code']",cleaned_code,"['public void insert(String addMe) {', ' if...",...,['Error: Compilation error: /tmp/tmpec4a2vlr/C...,0.0,indented_code,Class,116,11,105,"[(76, 74), (79, 78), (107, 104), (116, 115), (...",1,Short
661,150,156,CounterTester,"['import junit.framework.TestCase;', 'import j...","[' int which = extractedWords.size ();', ...","[' } else {', ' // If k is less than t...",{'original_code': 'public String getKthMostFre...,"['cleaned_code', 'trimmed_code', 'indented_code']",cleaned_code,"['public String getKthMostFrequent(int k) {', ...",...,['Error: '],0.0,indented_code,Global_Variable,151,94,57,"[(76, 74), (79, 78), (107, 104), (116, 115), (...",2,Short
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
158,541,541,TopKTester,"['import junit.framework.TestCase;', 'import j...",[' k = numInserts;'],"[' ', ' // make sure the list is the rig...",{'original_code': 'Here is the completed Java ...,"['cleaned_code', 'trimmed_code', 'indented_code']",cleaned_code,"['public void testInserts(int numInserts, int ...",...,['Error: Compilation error: /tmp/tmp1ugabl0_/T...,0.0,indented_code,Variable,541,481,60,"[(167, 23), (190, 189), (212, 189), (215, 189)...",2,Short
160,559,559,TopKTester,"['import junit.framework.TestCase;', 'import j...",[' reverseOrNot = controlTest[1];'],"[' ', ' // create a list of random ints'...",{'original_code': 'Here is the completed Java ...,"['cleaned_code', 'trimmed_code', 'indented_code']",cleaned_code,"['public void testInserts(int numInserts, int ...",...,['Error: Compilation error: /tmp/tmptyazdq_m/T...,0.0,indented_code,Variable,559,553,6,"[(167, 23), (190, 189), (212, 189), (215, 189)...",4,Short
162,566,566,TopKTester,"['import junit.framework.TestCase;', 'import j...",[' list[i] = numInserts - 1 - i;'],"[' else', ' list[i] = i; ', ' }...",{'original_code': 'Here is the completed Java ...,"['cleaned_code', 'trimmed_code', 'indented_code']",cleaned_code,"['public void testInserts(int numInserts, int ...",...,['Success: Final output matches the expected r...,1.0,trimmed_code,Variable,566,553,13,"[(167, 23), (190, 189), (212, 189), (215, 189)...",5,Short
165,568,568,TopKTester,"['import junit.framework.TestCase;', 'import j...",[' list[i] = i; '],"[' }', ' ', ' // if we are looking fo...",{'original_code': 'Here is the completed Java ...,"['cleaned_code', 'trimmed_code', 'indented_code']",cleaned_code,"['public void testBalance(int numInserts, int ...",...,['Success: Final output matches the expected r...,1.0,indented_code,Variable,568,564,4,"[(167, 23), (190, 189), (212, 189), (215, 189)...",7,Short


################ Analyzing claude-3-sonnet-20240229_no_afterlines ################
Number of rows with NaN values in 'comment_dist': 0


Unnamed: 0,start_line,end_line,code_task,before,between,after,gen_code_dict,gen_code_process_steps,selected_last_post_process_step,gen_code_selected,...,post_process_eval_res,post_process_pass_ratio,2nd_post_process_steps,Type,usage_line,def_line,abs_diff,list_of_tuples,comment_dist,range_class
1076,76,77,CounterTester,"['import junit.framework.TestCase;', 'import j...","[' return myWord.compareTo(w.myWord);', '...","["" // If the counts are not equal, subtract...",{'original_code': '# --BEGIN MISSING CODE--\n ...,"['cleaned_code', 'trimmed_code', 'indented_code']",cleaned_code,"[' return myWord.compareTo(w.myWord);', '...",...,['Success: Final output matches the expected r...,1.0,indented_code,Global_Variable,76,13,63,"[(76, 74), (79, 78), (107, 104), (116, 115), (...",2,Short
1079,79,80,CounterTester,"['import junit.framework.TestCase;', 'import j...","[' return w.myCount - myCount;', ' }']","['}', '', '', '/**', ' * This class is used to...",{'original_code': '# --BEGIN MISSING CODE--\nr...,"['cleaned_code', 'trimmed_code', 'indented_code']",cleaned_code,['return w.myCount - myCount;'],...,['Success: Final output matches the expected r...,1.0,indented_code,Global_Variable,79,14,65,"[(76, 74), (79, 78), (107, 104), (116, 115), (...",1,Short
1082,107,111,CounterTester,"['import junit.framework.TestCase;', 'import j...","[' ones.remove (addMe);', ' WordWrap...","['', ' // first check to see if the word is...",{'original_code': '# --BEGIN MISSING CODE--\n ...,"['cleaned_code', 'trimmed_code', 'indented_code']",cleaned_code,[' WordWrapper temp = wordsIHaveSeen.get(...,...,['Error: Problem reading file\njava.lang.NullP...,0.0,indented_code,Class,108,11,97,"[(76, 74), (79, 78), (107, 104), (116, 115), (...",3,Short
1090,116,117,CounterTester,"['import junit.framework.TestCase;', 'import j...",[' WordWrapper temp = wordsIHaveSeen.get ...,"['', ' // find the slot that we go to in ...",{'original_code': '# --BEGIN MISSING CODE--\n ...,"['cleaned_code', 'trimmed_code', 'indented_code']",cleaned_code,[' WordWrapper temp = wordsIHaveSeen.get(...,...,['Success: Final output matches the expected r...,1.0,trimmed_code,Class,116,11,105,"[(76, 74), (79, 78), (107, 104), (116, 115), (...",1,Short
1100,150,156,CounterTester,"['import junit.framework.TestCase;', 'import j...","[' int which = extractedWords.size ();', ...","[' } else {', ' // If k is less than t...",{'original_code': '# --BEGIN MISSING CODE--\n ...,"['cleaned_code', 'trimmed_code', 'indented_code']",cleaned_code,"[' return null;', ' } else {', ' ...",...,['Error: '],0.0,indented_code,Global_Variable,151,94,57,"[(76, 74), (79, 78), (107, 104), (116, 115), (...",2,Short
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
738,541,541,TopKTester,"['import junit.framework.TestCase;', 'import j...",[' k = numInserts;'],"[' ', ' // make sure the list is the rig...",{'original_code': '# --BEGIN MISSING CODE--\n ...,"['cleaned_code', 'trimmed_code', 'indented_code']",cleaned_code,"[' k = numInserts;', 'for (int i = 0; i < k...",...,['Success: Final output matches the expected r...,1.0,trimmed_code,Variable,541,481,60,"[(167, 23), (190, 189), (212, 189), (215, 189)...",2,Short
740,559,559,TopKTester,"['import junit.framework.TestCase;', 'import j...",[' reverseOrNot = controlTest[1];'],"[' ', ' // create a list of random ints'...",{'original_code': '# --BEGIN MISSING CODE--\n ...,"['cleaned_code', 'trimmed_code', 'indented_code']",cleaned_code,"[' reverseOrNot = controlTest[1];', ' ...",...,['Success: Final output matches the expected r...,1.0,trimmed_code,Variable,559,553,6,"[(167, 23), (190, 189), (212, 189), (215, 189)...",4,Short
742,566,566,TopKTester,"['import junit.framework.TestCase;', 'import j...",[' list[i] = numInserts - 1 - i;'],"[' else', ' list[i] = i; ', ' }...",{'original_code': '# --BEGIN MISSING CODE--\n ...,"['cleaned_code', 'trimmed_code', 'indented_code']",cleaned_code,"[' list[i] = numInserts - 1 - i;', ' ...",...,['Success: Final output matches the expected r...,1.0,trimmed_code,Variable,566,553,13,"[(167, 23), (190, 189), (212, 189), (215, 189)...",5,Short
745,568,568,TopKTester,"['import junit.framework.TestCase;', 'import j...",[' list[i] = i; '],"[' }', ' ', ' // if we are looking fo...",{'original_code': '# --BEGIN MISSING CODE--\n ...,"['cleaned_code', 'trimmed_code', 'indented_code']",cleaned_code,"[' list[i] = i;', ' }', ' if (ran...",...,['Success: Final output matches the expected r...,1.0,trimmed_code,Variable,568,564,4,"[(167, 23), (190, 189), (212, 189), (215, 189)...",7,Short


################ Analyzing deepseek-coder-1.3b-instruct_no_afterlines ################
Number of rows with NaN values in 'comment_dist': 0


Unnamed: 0,start_line,end_line,code_task,before,between,after,gen_code_dict,gen_code_process_steps,selected_last_post_process_step,gen_code_selected,...,post_process_eval_res,post_process_pass_ratio,2nd_post_process_steps,Type,usage_line,def_line,abs_diff,list_of_tuples,comment_dist,range_class
258,76,77,CounterTester,"['import junit.framework.TestCase;', 'import j...","[' return myWord.compareTo(w.myWord);', '...","["" // If the counts are not equal, subtract...",{'original_code': 'The missing code is as foll...,"['cleaned_code', 'trimmed_code', 'indented_code']",cleaned_code,"['', ' return w.myWord.compareTo(myWord);',...",...,['Error: '],0.0,indented_code,Global_Variable,76,13,63,"[(76, 74), (79, 78), (107, 104), (116, 115), (...",2,Short
261,79,80,CounterTester,"['import junit.framework.TestCase;', 'import j...","[' return w.myCount - myCount;', ' }']","['}', '', '', '/**', ' * This class is used to...","{'original_code': ""Here is the missing code:\n...","['cleaned_code', 'trimmed_code', 'indented_code']",cleaned_code,"['', ' return w.myCount - myCount;', '}', '...",...,['Success: Final output matches the expected r...,1.0,indented_code,Global_Variable,79,14,65,"[(76, 74), (79, 78), (107, 104), (116, 115), (...",1,Short
264,107,111,CounterTester,"['import junit.framework.TestCase;', 'import j...","[' ones.remove (addMe);', ' WordWrap...","['', ' // first check to see if the word is...",{'original_code': 'The missing code is as foll...,"['cleaned_code', 'trimmed_code', 'indented_code']",cleaned_code,[' WordWrapper existingWord = wordsIHaveS...,...,['Error: '],0.0,indented_code,Class,108,11,97,"[(76, 74), (79, 78), (107, 104), (116, 115), (...",3,Short
272,116,117,CounterTester,"['import junit.framework.TestCase;', 'import j...",[' WordWrapper temp = wordsIHaveSeen.get ...,"['', ' // find the slot that we go to in ...",{'original_code': 'The missing code is as foll...,"['cleaned_code', 'trimmed_code', 'indented_code']",cleaned_code,"['', ' wordsIHaveSeen.get(addMe).incCount...",...,['Error: Compilation error: /tmp/tmpbffexxnt/C...,0.0,indented_code,Class,116,11,105,"[(76, 74), (79, 78), (107, 104), (116, 115), (...",1,Short
282,150,156,CounterTester,"['import junit.framework.TestCase;', 'import j...","[' int which = extractedWords.size ();', ...","[' } else {', ' // If k is less than t...",{'original_code': 'Here is the complete Java c...,"['cleaned_code', 'trimmed_code', 'indented_code']",cleaned_code,['class WordWrapper implements Comparable<Word...,...,"[""Error: Compilation error: /tmp/tmp4uomf6rs/C...",0.0,indented_code,Global_Variable,151,94,57,"[(76, 74), (79, 78), (107, 104), (116, 115), (...",2,Short
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
473,541,541,TopKTester,"['import junit.framework.TestCase;', 'import j...",[' k = numInserts;'],"[' ', ' // make sure the list is the rig...",{'original_code': 'The missing code is as foll...,"['cleaned_code', 'trimmed_code', 'indented_code']",cleaned_code,"['', '// --END MISSING CODE--', ' ArrayList...",...,['Error: Compilation error: /tmp/tmp48ga32ee/T...,0.0,indented_code,Variable,541,481,60,"[(167, 23), (190, 189), (212, 189), (215, 189)...",2,Short
475,559,559,TopKTester,"['import junit.framework.TestCase;', 'import j...",[' reverseOrNot = controlTest[1];'],"[' ', ' // create a list of random ints'...",{'original_code': 'The code provided is incomp...,"['cleaned_code', 'trimmed_code', 'indented_code']",cleaned_code,"['public class AVLNode <T> {', ' Data <T> v...",...,"[""Error: Compilation error: /tmp/tmpop7y_sn0/T...",0.0,indented_code,Variable,559,553,6,"[(167, 23), (190, 189), (212, 189), (215, 189)...",4,Short
477,566,566,TopKTester,"['import junit.framework.TestCase;', 'import j...",[' list[i] = numInserts - 1 - i;'],"[' else', ' list[i] = i; ', ' }...",{'original_code': 'The code you provided is in...,"['cleaned_code', 'trimmed_code', 'indented_code']",cleaned_code,"['public class AVLNode <T> {', ' T val;', ' ...",...,"[""Error: Compilation error: /tmp/tmpj2x_1dva/T...",0.0,indented_code,Variable,566,553,13,"[(167, 23), (190, 189), (212, 189), (215, 189)...",5,Short
480,568,568,TopKTester,"['import junit.framework.TestCase;', 'import j...",[' list[i] = i; '],"[' }', ' ', ' // if we are looking fo...",{'original_code': 'The missing code is:\n\n```...,"['cleaned_code', 'trimmed_code', 'indented_code']",cleaned_code,"['', '// --END MISSING CODE--', ' }', ' ...",...,['Error: Compilation error: /tmp/tmp_j4hi1bk/T...,0.0,indented_code,Variable,568,564,4,"[(167, 23), (190, 189), (212, 189), (215, 189)...",7,Short


################ Analyzing claude-3-haiku-20240307_no_afterlines ################
Number of rows with NaN values in 'comment_dist': 0


Unnamed: 0,start_line,end_line,code_task,before,between,after,gen_code_dict,gen_code_process_steps,selected_last_post_process_step,gen_code_selected,...,post_process_eval_res,post_process_pass_ratio,2nd_post_process_steps,Type,usage_line,def_line,abs_diff,list_of_tuples,comment_dist,range_class
666,76,77,CounterTester,"['import junit.framework.TestCase;', 'import j...","[' return myWord.compareTo(w.myWord);', '...","["" // If the counts are not equal, subtract...",{'original_code': '# --BEGIN MISSING CODE--\n ...,"['cleaned_code', 'trimmed_code', 'indented_code']",cleaned_code,"[' return myWord.compareTo(w.myWord);', '...",...,['Success: Final output matches the expected r...,1.0,indented_code,Global_Variable,76,13,63,"[(76, 74), (79, 78), (107, 104), (116, 115), (...",2,Short
669,79,80,CounterTester,"['import junit.framework.TestCase;', 'import j...","[' return w.myCount - myCount;', ' }']","['}', '', '', '/**', ' * This class is used to...",{'original_code': '# --BEGIN MISSING CODE--\nr...,"['cleaned_code', 'trimmed_code', 'indented_code']",cleaned_code,"['return w.myCount - myCount;', '}']",...,['Success: Final output matches the expected r...,1.0,"['cleaned_code', 'trimmed_code', 'indented_code']",Global_Variable,79,14,65,"[(76, 74), (79, 78), (107, 104), (116, 115), (...",1,Short
672,107,111,CounterTester,"['import junit.framework.TestCase;', 'import j...","[' ones.remove (addMe);', ' WordWrap...","['', ' // first check to see if the word is...",{'original_code': '# --BEGIN MISSING CODE--\n ...,"['cleaned_code', 'trimmed_code', 'indented_code']",cleaned_code,"[' String oldVal = ones.get(addMe);', ' ...",...,['Error: Problem reading file\njava.lang.NullP...,0.0,indented_code,Class,108,11,97,"[(76, 74), (79, 78), (107, 104), (116, 115), (...",3,Short
680,116,117,CounterTester,"['import junit.framework.TestCase;', 'import j...",[' WordWrapper temp = wordsIHaveSeen.get ...,"['', ' // find the slot that we go to in ...","{'original_code': ""# --BEGIN MISSING CODE--\n ...","['cleaned_code', 'trimmed_code', 'indented_code']",cleaned_code,[' wordsIHaveSeen.get(addMe).incCount();'...,...,['Error: Compilation error: /tmp/tmpolqagvv3/C...,0.0,indented_code,Class,116,11,105,"[(76, 74), (79, 78), (107, 104), (116, 115), (...",1,Short
690,150,156,CounterTester,"['import junit.framework.TestCase;', 'import j...","[' int which = extractedWords.size ();', ...","[' } else {', ' // If k is less than t...",{'original_code': '# --BEGIN MISSING CODE--\nr...,"['cleaned_code', 'trimmed_code', 'indented_code']",cleaned_code,"['return null;', '} else {', ' return extra...",...,['Error: '],0.0,indented_code,Global_Variable,151,94,57,"[(76, 74), (79, 78), (107, 104), (116, 115), (...",2,Short
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
342,541,541,TopKTester,"['import junit.framework.TestCase;', 'import j...",[' k = numInserts;'],"[' ', ' // make sure the list is the rig...",{'original_code': '# --BEGIN MISSING CODE--\n ...,"['cleaned_code', 'trimmed_code', 'indented_code']",cleaned_code,"[' k = Math.min(k, numInserts);', ' ...",...,['Success: Final output matches the expected r...,1.0,trimmed_code,Variable,541,481,60,"[(167, 23), (190, 189), (212, 189), (215, 189)...",2,Short
344,559,559,TopKTester,"['import junit.framework.TestCase;', 'import j...",[' reverseOrNot = controlTest[1];'],"[' ', ' // create a list of random ints'...",{'original_code': '# --BEGIN MISSING CODE--\n ...,"['cleaned_code', 'trimmed_code', 'indented_code']",cleaned_code,"[' reverseOrNot = controlTest[1];', ' ...",...,['Success: Final output matches the expected r...,1.0,trimmed_code,Variable,559,553,6,"[(167, 23), (190, 189), (212, 189), (215, 189)...",4,Short
346,566,566,TopKTester,"['import junit.framework.TestCase;', 'import j...",[' list[i] = numInserts - 1 - i;'],"[' else', ' list[i] = i; ', ' }...","{'original_code': ""# --BEGIN MISSING CODE--\n ...","['cleaned_code', 'trimmed_code', 'indented_code']",cleaned_code,"[' if (reverseOrNot)', ' list[i] =...",...,['Success: Final output matches the expected r...,1.0,trimmed_code,Variable,566,553,13,"[(167, 23), (190, 189), (212, 189), (215, 189)...",5,Short
349,568,568,TopKTester,"['import junit.framework.TestCase;', 'import j...",[' list[i] = i; '],"[' }', ' ', ' // if we are looking fo...","{'original_code': ""# --BEGIN MISSING CODE--\n ...","['cleaned_code', 'trimmed_code', 'indented_code']",cleaned_code,"[' list[i] = i; ', ' }', ' if (ra...",...,['Success: Final output matches the expected r...,1.0,trimmed_code,Variable,568,564,4,"[(167, 23), (190, 189), (212, 189), (215, 189)...",7,Short


################ Analyzing Meta-Llama-3-70B-Instruct_no_afterlines ################
Number of rows with NaN values in 'comment_dist': 0


Unnamed: 0,start_line,end_line,code_task,before,between,after,gen_code_dict,gen_code_process_steps,selected_last_post_process_step,gen_code_selected,...,post_process_eval_res,post_process_pass_ratio,2nd_post_process_steps,Type,usage_line,def_line,abs_diff,list_of_tuples,comment_dist,range_class
1129,76,77,CounterTester,"['import junit.framework.TestCase;', 'import j...","[' return myWord.compareTo(w.myWord);', '...","["" // If the counts are not equal, subtract...",{'original_code': 'return myWord.compareTo(w.m...,"['cleaned_code', 'trimmed_code', 'indented_code']",cleaned_code,['return myWord.compareTo(w.myWord);'],...,['Success: Final output matches the expected r...,1.0,indented_code,Global_Variable,76,13,63,"[(76, 74), (79, 78), (107, 104), (116, 115), (...",2,Short
1132,79,80,CounterTester,"['import junit.framework.TestCase;', 'import j...","[' return w.myCount - myCount;', ' }']","['}', '', '', '/**', ' * This class is used to...",{'original_code': 'return w.myCount - myCount;...,"['cleaned_code', 'trimmed_code', 'indented_code']",cleaned_code,['return w.myCount - myCount;'],...,['Success: Final output matches the expected r...,1.0,indented_code,Global_Variable,79,14,65,"[(76, 74), (79, 78), (107, 104), (116, 115), (...",1,Short
1135,107,111,CounterTester,"['import junit.framework.TestCase;', 'import j...","[' ones.remove (addMe);', ' WordWrap...","['', ' // first check to see if the word is...",{'original_code': 'WordWrapper ww = wordsIHave...,"['cleaned_code', 'trimmed_code', 'indented_code']",cleaned_code,['WordWrapper ww = wordsIHaveSeen.get(addMe);'...,...,['Error: '],0.0,indented_code,Class,108,11,97,"[(76, 74), (79, 78), (107, 104), (116, 115), (...",3,Short
1143,116,117,CounterTester,"['import junit.framework.TestCase;', 'import j...",[' WordWrapper temp = wordsIHaveSeen.get ...,"['', ' // find the slot that we go to in ...",{'original_code': 'WordWrapper temp = wordsIHa...,"['cleaned_code', 'trimmed_code', 'indented_code']",cleaned_code,['WordWrapper temp = wordsIHaveSeen.get(addMe)...,...,['Success: Final output matches the expected r...,1.0,"['cleaned_code', 'trimmed_code', 'indented_code']",Class,116,11,105,"[(76, 74), (79, 78), (107, 104), (116, 115), (...",1,Short
1153,150,156,CounterTester,"['import junit.framework.TestCase;', 'import j...","[' int which = extractedWords.size ();', ...","[' } else {', ' // If k is less than t...","{'original_code': 'return null;', 'cleaned_cod...","['cleaned_code', 'trimmed_code', 'indented_code']",cleaned_code,['return null;'],...,['Error: '],0.0,indented_code,Global_Variable,151,94,57,"[(76, 74), (79, 78), (107, 104), (116, 115), (...",2,Short
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
934,541,541,TopKTester,"['import junit.framework.TestCase;', 'import j...",[' k = numInserts;'],"[' ', ' // make sure the list is the rig...","{'original_code': 'k = numInserts;', 'cleaned_...","['cleaned_code', 'trimmed_code', 'indented_code']",cleaned_code,['k = numInserts;'],...,['Success: Final output matches the expected r...,1.0,"['cleaned_code', 'trimmed_code', 'indented_code']",Variable,541,481,60,"[(167, 23), (190, 189), (212, 189), (215, 189)...",2,Short
936,559,559,TopKTester,"['import junit.framework.TestCase;', 'import j...",[' reverseOrNot = controlTest[1];'],"[' ', ' // create a list of random ints'...",{'original_code': ' reverseOrNot = contro...,"['cleaned_code', 'trimmed_code', 'indented_code']",cleaned_code,"[' reverseOrNot = controlTest[1];', ' ...",...,['Success: Final output matches the expected r...,1.0,trimmed_code,Variable,559,553,6,"[(167, 23), (190, 189), (212, 189), (215, 189)...",4,Short
938,566,566,TopKTester,"['import junit.framework.TestCase;', 'import j...",[' list[i] = numInserts - 1 - i;'],"[' else', ' list[i] = i; ', ' }...",{'original_code': 'list[i] = numInserts - 1 - ...,"['cleaned_code', 'trimmed_code', 'indented_code']",cleaned_code,"['list[i] = numInserts - 1 - i;', ' lis...",...,['Success: Final output matches the expected r...,1.0,trimmed_code,Variable,566,553,13,"[(167, 23), (190, 189), (212, 189), (215, 189)...",5,Short
941,568,568,TopKTester,"['import junit.framework.TestCase;', 'import j...",[' list[i] = i; '],"[' }', ' ', ' // if we are looking fo...",{'original_code': 'else\n list[i] = i; ...,"['cleaned_code', 'trimmed_code', 'indented_code']",cleaned_code,"[' list[i] = i; ', ' }', ' if (ra...",...,['Success: Final output matches the expected r...,1.0,trimmed_code,Variable,568,564,4,"[(167, 23), (190, 189), (212, 189), (215, 189)...",7,Short


################ Analyzing phi-3-mini-4k_no_afterlines ################
Number of rows with NaN values in 'comment_dist': 0


Unnamed: 0,start_line,end_line,code_task,before,between,after,gen_code_dict,gen_code_process_steps,selected_last_post_process_step,gen_code_selected,...,post_process_eval_res,post_process_pass_ratio,2nd_post_process_steps,Type,usage_line,def_line,abs_diff,list_of_tuples,comment_dist,range_class
454,76,77,CounterTester,"['import junit.framework.TestCase;', 'import j...","[' return myWord.compareTo(w.myWord);', '...","["" // If the counts are not equal, subtract...",{'original_code': 'public int compareTo (WordW...,"['cleaned_code', 'trimmed_code', 'indented_code']",cleaned_code,"['public int compareTo (WordWrapper w) {', ' ...",...,['Error: Compilation error: /tmp/tmp9pz56aut/C...,0.0,indented_code,Global_Variable,76,13,63,"[(76, 74), (79, 78), (107, 104), (116, 115), (...",2,Short
457,79,80,CounterTester,"['import junit.framework.TestCase;', 'import j...","[' return w.myCount - myCount;', ' }']","['}', '', '', '/**', ' * This class is used to...",{'original_code': '} // Closing brace for the ...,"['cleaned_code', 'trimmed_code', 'indented_code']",cleaned_code,['} // Closing brace for the WordWrapper class'],...,['Error: Compilation error: /tmp/tmprggjwgmo/C...,0.0,indented_code,Global_Variable,79,14,65,"[(76, 74), (79, 78), (107, 104), (116, 115), (...",1,Short
460,107,111,CounterTester,"['import junit.framework.TestCase;', 'import j...","[' ones.remove (addMe);', ' WordWrap...","['', ' // first check to see if the word is...",{'original_code': 'public void insert(String a...,"['cleaned_code', 'trimmed_code', 'indented_code']",cleaned_code,"['public void insert(String addMe) {', ' Wo...",...,"[""Error: Compilation error: /tmp/tmp3a4f8mjg/C...",0.0,indented_code,Class,108,11,97,"[(76, 74), (79, 78), (107, 104), (116, 115), (...",3,Short
468,116,117,CounterTester,"['import junit.framework.TestCase;', 'import j...",[' WordWrapper temp = wordsIHaveSeen.get ...,"['', ' // find the slot that we go to in ...",{'original_code': 'public void insert(String a...,"['cleaned_code', 'trimmed_code', 'indented_code']",cleaned_code,"['public void insert(String addMe) {', ' if...",...,['Error: Compilation error: /tmp/tmp16fp_l2h/C...,0.0,indented_code,Class,116,11,105,"[(76, 74), (79, 78), (107, 104), (116, 115), (...",1,Short
478,150,156,CounterTester,"['import junit.framework.TestCase;', 'import j...","[' int which = extractedWords.size ();', ...","[' } else {', ' // If k is less than t...",{'original_code': 'public String getKthMostFre...,"['cleaned_code', 'trimmed_code', 'indented_code']",cleaned_code,"['public String getKthMostFrequent(int k) {', ...",...,['Error: Compilation error: /tmp/tmpsp6vxayy/C...,0.0,indented_code,Global_Variable,151,94,57,"[(76, 74), (79, 78), (107, 104), (116, 115), (...",2,Short
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1263,541,541,TopKTester,"['import junit.framework.TestCase;', 'import j...",[' k = numInserts;'],"[' ', ' // make sure the list is the rig...",{'original_code': '```java\n// Ensure the corr...,"['cleaned_code', 'trimmed_code', 'indented_code']",cleaned_code,"['', '// Ensure the correct number of elements...",...,['Error: '],0.0,indented_code,Variable,541,481,60,"[(167, 23), (190, 189), (212, 189), (215, 189)...",2,Short
1265,559,559,TopKTester,"['import junit.framework.TestCase;', 'import j...",[' reverseOrNot = controlTest[1];'],"[' ', ' // create a list of random ints'...",{'original_code': 'private void testBalance (i...,"['cleaned_code', 'trimmed_code', 'indented_code']",cleaned_code,"['private void testBalance (int numInserts, in...",...,['Error: Compilation error: /tmp/tmpl__5qna9/T...,0.0,indented_code,Variable,559,553,6,"[(167, 23), (190, 189), (212, 189), (215, 189)...",4,Short
1267,566,566,TopKTester,"['import junit.framework.TestCase;', 'import j...",[' list[i] = numInserts - 1 - i;'],"[' else', ' list[i] = i; ', ' }...",{'original_code': '// --BEGIN MISSING CODE--\n...,"['cleaned_code', 'trimmed_code', 'indented_code']",cleaned_code,"['if (!randomOrNot) {', ' for (int j = 0; j...",...,['Error: '],0.0,indented_code,Variable,566,553,13,"[(167, 23), (190, 189), (212, 189), (215, 189)...",5,Short
1270,568,568,TopKTester,"['import junit.framework.TestCase;', 'import j...",[' list[i] = i; '],"[' }', ' ', ' // if we are looking fo...",{'original_code': 'private void testBalance (i...,"['cleaned_code', 'trimmed_code', 'indented_code']",cleaned_code,"['private void testBalance (int numInserts, in...",...,['Success: Final output matches the expected r...,1.0,indented_code,Variable,568,564,4,"[(167, 23), (190, 189), (212, 189), (215, 189)...",7,Short


################ Analyzing gpt-3.5-turbo-0125_no_afterlines ################
Number of rows with NaN values in 'comment_dist': 0


Unnamed: 0,start_line,end_line,code_task,before,between,after,gen_code_dict,gen_code_process_steps,selected_last_post_process_step,gen_code_selected,...,post_process_eval_res,post_process_pass_ratio,2nd_post_process_steps,Type,usage_line,def_line,abs_diff,list_of_tuples,comment_dist,range_class
1217,76,77,CounterTester,"['import junit.framework.TestCase;', 'import j...","[' return myWord.compareTo(w.myWord);', '...","["" // If the counts are not equal, subtract...",{'original_code': ' return myWord.compareTo...,"['cleaned_code', 'trimmed_code', 'indented_code']",cleaned_code,"[' return myWord.compareTo(w.myWord);', ' ...",...,['Success: Final output matches the expected r...,1.0,indented_code,Global_Variable,76,13,63,"[(76, 74), (79, 78), (107, 104), (116, 115), (...",2,Short
1220,79,80,CounterTester,"['import junit.framework.TestCase;', 'import j...","[' return w.myCount - myCount;', ' }']","['}', '', '', '/**', ' * This class is used to...",{'original_code': ' return myCount - w....,"['cleaned_code', 'trimmed_code', 'indented_code']",cleaned_code,"[' return myCount - w.myCount;', ' }...",...,['Error: '],0.0,indented_code,Global_Variable,79,14,65,"[(76, 74), (79, 78), (107, 104), (116, 115), (...",1,Short
1223,107,111,CounterTester,"['import junit.framework.TestCase;', 'import j...","[' ones.remove (addMe);', ' WordWrap...","['', ' // first check to see if the word is...",{'original_code': ' WordWrapper ww = word...,"['cleaned_code', 'trimmed_code', 'indented_code']",cleaned_code,[' WordWrapper ww = wordsIHaveSeen.get(ad...,...,['Error: Problem reading file\njava.lang.NullP...,0.0,indented_code,Class,108,11,97,"[(76, 74), (79, 78), (107, 104), (116, 115), (...",3,Short
1231,116,117,CounterTester,"['import junit.framework.TestCase;', 'import j...",[' WordWrapper temp = wordsIHaveSeen.get ...,"['', ' // find the slot that we go to in ...",{'original_code': ' WordWrapper temp = wo...,"['cleaned_code', 'trimmed_code', 'indented_code']",cleaned_code,[' WordWrapper temp = wordsIHaveSeen.get(...,...,"[""Error: Compilation error: /tmp/tmp59xuonmp/C...",0.0,indented_code,Class,116,11,105,"[(76, 74), (79, 78), (107, 104), (116, 115), (...",1,Short
1241,150,156,CounterTester,"['import junit.framework.TestCase;', 'import j...","[' int which = extractedWords.size ();', ...","[' } else {', ' // If k is less than t...",{'original_code': ' return null;\n }...,"['cleaned_code', 'trimmed_code', 'indented_code']",cleaned_code,"[' return null;', ' }', ' return ...",...,['Error: '],0.0,indented_code,Global_Variable,151,94,57,"[(76, 74), (79, 78), (107, 104), (116, 115), (...",2,Short
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
557,541,541,TopKTester,"['import junit.framework.TestCase;', 'import j...",[' k = numInserts;'],"[' ', ' // make sure the list is the rig...",{'original_code': '```java\nif (k > numInserts...,"['cleaned_code', 'trimmed_code', 'indented_code']",cleaned_code,"['', 'if (k > numInserts) {', ' assertEquals(...",...,['Error: '],0.0,indented_code,Variable,541,481,60,"[(167, 23), (190, 189), (212, 189), (215, 189)...",2,Short
559,559,559,TopKTester,"['import junit.framework.TestCase;', 'import j...",[' reverseOrNot = controlTest[1];'],"[' ', ' // create a list of random ints'...",{'original_code': '```java\nreverseOrNot = con...,"['cleaned_code', 'trimmed_code', 'indented_code']",cleaned_code,"['reverseOrNot = controlTest[1];', 'ITopKMachi...",...,['Success: Final output matches the expected r...,1.0,trimmed_code,Variable,559,553,6,"[(167, 23), (190, 189), (212, 189), (215, 189)...",4,Short
561,566,566,TopKTester,"['import junit.framework.TestCase;', 'import j...",[' list[i] = numInserts - 1 - i;'],"[' else', ' list[i] = i; ', ' }...",{'original_code': 'for (int i = 0; i < numInse...,"['cleaned_code', 'trimmed_code', 'indented_code']",cleaned_code,"['for (int i = 0; i < numInserts; i++) {', ' ...",...,['Success: Final output matches the expected r...,1.0,trimmed_code,Variable,566,553,13,"[(167, 23), (190, 189), (212, 189), (215, 189)...",5,Short
564,568,568,TopKTester,"['import junit.framework.TestCase;', 'import j...",[' list[i] = i; '],"[' }', ' ', ' // if we are looking fo...",{'original_code': '```java\nlist[i] = i;\n```\...,"['cleaned_code', 'trimmed_code', 'indented_code']",cleaned_code,"['', 'list[i] = i;', '']",...,['Success: Final output matches the expected r...,1.0,"['cleaned_code', 'trimmed_code', 'indented_code']",Variable,568,564,4,"[(167, 23), (190, 189), (212, 189), (215, 189)...",7,Short


################ Analyzing claude-3-opus-20240229_no_afterlines ################
Number of rows with NaN values in 'comment_dist': 0


Unnamed: 0,start_line,end_line,code_task,before,between,after,gen_code_dict,gen_code_process_steps,selected_last_post_process_step,gen_code_selected,...,post_process_eval_res,post_process_pass_ratio,2nd_post_process_steps,Type,usage_line,def_line,abs_diff,list_of_tuples,comment_dist,range_class
1217,76,77,CounterTester,"['import junit.framework.TestCase;', 'import j...","[' return myWord.compareTo(w.myWord);', '...","["" // If the counts are not equal, subtract...",{'original_code': 'Here is the generated code ...,"['cleaned_code', 'trimmed_code', 'indented_code']",cleaned_code,"[' return myWord.compareTo(w.myWord);', '...",...,['Success: Final output matches the expected r...,1.0,indented_code,Global_Variable,76,13,63,"[(76, 74), (79, 78), (107, 104), (116, 115), (...",2,Short
1220,79,80,CounterTester,"['import junit.framework.TestCase;', 'import j...","[' return w.myCount - myCount;', ' }']","['}', '', '', '/**', ' * This class is used to...",{'original_code': 'Here is the missing code to...,"['cleaned_code', 'trimmed_code', 'indented_code']",cleaned_code,"[' return w.myCount - myCount;', ' }']",...,['Success: Final output matches the expected r...,1.0,"['cleaned_code', 'trimmed_code', 'indented_code']",Global_Variable,79,14,65,"[(76, 74), (79, 78), (107, 104), (116, 115), (...",1,Short
1223,107,111,CounterTester,"['import junit.framework.TestCase;', 'import j...","[' ones.remove (addMe);', ' WordWrap...","['', ' // first check to see if the word is...",{'original_code': 'Here is the generated code ...,"['cleaned_code', 'trimmed_code', 'indented_code']",cleaned_code,"[' }', ' ones.put (addMe, addMe);', ' ...",...,['Error: '],0.0,indented_code,Class,108,11,97,"[(76, 74), (79, 78), (107, 104), (116, 115), (...",3,Short
1231,116,117,CounterTester,"['import junit.framework.TestCase;', 'import j...",[' WordWrapper temp = wordsIHaveSeen.get ...,"['', ' // find the slot that we go to in ...",{'original_code': 'Here is the generated code ...,"['cleaned_code', 'trimmed_code', 'indented_code']",cleaned_code,[' WordWrapper temp = wordsIHaveSeen.get ...,...,"[""Error: Compilation error: /tmp/tmpymxkys4m/C...",0.0,indented_code,Class,116,11,105,"[(76, 74), (79, 78), (107, 104), (116, 115), (...",1,Short
1241,150,156,CounterTester,"['import junit.framework.TestCase;', 'import j...","[' int which = extractedWords.size ();', ...","[' } else {', ' // If k is less than t...",{'original_code': 'Here is the generated code ...,"['cleaned_code', 'trimmed_code', 'indented_code']",cleaned_code,"[' return null;', ' }', ' return ex...",...,['Error: '],0.0,indented_code,Global_Variable,151,94,57,"[(76, 74), (79, 78), (107, 104), (116, 115), (...",2,Short
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1151,541,541,TopKTester,"['import junit.framework.TestCase;', 'import j...",[' k = numInserts;'],"[' ', ' // make sure the list is the rig...",{'original_code': 'Here is the missing Java co...,"['cleaned_code', 'trimmed_code', 'indented_code']",cleaned_code,"[' k = numInserts;', ' for (int i = 0;...",...,['Success: Final output matches the expected r...,1.0,trimmed_code,Variable,541,481,60,"[(167, 23), (190, 189), (212, 189), (215, 189)...",2,Short
1153,559,559,TopKTester,"['import junit.framework.TestCase;', 'import j...",[' reverseOrNot = controlTest[1];'],"[' ', ' // create a list of random ints'...",{'original_code': 'Here is the missing code to...,"['cleaned_code', 'trimmed_code', 'indented_code']",cleaned_code,[' reverseOrNot = controlTest[1];'],...,['Success: Final output matches the expected r...,1.0,"['cleaned_code', 'trimmed_code', 'indented_code']",Variable,559,553,6,"[(167, 23), (190, 189), (212, 189), (215, 189)...",4,Short
1155,566,566,TopKTester,"['import junit.framework.TestCase;', 'import j...",[' list[i] = numInserts - 1 - i;'],"[' else', ' list[i] = i; ', ' }...",{'original_code': 'Here is the missing code to...,"['cleaned_code', 'trimmed_code', 'indented_code']",cleaned_code,"[' list[i] = numInserts - 1 - i;', ' ...",...,['Success: Final output matches the expected r...,1.0,trimmed_code,Variable,566,553,13,"[(167, 23), (190, 189), (212, 189), (215, 189)...",5,Short
1158,568,568,TopKTester,"['import junit.framework.TestCase;', 'import j...",[' list[i] = i; '],"[' }', ' ', ' // if we are looking fo...",{'original_code': 'Here is the missing code to...,"['cleaned_code', 'trimmed_code', 'indented_code']",cleaned_code,"[' list[i] = i; ', ' }', ' if (ra...",...,['Success: Final output matches the expected r...,1.0,trimmed_code,Variable,568,564,4,"[(167, 23), (190, 189), (212, 189), (215, 189)...",7,Short


################ Analyzing deepseek-coder-7b-instruct_no_afterlines ################
Number of rows with NaN values in 'comment_dist': 0


Unnamed: 0,start_line,end_line,code_task,before,between,after,gen_code_dict,gen_code_process_steps,selected_last_post_process_step,gen_code_selected,...,post_process_eval_res,post_process_pass_ratio,2nd_post_process_steps,Type,usage_line,def_line,abs_diff,list_of_tuples,comment_dist,range_class
365,76,77,CounterTester,"['import junit.framework.TestCase;', 'import j...","[' return myWord.compareTo(w.myWord);', '...","["" // If the counts are not equal, subtract...",{'original_code': '```java\n return myWor...,"['cleaned_code', 'trimmed_code', 'indented_code']",cleaned_code,"['', ' return myWord.compareTo(w.myWord);...",...,['Error: Compilation error: /tmp/tmpvdx_hb1m/C...,0.0,indented_code,Global_Variable,76,13,63,"[(76, 74), (79, 78), (107, 104), (116, 115), (...",2,Short
368,79,80,CounterTester,"['import junit.framework.TestCase;', 'import j...","[' return w.myCount - myCount;', ' }']","['}', '', '', '/**', ' * This class is used to...",{'original_code': '```java\n return myCount...,"['cleaned_code', 'trimmed_code', 'indented_code']",cleaned_code,"['', ' return myCount - w.myCount;', ' }',...",...,['Error: '],0.0,indented_code,Global_Variable,79,14,65,"[(76, 74), (79, 78), (107, 104), (116, 115), (...",1,Short
371,107,111,CounterTester,"['import junit.framework.TestCase;', 'import j...","[' ones.remove (addMe);', ' WordWrap...","['', ' // first check to see if the word is...",{'original_code': 'Here is the missing code:\n...,"['cleaned_code', 'trimmed_code', 'indented_code']",cleaned_code,"['', ' String existing = ones.get(addMe);...",...,['Error: Problem reading file\njava.lang.NullP...,0.0,indented_code,Class,108,11,97,"[(76, 74), (79, 78), (107, 104), (116, 115), (...",3,Short
379,116,117,CounterTester,"['import junit.framework.TestCase;', 'import j...",[' WordWrapper temp = wordsIHaveSeen.get ...,"['', ' // find the slot that we go to in ...",{'original_code': '```java\n wordsIHaveSe...,"['cleaned_code', 'trimmed_code', 'indented_code']",cleaned_code,"['', ' wordsIHaveSeen.get(addMe).incCount...",...,['Error: Compilation error: /tmp/tmpbprubfnt/C...,0.0,indented_code,Class,116,11,105,"[(76, 74), (79, 78), (107, 104), (116, 115), (...",1,Short
389,150,156,CounterTester,"['import junit.framework.TestCase;', 'import j...","[' int which = extractedWords.size ();', ...","[' } else {', ' // If k is less than t...",{'original_code': '```java\n return new S...,"['cleaned_code', 'trimmed_code', 'indented_code']",cleaned_code,"['', ' return new String (extractedWords....",...,['Error: Compilation error: /tmp/tmpmfw3rb6v/C...,0.0,indented_code,Global_Variable,151,94,57,"[(76, 74), (79, 78), (107, 104), (116, 115), (...",2,Short
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
158,541,541,TopKTester,"['import junit.framework.TestCase;', 'import j...",[' k = numInserts;'],"[' ', ' // make sure the list is the rig...",{'original_code': '```java\n for (int i = 0...,"['cleaned_code', 'trimmed_code', 'indented_code']",cleaned_code,"['', ' for (int i = 0; i < k && i < retVal....",...,['Error: '],0.0,indented_code,Variable,541,481,60,"[(167, 23), (190, 189), (212, 189), (215, 189)...",2,Short
160,559,559,TopKTester,"['import junit.framework.TestCase;', 'import j...",[' reverseOrNot = controlTest[1];'],"[' ', ' // create a list of random ints'...",{'original_code': '```java\n reverseOrNot...,"['cleaned_code', 'trimmed_code', 'indented_code']",cleaned_code,"[' reverseOrNot = controlTest[1];', ' ...",...,['Success: Final output matches the expected r...,1.0,indented_code,Variable,559,553,6,"[(167, 23), (190, 189), (212, 189), (215, 189)...",4,Short
162,566,566,TopKTester,"['import junit.framework.TestCase;', 'import j...",[' list[i] = numInserts - 1 - i;'],"[' else', ' list[i] = i; ', ' }...","{'original_code': ""```java\n list[i] = ...","['cleaned_code', 'trimmed_code', 'indented_code']",cleaned_code,"[' list[i] = numInserts - 1 - i;', ' ...",...,"[""Error: Compilation error: /tmp/tmp3fdu6o8p/T...",0.0,indented_code,Variable,566,553,13,"[(167, 23), (190, 189), (212, 189), (215, 189)...",5,Short
165,568,568,TopKTester,"['import junit.framework.TestCase;', 'import j...",[' list[i] = i; '],"[' }', ' ', ' // if we are looking fo...","{'original_code': ""```java\n list[i] = ...","['cleaned_code', 'trimmed_code', 'indented_code']",cleaned_code,"[' list[i] = i; ', ' }', ' }', ...",...,['Success: Final output matches the expected r...,1.0,trimmed_code,Variable,568,564,4,"[(167, 23), (190, 189), (212, 189), (215, 189)...",7,Short
