In [1]:
import pandas as pd

In [2]:
def refactoring_unique_count(file_path):
    # Load the dataset
    df = pd.read_csv(file_path)

    # Filter for only large and small effect sizes
    df = df[df["effect_size_interpretation"].isin(["large", "small"])]

    # Split refactorings into individual items if multiple refactorings are listed
    df["refactorings"] = df["refactorings"].str.split(" + ")

    # Explode the refactorings column to ensure each refactoring is counted separately
    df = df.explode("refactorings")

    # Create a combined column for change_type and effect_size_interpretation
    df["change_effect_combo"] = (
        df["change_type"] + "_" + df["effect_size_interpretation"]
    )

    # Count occurrences of each combination for each refactoring type
    unique_counts = (
        df.groupby(["refactorings", "change_effect_combo"])
        .size()
        .reset_index(name="count")
    )

    # Pivot the table for better readability
    result = (
        unique_counts.pivot(
            index="refactorings", columns="change_effect_combo", values="count"
        )
        .fillna(0)
        .astype(int)
    )

    # Display the results
    print(result.to_string())
    result.to_csv("../../refactorings_change_effect_combo.csv")

In [None]:
# Example usage
file_path = "../data/dataset_with_refactorings.csv"

refactoring_unique_count(file_path)