In [None]:
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np
from collections import defaultdict
from matplotlib.ticker import FixedLocator, FixedFormatter

In [None]:
sns.set_theme(style="whitegrid", context="paper", font_scale=1.2)

In [None]:
# Color-blind-friendly palette
color_blind_palette = sns.color_palette('Paired')
ai_color = color_blind_palette[0]     
human_color = color_blind_palette[1]  

In [None]:
REFACTORING_CATEGORIES = {
    "Readability Improvements": [],
    "Structural Changes": [],
    "Parameter Adjustments": [],
    "Method Modifications": [],
    "Other": []
}

In [None]:
# Load CSV data
csv_file = "aggregated_repo_metrics_with_types.csv"
df = pd.read_csv(csv_file)

In [None]:
# Ensure numeric columns for refactoring categories
for col in REFACTORING_CATEGORIES:
    if col in df.columns:
        df[col] = pd.to_numeric(df[col], errors='coerce')

--------------------------
1) Box Plots
--------------------------

In [None]:

categories_to_visualize_boxplots = [
    "Readability Improvements (Renaming)",
    "Parameter Adjustment",
    "Structural Refactoring (Modularity)",
    "Other"
]

In [None]:
for category in categories_to_visualize_boxplots:
    plt.figure(figsize=(7, 3.5), dpi=300)
    sns.boxplot(
        x='Human/AI-Coauthored',
        y=category,
        data=df,
        palette=[ai_color, human_color],
        width=0.6,
        fliersize=3,
        linewidth=1,
        showmeans=False,
        medianprops={"color": "black", "linewidth": 1},
        whiskerprops={"color": "gray"},
        capprops={"color": "gray"},
        boxprops={"edgecolor": "gray", "facecolor": "white"}
    )
    plt.title(f'Distribution of {category} Refactorings', fontsize=10, fontweight='semibold')
    plt.xlabel('Repository Type', fontsize=9)
    plt.ylabel('Refactoring Count', fontsize=9)
    plt.xticks(rotation=0, ha='center', fontsize=7, fontweight='semibold')
    plt.yticks(fontsize=8)
    sns.despine(left=False, bottom=False)
    plt.tight_layout()
    plt.savefig(f'boxplot_{category.replace(" ", "_")}.png', bbox_inches='tight')
    plt.show()

--------------------------
2) Grouped Bar Chart (Category Counts)
--------------------------

In [None]:
categories_for_barchart = [
    "Naming Improvements",
    "Parameter Modifications",
    "Method Composition",
    "Method Movement",
]

In [None]:
csv_file = "aggregated_repo_metrics_with_types.csv"
df = pd.read_csv(csv_file)

In [None]:
category_counts_csv = "refactoring_category_counts.csv"
category_counts_df = pd.read_csv(category_counts_csv)

In [None]:
bar_chart_df = category_counts_df[category_counts_df['Category'].isin(categories_for_barchart)]
bar_chart_melted = bar_chart_df.melt(
    id_vars='Category',
    value_vars=['AI-Coauthored', 'Human Written'],
    var_name='Repo Type',
    value_name='Count'
)

In [None]:
# Create Wrapped Labels Manually

def wrap_label(text, width=15):
    if "Readability" in text:
        return "Readability\nImprovements"
    # Otherwise, if text is too long, insert a newline after the first space
    if len(text) > width:
        parts = text.split(" ", 1)
        if len(parts) > 1:
            return parts[0] + "\n" + parts[1]
    return text

In [None]:
wrapped_labels = [wrap_label(cat, width=15) for cat in categories_for_barchart]

In [None]:

plt.figure(figsize=(8, 4.5), dpi=300)
ax = sns.barplot(
    data=bar_chart_melted,
    x='Category',
    y='Count',
    hue='Repo Type',
    palette=[ai_color, human_color],
    order=categories_for_barchart,
    saturation=0.85,
    width=0.8,
    errwidth=0  
)

In [None]:

ax.xaxis.set_major_locator(FixedLocator(np.arange(len(categories_for_barchart))))
ax.xaxis.set_major_formatter(FixedFormatter(wrapped_labels))

In [None]:
plt.title('Refactoring Category Counts by Repository Type', fontsize=11, fontweight='semibold')
plt.xlabel('Refactoring Category', fontsize=8)
plt.ylabel('Number of Refactorings', fontsize=9)
plt.xticks(rotation=0, ha='center', fontsize=8, fontweight='semibold')
plt.yticks(fontsize=8)

In [None]:
# Add value labels on top of bars
for container in ax.containers:
    ax.bar_label(container, fontsize=8, fmt='%d', fontweight='semibold')

In [None]:
plt.legend(title='Repository Type', fontsize=8, frameon=False, loc='upper right')

In [None]:
plt.subplots_adjust(bottom=0.22)  

In [None]:
plt.savefig('grouped_barchart_ref_categories.png', dpi=300, bbox_inches='tight')
plt.savefig('cat.pdf', dpi=300, bbox_inches='tight')
plt.show()


--------------------------
3) PyRef refactoring types
--------------------------

In [None]:

pyref_type_counts_ai = defaultdict(int)
pyref_type_counts_human = defaultdict(int)

In [None]:
for _, row in df.iterrows():
    repo_type = row['Human/AI-Coauthored']
    most_common_type = row['Most Common Refactoring Type']  # e.g., "Rename Method"
    if most_common_type != "None":
        if repo_type == 'AI-Coauthored':
            pyref_type_counts_ai[most_common_type] += 1
        elif repo_type == 'Human Written':
            pyref_type_counts_human[most_common_type] += 1

In [None]:
# Create a DataFrame for plotting
unique_types = sorted(set(pyref_type_counts_ai.keys()) | set(pyref_type_counts_human.keys()))
data_list = []
for t in unique_types:
    data_list.append({
        'Refactoring Type': t,
        'Count': pyref_type_counts_ai[t],
        'Repo Type': 'AI-Coauthored'
    })
    data_list.append({
        'Refactoring Type': t,
        'Count': pyref_type_counts_human[t],
        'Repo Type': 'Human Written'
    })

In [None]:
pyref_type_counts_df = pd.DataFrame(data_list)

In [None]:
plt.figure(figsize=(7, 5.5), dpi=300)
sns.barplot(
    data=pyref_type_counts_df,
    x='Refactoring Type',
    y='Count',
    hue='Repo Type',
    palette=[ai_color, human_color],
    order=unique_types
)
plt.title('Most Common PyRef Refactoring Types by Repository Type', fontsize=10, fontweight='semibold')
plt.xlabel('PyRef Refactoring Type', fontsize=9)
plt.ylabel('Number of Repositories', fontsize=9)
plt.xticks(rotation=0, ha='center', fontsize=7, fontweight='semibold')
plt.yticks(fontsize=8)
plt.legend(title='Repository Type', fontsize=8, frameon=False, loc='best')
sns.despine(left=False, bottom=False)
plt.tight_layout()
plt.savefig('grouped_barchart_pyref_types.png', bbox_inches='tight')
plt.show()