In [1]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

# Load all baseline results
cm1_baseline = pd.read_csv('../results/CM1_baseline_results.csv')
pc1_baseline = pd.read_csv('../results/PC1_baseline_results.csv')
jm1_baseline = pd.read_csv('../results/JM1_baseline_results.csv')
kc1_baseline = pd.read_csv('../results/KC1_baseline_results.csv')

# Load all CodeBERT results
cm1_codebert = pd.read_csv('../results/cm1_codebert_results.csv')
pc1_codebert = pd.read_csv('../results/pc1_codebert_results.csv')
jm1_codebert = pd.read_csv('../results/jm1_codebert_results.csv')
kc1_codebert = pd.read_csv('../results/kc1_codebert_results.csv')

print("âœ… All results loaded!")

âœ… All results loaded!


In [2]:
# Combine all results
all_results = pd.concat([
    cm1_baseline, cm1_codebert,
    pc1_baseline, pc1_codebert,
    jm1_baseline, jm1_codebert,
    kc1_baseline, kc1_codebert
], ignore_index=True)

print("="*100)
print("COMPLETE RESULTS TABLE - ALL MODELS, ALL DATASETS")
print("="*100)
print(all_results.to_string(index=False))

# Save
all_results.to_csv('../results/complete_results_table.csv', index=False)
print("\nâœ… Saved: results/complete_results_table.csv")

COMPLETE RESULTS TABLE - ALL MODELS, ALL DATASETS
   Model Dataset  Accuracy  Precision  Recall  F1-Score  AUC-ROC
      RF     CM1    0.8652     0.2500  0.1000    0.1429   0.6810
     SVM     CM1    0.7303     0.2500  0.7000    0.3684   0.7975
     MLP     CM1    0.8876     0.0000  0.0000    0.0000   0.7101
CodeBERT     CM1    0.8876     0.0000  0.0000    0.0000   0.6873
      RF     PC1    0.9005     0.3077  0.2857    0.2963   0.8277
     SVM     PC1    0.7749     0.2041  0.7143    0.3175   0.8093
     MLP     PC1    0.9267     0.5000  0.1429    0.2222   0.7785
CodeBERT     PC1    0.9267     0.0000  0.0000    0.0000   0.7147
      RF     JM1    0.7448     0.4346  0.4378    0.4362   0.7084
     SVM     JM1    0.6983     0.3794  0.5323    0.4431   0.7091
     MLP     JM1    0.7897     0.6392  0.1542    0.2485   0.7066
CodeBERT     JM1    0.7757     0.5172  0.0746    0.1304   0.6607
      RF     KC1    0.7078     0.4231  0.3492    0.3826   0.6096
     SVM     KC1    0.6255     0.3409  0

In [5]:
# Calculate CodeBERT improvements over best baseline
datasets = ['CM1', 'PC1', 'JM1', 'KC1']
metrics = ['Accuracy', 'Precision', 'Recall', 'F1-Score', 'AUC-ROC']

improvements = []

for dataset in datasets:
    # Get baseline results
    baseline = all_results[
        (all_results['Dataset'] == dataset) & 
        (all_results['Model'] != 'CodeBERT')
    ]
    
    # Get CodeBERT results
    codebert = all_results[
        (all_results['Dataset'] == dataset) & 
        (all_results['Model'] == 'CodeBERT')
    ]
    
    if len(baseline) == 0 or len(codebert) == 0:
        continue
    
    improvement_row = {'Dataset': dataset}
    
    for metric in metrics:
        # Convert to float
        baseline[metric] = baseline[metric].astype(float)
        codebert[metric] = codebert[metric].astype(float)
        
        # Best baseline
        best_baseline = baseline[metric].max()
        best_model = baseline.loc[baseline[metric].idxmax(), 'Model']
        
        # CodeBERT value
        codebert_val = codebert[metric].iloc[0]
        
        # Improvement
        improvement = ((codebert_val - best_baseline) / best_baseline) * 100
        
        improvement_row[f'{metric}_Baseline'] = f"{best_baseline:.4f} ({best_model})"
        improvement_row[f'{metric}_CodeBERT'] = f"{codebert_val:.4f}"
        improvement_row[f'{metric}_Improvement'] = f"{improvement:+.2f}%"
    
    improvements.append(improvement_row)

improvement_df = pd.DataFrame(improvements)

print("\n" + "="*120)
print("CODEBERT IMPROVEMENTS OVER BEST BASELINE")
print("="*120)
print(improvement_df.to_string(index=False))
print("="*120)

# Save
improvement_df.to_csv('../results/codebert_improvements.csv', index=False)
print("\nâœ… Saved: results/codebert_improvements.csv")


CODEBERT IMPROVEMENTS OVER BEST BASELINE
Dataset Accuracy_Baseline Accuracy_CodeBERT Accuracy_Improvement Precision_Baseline Precision_CodeBERT Precision_Improvement Recall_Baseline Recall_CodeBERT Recall_Improvement F1-Score_Baseline F1-Score_CodeBERT F1-Score_Improvement AUC-ROC_Baseline AUC-ROC_CodeBERT AUC-ROC_Improvement
    CM1      0.8876 (MLP)            0.8876               +0.00%        0.2500 (RF)             0.0000              -100.00%    0.7000 (SVM)          0.0000           -100.00%      0.3684 (SVM)            0.0000             -100.00%     0.7975 (SVM)           0.6873             -13.82%
    PC1      0.9267 (MLP)            0.9267               +0.00%       0.5000 (MLP)             0.0000              -100.00%    0.7143 (SVM)          0.0000           -100.00%      0.3175 (SVM)            0.0000             -100.00%      0.8277 (RF)           0.7147             -13.65%
    JM1      0.7897 (MLP)            0.7757               -1.77%       0.6392 (MLP)             0

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  baseline[metric] = baseline[metric].astype(float)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  codebert[metric] = codebert[metric].astype(float)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  baseline[metric] = baseline[metric].astype(float)
A value is trying to be set on a copy of a slice from a

In [6]:
# Overall statistics
print("\n" + "="*80)
print("ðŸ“Š SUMMARY STATISTICS")
print("="*80)

for metric in metrics:
    # Get all baseline best values
    baseline_values = []
    codebert_values = []
    
    for dataset in datasets:
        baseline = all_results[
            (all_results['Dataset'] == dataset) & 
            (all_results['Model'] != 'CodeBERT')
        ]
        codebert = all_results[
            (all_results['Dataset'] == dataset) & 
            (all_results['Model'] == 'CodeBERT')
        ]
        
        if len(baseline) > 0 and len(codebert) > 0:
            baseline[metric] = baseline[metric].astype(float)
            codebert[metric] = codebert[metric].astype(float)
            
            baseline_values.append(baseline[metric].max())
            codebert_values.append(codebert[metric].iloc[0])
    
    avg_baseline = sum(baseline_values) / len(baseline_values)
    avg_codebert = sum(codebert_values) / len(codebert_values)
    avg_improvement = ((avg_codebert - avg_baseline) / avg_baseline) * 100
    
    wins = sum(c > b for c, b in zip(codebert_values, baseline_values))
    
    print(f"\n{metric}:")
    print(f"  Average Baseline:  {avg_baseline:.4f}")
    print(f"  Average CodeBERT:  {avg_codebert:.4f}")
    print(f"  Average Improvement: {avg_improvement:+.2f}%")
    print(f"  CodeBERT Wins: {wins}/{len(datasets)} datasets")

print("\n" + "="*80)


ðŸ“Š SUMMARY STATISTICS

Accuracy:
  Average Baseline:  0.8331
  Average CodeBERT:  0.8327
  Average Improvement: -0.05%
  CodeBERT Wins: 1/4 datasets

Precision:
  Average Baseline:  0.4545
  Average CodeBERT:  0.1293
  Average Improvement: -71.55%
  CodeBERT Wins: 0/4 datasets

Recall:
  Average Baseline:  0.6057
  Average CodeBERT:  0.0186
  Average Improvement: -96.92%
  CodeBERT Wins: 0/4 datasets

F1-Score:
  Average Baseline:  0.3816
  Average CodeBERT:  0.0326
  Average Improvement: -91.46%
  CodeBERT Wins: 0/4 datasets

AUC-ROC:
  Average Baseline:  0.7372
  Average CodeBERT:  0.6606
  Average Improvement: -10.39%
  CodeBERT Wins: 0/4 datasets



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  baseline[metric] = baseline[metric].astype(float)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  codebert[metric] = codebert[metric].astype(float)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  baseline[metric] = baseline[metric].astype(float)
A value is trying to be set on a copy of a slice from a

In [7]:
# Create LaTeX-style table for paper
print("\n" + "="*80)
print("ðŸ“„ TABLE FOR PAPER (LaTeX format)")
print("="*80)

print("\n% Performance Comparison Table")
print("\\begin{table}[h]")
print("\\centering")
print("\\caption{Performance Comparison: CodeBERT vs Baseline Models}")
print("\\label{tab:performance}")
print("\\begin{tabular}{|l|c|c|c|c|c|c|}")
print("\\hline")
print("\\textbf{Dataset} & \\textbf{Model} & \\textbf{Accuracy} & \\textbf{Precision} & \\textbf{Recall} & \\textbf{F1} & \\textbf{AUC} \\\\")
print("\\hline")

for dataset in datasets:
    dataset_results = all_results[all_results['Dataset'] == dataset]
    
    for idx, row in dataset_results.iterrows():
        dataset_col = dataset if idx == dataset_results.index[0] else ""
        print(f"{dataset_col} & {row['Model']} & {row['Accuracy']} & {row['Precision']} & {row['Recall']} & {row['F1-Score']} & {row['AUC-ROC']} \\\\")
    
    print("\\hline")

print("\\end{tabular}")
print("\\end{table}")
print("\n" + "="*80)


ðŸ“„ TABLE FOR PAPER (LaTeX format)

% Performance Comparison Table
\begin{table}[h]
\centering
\caption{Performance Comparison: CodeBERT vs Baseline Models}
\label{tab:performance}
\begin{tabular}{|l|c|c|c|c|c|c|}
\hline
\textbf{Dataset} & \textbf{Model} & \textbf{Accuracy} & \textbf{Precision} & \textbf{Recall} & \textbf{F1} & \textbf{AUC} \\
\hline
CM1 & RF & 0.8652 & 0.25 & 0.1 & 0.1429 & 0.681 \\
 & SVM & 0.7303 & 0.25 & 0.7 & 0.3684 & 0.7975 \\
 & MLP & 0.8876 & 0.0 & 0.0 & 0.0 & 0.7101 \\
 & CodeBERT & 0.8876 & 0.0 & 0.0 & 0.0 & 0.6873 \\
\hline
PC1 & RF & 0.9005 & 0.3077 & 0.2857 & 0.2963 & 0.8277 \\
 & SVM & 0.7749 & 0.2041 & 0.7143 & 0.3175 & 0.8093 \\
 & MLP & 0.9267 & 0.5 & 0.1429 & 0.2222 & 0.7785 \\
 & CodeBERT & 0.9267 & 0.0 & 0.0 & 0.0 & 0.7147 \\
\hline
JM1 & RF & 0.7448 & 0.4346 & 0.4378 & 0.4362 & 0.7084 \\
 & SVM & 0.6983 & 0.3794 & 0.5323 & 0.4431 & 0.7091 \\
 & MLP & 0.7897 & 0.6392 & 0.1542 & 0.2485 & 0.7066 \\
 & CodeBERT & 0.7757 & 0.5172 & 0.0746 & 0.1304 & 0