In [2]:
import pandas as pd
import plotnine as pn
import pingouin as pg

print("Pandas: %s"%pd.__version__)
print("Plotnine: %s"%pn.__version__)
print("Pingouin: %s"%pg.__version__)

Pandas: 2.2.3
Plotnine: 0.14.5
Pingouin: 0.5.5


In [None]:
ref = pd.read_csv('annotations.csv')
ref.head()

In [None]:
res = pd.read_csv('result.csv')
res.head()

In [None]:
ref.groupby("path")["line"].apply(set)
ref.head()
#res.groupby(["path", "type"])["line"].apply(set).unstack(fill_value=set())
res.head()

In [48]:
def calculate_precision_recall(human_data, algorithm_data):
    # Group data by path for easier processing
    human_lines = human_data.groupby("path")["line"].apply(set)
    algorithm_lines = algorithm_data.groupby(["path", "type"])["line"].apply(set).unstack(fill_value=set())
    
    # Initialize dictionaries to hold precision and recall values
    precision_scores = {}
    recall_scores = {}
    
    # Loop through each path in the human data
    for path, human_lines_set in human_lines.items():
        for algorithm_type in algorithm_lines.columns:
            # Get the lines identified by the current algorithm for the current path
            algorithm_lines_set = algorithm_lines.loc[path, algorithm_type] if path in algorithm_lines.index else set()
            
            # Calculate true positives, precision, and recall
            true_positives = human_lines_set & algorithm_lines_set
            precision = len(true_positives) / len(algorithm_lines_set) if len(algorithm_lines_set) > 0 else 0
            recall = len(true_positives) / len(human_lines_set) if len(human_lines_set) > 0 else 0
            
            # Store results
            precision_scores[(path, algorithm_type)] = precision
            recall_scores[(path, algorithm_type)] = recall
    
    return precision_scores, recall_scores

In [None]:
#calculate_precision_recall(ref, res)
accuracies = pd.DataFrame(calculate_precision_recall(ref, res)).transpose().rename(columns={0: 'precision', 1: 'recall'}).reindex()

In [None]:
lcs = accuracies.iloc[::2, :]

In [85]:
cidiff = accuracies.iloc[1::2, :]

In [None]:
accuracies = pd.DataFrame(calculate_precision_recall(ref, res)).transpose().rename(columns={0: 'precision', 1: 'recall'}).reindex()
accuracies.loc[::2, 3] = 'lcs'
accuracies.loc[1::2, 3] = 'cidiff'
accuracies.rename(columns={3: 'algorithm'}, inplace=True)
accuracies

In [None]:
pg.ttest(accuracies[accuracies['algorithm'] == 'lcs']['precision'], accuracies[accuracies['algorithm'] == 'cidiff']['precision'],paired=True)

In [None]:
pg.ttest(accuracies[accuracies['algorithm'] == 'lcs']['recall'], accuracies[accuracies['algorithm'] == 'cidiff']['recall'],paired=True)

In [None]:
melted = pd.melt(accuracies, id_vars='algorithm', value_vars=['precision', 'recall'], var_name='metric', value_name='value')
pn.ggplot(melted, pn.aes(x='algorithm', y='value', fill='algorithm')) + pn.geom_violin() + pn.facet_wrap('~metric', scales='free') + pn.theme(subplots_adjust={'wspace': 0.25},legend_position='none')

In [None]:
merged = lcs.describe().merge(cidiff.describe(), left_index=True, right_index=True, suffixes=(' lcs', ' seed')).rename(columns={'precision lcs': 'Precision LCS-diff', 'recall lcs': 'Recall LCS-diff', 'precision seed': 'Precision CiDiff', 'recall seed': 'Recall CiDiff'})
merged

In [None]:
with open('accuracies.tex', 'w') as tf:
     tf.write(merged.to_latex())