In [2]:
import matplotlib.pyplot as plt
import pandas as pd
import plotnine as pn

In [3]:
ref = pd.read_csv('human.csv')
ref.head()

Unnamed: 0,path,type,line
0,warrenbuckley/compact-log-format-viewer/run2037-0,human,553
1,warrenbuckley/compact-log-format-viewer/run2037-0,human,501
2,warrenbuckley/compact-log-format-viewer/run2037-0,human,502
3,warrenbuckley/compact-log-format-viewer/run2037-0,human,503
4,warrenbuckley/compact-log-format-viewer/run2037-0,human,504


In [4]:
res = pd.read_csv('result2.csv')
gpt_res = pd.read_csv('gpt.csv')
keyword_res = pd.read_csv('keyword.csv')

res = pd.concat([res, gpt_res, keyword_res])
res.head()

Unnamed: 0,path,type,line
0,oramasearch/orama/run626-0,seed,483
1,oramasearch/orama/run626-0,seed,484
2,oramasearch/orama/run626-0,seed,485
3,oramasearch/orama/run626-0,seed,486
4,oramasearch/orama/run626-0,seed,487


In [5]:
def calculate_precision_recall(human_data, algorithm_data):
    # Group data by path for easier processing
    human_lines = human_data.groupby("path")["line"].apply(set)
    algorithm_lines = algorithm_data.groupby(["path", "type"])["line"].apply(set).unstack(fill_value=set())

    # Initialize dictionaries to hold precision and recall values
    precision_scores = {}
    recall_scores = {}

    # Loop through each path in the human data
    for path, human_lines_set in human_lines.items():
        for algorithm_type in algorithm_lines.columns:
            # Get the lines identified by the current algorithm for the current path
            algorithm_lines_set = algorithm_lines.loc[path, algorithm_type] if path in algorithm_lines.index else set()

            # Calculate true positives, precision, and recall
            true_positives = human_lines_set & algorithm_lines_set
            precision = len(true_positives) / len(algorithm_lines_set) if len(algorithm_lines_set) > 0 else 0
            recall = len(true_positives) / len(human_lines_set) if len(human_lines_set) > 0 else 0

            # Store results
            precision_scores[(path, algorithm_type)] = precision
            recall_scores[(path, algorithm_type)] = recall

    return precision_scores, recall_scores

In [6]:
#calculate_precision_recall(ref, res)
accuracies = pd.DataFrame(calculate_precision_recall(ref, res)).transpose().rename(
    columns={0: 'precision', 1: 'recall'}).reindex()

accuracies

Unnamed: 0,precision,recall
"(MilanCommunity/Milan/run23-0, bigram)",0.008576,1.000000
"(MilanCommunity/Milan/run23-0, drain)",0.012563,1.000000
"(MilanCommunity/Milan/run23-0, gpt)",0.666667,0.400000
"(MilanCommunity/Milan/run23-0, keyword)",0.833333,1.000000
"(MilanCommunity/Milan/run23-0, lcs)",0.008741,1.000000
...,...,...
"(zeabur/zbpack/run552-0, drain)",0.000000,0.000000
"(zeabur/zbpack/run552-0, gpt)",1.000000,0.666667
"(zeabur/zbpack/run552-0, keyword)",0.500000,0.333333
"(zeabur/zbpack/run552-0, lcs)",0.011521,0.833333


In [7]:
bigram = accuracies.iloc[::6, :]
drain = accuracies.iloc[1::6, :]
gpt = accuracies.iloc[2::6, :]
keyword = accuracies.iloc[3::6, :]
lcs = accuracies.iloc[4::6, :]
seed = accuracies.iloc[5::6, :]

In [9]:
merged = (lcs.describe()
          .merge(seed.describe(), left_index=True, right_index=True, suffixes=(' lcs', ' seed'))
          .merge(gpt.describe().add_suffix(' gpt'), left_index=True, right_index=True)
          .merge(keyword.describe().add_suffix(' keyword'), left_index=True, right_index=True)
          .merge(bigram.describe().add_suffix(' bigram'), left_index=True, right_index=True)
          # .merge(drain2.describe().add_suffix(' drain2'), left_index=True, right_index=True)
          )
merged

Unnamed: 0,precision lcs,recall lcs,precision seed,recall seed,precision gpt,recall gpt,precision keyword,recall keyword,precision bigram,recall bigram
count,100.0,100.0,100.0,100.0,100.0,100.0,100.0,100.0,100.0,100.0
mean,0.211135,0.932429,0.465967,0.911393,0.873301,0.460418,0.684046,0.412902,0.230557,0.969864
std,0.234657,0.154955,0.318135,0.180397,0.243298,0.306611,0.300717,0.302156,0.222194,0.134975
min,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
25%,0.025894,0.92,0.165012,0.914189,0.842857,0.213346,0.5,0.166667,0.064094,1.0
50%,0.110429,1.0,0.495283,1.0,1.0,0.4,0.714286,0.333333,0.146573,1.0
75%,0.304869,1.0,0.75,1.0,1.0,0.699231,1.0,0.666667,0.323718,1.0
max,0.882883,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.971014,1.0


In [42]:
with open('accuracies.tex', 'w') as tf:
    tf.write(merged.to_latex())

In [7]:
drain = pd.read_csv('drain2-py.csv')
all_paths = ref["path"].unique()
results = []
for path in all_paths:
    human_subset = ref[ref["path"] == path]
    drain_subset = drain[drain["path"] == path]
    human_lines = set(human_subset['line'])
    drain_lines = set(drain_subset['line'])
    common_lines = human_lines & drain_lines
    only_in_drain = drain_lines - human_lines
    ratio_human_in_drain = len(common_lines) / len(human_lines) if human_lines else 0
    ratio_drain_not_in_human = len(only_in_drain) / len(drain_lines) if drain_lines else 0
    results.append({
        "path": path,
        "lines_in_human": len(human_lines),
        "lines_in_drain": len(drain_lines),
        "ratio_human_in_drain": ratio_human_in_drain,
        "ratio_drain_not_in_human": ratio_drain_not_in_human
    })

results_df = pd.DataFrame(results)
results_df = results_df.sort_values(by="ratio_human_in_drain", ascending=False)
results_df['path'] = pd.Categorical(results_df['path'], categories=results_df['path'], ordered=True)

quartiles = results_df['ratio_human_in_drain'].quantile([0.25, 0.5, 0.75])
y_labels = [f"Q1: {quartiles[0.25]:.2f}", f"Median: {quartiles[0.5]:.2f}", f"Q3: {quartiles[0.75]:.2f}"]

# print(results_df.head())
plot = (
        pn.ggplot(results_df, pn.aes(x="path", y="ratio_human_in_drain")) +
        pn.geom_bar(stat="identity", fill="steelblue") +
        pn.labs(
            title="Ratio of Human in Drain per Path",
            x="Path",
            y="Ratio (Human in Drain)"
        ) +
        pn.scale_y_continuous(
            limits=(0, results_df['ratio_human_in_drain'].max()),
            breaks=quartiles.values,
            labels=y_labels
        ) +
        # pn.theme_minimal() +
        pn.theme_bw() +
        pn.theme(
            axis_text_x=pn.element_text(rotation=90,hjust=1),
            axis_ticks_length=0
        )
)
# plt.figure(figsize=(10,25))
plot.save("output3.png", height=10, width=10, dpi=300)

results_df2 = pd.DataFrame(results)
results_df2 = results_df2.sort_values(by="ratio_drain_not_in_human", ascending=False)
results_df2['path'] = pd.Categorical(results_df2['path'], categories=results_df2['path'], ordered=True)

quartiles = results_df2['ratio_drain_not_in_human'].quantile([0.25, 0.5, 0.75])
y_labels = [f"Q1: {quartiles[0.25]:.2f}", f"Median: {quartiles[0.5]:.2f}", f"Q3: {quartiles[0.75]:.2f}"]

# print(results_df.head())
plot = (
        pn.ggplot(results_df2, pn.aes(x="path", y="ratio_drain_not_in_human")) +
        pn.geom_bar(stat="identity", fill="steelblue") +
        pn.labs(
            title="Ratio of Drain Not in Human per Path",
            x="Path",
            y="Ratio (Drain Not in Human)"
        ) +
        pn.scale_y_continuous(
            limits=(0, results_df['ratio_drain_not_in_human'].max()),  # Limites de l'axe Y
            breaks=[quartiles[0.25], quartiles[0.5], quartiles[0.75], 0.25, 0.5, 0.75],  # Positionner les breaks (quartiles)
        #     labels=[f"Q1: {quartiles[0.25]:.2f}", f"Median: {quartiles[0.5]:.2f}", f"Q3: {quartiles[0.75]:.2f}"],  # Labels des quartiles
        #     expand=(0, 0)  # Empêcher l'espace supplémentaire autour des ticks
        )

        + pn.geom_hline(yintercept=quartiles, color="red")
        # + pn.geom_vline(xintercept=dataset["total_size"].quantile(0.25), color="red")
        # + pn.geom_vline(xintercept=dataset["total_size"].quantile(0.75), color="red")
        +
        # pn.theme_minimal() +
        pn.theme_bw() +
        pn.theme(
            axis_text_x=pn.element_text(rotation=90,hjust=1)
        )
)
# plt.figure(figsize=(10,25))
plot.save("output4.png", height=10, width=10, dpi=300)

