In [1]:
import numpy as np
import pandas as pd
import glob
from os import path
import re
import seaborn as sns
import altair as alt
import matplotlib.pyplot as plt

sns.set_style("white")

In [2]:
# process results into csv
results = []
pattern = re.compile(r"(?P<func>(entropy_score|least_confidence|random_score))_(?P<seed_size>\d+)_(?P<active_size>\d+)")
for fname in glob.glob("results_ablation/**"):
    match = re.search(pattern, fname)
    if match:
        results.append({
            "acquisition_function": match.group("func"),
            "seed_size": match.group("seed_size"),
            "active_batch_size": match.group("active_size"),
            "accuracy": np.load(fname)
        })
    else:
        print(f"skipping improperly formatted file: {fname}")
results = pd.DataFrame(results)
results.to_csv("results_ablation/results_ablation.csv", index=False)    

skipping improperly formatted file: results_ablation\results_ablation.csv


In [3]:
results.dtypes

acquisition_function    object
seed_size               object
active_batch_size       object
accuracy                object
dtype: object

In [4]:
results["final_accuracy"] = results["accuracy"].apply(lambda l: l[-1])
results["seed_size"] = results["seed_size"].astype(int)
results["active_batch_size"] = results["active_batch_size"].astype(int)


In [5]:
def plot_against_base(base_chart: alt.Chart):
    scatter = base.mark_boxplot().encode(
        y=alt.Y("final_accuracy:Q", scale=exclude_zero_point, title="Accuracy"),
        # color="acquisition_function:N"
    ) 
    line = base.mark_line().encode(
        y=alt.Y("mean(final_accuracy):Q", scale=exclude_zero_point, title="Mean accuracy"),
    )
    return (scatter + line).facet(column="acquisition_function:N")

In [6]:
# All else equal, what is the relationship between seed size and accuracy?
exclude_zero_point = alt.Scale(zero=False)

base = alt.Chart(results).encode(
    x=alt.X("seed_size:Q", scale=exclude_zero_point, title="Seed dataset size"),
    
)
plot_against_base(base)
# regression = base.encode(
#     y=alt.Y("mean(final_accuracy):Q", scale=exclude_zero_point)
# ).transform_regression(
#     'seed_size', 
#     'final_accuracy', 
#     groupby=['acquisition_function'],
#     extent=[results['seed_size'].min() - 1000, results['seed_size'].max() + 1000]
# ).mark_line(
#     color="orange", 
#     strokeDash=[3,3],
# )




- Actual acquisition functions show a strong positive correlation b/w seed dataset size and accuracy. This makes sense because we expect more data to improve performance.
- This effect is much less strong in the random score function. *why?*

In [8]:
# All else equal, what is the relationship between active learning batch size and accuracy?
base = alt.Chart(results).encode(
    x=alt.X("active_batch_size:Q", scale=exclude_zero_point, title="Active learning batch size"),   
)

plot_against_base(base)

- Relationship seems much weaker overall
- if the relationship is significantly stronger for the actual functions than for the random function, that means our choice of functions is actually helping

In [12]:
# pearson's R per acquisition function
results.groupby("acquisition_function").corr()

Unnamed: 0_level_0,Unnamed: 1_level_0,seed_size,active_batch_size,final_accuracy
acquisition_function,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
entropy_score,seed_size,1.0,0.0,0.845683
entropy_score,active_batch_size,0.0,1.0,0.20713
entropy_score,final_accuracy,0.845683,0.20713,1.0
least_confidence,seed_size,1.0,0.0,0.889154
least_confidence,active_batch_size,0.0,1.0,0.203143
least_confidence,final_accuracy,0.889154,0.203143,1.0
random_score,seed_size,1.0,0.0,0.337486
random_score,active_batch_size,0.0,1.0,-0.086663
random_score,final_accuracy,0.337486,-0.086663,1.0
