# Analyze results from the ensemble model
This notebook loads the results from the ensemble model run 
and produces the graphs as included in the report and the presentation.

In [2]:
import polars as pl
from polars import DataFrame
from label_legends.result import calculate_scores
from label_legends.util import ROOT

import altair


# altair.renderers.enable("png") # browser
altair.renderers.enable("browser") # browser

pl.Config.set_tbl_cols(20)
pl.Config.set_tbl_rows(100)

polars.config.Config

In [3]:
scores = pl.read_csv(ROOT / "resource" / "ensemble_scores_own.csv").sort("recall", descending=True)
scores.head(100)

model,precision,recall,fscore,accuracy,tp,tn,fp,fn
str,f64,f64,f64,f64,i64,i64,i64,i64
"""female""",0.714286,0.714286,0.714286,0.733333,20,24,8,8
"""negative""",0.5,0.428571,0.461538,0.533333,12,20,12,16
"""female_negative""",0.769231,0.357143,0.487805,0.65,10,29,3,18
"""swear""",0.75,0.321429,0.45,0.633333,9,29,3,19
"""ensemble""",0.888889,0.285714,0.432432,0.65,8,31,1,20
"""deberta""",0.875,0.25,0.388889,0.633333,7,31,1,21


In [4]:
predictions = pl.read_csv(ROOT / "resource" / "ensemble_predictions_own.csv").with_columns(pl.struct(["pred_female", "pred_swear"]).map_elements(lambda x: x["pred_female"] and x["pred_swear"], return_dtype=pl.Int64).alias("pred_female_swear"))

predictions.head(100)

id,type,label,deberta,female,pred_female,negative,pred_negative,pred_female_negative,swear,pred_swear,pred_ensemble,pred_female_swear
i64,str,i64,i64,i64,i64,i64,i64,i64,i64,i64,i64,i64
1,"""fn""",1,0,1,1,0,0,0,0,0,0,0
2,"""fn""",1,0,1,1,0,0,0,0,0,0,0
3,"""tn""",0,0,0,0,1,1,0,0,0,0,0
4,"""fn""",1,0,1,1,0,0,0,0,0,0,0
5,"""tn""",0,0,0,0,1,1,0,0,0,0,0
6,"""tn""",0,0,0,0,0,0,0,0,0,0,0
7,"""fn""",1,0,1,1,1,1,1,0,0,0,0
8,"""tn""",0,0,0,0,0,0,0,0,0,0,0
9,"""tn""",0,0,0,0,0,0,0,0,0,0,0
10,"""fn""",1,0,0,0,0,0,0,0,0,0,0


In [5]:
# scores_ensemble = calculate_scores(predictions["label"], predictions["pred_ensemble"])
# scores_ensemble

# scores_female_swear = calculate_scores(predictions["label"], predictions["pred_female_swear"])
# scores_female_swear
#
# pl.concat([scores, DataFrame([s for s in [{"model": "pred_ensemble"} | scores_ensemble.asdict(), {"model": "pred_female_swear"} | scores_female_swear.asdict()]])]).write_csv(ROOT / "resource" / "ensemble_scores.csv")

In [6]:

scores = scores.sort("fscore", descending=True)
def barplot(measure: str):
    return altair.Chart(scores).mark_bar().encode(altair.X("model:N", sort=altair.EncodingSortField(field="fscore", order="descending")), altair.Y(measure), color="model")


cols = ["fscore", "recall", "precision", "fn", "fp"] #, "accuracy"]# , "tp", "tn", "fp", "fn"]
charts = map(barplot, cols)

combined_chart = altair.hconcat(*charts, title="Performance metrics for ensemble models based on DeBERTa").configure_title(fontSize=20, anchor="middle").configure_axis(labelFontSize=15, titleFontSize=15).configure_legend(labelFontSize=15, titleFontSize=15).show()

In [7]:
def selectivity_ratios(model: str):
    return (predictions.group_by("type", model).len()
        .with_columns((pl.col("len") / pl.sum("len").over("type")).alias("frac"))
        .with_columns(pl.lit(model).alias("model"))
        .filter(pl.col("type").is_in(["fn", "tn"]))
        .filter(pl.col(model) == 1)
        .sort("len").rename({"len": "samples"})
        .select("model", "type", "frac", "samples")
    )
models = ["female", "negative", "swear", "pred_female_negative", "pred_female_swear", "pred_ensemble"]
sel_ratios = pl.concat(map(selectivity_ratios, models))

def frac_chart(model: str, scale: str = "frac"):
    return altair.Chart(sel_ratios.filter(pl.col("model") == model)).mark_bar().encode(altair.X("type", sort=altair.EncodingSortField(field="frac", order="descending"), title=model), altair.Y(scale), color="type")

frac_charts = map(frac_chart, models)
combined_chart = altair.hconcat(*frac_charts, title="Fraction of reconsidered samples").configure_title(fontSize=20, anchor="middle").configure_axis(labelFontSize=15, titleFontSize=15).configure_legend(labelFontSize=15, titleFontSize=15).show()