# 目的
valid_dfを用いて、以下２つの観点でscoreを計算する
- 出力からラベル列を生成する
- 存在するデータから計算する

詳細
- https://www.notion.so/exp011-valid-f5score-e5ae29d2fd704ac799c142b6844a5b16?pvs=4

In [31]:
import polars as pl

In [32]:
train = pl.read_json("../data/train.json")
data = pl.read_csv("../valid_df/exp012.csv")

In [33]:
train.shape

(6807, 5)

In [34]:
# 全行を表示する
pl.Config.set_tbl_rows(10)

polars.config.Config

# 出力からラベル列を生成する

In [35]:
pred_df = (
    data.select(
        pl.col("document_pred").replace("null", None).cast(pl.Int64),
        pl.col("token_pred").replace("null", None).cast(pl.Int64),
        pl.col("label_pred").replace("null", None),
    )
    .drop_nulls()
    .sort("document_pred")
)

In [36]:
# train_only_valid_document = train.filter(
#     pl.col("document").is_in(
#         pred_df.get_column("document_pred").unique()
#     )
# )

In [37]:
# train_only_valid_document = train_only_valid_document.with_columns(
#     pl.col("tokens").map_elements(len).alias("tokens_len"),
# )

In [38]:
train_with_token_len = train.with_columns(
    pl.col("tokens").map_elements(len).alias("tokens_len"),
)

In [39]:
pred_df_agg_with_len = (
    pred_df.group_by("document_pred")
    .agg(
        pl.col("token_pred"),
        pl.col("label_pred"),
    )
    .join(
        train_with_token_len.select(["document", "tokens_len", "labels"]),
        left_on="document_pred",
        right_on="document",
        how="left",
    )
)

In [40]:
# 推論したlabel列を
label_pred_alls = []
for token_pred, label_pred, tokens_len in zip(
    pred_df_agg_with_len["token_pred"],
    pred_df_agg_with_len["label_pred"],
    pred_df_agg_with_len["tokens_len"],
):
    label_pred_all = ["O" for _ in range(tokens_len)]
    for token, label in zip(token_pred, label_pred):
        label_pred_all[token] = label
    label_pred_alls.append(label_pred_all)

actual_pred_df = pred_df_agg_with_len.with_columns(
    pl.Series("label_pred_all", label_pred_alls)
).select(["labels", "label_pred_all"])

In [69]:
from seqeval.metrics.sequence_labeling import precision_recall_fscore_support

calculated_f1_score = precision_recall_fscore_support(
    actual_pred_df["labels"].to_list(),
    actual_pred_df["label_pred_all"].to_list(),
    beta=1,
    average="micro",
)[2]

calculated_f5_score = precision_recall_fscore_support(
    actual_pred_df["labels"].to_list(),
    actual_pred_df["label_pred_all"].to_list(),
    beta=5,
    average="micro",
)[2]

calculated_f1_score, calculated_f5_score

(0.9074074074074073, 0.9168105929763961)

In [68]:
from seqeval.metrics import classification_report

print(
    classification_report(
        actual_pred_df["labels"].to_list(),
        actual_pred_df["label_pred_all"].to_list(),
    )
)

  _warn_prf(average, modifier, msg_start, len(result))


                precision    recall  f1-score   support

         EMAIL       0.86      1.00      0.92         6
        ID_NUM       0.94      0.89      0.92        19
  NAME_STUDENT       0.91      0.92      0.92       222
     PHONE_NUM       0.33      1.00      0.50         1
STREET_ADDRESS       0.00      0.00      0.00         1
  URL_PERSONAL       0.85      1.00      0.92        17
      USERNAME       0.00      0.00      0.00         1

     micro avg       0.90      0.92      0.91       267
     macro avg       0.56      0.69      0.60       267
  weighted avg       0.90      0.92      0.91       267



# 存在するデータから計算する

In [29]:
def f5_score(precision: float, recall: float, beta: int = 5):
    return (1 + beta**2) * (precision * recall) / ((beta**2 * precision) + recall)

In [30]:
actual_df = data.select(
    pl.col(["document", "token", "token_str"]).drop_nulls()
).unique()

pred_df = data.select(
    pl.col(["document_pred", "token_pred", "token_str_pred"]).drop_nulls()
).unique()

correct_num = actual_df.join(
    pred_df,
    left_on=["document", "token", "token_str"],
    right_on=["document_pred", "token_pred", "token_str_pred"],
    how="inner",
).height

actual_but_not_pred_num = actual_df.join(
    pred_df,
    left_on=["document", "token", "token_str"],
    right_on=["document_pred", "token_pred", "token_str_pred"],
    how="anti",
).height

pred_but_not_actual_num = pred_df.join(
    actual_df,
    left_on=["document_pred", "token_pred", "token_str_pred"],
    right_on=["document", "token", "token_str"],
    how="anti",
).height

precision = correct_num / (correct_num + pred_but_not_actual_num)
recall = correct_num / (correct_num + actual_but_not_pred_num)

f5_score(precision, recall)

0.9609728956362769