# 目的
- exp012とexp020のvalid_dfにおけるclassification_reportを比較する

In [2]:
import polars as pl

In [3]:
train = pl.read_json("../data/train.json")
data_12 = pl.read_csv("../valid_df/exp012.csv")
data_20 = pl.read_csv("../valid_df/exp020.csv")

In [4]:
train.shape

(6807, 5)

In [5]:
# 全行を表示する
pl.Config.set_tbl_rows(10)

polars.config.Config

# exp012

In [6]:
pred_df = (
    data_12.select(
        pl.col("document_pred").replace("null", None).cast(pl.Int64),
        pl.col("token_pred").replace("null", None).cast(pl.Int64),
        pl.col("label_pred").replace("null", None),
    )
    .drop_nulls()
    .sort("document_pred")
)

In [7]:
train_with_token_len = train.with_columns(
    pl.col("tokens").map_elements(len).alias("tokens_len"),
)

In [8]:
pred_df_agg_with_len = (
    pred_df.group_by("document_pred")
    .agg(
        pl.col("token_pred"),
        pl.col("label_pred"),
    )
    .join(
        train_with_token_len.select(["document", "tokens_len", "labels"]),
        left_on="document_pred",
        right_on="document",
        how="left",
    )
)

In [9]:
# 推論したlabel列を
label_pred_alls = []
for token_pred, label_pred, tokens_len in zip(
    pred_df_agg_with_len["token_pred"],
    pred_df_agg_with_len["label_pred"],
    pred_df_agg_with_len["tokens_len"],
):
    label_pred_all = ["O" for _ in range(tokens_len)]
    for token, label in zip(token_pred, label_pred):
        label_pred_all[token] = label
    label_pred_alls.append(label_pred_all)

actual_pred_df = pred_df_agg_with_len.with_columns(
    pl.Series("label_pred_all", label_pred_alls)
).select(["labels", "label_pred_all"])

In [10]:
from seqeval.metrics.sequence_labeling import precision_recall_fscore_support

calculated_f1_score = precision_recall_fscore_support(
    actual_pred_df["labels"].to_list(),
    actual_pred_df["label_pred_all"].to_list(),
    beta=1,
    average="micro",
)[2]

calculated_f5_score = precision_recall_fscore_support(
    actual_pred_df["labels"].to_list(),
    actual_pred_df["label_pred_all"].to_list(),
    beta=5,
    average="micro",
)[2]

calculated_f1_score, calculated_f5_score

(0.9074074074074073, 0.9168105929763961)

In [11]:
from seqeval.metrics import classification_report

print(
    classification_report(
        actual_pred_df["labels"].to_list(),
        actual_pred_df["label_pred_all"].to_list(),
    )
)

  _warn_prf(average, modifier, msg_start, len(result))


                precision    recall  f1-score   support

         EMAIL       0.86      1.00      0.92         6
        ID_NUM       0.94      0.89      0.92        19
  NAME_STUDENT       0.91      0.92      0.92       222
     PHONE_NUM       0.33      1.00      0.50         1
STREET_ADDRESS       0.00      0.00      0.00         1
  URL_PERSONAL       0.85      1.00      0.92        17
      USERNAME       0.00      0.00      0.00         1

     micro avg       0.90      0.92      0.91       267
     macro avg       0.56      0.69      0.60       267
  weighted avg       0.90      0.92      0.91       267



# exp020

In [12]:
pred_df = (
    data_20.select(
        pl.col("document_pred").replace("null", None).cast(pl.Int64),
        pl.col("token_pred").replace("null", None).cast(pl.Int64),
        pl.col("label_pred").replace("null", None),
    )
    .drop_nulls()
    .sort("document_pred")
)

train_with_token_len = train.with_columns(
    pl.col("tokens").map_elements(len).alias("tokens_len"),
)

pred_df_agg_with_len = (
    pred_df.group_by("document_pred")
    .agg(
        pl.col("token_pred"),
        pl.col("label_pred"),
    )
    .join(
        train_with_token_len.select(["document", "tokens_len", "labels"]),
        left_on="document_pred",
        right_on="document",
        how="left",
    )
)

# 推論したlabel列を
label_pred_alls = []
for token_pred, label_pred, tokens_len in zip(
    pred_df_agg_with_len["token_pred"],
    pred_df_agg_with_len["label_pred"],
    pred_df_agg_with_len["tokens_len"],
):
    label_pred_all = ["O" for _ in range(tokens_len)]
    for token, label in zip(token_pred, label_pred):
        label_pred_all[token] = label
    label_pred_alls.append(label_pred_all)

actual_pred_df = pred_df_agg_with_len.with_columns(
    pl.Series("label_pred_all", label_pred_alls)
).select(["labels", "label_pred_all"])

from seqeval.metrics.sequence_labeling import precision_recall_fscore_support

calculated_f1_score = precision_recall_fscore_support(
    actual_pred_df["labels"].to_list(),
    actual_pred_df["label_pred_all"].to_list(),
    beta=1,
    average="micro",
)[2]

calculated_f5_score = precision_recall_fscore_support(
    actual_pred_df["labels"].to_list(),
    actual_pred_df["label_pred_all"].to_list(),
    beta=5,
    average="micro",
)[2]

calculated_f1_score, calculated_f5_score

(0.8816568047337279, 0.9407479358912092)

In [13]:
from seqeval.metrics import classification_report

print(
    classification_report(
        actual_pred_df["labels"].to_list(),
        actual_pred_df["label_pred_all"].to_list(),
    )
)

                precision    recall  f1-score   support

         EMAIL       0.80      1.00      0.89         4
        ID_NUM       0.86      1.00      0.92        18
  NAME_STUDENT       0.83      0.97      0.89       268
     PHONE_NUM       1.00      1.00      1.00         1
STREET_ADDRESS       0.00      0.00      0.00         1
  URL_PERSONAL       0.82      0.67      0.74        21
      USERNAME       1.00      1.00      1.00         2

     micro avg       0.83      0.95      0.88       315
     macro avg       0.76      0.80      0.78       315
  weighted avg       0.83      0.95      0.88       315

