# Перерасчет метрики для Llama2
В изначальном подсчете при генерации нескольких пар из одного предложения был добавлен пробел перед запятой. Пробел попадал в ответ и при сравнении возращал неправильные данные. В этом ноутбуке на основе артефактов, сохранненных в wandb, пересчитываются метрики качества для задачи E2E-ABSA для Llama 2 для датасета с ресторанами и компьютерами. Данные потом прогружаютяс в wandb в отдельный run.

In [4]:
import wandb
def calc(answer, label):
    """ф-ция для расчета метрики на основе полученных списков ответов и изначальной разметки"""
    total_aspect_pred = 0
    total_aspects_lab = 0
    TP_aspect = 0
    FN_aspect = 0
    FP_aspect = 0
    TP_sent = 0
    FN_sent = 0
    FP_sent = 0

    wandb.define_metric("F1_A", summary="mean")
    wandb.define_metric("F1_S", summary="mean")
    wandb.define_metric("F1_macro_ABSA", summary="mean")
    wandb.define_metric("F1_micro_ABSA", summary="mean")

    for pred, lb in zip(answer, label):
        y_pred = pred.split(",")
        y_pred = [i.strip() for i in y_pred]
        y_true = lb.split(",")
        y_true = [i.strip() for i in y_true]
        aspects_true_lst = [item.split(":")[0] for item in y_true]

        total_aspects_lab += len(aspects_true_lst)
        aspects_pred_lst = [item.split(":")[0] for item in y_pred]
        total_aspect_pred += len(aspects_pred_lst)

        for aspect in aspects_true_lst:
            if aspect in aspects_pred_lst:
                TP_aspect += 1
            else:
                FN_aspect += 1
                
        for aspect in aspects_pred_lst:
            if aspect not in aspects_true_lst:
                FP_aspect += 1
                FP_sent += 1

        for item in y_true:
            if item in y_pred:
                TP_sent += 1
            else:
                FN_sent += 1

    F1_aspect = 2 * TP_aspect / (2 * TP_aspect + FN_aspect + FP_aspect)
    F1_sent = 2 * TP_sent / (2 * TP_sent + FN_sent + FP_sent)
    F1_macro = (F1_aspect + F1_sent) / 2
    F1_micro = (
        2
        * (TP_aspect + TP_sent)
        / ((2 * (TP_aspect + TP_sent)) + (FN_aspect + FN_sent + FP_aspect + FP_sent))
    )

    log_dict = {
        "F1_A": F1_aspect,
        "F1_S": F1_sent,
        "F1_macro_ABSA": F1_macro,
        "F1_micro_ABSA": F1_micro,
    }
    wandb.log(log_dict)
    wandb.finish()
    print(f'TP_sent={TP_sent}, FN_sent={FN_sent}, FP_sent={FP_sent},Total_Aspects={total_aspects_lab}, Exact_match={round(TP_sent/total_aspects_lab*100)}')
    

In [5]:
import json

run = wandb.init(project="absa_fin", name="absaR-llama7v2", tags=["y1srk895"])
artifact = run.use_artifact('shakhova/absa_fin/run-y1srk895-absaRllama7_preds:v0', type='run_table')
artifact_dir = artifact.download()

[34m[1mwandb[0m:   1 of 1 files downloaded.  


In [6]:
with open(
    "artifacts/run-y1srk895-absaRllama7_preds-v0/absaR-llama7_preds.table.json"
) as fp:
    template = json.load(fp)
y_true = []
y_pred = []

for line in template["data"]:
    y_pred.append(line[0])
    y_true.append(line[1])

calc(y_pred, y_true)

0,1
F1_A,▁
F1_S,▁
F1_macro_ABSA,▁
F1_micro_ABSA,▁


TP_sent=259, FN_sent=55, FP_sent=28,Total_Aspects=314, Exact_match=82


In [7]:
run = wandb.init(project="absa_fin", name="absaR-llama7v2", tags=["lxlzlbzz"])
artifact = run.use_artifact(
    "shakhova/absa_fin/run-lxlzlbzz-absaRllama7_preds:v0", type="run_table"
)
artifact_dir = artifact.download()

[34m[1mwandb[0m:   1 of 1 files downloaded.  


In [8]:
with open(
    "artifacts/run-lxlzlbzz-absaRllama7_preds-v0/absaR-llama7_preds.table.json"
) as fp:
    template = json.load(fp)
y_true = []
y_pred = []

for line in template["data"]:
    y_pred.append(line[0])
    y_true.append(line[1])

calc(y_pred, y_true)

0,1
F1_A,▁
F1_S,▁
F1_macro_ABSA,▁
F1_micro_ABSA,▁


TP_sent=250, FN_sent=93, FP_sent=65,Total_Aspects=343, Exact_match=73


In [9]:
run = wandb.init(project="absa_fin", name="absaR-llama7v2", tags=["ir1b6jlf"])
artifact = run.use_artifact(
    "shakhova/absa_fin/run-ir1b6jlf-absaRllama7_preds:v0", type="run_table"
)
artifact_dir = artifact.download()

[34m[1mwandb[0m:   1 of 1 files downloaded.  


In [10]:
with open(
    "artifacts/run-ir1b6jlf-absaRllama7_preds-v0/absaR-llama7_preds.table.json"
) as fp:
    template = json.load(fp)
y_true = []
y_pred = []

for line in template["data"]:
    y_pred.append(line[0])
    y_true.append(line[1])

calc(y_pred, y_true)

0,1
F1_A,▁
F1_S,▁
F1_macro_ABSA,▁
F1_micro_ABSA,▁


TP_sent=256, FN_sent=58, FP_sent=32,Total_Aspects=314, Exact_match=82


In [11]:
run = wandb.init(project="absa_fin", name="absaR-llama7v2", tags=["ixp0dw6x"])
artifact = run.use_artifact(
    "shakhova/absa_fin/run-ixp0dw6x-absaLllama7_preds:v0", type="run_table"
)
artifact_dir = artifact.download()

[34m[1mwandb[0m:   1 of 1 files downloaded.  


In [12]:
with open(
    "artifacts/run-ixp0dw6x-absaLllama7_preds-v0/absaL-llama7_preds.table.json"
) as fp:
    template = json.load(fp)
y_true = []
y_pred = []

for line in template["data"]:
    y_pred.append(line[0])
    y_true.append(line[1])

calc(y_pred, y_true)

0,1
F1_A,▁
F1_S,▁
F1_macro_ABSA,▁
F1_micro_ABSA,▁


TP_sent=231, FN_sent=71, FP_sent=48,Total_Aspects=302, Exact_match=76


In [13]:
run = wandb.init(project="absa_fin", name="absaR-llama7v2", tags=["kikj1txw"])
artifact = run.use_artifact(
    "shakhova/absa_fin/run-kikj1txw-absaLllama7_preds:v0", type="run_table"
)
artifact_dir = artifact.download()

[34m[1mwandb[0m:   1 of 1 files downloaded.  


In [14]:
with open(
    "artifacts/run-kikj1txw-absaLllama7_preds-v0/absaL-llama7_preds.table.json"
) as fp:
    template = json.load(fp)
y_true = []
y_pred = []

for line in template["data"]:
    y_pred.append(line[0])
    y_true.append(line[1])

calc(y_pred, y_true)

0,1
F1_A,▁
F1_S,▁
F1_macro_ABSA,▁
F1_micro_ABSA,▁


TP_sent=266, FN_sent=66, FP_sent=49,Total_Aspects=332, Exact_match=80


In [15]:
run = wandb.init(project="absa_fin", name="MTabsaR-llama7v2", tags=["feihuaev"])
artifact = run.use_artifact(
    "shakhova/absa_fin/run-feihuaev-MTabsaRllama7_preds:v0", type="run_table"
)
artifact_dir = artifact.download()

[34m[1mwandb[0m:   1 of 1 files downloaded.  


In [16]:
with open(
    "artifacts/run-feihuaev-MTabsaRllama7_preds-v0/MTabsaR-llama7_preds.table.json"
) as fp:
    template = json.load(fp)
y_true = []
y_pred = []

for line in template["data"]:
    y_pred.append(line[0])
    y_true.append(line[1])

calc(y_pred, y_true)

0,1
F1_A,▁
F1_S,▁
F1_macro_ABSA,▁
F1_micro_ABSA,▁


TP_sent=248, FN_sent=53, FP_sent=37,Total_Aspects=301, Exact_match=82


In [17]:
run = wandb.init(project="absa_fin", name="MTabsaR-llama7v2", tags=["dyng9he8"])
artifact = run.use_artifact(
    "shakhova/absa_fin/run-dyng9he8-MTabsaRllama7_preds:v0", type="run_table"
)
artifact_dir = artifact.download()

[34m[1mwandb[0m:   1 of 1 files downloaded.  


In [18]:
with open(
    "artifacts/run-dyng9he8-MTabsaRllama7_preds-v0/MTabsaR-llama7_preds.table.json"
) as fp:
    template = json.load(fp)
y_true = []
y_pred = []

for line in template["data"]:
    y_pred.append(line[0])
    y_true.append(line[1])

calc(y_pred, y_true)

0,1
F1_A,▁
F1_S,▁
F1_macro_ABSA,▁
F1_micro_ABSA,▁


TP_sent=239, FN_sent=54, FP_sent=28,Total_Aspects=293, Exact_match=82


In [19]:
run = wandb.init(project="absa_fin", name="MTabsaR-llama7v2", tags=["yzta4uva"])
artifact = run.use_artifact(
    "shakhova/absa_fin/run-yzta4uva-MTabsaRllama7_preds:v0", type="run_table"
)
artifact_dir = artifact.download()

[34m[1mwandb[0m:   1 of 1 files downloaded.  


In [20]:
with open(
    "artifacts/run-yzta4uva-MTabsaRllama7_preds-v0/MTabsaR-llama7_preds.table.json"
) as fp:
    template = json.load(fp)
y_true = []
y_pred = []

for line in template["data"]:
    y_pred.append(line[0])
    y_true.append(line[1])

calc(y_pred, y_true)

0,1
F1_A,▁
F1_S,▁
F1_macro_ABSA,▁
F1_micro_ABSA,▁


TP_sent=260, FN_sent=53, FP_sent=46,Total_Aspects=313, Exact_match=83


In [21]:
run = wandb.init(project="absa_fin", name="MTabsaR-llama7v2", tags=["wya5rjd6"])
artifact = run.use_artifact(
    "shakhova/absa_fin/run-wya5rjd6-MTabsaRllama7_preds:v0", type="run_table"
)
artifact_dir = artifact.download()

[34m[1mwandb[0m:   1 of 1 files downloaded.  


In [22]:
with open(
    "artifacts/run-wya5rjd6-MTabsaRllama7_preds-v0/MTabsaR-llama7_preds.table.json"
) as fp:
    template = json.load(fp)
y_true = []
y_pred = []

for line in template["data"]:
    y_pred.append(line[0])
    y_true.append(line[1])

calc(y_pred, y_true)

0,1
F1_A,▁
F1_S,▁
F1_macro_ABSA,▁
F1_micro_ABSA,▁


TP_sent=247, FN_sent=60, FP_sent=40,Total_Aspects=307, Exact_match=80


In [23]:
run = wandb.init(project="absa_fin", name="MTabsaR-llama7v2", tags=["sp9traj2"])
artifact = run.use_artifact(
    "shakhova/absa_fin/run-sp9traj2-MTabsaRllama7_preds:v0", type="run_table"
)
artifact_dir = artifact.download()

[34m[1mwandb[0m:   1 of 1 files downloaded.  


In [24]:
with open(
    "artifacts/run-sp9traj2-MTabsaRllama7_preds-v0/MTabsaR-llama7_preds.table.json"
) as fp:
    template = json.load(fp)
y_true = []
y_pred = []

for line in template["data"]:
    y_pred.append(line[0])
    y_true.append(line[1])

calc(y_pred, y_true)

0,1
F1_A,▁
F1_S,▁
F1_macro_ABSA,▁
F1_micro_ABSA,▁


TP_sent=247, FN_sent=72, FP_sent=40,Total_Aspects=319, Exact_match=77


# Laptops

In [25]:
run = wandb.init(project="absa_fin", name="absaL-llama7v2", tags=["gpxqe7v1"])
artifact = run.use_artifact(
    "shakhova/absa_fin/run-gpxqe7v1-absaLllama7_preds:v0", type="run_table"
)
artifact_dir = artifact.download()

[34m[1mwandb[0m:   1 of 1 files downloaded.  


In [26]:
with open(
    "artifacts/run-gpxqe7v1-absaLllama7_preds-v0/absaL-llama7_preds.table.json"
) as fp:
    template = json.load(fp)
y_true = []
y_pred = []

for line in template["data"]:
    y_pred.append(line[0])
    y_true.append(line[1])

calc(y_pred, y_true)

0,1
F1_A,▁
F1_S,▁
F1_macro_ABSA,▁
F1_micro_ABSA,▁


TP_sent=229, FN_sent=44, FP_sent=29,Total_Aspects=273, Exact_match=84


In [27]:
run = wandb.init(project="absa_fin", name="absaL-llama7v2", tags=["zovf8s0i"])
artifact = run.use_artifact(
    "shakhova/absa_fin/run-zovf8s0i-absaLllama7_preds:v0", type="run_table"
)
artifact_dir = artifact.download()

[34m[1mwandb[0m:   1 of 1 files downloaded.  


In [28]:
with open(
    "artifacts/run-zovf8s0i-absaLllama7_preds-v0/absaL-llama7_preds.table.json"
) as fp:
    template = json.load(fp)
y_true = []
y_pred = []

for line in template["data"]:
    y_pred.append(line[0])
    y_true.append(line[1])

calc(y_pred, y_true)

0,1
F1_A,▁
F1_S,▁
F1_macro_ABSA,▁
F1_micro_ABSA,▁


TP_sent=197, FN_sent=54, FP_sent=22,Total_Aspects=251, Exact_match=78


In [29]:
run = wandb.init(project="absa_fin", name="absaL-llama7v2", tags=["xwvohfg2"])
artifact = run.use_artifact(
    "shakhova/absa_fin/run-xwvohfg2-absaLllama7_preds:v0", type="run_table"
)
artifact_dir = artifact.download()

[34m[1mwandb[0m:   1 of 1 files downloaded.  


In [30]:
with open(
    "artifacts/run-xwvohfg2-absaLllama7_preds-v0/absaL-llama7_preds.table.json"
) as fp:
    template = json.load(fp)
y_true = []
y_pred = []

for line in template["data"]:
    y_pred.append(line[0])
    y_true.append(line[1])

calc(y_pred, y_true)

0,1
F1_A,▁
F1_S,▁
F1_macro_ABSA,▁
F1_micro_ABSA,▁


TP_sent=205, FN_sent=54, FP_sent=30,Total_Aspects=259, Exact_match=79


In [31]:
run = wandb.init(project="absa_fin", name="absaL-llama7v2", tags=["fxdoiohj"])
artifact = run.use_artifact(
    "shakhova/absa_fin/run-fxdoiohj-absaLllama7_preds:v0", type="run_table"
)
artifact_dir = artifact.download()

[34m[1mwandb[0m:   1 of 1 files downloaded.  


In [32]:
with open(
    "artifacts/run-fxdoiohj-absaLllama7_preds-v0/absaL-llama7_preds.table.json"
) as fp:
    template = json.load(fp)
y_true = []
y_pred = []

for line in template["data"]:
    y_pred.append(line[0])
    y_true.append(line[1])

calc(y_pred, y_true)

0,1
F1_A,▁
F1_S,▁
F1_macro_ABSA,▁
F1_micro_ABSA,▁


TP_sent=218, FN_sent=52, FP_sent=28,Total_Aspects=270, Exact_match=81


In [33]:
run = wandb.init(project="absa_fin", name="absaL-llama7v2", tags=["awfju5k6"])
artifact = run.use_artifact(
    "shakhova/absa_fin/run-awfju5k6-absaLllama7_preds:v0", type="run_table"
)
artifact_dir = artifact.download()

[34m[1mwandb[0m:   1 of 1 files downloaded.  


In [34]:
with open(
    "artifacts/run-awfju5k6-absaLllama7_preds-v0/absaL-llama7_preds.table.json"
) as fp:
    template = json.load(fp)
y_true = []
y_pred = []

for line in template["data"]:
    y_pred.append(line[0])
    y_true.append(line[1])

calc(y_pred, y_true)

0,1
F1_A,▁
F1_S,▁
F1_macro_ABSA,▁
F1_micro_ABSA,▁


TP_sent=208, FN_sent=51, FP_sent=34,Total_Aspects=259, Exact_match=80


In [35]:
run = wandb.init(project="absa_fin", name="MTabsaL-llama7v2", tags=["bj41dns5"])
artifact = run.use_artifact(
    "shakhova/absa_fin/run-bj41dns5-MTabsaLllama7_preds:v0", type="run_table"
)
artifact_dir = artifact.download()

[34m[1mwandb[0m:   1 of 1 files downloaded.  


In [36]:
with open(
    "artifacts/run-bj41dns5-MTabsaLllama7_preds-v0/MTabsaL-llama7_preds.table.json"
) as fp:
    template = json.load(fp)
y_true = []
y_pred = []

for line in template["data"]:
    y_pred.append(line[0])
    y_true.append(line[1])

calc(y_pred, y_true)

0,1
F1_A,▁
F1_S,▁
F1_macro_ABSA,▁
F1_micro_ABSA,▁


TP_sent=204, FN_sent=40, FP_sent=23,Total_Aspects=244, Exact_match=84


In [37]:
run = wandb.init(project="absa_fin", name="MTabsaL-llama7v2", tags=["dbt5pbqy"])
artifact = run.use_artifact(
    "shakhova/absa_fin/run-dbt5pbqy-MTabsaLllama7_preds:v0", type="run_table"
)
artifact_dir = artifact.download()

[34m[1mwandb[0m:   1 of 1 files downloaded.  


In [38]:
with open(
    "artifacts/run-dbt5pbqy-MTabsaLllama7_preds-v0/MTabsaL-llama7_preds.table.json"
) as fp:
    template = json.load(fp)
y_true = []
y_pred = []

for line in template["data"]:
    y_pred.append(line[0])
    y_true.append(line[1])

calc(y_pred, y_true)

0,1
F1_A,▁
F1_S,▁
F1_macro_ABSA,▁
F1_micro_ABSA,▁


TP_sent=191, FN_sent=38, FP_sent=25,Total_Aspects=229, Exact_match=83


In [39]:
run = wandb.init(project="absa_fin", name="MTabsaL-llama7v2", tags=["pn0z5vb0"])
artifact = run.use_artifact(
    "shakhova/absa_fin/run-pn0z5vb0-MTabsaLllama7_preds:v0", type="run_table"
)
artifact_dir = artifact.download()

[34m[1mwandb[0m:   1 of 1 files downloaded.  


In [40]:
with open(
    "artifacts/run-pn0z5vb0-MTabsaLllama7_preds-v0/MTabsaL-llama7_preds.table.json"
) as fp:
    template = json.load(fp)
y_true = []
y_pred = []

for line in template["data"]:
    y_pred.append(line[0])
    y_true.append(line[1])

calc(y_pred, y_true)

0,1
F1_A,▁
F1_S,▁
F1_macro_ABSA,▁
F1_micro_ABSA,▁


TP_sent=213, FN_sent=55, FP_sent=35,Total_Aspects=268, Exact_match=79


In [41]:
run = wandb.init(project="absa_fin", name="MTabsaL-llama7v2", tags=["4yfnq7mr"])
artifact = run.use_artifact(
    "shakhova/absa_fin/run-4yfnq7mr-MTabsaLllama7_preds:v0", type="run_table"
)
artifact_dir = artifact.download()

[34m[1mwandb[0m:   1 of 1 files downloaded.  


In [42]:
with open(
    "artifacts/run-4yfnq7mr-MTabsaLllama7_preds-v0/MTabsaL-llama7_preds.table.json"
) as fp:
    template = json.load(fp)
y_true = []
y_pred = []

for line in template["data"]:
    y_pred.append(line[0])
    y_true.append(line[1])

calc(y_pred, y_true)

0,1
F1_A,▁
F1_S,▁
F1_macro_ABSA,▁
F1_micro_ABSA,▁


TP_sent=206, FN_sent=57, FP_sent=30,Total_Aspects=263, Exact_match=78


In [43]:
run = wandb.init(project="absa_fin", name="MTabsaL-llama7v2", tags=["orgik5ax"])
artifact = run.use_artifact(
    "shakhova/absa_fin/run-orgik5ax-MTabsaLllama7_preds:v0", type="run_table"
)
artifact_dir = artifact.download()

[34m[1mwandb[0m:   1 of 1 files downloaded.  


In [44]:
with open(
    "artifacts/run-orgik5ax-MTabsaLllama7_preds-v0/MTabsaL-llama7_preds.table.json"
) as fp:
    template = json.load(fp)
y_true = []
y_pred = []

for line in template["data"]:
    y_pred.append(line[0])
    y_true.append(line[1])

calc(y_pred, y_true)

0,1
F1_A,▁
F1_S,▁
F1_macro_ABSA,▁
F1_micro_ABSA,▁


TP_sent=203, FN_sent=58, FP_sent=34,Total_Aspects=261, Exact_match=78
