# Baseline Results
Baseline models are:
- bert-large-uncased
- roberta-large
- mpnet-base

In [1]:
import os
import pandas as pd
import sys
sys.path.append("..")
from baseline.parse_output import get_results_overall_table, get_results_table, parse_predictions

MODELS = ["bert-large-uncased", "roberta-large", "mpnet-base"]
BASE_PATH = "../baseline/music-ner-eacl2023/output/shs100k2/complete"


In [2]:
get_results_table(MODELS)


Unnamed: 0_level_0,f1,f1,f1,f1,f1,f1,precision,precision,precision,precision,precision,precision,recall,recall,recall,recall,recall,recall
Attribute,Artist,Artist,Artist,WoA,WoA,WoA,Artist,Artist,Artist,WoA,WoA,WoA,Artist,Artist,Artist,WoA,WoA,WoA
Scenario,exact,strict,type,exact,strict,type,exact,strict,type,exact,strict,type,exact,strict,type,exact,strict,type
Model,Unnamed: 1_level_3,Unnamed: 2_level_3,Unnamed: 3_level_3,Unnamed: 4_level_3,Unnamed: 5_level_3,Unnamed: 6_level_3,Unnamed: 7_level_3,Unnamed: 8_level_3,Unnamed: 9_level_3,Unnamed: 10_level_3,Unnamed: 11_level_3,Unnamed: 12_level_3,Unnamed: 13_level_3,Unnamed: 14_level_3,Unnamed: 15_level_3,Unnamed: 16_level_3,Unnamed: 17_level_3,Unnamed: 18_level_3
bert-large-uncased,0.904479,0.899604,0.916318,0.912811,0.904708,0.943012,0.893328,0.888513,0.905021,0.907101,0.899048,0.937113,0.915912,0.910975,0.927901,0.918593,0.910439,0.948986
mpnet-base,0.898158,0.892833,0.91155,0.921381,0.916022,0.951328,0.890518,0.885239,0.903797,0.916012,0.910683,0.945784,0.90593,0.900559,0.919438,0.926814,0.921423,0.956938
roberta-large,0.90344,0.898807,0.915937,0.915952,0.909427,0.94649,0.894549,0.889962,0.906923,0.911202,0.904711,0.941582,0.912509,0.90783,0.925132,0.920752,0.914193,0.95145


In [3]:
get_results_overall_table(MODELS)


Unnamed: 0_level_0,macro,macro,macro,macro,macro,macro,macro,macro,macro,macro,...,micro,micro,micro,micro,micro,micro,micro,micro,micro,micro
Scenario,exact,exact,exact,exact,exact,exact,exact,exact,exact,exact,...,type,type,type,type,type,type,type,type,type,type
Metric,actual,correct,f1,incorrect,missed,partial,possible,precision,recall,spurious,...,actual,correct,f1,incorrect,missed,partial,possible,precision,recall,spurious
Model,Unnamed: 1_level_3,Unnamed: 2_level_3,Unnamed: 3_level_3,Unnamed: 4_level_3,Unnamed: 5_level_3,Unnamed: 6_level_3,Unnamed: 7_level_3,Unnamed: 8_level_3,Unnamed: 9_level_3,Unnamed: 10_level_3,Unnamed: 11_level_3,Unnamed: 12_level_3,Unnamed: 13_level_3,Unnamed: 14_level_3,Unnamed: 15_level_3,Unnamed: 16_level_3,Unnamed: 17_level_3,Unnamed: 18_level_3,Unnamed: 19_level_3,Unnamed: 20_level_3,Unnamed: 21_level_3
bert-large-uncased,16963.0,15257.0,0.908645,442.0,937.0,0.0,16636.0,0.900214,0.917252,1264.0,...,33926.0,31186.0,0.928182,212.0,1874.0,0.0,33272.0,0.919236,0.937305,2528.0
mpnet-base,16883.0,15226.0,0.90977,437.5,972.5,0.0,16636.0,0.903265,0.916372,1219.5,...,33766.0,31148.0,0.929264,179.0,1945.0,0.0,33272.0,0.922466,0.936163,2439.0
roberta-large,16846.0,15194.0,0.909696,434.5,955.5,0.0,16584.0,0.902875,0.91663,1217.5,...,33692.0,31074.0,0.929524,183.0,1911.0,0.0,33168.0,0.922296,0.936867,2435.0


# Error Analysis
Based on [Batista (2018)](https://www.davidsbatista.net/blog/2018/05/09/Named_Entity_Evaluation/), we can have three types of errors:
- missing entities (False Negatives)
- spurious entities (False Positives)
- incorrect entities (only partially detected entities)


In [4]:
data = pd.read_parquet("../data/intermediate/shs100k2_IOB.parquet")
data = data.loc[(data.split == "TEST") & (data.part.isin(["both_100", "medium"])), 
         ["set_id", "yt_id", "title", "performer", "TEXT", "IOB"]]

for model in MODELS:
    data["IOB", model] = parse_predictions(model)



In [9]:
from preprocessing.Utils import B_PREFIX, I_PREFIX, O_LABEL, overlap
from typing import List, Dict, Tuple
from src.Utils import get_missing_ents, get_spurious_ents, get_incorrect_ents

for model in MODELS:
    data[("missing", model)] = data.apply(lambda row: get_missing_ents(row["IOB"], row[("IOB", model)]), axis=1)
    data[("spurious", model)] = data.apply(lambda row: get_spurious_ents(row["IOB"], row[("IOB", model)]), axis=1)
    data[("incorrect", model)] = data.apply(lambda row: get_incorrect_ents(row["IOB"], row[("IOB", model)]), axis=1)


# Error Analyse Aspekte:
- Artist und Titel gruppieren
    - nach Länge
    - nach POS Tag 
    - "feat"
    - Popularity pro Artist
        - Wikidata
        - Views
    - Sprache der Wörter?
        - WordNet
            - Sind Namen und Artists im WordNet
    - Top-Down vs. Bottom Up
        - Bottom Up: Error Analyse -> ändern

In [10]:
data

Unnamed: 0,set_id,yt_id,title,performer,TEXT,IOB,"(IOB, bert-large-uncased)","(IOB, roberta-large)","(IOB, mpnet-base)","(missing, bert-large-uncased)","(spurious, bert-large-uncased)","(incorrect, bert-large-uncased)","(missing, roberta-large)","(spurious, roberta-large)","(incorrect, roberta-large)","(missing, mpnet-base)","(spurious, mpnet-base)","(incorrect, mpnet-base)"
0,21,gORyrU1xQpg,[Yesterday],[marianne faithfull],"[marianne, faithfull, -, yesterday, (, with, l...","[B-Artist, I-Artist, O, B-WoA, O, O, O, O, O, ...","[B-Artist, I-Artist, O, B-WoA, O, O, O, O, O, ...","[B-Artist, I-Artist, O, B-WoA, O, O, O, O, O, ...","[B-Artist, I-Artist, O, B-WoA, O, O, O, O, O, ...",{},{},{},{},{},{},{},{},{}
1,21,jQhC3bMMLmw,[Yesterday],[matt monro],"[matt, monro, -, yesterday, ., nancyfloressant...","[B-Artist, I-Artist, O, B-WoA, O, O, O, O, O, ...","[B-Artist, I-Artist, O, B-WoA, O, O, O, O, O, ...","[B-Artist, I-Artist, O, B-WoA, O, O, O, O, O, ...","[B-Artist, I-Artist, O, B-WoA, O, O, O, O, O, ...",{},{},{},{},{},{},{},{},{}
4,21,eI3-EVEU51s,[Yesterday],"[the hollyridge strings, hollyridge strings]","[hollyridge, strings, -, yesterday, ., boyjohn...","[B-Artist, I-Artist, O, B-WoA, O, O, O, O, O, ...","[B-Artist, I-Artist, O, B-WoA, O, O, O, O, O, ...","[B-Artist, I-Artist, O, B-WoA, O, O, O, O, O, ...","[B-Artist, I-Artist, O, B-WoA, O, O, O, O, O, ...",{},{},{},{},{},{},{},{},{}
5,21,ew1y-gvO_NM,[Yesterday],[cilla black],"[cilla, black, :, :, :, :, :, yesterday, ., .,...","[B-Artist, I-Artist, O, O, O, O, O, B-WoA, O, ...","[B-Artist, I-Artist, O, O, O, O, O, B-WoA, O, ...","[B-Artist, I-Artist, O, O, O, O, O, B-WoA, O, ...","[B-Artist, I-Artist, O, O, O, O, O, B-WoA, O, ...",{},{},{},{},{},{},{},{},{}
6,21,tjYQuEyxRp0,[Yesterday],[sarah vaughan],"[sarah, vaughan, -, yesterday, ., david, speed...","[B-Artist, I-Artist, O, B-WoA, O, O, O, O, O, ...","[B-Artist, I-Artist, O, B-WoA, O, O, O, O, O, ...","[B-Artist, I-Artist, O, B-WoA, O, O, O, O, O, ...","[B-Artist, I-Artist, O, B-WoA, O, O, O, O, O, ...",{},{},{},{},{},{},{},{},{}
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
10539,9993,3qcMjKxt-dE,[53rd & 3rd],[ramones],"[ramones, -, 53rd, &, 3rd, ., the, ramones, ♪,...","[B-Artist, O, B-WoA, I-WoA, I-WoA, O, O, B-Art...","[B-Artist, O, B-WoA, I-WoA, I-WoA, O, O, B-Art...","[B-Artist, O, B-WoA, I-WoA, I-WoA, O, O, B-Art...","[B-Artist, O, B-WoA, I-WoA, I-WoA, O, O, B-Art...",{},"{(40, 42): 'WoA', (47, 49): 'WoA', (56, 58): '...",{},{},"{(40, 42): 'WoA', (47, 49): 'WoA', (56, 58): '...",{},{},"{(40, 42): 'WoA', (47, 49): 'WoA', (56, 58): '...",{}
10541,9993,P27xcT6mJP4,[53rd & 3rd],[metallica],"[53rd, &, 3rd, -, metallica, ., funinfuneral13...","[B-WoA, I-WoA, I-WoA, O, B-Artist, O, O, O, O,...","[B-WoA, I-WoA, I-WoA, O, B-Artist, O, O, O, O,...","[B-WoA, I-WoA, I-WoA, O, B-Artist, O, O, O, O,...","[B-WoA, I-WoA, I-WoA, O, B-Artist, O, O, O, O,...",{},"{(39, 41): 'WoA', (46, 48): 'WoA', (55, 57): '...",{},{},"{(39, 41): 'WoA', (46, 48): 'WoA', (55, 57): '...",{},{},"{(39, 41): 'WoA', (46, 48): 'WoA', (55, 57): '...",{}
10543,9994,EVsRN0eDGNc,[Sytten ti],[otto brandenburg],"[otto, brandenburg, -, sytten, ti, ., ., ., ch...","[B-Artist, I-Artist, O, B-WoA, I-WoA, O, O, O,...","[B-Artist, I-Artist, O, B-WoA, I-WoA, O, O, O,...","[B-Artist, I-Artist, O, B-WoA, I-WoA, O, O, O,...","[B-Artist, I-Artist, O, B-WoA, I-WoA, O, O, O,...",{},{},{},{},{},{},{},{},{}
10544,9998,QveJv7V4OZI,[I Don't Care],[ramones],"[ramones, -, i, dont, care, ., the, ramones, ♪...","[B-Artist, O, B-WoA, I-WoA, I-WoA, O, O, B-Art...","[B-Artist, O, B-WoA, I-WoA, I-WoA, O, O, B-Art...","[B-Artist, O, B-WoA, I-WoA, I-WoA, O, O, B-Art...","[B-Artist, O, B-WoA, I-WoA, I-WoA, O, O, B-Art...",{},{},"{(22, 23): 'WoA'}",{},{},{},{},{},{}


In [None]:
def get_missing_ents(IOB_true: List[str], IOB_pred: List[str]) -> Dict[]:
    

{'WoA': [(0, 0), (9, 9)], 'Artist': [(2, 3)]}