## Tool - REBEL: Relation Extraction By End-to-end Language generation
---

### Load and standardize the data


In [1]:
import pandas as pd
import ast

# input file
nel_result = '../../data/results/spacy_entity_linker/FAA_DataModel_20240104104402.csv'

# load output from the tool to be evaluated
data = pd.read_csv(nel_result)

In [2]:
# rename columns
data.rename(columns={"c5": "id", "c119": "sample", "c119_entity_linking":"entity_linking"}, inplace=True)

# Convert the string representation of a list of dictionaries to actual list of dictionaries
data['entity_linking'] = data['entity_linking'].apply(ast.literal_eval)

# explode the entity_linking column
data = data.explode('entity_linking').reset_index(drop=True)

In [3]:
# Convert the dictionary column to a DataFrame
attributes_df = data['entity_linking'].apply(pd.Series)

# change the column type to object
attributes_df['indentifier'] = attributes_df['indentifier'].astype('Int64')

attributes_df.drop(columns=[0], inplace=True)
attributes_df

Unnamed: 0,indentifier,label,description
0,8015236,William Matthew Prior,American painter
1,6588629,Toledo Airport (Brazil),
2,4698432,Airborne,controversial dietary supplement
3,3951828,agency,capacity of an agent to act in a world
4,6588629,Toledo Airport (Brazil),
...,...,...,...
16891,5421066,Experienced,live album
16892,6203,Avogadro constant,"fundamental physical constant (symbols: L,Nᴀ) ..."
16893,5727902,circa,approximately – should be used with qualifier ...
16894,1431062,Castellón-Costa Azahar Airport,airport


In [4]:
# Concatenate with the original DataFrame (minus the dictionary column)
data = pd.concat([data.drop('entity_linking', axis=1), attributes_df], axis=1)

data

Unnamed: 0,id,sample,indentifier,label,description
0,19750315005389A,TAILWHEEL COCKED RIGHT PRIOR TO TKOF. ...,8015236,William Matthew Prior,American painter
1,19750419011349A,TOW PLANE BECAME AIRBORNE THEN SETTLED.STUDENT...,6588629,Toledo Airport (Brazil),
2,19750419011349A,TOW PLANE BECAME AIRBORNE THEN SETTLED.STUDENT...,4698432,Airborne,controversial dietary supplement
3,19750419011349A,TOW PLANE BECAME AIRBORNE THEN SETTLED.STUDENT...,3951828,agency,capacity of an agent to act in a world
4,19750419011349A,TOW PLANE BECAME AIRBORNE THEN SETTLED.STUDENT...,6588629,Toledo Airport (Brazil),
...,...,...,...,...,...
16891,20080404840559A,(-23) THE AIRCRAFT EXPERIENCED SEVERE TURBULAN...,5421066,Experienced,live album
16892,20080404840559A,(-23) THE AIRCRAFT EXPERIENCED SEVERE TURBULAN...,6203,Avogadro constant,"fundamental physical constant (symbols: L,Nᴀ) ..."
16893,20080404840559A,(-23) THE AIRCRAFT EXPERIENCED SEVERE TURBULAN...,5727902,circa,approximately – should be used with qualifier ...
16894,20080404840559A,(-23) THE AIRCRAFT EXPERIENCED SEVERE TURBULAN...,1431062,Castellón-Costa Azahar Airport,airport


save the processed tool output to a file

In [5]:
data.to_csv('../../tool_results/spacy_entitylinker.csv', index=False)

## Gold standard NEL data

### Load the processed gold standard data

In [1]:
import pandas as pd

gold_standard_path = '../../gold_standard/processed/nel.csv'

# load output from the tool to be evaluated
data = pd.read_csv(gold_standard_path)

data

Unnamed: 0,id,sample,entity,qid
0,19990213001379A,ACFT WAS TAXIING FOR TAKE OFF WHEN IT LOST CON...,ACFT,Q11436
1,19990213001379A,ACFT WAS TAXIING FOR TAKE OFF WHEN IT LOST CON...,DITCH,Q2048319
2,19990213001379A,ACFT WAS TAXIING FOR TAKE OFF WHEN IT LOST CON...,TREE,Q10884
3,19990213001379A,ACFT WAS TAXIING FOR TAKE OFF WHEN IT LOST CON...,LOST CONTROL,Q29017603
4,19800217031649I,"AFTER TAKEOFF, ENGINE QUIT. WING FUEL TANK SUM...",TAKEOFF,Q854248
...,...,...,...,...
496,19870523018729A,CANOPY CAME OPEN ON CLIMBOUT CAUSING AN UNCONT...,PILOT ERROR,Q3057459
497,20030620012809I,(-23) PILOT FAILED TO ASSURE THE OIL FILLER CA...,PILOT,Q2095549
498,20030620012809I,(-23) PILOT FAILED TO ASSURE THE OIL FILLER CA...,OIL FILLER CAP,
499,20030620012809I,(-23) PILOT FAILED TO ASSURE THE OIL FILLER CA...,OIL,Q42962


In [2]:
data[data.id=='19990213001379A']['sample'][0:1].to_string(index=False)

'ACFT WAS TAXIING FOR TAKE OFF WHEN IT LOST CONT...'