In [None]:
%%capture
!pip install gradio
!pip install accelerate -U
!pip install transformers huggingface_hub
!pip install gliner[gpu]

In [None]:
from gliner import GLiNER
import torch

if torch.cuda.is_available():
    device = 'cuda'
else:
    device = 'cpu'

print(device)

model = GLiNER.from_pretrained("urchade/gliner_multi_pii-v1").to(device)

In [None]:
label_mapping = {
    "ORG": "organization",
    "PERS": "person",
    "LOC": "location",
    "MON": "monetary value",
    "PCT": "percentage",
    "DATE": "date",
    "TIME": "timestamp",
    "PERIOD": "time period",
    "JOB": "job title",
    "DOC": "document name",
    "QUANT": "quantity",
    "ART": "artifact",
    "MISC": "miscellaneous"
}

labels = list(label_mapping.values())
reverse_label_mapping = {v: k for k, v in label_mapping.items()}

In [None]:
from tqdm import tqdm
import pandas as pd
import json

df = pd.read_csv("test.csv")

results = []
for index, row in tqdm(df.iterrows(), total=df.shape[0]):
    _, text, _ = row
    entities = model.predict_entities(text, labels)
    formatted_entities = [{"label": reverse_label_mapping[ent["label"]], "text": ent["text"]} for ent in entities]
    results.append(json.dumps(formatted_entities, ensure_ascii=False))
    
df["pred"] = results

In [None]:
df.to_csv("zero_shot_gliner_multi_pii-v1_4.csv", index=False)