In [1]:
from transformers import AutoModelForSequenceClassification, AutoTokenizer

model_dir = '/mnt/c/Users/Sergi/Desktop/BSC/modelos_entrenados/transformers_rel1_C'
model = AutoModelForSequenceClassification.from_pretrained(model_dir)

path = '/mnt/c/Users/Sergi/Desktop/BSC/spanish_sapbert_models/sapbert_15_noparents_1epoch'
tokenizer = AutoTokenizer.from_pretrained(path)

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
import torch
def compute_relation(source, target):
    """
    Computes relations between source and target entities using Transformers model.

    Parameters:
    source (list): List of source entities.
    target (list): List of target entities.

    Returns:
    list: List of labels representing computed relations.
    """
    all_labels = ['BROAD','EXACT','NARROW','NO_RELATION']
    final_labels = list()
    tokenized_mention = tokenizer(source, target, return_tensors='pt', padding=True, truncation=True)
    with torch.no_grad():
        output = model(**tokenized_mention)
    logits = output.logits
    for i in range(len(logits.tolist())):
        predscores = {label: score for label, score in zip(all_labels, logits.tolist()[i])}
        top_n_labels = sorted(predscores, key=predscores.get, reverse=True)[:1]
        #print(predscores)
        filtered_labels = [label for label in top_n_labels if predscores[label] > -8]
        final_labels.append(filtered_labels)
    return final_labels

In [3]:
import pandas as pd

path = '/mnt/c/Users/Sergi/Desktop/BSC/Test_files/testset_rel_model.tsv'
data = pd.read_csv(path, delimiter='\t')
source = data['source'].tolist()
target = data['target'].tolist()

In [4]:
rels = compute_relation(source, target)
rels = [i[0] for i in rels]

In [5]:
compute_relation("cáncer de pulmón","sospecha de cáncer de pulmón")

[['BROAD']]

In [6]:
data['predicted_rel'] = rels

In [7]:
data[data["rel_type"]!=data["predicted_rel"]]

Unnamed: 0,source,target,rel_type,predicted_rel
0,5-fosfato,piridoxal 5-fosfato,NO_RELATION,BROAD
1,abdomen torax,abdomen,BROAD,NARROW
3,abordaje de articulacion de hombro,artrotomia de articulacion de hombro,NO_RELATION,BROAD
7,aborto ilegal con falla renal,aborto legal con falla renal aguda,NO_RELATION,BROAD
9,aborto legal con embolismo gaseoso,aborto con embolismo gaseoso,NARROW,EXACT
...,...,...,...,...
4991,vulvovaginitis gonococica neonatal,conjuntivitis neonatal gonococica,NO_RELATION,EXACT
4994,y factor de necrosis tumoral,necrosis tumoral,NO_RELATION,EXACT
4997,zona de hombro izquierdo,hombro izquierdo,NARROW,EXACT
4998,zona de un ganglio linfatico,ganglio linfatico regional,NO_RELATION,EXACT


### EVALUATION

In [8]:
y_test = data['rel_type'].tolist()
y_pred = data['predicted_rel'].tolist()

In [9]:
from sklearn.metrics import f1_score, accuracy_score
import numpy as np

# Calculate F1-score
f1 = f1_score(y_pred, y_test, average="micro")  # You can choose other average options as needed

# Calculate accuracy
accuracy = accuracy_score(y_pred, y_test)

# Print the calculated metrics
print("F1-score:", f1)
print("Accuracy:", accuracy)

F1-score: 0.6798
Accuracy: 0.6798


In [10]:
from sklearn.metrics import classification_report

In [11]:
print(classification_report(y_test, y_pred, target_names=['BROAD','EXACT','NARROW','NO_RELATION']))

              precision    recall  f1-score   support

       BROAD       0.42      0.83      0.56       620
       EXACT       0.35      0.92      0.50       367
      NARROW       0.50      0.56      0.53       358
 NO_RELATION       0.97      0.64      0.77      3655

    accuracy                           0.68      5000
   macro avg       0.56      0.74      0.59      5000
weighted avg       0.83      0.68      0.71      5000

