In [1]:
from transformers import AutoModelForSequenceClassification, AutoTokenizer

model_dir = '/mnt/c/Users/Sergi/Desktop/BSC/modelos_entrenados/transformers_rel1_B'
model = AutoModelForSequenceClassification.from_pretrained(model_dir)

path = '/mnt/c/Users/Sergi/Desktop/BSC/spanish_sapbert_models/sapbert_15_noparents_1epoch'
tokenizer = AutoTokenizer.from_pretrained(path)

  from .autonotebook import tqdm as notebook_tqdm


In [13]:
import torch
def compute_relation(source, target):
    """
    Computes relations between source and target entities using Transformers model.

    Parameters:
    source (list): List of source entities.
    target (list): List of target entities.

    Returns:
    list: List of labels representing computed relations.
    """
    all_labels = ['BROAD','EXACT','NARROW','NO_RELATION']
    final_labels = list()
    tokenized_mention = tokenizer(source, target, return_tensors='pt', padding=True, truncation=True)
    with torch.no_grad():
        output = model(**tokenized_mention)
    logits = output.logits
    for i in range(len(logits.tolist())):
        predscores = {label: score for label, score in zip(all_labels, logits.tolist()[i])}
        top_n_labels = sorted(predscores, key=predscores.get, reverse=True)[:1]
        print(predscores)
        filtered_labels = [label for label in top_n_labels if predscores[label] > -8]
        final_labels.append(filtered_labels)
    return final_labels

In [3]:
import pandas as pd

path = '/mnt/c/Users/Sergi/Desktop/BSC/Test_files/testset_rel_model.tsv'
data = pd.read_csv(path, delimiter='\t')
source = data['source'].tolist()
target = data['target'].tolist()

In [4]:
rels = compute_relation(source, target)
rels = [i[0] for i in rels]

In [20]:
compute_relation("cáncer de pulmón","sospecha de cáncer de pulmón")

{'BROAD': 1.5332554578781128, 'EXACT': -3.267200231552124, 'NARROW': -5.707673072814941, 'NO_RELATION': -1.59722101688385}


[['BROAD']]

In [5]:
data['predicted_rel'] = rels

In [11]:
data[data["rel_type"]!=data["predicted_rel"]]

Unnamed: 0,source,target,rel_type,predicted_rel
0,5-fosfato,piridoxal 5-fosfato,NO_RELATION,BROAD
1,abdomen torax,abdomen,BROAD,NARROW
3,abordaje de articulacion de hombro,artrotomia de articulacion de hombro,NO_RELATION,BROAD
10,absceso areolar agudo,absceso mamario agudo,NARROW,EXACT
12,absceso de cuello,drenaje de absceso de cuello,NO_RELATION,EXACT
...,...,...,...,...
4987,vitamina C,ingesta de vitamina C,NO_RELATION,EXACT
4988,vitamina C,medicion de vitamina C,NO_RELATION,EXACT
4990,vomito fecal,vomito de materia fecal,NO_RELATION,EXACT
4994,y factor de necrosis tumoral,necrosis tumoral,NO_RELATION,NARROW


### EVALUATION

In [6]:
y_test = data['rel_type'].tolist()
y_pred = data['predicted_rel'].tolist()

In [8]:
from sklearn.metrics import f1_score, accuracy_score
import numpy as np

# Calculate F1-score
f1 = f1_score(y_pred, y_test, average="micro")  # You can choose other average options as needed

# Calculate accuracy
accuracy = accuracy_score(y_pred, y_test)

# Print the calculated metrics
print("F1-score:", f1)
print("Accuracy:", accuracy)

F1-score: 0.7352
Accuracy: 0.7352


In [7]:
from sklearn.metrics import classification_report

In [9]:
print(classification_report(y_test, y_pred, target_names=['BROAD','EXACT','NARROW','NO_RELATION']))

              precision    recall  f1-score   support

       BROAD       0.51      0.79      0.62       620
       EXACT       0.39      0.89      0.55       367
      NARROW       0.47      0.64      0.54       358
 NO_RELATION       0.97      0.72      0.83      3655

    accuracy                           0.74      5000
   macro avg       0.59      0.76      0.63      5000
weighted avg       0.83      0.74      0.76      5000

