In [1]:
from transformers import AutoModelForSequenceClassification, AutoTokenizer

model_dir = '/mnt/c/Users/Sergi/Desktop/BSC/modelos_entrenados/output_1.5M/checkpoint-90000'
model = AutoModelForSequenceClassification.from_pretrained(model_dir)

path = '/mnt/c/Users/Sergi/Desktop/BSC/spanish_sapbert_models/sapbert_15_noparents_1epoch'
tokenizer = AutoTokenizer.from_pretrained(path)

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
import torch
def compute_relation(source, target):
    """
    Computes relations between source and target entities using Transformers model.

    Parameters:
    source (list): List of source entities.
    target (list): List of target entities.

    Returns:
    list: List of labels representing computed relations.
    """
    all_labels = ['BROAD','EXACT','NARROW','NO_RELATION']
    final_labels = list()
    tokenized_mention = tokenizer(source, target, return_tensors='pt', padding=True, truncation=True)
    with torch.no_grad():
        output = model(**tokenized_mention)
    logits = output.logits
    for i in range(len(logits.tolist())):
        predscores = {label: score for label, score in zip(all_labels, logits.tolist()[i])}
        top_n_labels = sorted(predscores, key=predscores.get, reverse=True)[:1]
        print(predscores)
        filtered_labels = [label for label in top_n_labels if predscores[label] > -8]
        final_labels.append(filtered_labels)
    return final_labels

In [5]:
compute_relation("cáncer de pulmón","sospecha de cáncer de pulmón")

{'BROAD': 2.92311692237854, 'EXACT': -4.191068649291992, 'NARROW': -8.286438941955566, 'NO_RELATION': -3.2009711265563965}


[['BROAD']]

In [13]:
compute_relation("cáncer de pulmón","sospecha de cáncer")

{'BROAD': -4.997252464294434, 'EXACT': -7.428595542907715, 'NARROW': -6.014092445373535, 'NO_RELATION': 4.584993362426758}


[['NO_RELATION']]

In [3]:
import pandas as pd

path = '/mnt/c/Users/Sergi/Desktop/BSC/Test_files/testset_rel_model.tsv'
data = pd.read_csv(path, delimiter='\t')
source = data['source'].tolist()
target = data['target'].tolist()

In [6]:
rels = compute_relation(source, target)
rels = [i[0] for i in rels]

{'BROAD': 0.38876283168792725, 'EXACT': -1.42830491065979, 'NARROW': -7.213569641113281, 'NO_RELATION': -1.3588294982910156}
{'BROAD': -4.323330879211426, 'EXACT': -2.0483813285827637, 'NARROW': 1.246295690536499, 'NO_RELATION': -2.345996141433716}
{'BROAD': -5.9710307121276855, 'EXACT': -7.3176374435424805, 'NARROW': -7.333452224731445, 'NO_RELATION': 5.457026481628418}
{'BROAD': -2.3626179695129395, 'EXACT': -0.1812899112701416, 'NARROW': -1.4376956224441528, 'NO_RELATION': -1.0587713718414307}
{'BROAD': -3.1399588584899902, 'EXACT': -0.6730526685714722, 'NARROW': -4.465980529785156, 'NO_RELATION': 0.510861873626709}
{'BROAD': -4.223026275634766, 'EXACT': -3.6950926780700684, 'NARROW': -7.778680801391602, 'NO_RELATION': 3.148122787475586}
{'BROAD': -8.745035171508789, 'EXACT': -5.844765663146973, 'NARROW': 3.4719080924987793, 'NO_RELATION': -3.597865104675293}
{'BROAD': 3.0093374252319336, 'EXACT': -3.3112587928771973, 'NARROW': -7.6144914627075195, 'NO_RELATION': -4.534489631652832}

In [7]:
data['predicted_rel'] = rels

In [8]:
data[data["rel_type"]!=data["predicted_rel"]]

Unnamed: 0,source,target,rel_type,predicted_rel
0,5-fosfato,piridoxal 5-fosfato,NO_RELATION,BROAD
1,abdomen torax,abdomen,BROAD,NARROW
3,abordaje de articulacion de hombro,artrotomia de articulacion de hombro,NO_RELATION,EXACT
7,aborto ilegal con falla renal,aborto legal con falla renal aguda,NO_RELATION,BROAD
10,absceso areolar agudo,absceso mamario agudo,NARROW,NO_RELATION
...,...,...,...,...
4990,vomito fecal,vomito de materia fecal,NO_RELATION,EXACT
4994,y factor de necrosis tumoral,necrosis tumoral,NO_RELATION,NARROW
4997,zona de hombro izquierdo,hombro izquierdo,NARROW,EXACT
4998,zona de un ganglio linfatico,ganglio linfatico regional,NO_RELATION,EXACT


### EVALUATION

In [9]:
y_test = data['rel_type'].tolist()
y_pred = data['predicted_rel'].tolist()

In [10]:
from sklearn.metrics import f1_score, accuracy_score
import numpy as np

# Calculate F1-score
f1 = f1_score(y_pred, y_test, average="micro")  # You can choose other average options as needed

# Calculate accuracy
accuracy = accuracy_score(y_pred, y_test)

# Print the calculated metrics
print("F1-score:", f1)
print("Accuracy:", accuracy)

F1-score: 0.6862
Accuracy: 0.6862


In [11]:
from sklearn.metrics import classification_report

In [12]:
print(classification_report(y_test, y_pred, target_names=['BROAD','EXACT','NARROW','NO_RELATION']))

              precision    recall  f1-score   support

       BROAD       0.50      0.82      0.62       620
       EXACT       0.31      0.95      0.46       367
      NARROW       0.49      0.64      0.55       358
 NO_RELATION       0.98      0.64      0.78      3655

    accuracy                           0.69      5000
   macro avg       0.57      0.76      0.60      5000
weighted avg       0.84      0.69      0.72      5000

