In [None]:
import pandas as pd
import transformers
from transformers import AutoTokenizer, EsmForSequenceClassification, EsmTokenizer, pipeline
import sys
from datasets import Dataset, DatasetDict
import numpy as np
import pandas as pd
import torch
import sklearn.metrics

def precision_recall_curve(y_true, pred_scores, thresholds):
    precisions = []
    recalls = []

    for threshold in thresholds:
        y_pred = ["TRUE" if score >= threshold else "FALSE" for score in pred_scores]

        precision = sklearn.metrics.precision_score(y_true=y_true, y_pred=y_pred, pos_label="TRUE")
        recall = sklearn.metrics.recall_score(y_true=y_true, y_pred=y_pred, pos_label="TRUE")

        precisions.append(precision)
        recalls.append(recall)

    return precisions, recalls


def evaluate_testdata(testdata, model_path, sep):


    model = EsmForSequenceClassification.from_pretrained(model_path)
    tokenizer = AutoTokenizer.from_pretrained("facebook/esm2_t6_8M_UR50D")
    tokenizer.add_tokens([sep])
    epitope_vocab = ["A1", "C1", "D1", "E1", "F1", "G1", "H1", "I1", "K1", "L1", "M1", "N1", "P1", "Q1", "R1", "S1", "T1", "V1", "W1", "Y1"]

    ###########  comment for 1 vocab
    tokenizer.add_tokens(epitope_vocab)
    model.resize_token_embeddings(len(tokenizer))
    test_df = pd.read_csv(testdata)
    test_df['label_true_pair']=test_df['label_true_pair'].astype('int')

    def insert_1_after_characters(s):
        return '1'.join(s) + '1'

    ### comment for 1 vocab
    test_df['epitope_aa'] = test_df['epitope_aa'].apply(insert_1_after_characters)

    # Format data
    test_df = pd.DataFrame({'seq': test_df['cdr3_alpha_aa'] + sep + test_df['epitope_aa'],
                            'label': test_df['label_true_pair']})

    dataset = DatasetDict({
        'test': Dataset.from_pandas(test_df)
    })

    def tokenize_function(dataset):
        return tokenizer(dataset['seq'], return_tensors='pt', max_length=len(tokenizer), padding='max_length', truncation=True)
    tokenized_dataset = dataset.map(tokenize_function, batched=False)
    print(tokenized_dataset['test'])

    # saved_model = pipeline('text-classification',
    #                    model = model_path, tokenizer=tokenizer)
    # predictions = saved_model(tokenized_dataset['test'], truncation=True)
    # print(predictions)
    iid=torch.tensor(tokenized_dataset['test']['input_ids']).to('cuda')
    atm=torch.tensor(tokenized_dataset['test']['attention_mask']).to('cuda')
    with torch.no_grad():
        out=model(iid,atm)
    preds = torch.nn.functional.sigmoid(out.logits)
    # predictions=(torch.argmax(preds, dim=1))
    thresholds=np.arange(start=0.2, stop=0.7, step=0.05)

    y_true = test_df['label_true_pair']
    # pred_scores = predictions['scores']
    precisions, recalls = precision_recall_curve(y_true=y_true,
                                                pred_scores=preds[:,0],
                                                thresholds=thresholds)

    precisions.append(1)
    recalls.append(0)

    precisions = np.array(precisions)
    recalls = np.array(recalls)

    AP = np.sum((recalls[:-1] - recalls[1:]) * precisions[:-1])
    print(AP)



    # outputs = model(input_ids=tokenized_dataset['test']['input_ids'], attention_mask=tokenized_dataset['test']['attention_mask'])
    # predicted_label = outputs.logits.argmax(-1)
    # print(predicted_label)


evaluate_testdata(r'vdjdb_external_negatives_data.csv',r'tmp\vocab1\checkpoint-25800', '0')


**Source of Borrowed Code:**

The following code was adapted from (https://blog.paperspace.com/mean-average-precision/) 
