In [1]:
import torch
import pandas as pd
from transformers import AutoTokenizer, AutoModelForSequenceClassification
from sklearn.metrics import accuracy_score, precision_score, recall_score

In [3]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

tokenizer = AutoTokenizer.from_pretrained("Colorful/RTA")
model_path = "/kaggle/input/dupbrfinetunedmodel/model"
model = AutoModelForSequenceClassification.from_pretrained(model_path)

model.to(device)  

RobertaForSequenceClassification(
  (roberta): RobertaModel(
    (embeddings): RobertaEmbeddings(
      (word_embeddings): Embedding(50265, 768, padding_idx=1)
      (position_embeddings): Embedding(514, 768, padding_idx=1)
      (token_type_embeddings): Embedding(1, 768)
      (LayerNorm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (encoder): RobertaEncoder(
      (layer): ModuleList(
        (0-11): 12 x RobertaLayer(
          (attention): RobertaAttention(
            (self): RobertaSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.1, inplace=False)
            )
            (output): RobertaSelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
             

In [4]:
csv_path = '/kaggle/input/dup-br-universalencoder/dup_br_detection_data/dup_test.csv'
df = pd.read_csv(csv_path)

# Assuming 'label' column has values 'yes' and 'no', convert them to binary labels [0, 1]
df['label'] = df['label'].apply(lambda x: 1 if x == 'yes' else 0)

df.head()

Unnamed: 0,bug_id1,bug1,bug_id2,bug2,label
0,14348,"Hi, in many countries (like mine, Italy, or Ar...",29379,If you like to use numeric . instead of normql...,1
1,42461,"When A database has been created, then saved. ...",42460,"When a new database is created, it is not poss...",1
2,103643,I have noticed several times in my calc 310m11...,102506,"When i clip the tekst, then calc is crash.",1
3,14261,[snip]\n\n/client/XRmVirtualDevice -T com.s...,14994,NoDependencies\nc:\OOoSrc\ooo_1.1beta2_src\sol...,1
4,17801,The installation script fails with the undefin...,17286,"I have RedHat 7.2, with kernel 2.4.7-10. I do...",1


In [6]:
true_labels = []
predicted_labels = []

for index, row in df.iterrows():
    bug1 = row['bug1']
    bug2 = row['bug2']
    true_label = row['label']
    
    # Tokenize input sentences
    args = (bug1, bug2)
    tokenized_inputs = tokenizer(*args, padding="max_length", max_length=tokenizer.model_max_length, truncation=True, return_tensors="pt")
    
    inputs = {key: value.to(device) for key, value in tokenized_inputs.items()}
    
    with torch.no_grad():
        logits = model(**inputs).logits
        probabilities = torch.softmax(logits, dim=1)

        prediction_label = torch.argmax(probabilities, dim=1).cpu().item()
        
        true_labels.append(true_label)
        predicted_labels.append(prediction_label)

# Calculate evaluation metrics
accuracy = accuracy_score(true_labels, predicted_labels) * 100
precision = precision_score(true_labels, predicted_labels) * 100
recall = recall_score(true_labels, predicted_labels) * 100

print(f"Accuracy: {accuracy:.4f}%")
print(f"Precision: {precision:.4f}%")
print(f"Recall: {recall:.4f}%")


Accuracy: 97.3051%
Precision: 97.4576%
Recall: 98.8637%
