In [None]:
!pip install transformers

import pandas as pd
import numpy as np
from functools import partial
import torch
import json
import os
from transformers import ElectraTokenizer, ElectraForSequenceClassification

In [None]:
# If there's a GPU available...
if torch.cuda.is_available():    

    # Tell PyTorch to use the GPU.    
    device = torch.device('cuda')

    print('There are %d GPU(s) available.' % torch.cuda.device_count())

    print('We will use the GPU:', torch.cuda.get_device_name(0))
else:
    print('No GPU available, using the CPU instead.')
    device = torch.device('cpu')

In [None]:
model_id = 'sultan/BioM-BERT-PubMed-PMC-Large'
task_2_results_df_path = '/path/to/task2/results/df.json'
output_path = '/output/results/path/output.json'
checkpoint_path = '/path/to/finetuned/model/checkpoint'

In [None]:
task_2_results_df = pd.read_json(task_2_results_df_path) # load the results from task 2 (evidence sentence indexes for each input example)

In [None]:
bert_model = ElectraForSequenceClassification.from_pretrained(model_id, num_labels=2)
tokenizer = ElectraTokenizer.from_pretrained(model_id)

model_state, optimizer_state = torch.load(checkpoint_path)
bert_model.load_state_dict(model_state)

bert_model.to(device)

In [None]:
import math

MAX_LEN = 512

label_dict = {0 : 'Entailment', 1 : 'Contradiction'}

def get_prediction(row):
  hypothesis = row['Statement']
  section_id = row['Section_id']

  model = bert_model 
 
  tokenization_output = tokenizer.encode_plus(text=hypothesis,
                                                   text_pair=' '.join(row['Premise']), # premise is a list of evidence sentences
                                                   add_special_tokens=True,
                                                   truncation=True,
                                                   max_length=MAX_LEN, 
                                                   return_tensors='pt',
                                                   return_token_type_ids=True,
                                                   return_attention_mask=True)
    
  out_input_ids = tokenization_output['input_ids'][0]
  out_mask_ids = tokenization_output['attention_mask'][0]
  out_segment_ids = tokenization_output['token_type_ids'][0]

  # model.forward expects a tensor of shape (batch_size, sample_length).
  # The batch size here is 1, but the shape is (sample_length), so we have to reshape
  input_len = out_input_ids.size(0)
  out_input_ids = torch.reshape(out_input_ids, (1, input_len)).cuda()
  out_mask_ids = torch.reshape(out_mask_ids, (1, input_len)).cuda()
  out_segment_ids = torch.reshape(out_segment_ids, (1, input_len)).cuda()

  outputs = model(input_ids=out_input_ids, 
                  attention_mask=out_mask_ids,
                  token_type_ids=out_segment_ids)
  
  logits = outputs[0].detach().cpu()

  prediction_probabilities = torch.sigmoid(logits)
  classification = torch.argmax(prediction_probabilities).item() 
  return label_dict[classification]

In [None]:
task_2_results_df['Prediction'] = task_2_results_df.apply(lambda row: get_prediction(row), axis=1)

In [None]:
result_df = qa_dev_results_df[['Prediction']].copy() # the official format requires only example ID + predicted label
result_df.head()

In [None]:
result_df.to_json(output_path, orient ='index')