In [None]:
!pip install datasets
!pip install transformers

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting datasets
  Downloading datasets-2.7.1-py3-none-any.whl (451 kB)
[K     |████████████████████████████████| 451 kB 12.0 MB/s 
[?25hCollecting responses<0.19
  Downloading responses-0.18.0-py3-none-any.whl (38 kB)
Collecting xxhash
  Downloading xxhash-3.1.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (212 kB)
[K     |████████████████████████████████| 212 kB 68.4 MB/s 
Collecting huggingface-hub<1.0.0,>=0.2.0
  Downloading huggingface_hub-0.11.1-py3-none-any.whl (182 kB)
[K     |████████████████████████████████| 182 kB 59.7 MB/s 
Collecting multiprocess
  Downloading multiprocess-0.70.14-py38-none-any.whl (132 kB)
[K     |████████████████████████████████| 132 kB 50.1 MB/s 
Collecting urllib3!=1.25.0,!=1.25.1,<1.26,>=1.21.1
  Downloading urllib3-1.25.11-py2.py3-none-any.whl (127 kB)
[K     |████████████████████████████████| 127 kB 59.0 MB/s 
Installing collected 

### Library Loading

In [None]:
from datasets import load_dataset
import pandas as pd
from transformers import LongformerTokenizerFast
import torch
import string
import re
from collections import Counter
from transformers import LongformerForQuestionAnswering
from transformers import AutoTokenizer, AutoModelForQuestionAnswering

In [None]:
from huggingface_hub import login
login()

VBox(children=(HTML(value='<center> <img\nsrc=https://huggingface.co/front/assets/huggingface_logo-noborder.sv…

### Loading the Covid dataset

In [None]:
def deepset_save():
    train = load_dataset("covid_qa_deepset",split="train")
    cols=["question","context","id","answers"]
    data=[]
    for question,context,document_id,answer in zip(train["question"],train["context"],train["document_id"],train["answers"]):
        data.append([question,context,document_id,answer])
    df=pd.DataFrame(data,columns=cols)
    df.to_json("train_deepset.jsonl",lines=True,orient='records')
    return df, data

In [None]:
df, data = deepset_save()

Downloading builder script:   0%|          | 0.00/4.80k [00:00<?, ?B/s]

Downloading metadata:   0%|          | 0.00/1.95k [00:00<?, ?B/s]

Downloading readme:   0%|          | 0.00/5.61k [00:00<?, ?B/s]

Downloading and preparing dataset covid_qa_deepset/covid_qa_deepset to /root/.cache/huggingface/datasets/covid_qa_deepset/covid_qa_deepset/1.0.0/fb886523842e312176f92ec8e01e77a08fa15a694f5741af6fc42796ee9c8c46...


Downloading data:   0%|          | 0.00/1.35M [00:00<?, ?B/s]

Generating train split:   0%|          | 0/2019 [00:00<?, ? examples/s]

Dataset covid_qa_deepset downloaded and prepared to /root/.cache/huggingface/datasets/covid_qa_deepset/covid_qa_deepset/1.0.0/fb886523842e312176f92ec8e01e77a08fa15a694f5741af6fc42796ee9c8c46. Subsequent calls will reuse this data.


In [None]:
tokenizer = LongformerTokenizerFast.from_pretrained('valhalla/longformer-base-4096-finetuned-squadv1')
model = LongformerForQuestionAnswering.from_pretrained("abhijithneilabraham/longformer_covid_qa")

Downloading:   0%|          | 0.00/26.0 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/798k [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/456k [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/150 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/757 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/757 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/595M [00:00<?, ?B/s]

### Score Evaluator

In [None]:
def normalize_answer(s):
    """Lower text and remove punctuation, articles and extra whitespace."""
    def remove_articles(text):
        return re.sub(r'\b(a|an|the)\b', ' ', text)

    def white_space_fix(text):
        return ' '.join(text.split())

    def remove_punc(text):
        exclude = set(string.punctuation)
        return ''.join(ch for ch in text if ch not in exclude)

    def lower(text):
        return text.lower()

    return white_space_fix(remove_articles(remove_punc(lower(s))))


def f1_score(prediction, ground_truth):
    prediction_tokens = normalize_answer(prediction).split()
    ground_truth_tokens = normalize_answer(ground_truth).split()
    common = Counter(prediction_tokens) & Counter(ground_truth_tokens)
    num_same = sum(common.values())
    if num_same == 0:
        return 0
    precision = 1.0 * num_same / len(prediction_tokens)
    recall = 1.0 * num_same / len(ground_truth_tokens)
    f1 = (2 * precision * recall) / (precision + recall)
    return f1


def exact_match_score(prediction, ground_truth):
    return (normalize_answer(prediction) == normalize_answer(ground_truth))

### Next

In [None]:
val = load_dataset("covid_qa_deepset",split='train[90%:]') 
val_contexts, val_questions, val_answers =val["context"], val["question"],val["answers"]



### Find Answer

In [None]:
def wrap(s, w):
    return [s[i:i + w] for i in range(0, len(s), w)]

In [None]:
def qa(question,text, originalAnswer, maxLength, tokenizer, model):
    text = wrap(text, maxLength)
    fScore = None
    result_answer = None
    for texts in text:
      encoding = tokenizer.encode_plus(question, texts, return_tensors="pt")
      input_ids = encoding["input_ids"]
      attention_mask = encoding["attention_mask"]

      start_scores, end_scores = model(input_ids, attention_mask=attention_mask,return_dict=False)
      all_tokens = tokenizer.convert_ids_to_tokens(input_ids[0].tolist())

      answer_tokens = all_tokens[torch.argmax(start_scores) :torch.argmax(end_scores)+1]
      answer = tokenizer.decode(tokenizer.convert_tokens_to_ids(answer_tokens))
      f1=f1_score(answer,originalAnswer)
      if fScore == None or fScore < f1:
        result_answer = answer
        fScore = f1
    return result_answer

In [None]:
fiScoresAll = []
emScoresAll = []
almostScoresAll = []
almostCountAll = []
answerPresentAll = []
answerPresentCountAll = []


In [None]:
tokenizerLongFormer = AutoTokenizer.from_pretrained("abhijithneilabraham/longformer_covid_qa")
modelLongFormer = AutoModelForQuestionAnswering.from_pretrained("abhijithneilabraham/longformer_covid_qa")

p,ans=[],[]
f1_total=0
em_total=0
almost_total_count = 0
almost_total = 0
answerPresent_total_count = 0
answerPresent_total = 0
count=0
for i in range(len(val_contexts)):
  answer=val_answers[i]["text"][0]
  pred = qa(val_questions[i], val_contexts[i], answer, 4096, tokenizerLongFormer, modelLongFormer)
  p.append(pred)
  ans.append(answer)
  f1=f1_score(pred,answer)
  em=exact_match_score(pred,answer)

  if f1 >= 0.5:
    almost_total += f1
    almost_total_count += 1
  if f1 > 0:
    answerPresent_total += f1
    answerPresent_total_count += 1
  if em:
    em_total+=1
  
  f1_total+=f1
  print(em)
  print(f1)
  print("***************** COunt")
  count+=1
  print(count)
    
f1_total=f1_total/count
em_total=em_total/count
almost_total = almost_total/count
almost_total_count = almost_total_count/count
answerPresent_total = answerPresent_total/count
answerPresent_total_count = answerPresent_total_count/count

fiScoresAll.append(f1_total)
emScoresAll.append(em_total)
almostScoresAll.append(almost_total)
almostCountAll.append(almost_total_count)
answerPresentAll.append(answerPresent_total)
answerPresentCountAll.append(answerPresent_total_count)
# output => a 'brake' on the immune system


True
1.0
***************** COunt
1
True
1.0
***************** COunt
2
False
0.9836065573770492
***************** COunt
3
True
1.0
***************** COunt
4
False
0.03913894324853229
***************** COunt
5
False
0.6363636363636364
***************** COunt
6
True
1.0
***************** COunt
7
True
1.0
***************** COunt
8
False
0.2222222222222222
***************** COunt
9
True
1.0
***************** COunt
10
False
0
***************** COunt
11
True
1.0
***************** COunt
12
True
1.0
***************** COunt
13
False
0.8
***************** COunt
14
True
1.0
***************** COunt
15
False
0.5714285714285715
***************** COunt
16
False
0.10526315789473685
***************** COunt
17
True
1.0
***************** COunt
18
False
0.5
***************** COunt
19
False
0.9836065573770492
***************** COunt
20
True
1.0
***************** COunt
21
True
1.0
***************** COunt
22
True
1.0
***************** COunt
23
True
1.0
***************** COunt
24
True
1.0
***************** COu

In [None]:
tokenizerBioBert = AutoTokenizer.from_pretrained("juliusco/biobert-base-cased-v1.1-squad-finetuned-covbiobert")
modelBioBert = AutoModelForQuestionAnswering.from_pretrained("juliusco/biobert-base-cased-v1.1-squad-finetuned-covbiobert")

p,ans=[],[]
f1_total=0
em_total=0
count=0
for i in range(len(val_contexts)):
  answer=val_answers[i]["text"][0]
  pred = qa(val_questions[i], val_contexts[i], answer, 512, tokenizerBioBert, modelBioBert)
  p.append(pred)
  ans.append(answer)
  f1=f1_score(pred,answer)
  em=exact_match_score(pred,answer)

  if f1 >= 0.5:
    almost_total += f1
    almost_total_count += 1
  if f1 > 0:
    answerPresent_total += f1
    answerPresent_total_count += 1
  if em:
    em_total+=1
  
  f1_total+=f1
  print(em)
  print(f1)
  print("***************** COunt")
  count+=1
  print(count)
    
f1_total=f1_total/count
em_total=em_total/count
almost_total = almost_total/count
almost_total_count = almost_total_count/count
answerPresent_total = answerPresent_total/count
answerPresent_total_count = answerPresent_total_count/count

fiScoresAll.append(f1_total)
emScoresAll.append(em_total)
almostScoresAll.append(almost_total)
almostCountAll.append(almost_total_count)
answerPresentAll.append(answerPresent_total)
answerPresentCountAll.append(answerPresent_total_count)


Downloading:   0%|          | 0.00/365 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/213k [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/436k [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/112 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/694 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/431M [00:00<?, ?B/s]

False
0.13157894736842105
***************** COunt
1
False
0.7741935483870968
***************** COunt
2
False
0
***************** COunt
3
True
1.0
***************** COunt
4
False
0
***************** COunt
5
False
0
***************** COunt
6
False
0
***************** COunt
7
False
0
***************** COunt
8
False
0
***************** COunt
9
False
0
***************** COunt
10
False
0
***************** COunt
11
True
1.0
***************** COunt
12
False
0.06666666666666668
***************** COunt
13
False
0.047619047619047616
***************** COunt
14
False
0.5714285714285715
***************** COunt
15
False
0
***************** COunt
16
False
0.0625
***************** COunt
17
True
1.0
***************** COunt
18
False
0.08333333333333334
***************** COunt
19
False
0.043478260869565216
***************** COunt
20
False
0.2978723404255319
***************** COunt
21
False
0.05405405405405405
***************** COunt
22
False
0.16216216216216217
***************** COunt
23
False
0
*********

In [None]:
tokenizerDistilBert = AutoTokenizer.from_pretrained("shainahub/covid_qa_distillbert")
modelDistilBert = AutoModelForQuestionAnswering.from_pretrained("shainahub/covid_qa_distillbert")

p,ans=[],[]
f1_total=0
em_total=0
count=0
for i in range(len(val_contexts)):
  answer=val_answers[i]["text"][0]
  pred = qa(val_questions[i], val_contexts[i], answer, 512, tokenizerDistilBert, modelDistilBert)
  p.append(pred)
  ans.append(answer)
  f1=f1_score(pred,answer)
  em=exact_match_score(pred,answer)

  if f1 >= 0.5:
    almost_total += f1
    almost_total_count += 1
  if f1 > 0:
    answerPresent_total += f1
    answerPresent_total_count += 1
  if em:
    em_total+=1
  
  f1_total+=f1
  print(em)
  print(f1)
  print("***************** COunt")
  count+=1
  print(count)
    
f1_total=f1_total/count
em_total=em_total/count
almost_total = almost_total/count
almost_total_count = almost_total_count/count
answerPresent_total = answerPresent_total/count
answerPresent_total_count = answerPresent_total_count/count

fiScoresAll.append(f1_total)
emScoresAll.append(em_total)
almostScoresAll.append(almost_total)
almostCountAll.append(almost_total_count)
answerPresentAll.append(answerPresent_total)
answerPresentCountAll.append(answerPresent_total_count)
# output => a 'brake' on the immune system

Downloading:   0%|          | 0.00/333 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/232k [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/466k [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/112 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/561 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/265M [00:00<?, ?B/s]

False
0.09090909090909091
***************** COunt
1
False
0.7999999999999999
***************** COunt
2
False
0.7450980392156863
***************** COunt
3
True
1.0
***************** COunt
4
True
1.0
***************** COunt
5
False
0
***************** COunt
6
False
0.375
***************** COunt
7
False
0.6250000000000001
***************** COunt
8
False
0.2222222222222222
***************** COunt
9
False
0.761904761904762
***************** COunt
10
False
0
***************** COunt
11
True
1.0
***************** COunt
12
False
0.7272727272727273
***************** COunt
13
False
0
***************** COunt
14
False
0.30769230769230765
***************** COunt
15
False
0
***************** COunt
16
False
0.058823529411764705
***************** COunt
17
True
1.0
***************** COunt
18
False
0.10526315789473685
***************** COunt
19
True
1.0
***************** COunt
20
False
0.9032258064516129
***************** COunt
21
False
0.8500000000000001
***************** COunt
22
False
0
**************

In [None]:
tokenizerElectra = AutoTokenizer.from_pretrained("armageddon/electra-base-squad2-covid-qa-deepset")
modelElectra = AutoModelForQuestionAnswering.from_pretrained("armageddon/electra-base-squad2-covid-qa-deepset")

p,ans=[],[]
f1_total=0
em_total=0
count=0
for i in range(len(val_contexts)):
  answer=val_answers[i]["text"][0]
  pred = qa(val_questions[i], val_contexts[i], answer, 512, tokenizerElectra, modelElectra)
  p.append(pred)
  ans.append(answer)
  f1=f1_score(pred,answer)
  em=exact_match_score(pred,answer)

  if f1 >= 0.5:
    almost_total += f1
    almost_total_count += 1
  if f1 > 0:
    answerPresent_total += f1
    answerPresent_total_count += 1
  if em:
    em_total+=1
  
  f1_total+=f1
  print(em)
  print(f1)
  print("***************** COunt")
  count+=1
  print(count)
    
f1_total=f1_total/count
em_total=em_total/count
almost_total = almost_total/count
almost_total_count = almost_total_count/count
answerPresent_total = answerPresent_total/count
answerPresent_total_count = answerPresent_total_count/count

fiScoresAll.append(f1_total)
emScoresAll.append(em_total)
almostScoresAll.append(almost_total)
almostCountAll.append(almost_total_count)
answerPresentAll.append(answerPresent_total)
answerPresentCountAll.append(answerPresent_total_count)
# output => a 'brake' on the immune system


Downloading:   0%|          | 0.00/501 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/232k [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/712k [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/112 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/841 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/436M [00:00<?, ?B/s]

False
0.1
***************** COunt
1
False
0.7741935483870968
***************** COunt
2
False
0.18604651162790697
***************** COunt
3
True
1.0
***************** COunt
4
True
1.0
***************** COunt
5
False
0.5217391304347827
***************** COunt
6
False
0.0588235294117647
***************** COunt
7
False
0.6250000000000001
***************** COunt
8
False
0.048780487804878044
***************** COunt
9
False
0.8421052631578948
***************** COunt
10
False
0
***************** COunt
11
True
1.0
***************** COunt
12
True
1.0
***************** COunt
13
False
0
***************** COunt
14
False
0.5714285714285715
***************** COunt
15
True
1.0
***************** COunt
16
False
0.06741573033707865
***************** COunt
17
False
0.6666666666666666
***************** COunt
18
False
0.051948051948051945
***************** COunt
19
False
0.9841269841269841
***************** COunt
20
False
0.8749999999999999
***************** COunt
21
False
0.8500000000000001
***************

In [None]:
tokenizerAlbert = AutoTokenizer.from_pretrained("armageddon/albert-squad-v2-covid-qa-deepset")
modelAlbert = AutoModelForQuestionAnswering.from_pretrained("armageddon/albert-squad-v2-covid-qa-deepset")

p,ans=[],[]
f1_total=0
em_total=0
count=0
for i in range(len(val_contexts)):
  answer=val_answers[i]["text"][0]
  pred = qa(val_questions[i], val_contexts[i], answer, 512, tokenizerAlbert, modelAlbert)
  p.append(pred)
  ans.append(answer)
  f1=f1_score(pred,answer)
  em=exact_match_score(pred,answer)

  if f1 >= 0.5:
    almost_total += f1
    almost_total_count += 1
  if f1 > 0:
    answerPresent_total += f1
    answerPresent_total_count += 1
  if em:
    em_total+=1
  
  f1_total+=f1
  print(em)
  print(f1)
  print("***************** COunt")
  count+=1
  print(count)
    
f1_total=f1_total/count
em_total=em_total/count
almost_total = almost_total/count
almost_total_count = almost_total_count/count
answerPresent_total = answerPresent_total/count
answerPresent_total_count = answerPresent_total_count/count

fiScoresAll.append(f1_total)
emScoresAll.append(em_total)
almostScoresAll.append(almost_total)
almostCountAll.append(almost_total_count)
answerPresentAll.append(answerPresent_total)
answerPresentCountAll.append(answerPresent_total_count)
# output => a 'brake' on the immune system


False
0
***************** COunt
1
True
1.0
***************** COunt
2
False
0.7450980392156863
***************** COunt
3
True
1.0
***************** COunt
4
True
1.0
***************** COunt
5
False
0.6363636363636364
***************** COunt
6
False
0.13636363636363635
***************** COunt
7
True
1.0
***************** COunt
8
False
0.1875
***************** COunt
9
True
1.0
***************** COunt
10
False
0
***************** COunt
11
False
0.14705882352941177
***************** COunt
12
True
1.0
***************** COunt
13
False
0
***************** COunt
14
False
0.5714285714285715
***************** COunt
15
False
0.5714285714285715
***************** COunt
16
False
0.0625
***************** COunt
17
True
1.0
***************** COunt
18
False
0
***************** COunt
19
False
0.9836065573770492
***************** COunt
20
False
0.967741935483871
***************** COunt
21
True
1.0
***************** COunt
22
False
0
***************** COunt
23
False
0
***************** COunt
24
False
0.199999

In [None]:
print(fiScoresAll)
print(emScoresAll)
print(almostScoresAll)
print(almostCountAll)
print(answerPresentAll)
print(answerPresentCountAll)

[0.5603908540435424, 0.2798536035926875, 0.35813532278796356, 0.6140720962295781, 0.4136781352975916]
[0.3316831683168317, 0.0891089108910891, 0.14356435643564355, 0.29207920792079206, 0.2722772277227723]
[0.49084708879208416, 0.1704493829240307, 0.3068771468012502, 0.5272089693627102, 0.49661939735283267]
[0.5544554455445545, 0.21561611606705225, 0.3674040401785498, 0.6057792279216759, 0.5525038575639687]
[0.5603908540435424, 0.2826278157414179, 0.35953447039064385, 0.6158519698453734, 0.5274703053252684]
[0.8564356435643564, 0.8210714635820017, 0.5931736211068416, 0.9732335327777566, 0.7028377897662265]
