## Imports

In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
from pathlib import Path
import pandas as pd
from tqdm import tqdm
import json
import os
from subprocess import Popen, PIPE, STDOUT

from haystack.preprocessor.cleaning import clean_wiki_text
from haystack.preprocessor.utils import convert_files_to_dicts, fetch_archive_from_http
from haystack.reader.farm import FARMReader
from haystack.reader.transformers import TransformersReader
from haystack.utils import print_answers
from haystack.document_store.elasticsearch import ElasticsearchDocumentStore
from haystack.retriever.sparse import ElasticsearchRetriever
from haystack.pipeline import ExtractiveQAPipeline

03/07/2021 09:42:10 - INFO - faiss.loader -   Loading faiss with AVX2 support.
03/07/2021 09:42:10 - INFO - faiss.loader -   Loading faiss.
03/07/2021 09:42:11 - INFO - farm.modeling.prediction_head -   Better speed can be achieved with apex installed from https://www.github.com/nvidia/apex .


## Load data

In [None]:
data = Path('./data/subjqa')
!ls {data}

books  electronics  grocery  movies  restaurants  tripadvisor


### Electronics

In [None]:
electronics_df = pd.read_csv(data/'electronics/splits/test.csv')
electronics_df.head()

Unnamed: 0,item_id,domain,nn_mod,nn_asp,query_mod,query_asp,q_review_id,q_reviews_id,question,question_subj_level,ques_subj_score,is_ques_subjective,review_id,review,human_ans_spans,human_ans_indices,answer_subj_level,ans_subj_score,is_ans_subjective
0,B0000BZOGJ,electronics,double,zipper,wide,strap,19d6980d862e90d9170006eaa8516e58,a26d5bd37e06bc8b284ceea6a1eab28d,What is strap?,5,0.0,False,2e675cc6ca63d7052aa195f41fada781,"I purchased this just this week, and while it holds myHP Pavilion DV9830US 1...",ANSWERNOTFOUND,"(1135, 1149)",1,0.0,False
1,B0000BZOGJ,electronics,double,zipper,wide,strap,19d6980d862e90d9170006eaa8516e58,a26d5bd37e06bc8b284ceea6a1eab28d,What is strap?,1,0.0,False,2e675cc6ca63d7052aa195f41fada781,"I purchased this just this week, and while it holds myHP Pavilion DV9830US 1...",ANSWERNOTFOUND,"(1135, 1149)",1,0.0,False
2,B004T9RR6I,electronics,available,feature,major,issue,9272dfe044ca6bd7285e5e48ffe008a8,23c36d13271938c92b2eb22ebed15f73,Was it issue ?,1,0.0,False,52d167f982a274fd85d37903f857966f,UPDATE (26 June 2012):I got ready to add a USB storage device (Seagate 2TB) ...,ANSWERNOTFOUND,"(8416, 8430)",1,0.0,False
3,B004T9RR6I,electronics,available,feature,major,issue,9272dfe044ca6bd7285e5e48ffe008a8,23c36d13271938c92b2eb22ebed15f73,Was it issue ?,1,0.0,False,52d167f982a274fd85d37903f857966f,UPDATE (26 June 2012):I got ready to add a USB storage device (Seagate 2TB) ...,ANSWERNOTFOUND,"(8416, 8430)",1,0.0,False
4,B004CLYEFK,electronics,thick,wire,enough,length,b67a9e235ed955a51d09c7f081a62cb5,86c3c5151f9afb17b8de96c6dfb37542,What is the length of the cable of a television ?,4,0.0,False,b1112cc98744b1dc26f6c9b80c2b97f0,What more can you say about Micra Digital USB A to USB B Cable other than it...,ANSWERNOTFOUND,"(199, 213)",4,0.0,False


In [None]:
electronics_df["q_review_id"].nunique()

358

In [None]:
electronics_df["q_reviews_id"].nunique()

1194

In [None]:
electronics_df["human_ans_spans"].value_counts()

ANSWERNOTFOUND                                                                  1399
bass is solid and powerful                                                         3
comfortable                                                                        3
the sound quality is great                                                         3
4 and 5 Star reviews                                                               2
                                                                                ... 
speakers are too quiet                                                             1
I do not give it 5 stars because the image quality is not full hd                  1
accurate + bright display                                                          1
The router took about 10 ' to install and has worked like a charm since then       1
the resulting images will be very soft .   It sharpens up nicely by f/4            1
Name: human_ans_spans, Length: 888, dtype: int64

In [None]:
electronics_df.shape

(2345, 19)

## Convert to SQuAD format

Need this format

```json
{
    "data": [
        {
            "title": "Beyoncé",
            "paragraphs": [
                {
                    "qas": [
                        {
                            "question": "When did Beyonce start becoming popular?",
                            "id": "56be85543aeaaa14008c9063",
                            "answers": [
                                {
                                    "text": "in the late 1990s",
                                    "answer_start": 269
                                }
                            ],
                            "is_impossible": false
                        }
                        ...
                    ],
                    "context": "Beyoncé ..."
                },
                ...
            ]
        }
    ]
}
```

In [None]:
# peek at single row
row = electronics_df.query("human_ans_spans != 'ANSWERNOTFOUND'").iloc[0]
row

item_id                                                                                     B003VAGXWK
domain                                                                                     electronics
nn_mod                                                                                           sleek
nn_asp                                                                                        keyboard
query_mod                                                                                        solid
query_asp                                                                                          key
q_review_id                                                           73e8277fbf438a7ade8f720ddf8a4f47
q_reviews_id                                                          55576d11e04159c488107b442aaff880
question                                                            How are the keys of the  keyboard?
question_subj_level                                                      

So we could have the following field mappings:

* title -> item_id
* question -> question
* id -> q_review_id
* answers -> human_ans_spans
* answer_start -> first element of human_ans_indices
* is_impossible -> if human_ans_spans == ANSWERNOTFOUND
* context -> review

Groupby `item_id` and build data structure from that? First build up paragraphs from row:

In [None]:
eval(row["human_ans_indices"])

(421, 453)

In [None]:
pars = [
    {"qas": 
     [
         {"question": row["question"], 
          "id": row["q_review_id"], 
          "answers": [
              {"text": row["human_ans_spans"], 
               "answer_start": eval(row["human_ans_indices"])[0]}
          ], 
          "is_impossible": True if row["human_ans_spans"] == "ANSWERNOTFOUND" else False}],
     "context": row["review"]
    }]
pars

[{'qas': [{'question': 'How are the keys of the  keyboard?',
    'id': '73e8277fbf438a7ade8f720ddf8a4f47',
    'answers': [{'text': 'The illuminated keys are helpful',
      'answer_start': 421}],
    'is_impossible': False}],
  'context': "I was reluctant to try a wireless keyboard, but due to a wire-chomping kitty, decided it was best to go wireless. I'm so glad I did. This keyboard is sleek and stylish. It has a great feel under my fingertips. I was concerned that a wireless keyboard would be &#34;buggy&#34; and not be efficient, but this keyboard is as good as any corded keyboard. It charges easily via USB port and holds a charge for about ten days. The illuminated keys are helpful, if, like me, your eyes aren't as young as they once were. I already had the logitech unifying plug that plugs into my computer for my mouse and touchpad. I turned the keyboard on and the Logitech plug recognized it right away. I highly recommend this keyboard. ANSWERNOTFOUND"}]

In [None]:
sample_df = electronics_df.query("item_id == 'B006ZS5ATM' | item_id == 'B0074BW614'")
sample_df.head(1)

Unnamed: 0,item_id,domain,nn_mod,nn_asp,query_mod,query_asp,q_review_id,q_reviews_id,question,question_subj_level,ques_subj_score,is_ques_subjective,review_id,review,human_ans_spans,human_ans_indices,answer_subj_level,ans_subj_score,is_ans_subjective
0,B006ZS5ATM,electronics,sore,ear,uncomfortable,headphone,adb20314dbbd8196b7e9fb587b78147f,6e1052529424f3a98d303380155c9dde,What do you think about headphone?,5,0.0,False,197e760a49907baeff809b2ccdfe466f,"I had a smaller headset that went on the ear. After a 8 - 10 hour workday, m...",ANSWERNOTFOUND,"(547, 561)",5,0.0,False


In [None]:
sample_df.shape

(108, 19)

In [None]:
sample_df.groupby('item_id')['q_review_id'].count()

item_id
B006ZS5ATM      5
B0074BW614    103
Name: q_review_id, dtype: int64

In [None]:
for row in samp

In [None]:
def create_paragraphs(group):
    pars = []
    
    for idx, row in group.iterrows():
        qas = {"qas": 
               [
                 {"question": row["question"], 
                  "id": row["q_review_id"], 
                  "answers": [
                      {"text": row["answers_text"], 
                       "answer_start": eval(row["human_ans_indices"])[0]}
                  ], 
                  "is_impossible": True if row["answers_text"] == "ANSWERNOTFOUND" else False}],
             "context": row["review"]
            }
        pars.append(qas)
        
    return pars

In [None]:
groups = electronics_df.groupby('item_id').apply(create_paragraphs).to_frame(name="paragraphs").reset_index().rename(columns={"item_id":'title'})

In [None]:
groups

Unnamed: 0,title,paragraphs
0,B00001P4ZH,"[{'qas': [{'question': 'How is the bass?', 'id': '2543d296da9766d8d17d040ecc..."
1,B00001WRSJ,"[{'qas': [{'question': 'How is the audio bass?', 'id': '6895a59b470d8feee0f3..."
2,B00004SABB,"[{'qas': [{'question': 'How is the time?', 'id': '47110eb7720cffd03bb78f6099..."
3,B00004SB92,"[{'qas': [{'question': 'Is documentation clear?', 'id': '7af7e66deeecf69fc6e..."
4,B00004T8R2,"[{'qas': [{'question': 'How is the sound?', 'id': '54f67a1452a33068b8a6a93bf..."
...,...,...
429,B00GH0N1LM,"[{'qas': [{'question': 'How do I get to the port?', 'id': '96bd81889288ed5d2..."
430,B00GP4BVTO,"[{'qas': [{'question': 'How about profile?', 'id': '12264045ff398038d51f77c3..."
431,B00H3OYSHW,"[{'qas': [{'question': 'Where can I feel my device?', 'id': 'e0ea72e711324b5..."
432,B00HNGB1YS,"[{'qas': [{'question': 'How is the grip?', 'id': 'a541ffd9697ebf0ad9d2264285..."


In [None]:
len(groups.iloc[1]["paragraphs"])

2

In [None]:
groups.to_dict(orient='records')[0]

{'title': 'B006ZS5ATM',
 'paragraphs': [{'qas': [{'question': 'What do you think about headphone?',
     'id': 'adb20314dbbd8196b7e9fb587b78147f',
     'answers': [{'text': 'ANSWERNOTFOUND', 'answer_start': 547}],
     'is_impossible': True}],
   'context': "I had a smaller headset that went on the ear. After a 8 - 10 hour workday, my ears were throbbing red and painfully sore. With this headset, I can go the same shift and not feel any pain on my ear. They may feel a little on the heavy side, but if you adjust the head phone cups just right and stretch them, you will not have any problems.Quality and sound, words cannot explain. Its so clear and loud, the headset holds in the sound. When I am on a call or listening to music, I do not hear nothing else, but what's coming in through the headset. ANSWERNOTFOUND"},
  {'qas': [{'question': 'What is the sound quality?',
     'id': 'a3cd9be41531e7f23a9882c2a3cb15d4',
     'answers': [{'text': 'Sound quality is awesome', 'answer_start': 490}]

In [None]:
squad_data = {}
squad_data["data"] = groups.to_dict(orient='records')

with open("data/subjqa/squad_format/electronics-test.json", 'w', encoding='utf-8') as f:
    json.dump(squad_data, f)

## No Fine-Tuning

In [None]:
reader = FARMReader(model_name_or_path="deepset/roberta-base-squad2", use_gpu=True, context_window_size=500)

03/07/2021 11:00:07 - INFO - farm.utils -   Using device: CUDA 
03/07/2021 11:00:07 - INFO - farm.utils -   Number of GPUs: 1
03/07/2021 11:00:07 - INFO - farm.utils -   Distributed Training: False
03/07/2021 11:00:07 - INFO - farm.utils -   Automatic Mixed Precision: None
Some weights of RobertaModel were not initialized from the model checkpoint at deepset/roberta-base-squad2 and are newly initialized: ['roberta.pooler.dense.weight', 'roberta.pooler.dense.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
03/07/2021 11:00:21 - INFO - farm.utils -   Using device: CUDA 
03/07/2021 11:00:21 - INFO - farm.utils -   Number of GPUs: 1
03/07/2021 11:00:21 - INFO - farm.utils -   Distributed Training: False
03/07/2021 11:00:21 - INFO - farm.utils -   Automatic Mixed Precision: None
03/07/2021 11:00:21 - INFO - farm.infer -   Got ya 7 parallel workers to do inference ...
03/07/2021 11:00:21 - INFO - farm.infer -    0    0    0

In [None]:
sample_df.query("human_ans_spans == 'think great'")

Unnamed: 0,item_id,domain,nn_mod,nn_asp,query_mod,query_asp,q_review_id,q_reviews_id,question,question_subj_level,ques_subj_score,is_ques_subjective,review_id,review,human_ans_spans,human_ans_indices,answer_subj_level,ans_subj_score,is_ans_subjective,answers_text
729,B0074BW614,electronics,solid,device,fast,processor,1a320a21598606afa533700b395eb71b,54a47a3867b408360b40b58eb93c1de0,How is processor?,1,0.0,False,c687cef56ac10872ef5d0ae0869650db,I've been an iPad user since the original came out. I also have an iPad 3. I...,think great,"(2476, 2497)",1,0.75,True,think this is a great


In [None]:
sample_df.query("human_ans_spans == 'think great'").iloc[0]["review"][2476:2498]

'think this is a great '

In [None]:
def create_answer_spans(row):
    start_idx, end_idx = eval(row["human_ans_indices"])
    return row["review"][start_idx:end_idx]

In [None]:
electronics_df["answers_text"] = electronics_df.apply(create_answer_spans, axis=1)

In [None]:
sample_df["answers_text"]

0       ANSWERNOTFOUND
1       ANSWERNOTFOUND
24      ANSWERNOTFOUND
45      ANSWERNOTFOUND
51      ANSWERNOTFOUND
             ...      
2272    ANSWERNOTFOUND
2283    ANSWERNOTFOUND
2286              32GB
2333    ANSWERNOTFOUND
2334    ANSWERNOTFOUND
Name: answers_text, Length: 108, dtype: object

In [None]:
reader_eval_results = reader.eval_on_file("data/subjqa/squad_format/", "electronics-test.json", device='cuda')

Preprocessing Dataset data/subjqa/squad_format/electronics-test.json: 100%|██████████| 718/718 [00:01<00:00, 405.97 Dicts/s]
Evaluating: 100%|██████████| 35/35 [00:19<00:00,  1.76it/s]


In [None]:
print("Reader Top-N-Accuracy:", reader_eval_results["top_n_accuracy"])
## Reader Exact Match is the proportion of questions where the predicted answer is exactly the same as the correct answer
print("Reader Exact Match:", reader_eval_results["EM"])
## Reader F1-Score is the average overlap between the predicted answers and the correct answers
print("Reader F1-Score:", reader_eval_results["f1"])

Reader Top-N-Accuracy: 0.7047353760445683
Reader Exact Match: 0.2395543175487465
Reader F1-Score: 0.30428376775021326


In [None]:
print("Reader Top-N-Accuracy:", reader_eval_results["top_n_accuracy"])
## Reader Exact Match is the proportion of questions where the predicted answer is exactly the same as the correct answer
print("Reader Exact Match:", reader_eval_results["EM"])
## Reader F1-Score is the average overlap between the predicted answers and the correct answers
print("Reader F1-Score:", reader_eval_results["f1"])

Reader Top-N-Accuracy: 0.7961620469083156
Reader Exact Match: 0.3390191897654584
Reader F1-Score: 0.409314521578848


### Boot ES

In [None]:
! wget -nc https://artifacts.elastic.co/downloads/elasticsearch/elasticsearch-7.9.2-linux-x86_64.tar.gz -q
! tar -xzf elasticsearch-7.9.2-linux-x86_64.tar.gz
! chown -R daemon:daemon elasticsearch-7.9.2

es_server = Popen(['elasticsearch-7.9.2/bin/elasticsearch'],
                   stdout=PIPE, stderr=STDOUT,
                   preexec_fn=lambda: os.setuid(1)  # as daemon
                  )
# wait until ES has started
! sleep 30

In [None]:
document_store = ElasticsearchDocumentStore(host="localhost", username="", password="", index="document")

03/05/2021 15:11:00 - INFO - elasticsearch -   HEAD http://localhost:9200/ [status:200 request:0.094s]
03/05/2021 15:11:00 - INFO - elasticsearch -   HEAD http://localhost:9200/document [status:200 request:0.011s]
03/05/2021 15:11:00 - INFO - elasticsearch -   GET http://localhost:9200/document [status:200 request:0.004s]
03/05/2021 15:11:00 - INFO - elasticsearch -   PUT http://localhost:9200/document/_mapping [status:200 request:0.024s]
03/05/2021 15:11:00 - INFO - elasticsearch -   HEAD http://localhost:9200/label [status:200 request:0.002s]


In [None]:
document_store.delete_all_documents()

03/04/2021 21:20:03 - INFO - elasticsearch -   POST http://localhost:9200/document/_delete_by_query [status:200 request:0.970s]


### Index docs

In [None]:
docs = [{"text": row["text"], "meta":{"asin": row["asin"], "is_answerable": row["is_answerable"]}} for _, row in qa_df.iterrows()]
docs[0]

{'text': "Works perfectly and easy to use. Software download also great.The only surprise was that the one I ordered, (USB) doesn't work with an iPad.I was thinking it would work with both PC and iPad. My mistake. I use this with Logic Pro X on an iMac running Mavericks (it's replacing an Mbox) and with a Sony Vaio running Windows 7 and get excellent results (don't forget to install the Windows drivers or you'll run into latency issues). I also use it with the Auria App on my iPad Air. I did appreciate the direct line in switch...I could hear exactly what was being played into the unit without having to route through the computer. That was a nice feature. More recently, I was very happy to get this working with my ipad mini. I did purchase a recommended usb powered hub Belkin model &#34; F4U020&#34; and with that - I'm good to play music into and out of my ipad. Focusrite. An industry standard.I bought this specifically for use with an iPad to do mobile recording. The app I use is Auri

In [None]:
document_store.write_documents(docs)

03/05/2021 15:11:02 - INFO - elasticsearch -   POST http://localhost:9200/_bulk?refresh=wait_for [status:200 request:1.124s]
03/05/2021 15:11:04 - INFO - elasticsearch -   POST http://localhost:9200/_bulk?refresh=wait_for [status:200 request:1.382s]
03/05/2021 15:11:05 - INFO - elasticsearch -   POST http://localhost:9200/_bulk?refresh=wait_for [status:200 request:1.381s]
03/05/2021 15:11:06 - INFO - elasticsearch -   POST http://localhost:9200/_bulk?refresh=wait_for [status:200 request:1.281s]
03/05/2021 15:11:08 - INFO - elasticsearch -   POST http://localhost:9200/_bulk?refresh=wait_for [status:200 request:1.237s]
03/05/2021 15:11:09 - INFO - elasticsearch -   POST http://localhost:9200/_bulk?refresh=wait_for [status:200 request:1.254s]
03/05/2021 15:11:10 - INFO - elasticsearch -   POST http://localhost:9200/_bulk?refresh=wait_for [status:200 request:1.260s]
03/05/2021 15:11:12 - INFO - elasticsearch -   POST http://localhost:9200/_bulk?refresh=wait_for [status:200 request:1.275s]


### Retriever

In [None]:
retriever = ElasticsearchRetriever(document_store=document_store)

### Reader

In [None]:
reader = FARMReader(model_name_or_path="deepset/roberta-base-squad2", use_gpu=True, context_window_size=500)

03/06/2021 10:59:24 - INFO - farm.utils -   Using device: CUDA 
03/06/2021 10:59:24 - INFO - farm.utils -   Number of GPUs: 1
03/06/2021 10:59:24 - INFO - farm.utils -   Distributed Training: False
03/06/2021 10:59:24 - INFO - farm.utils -   Automatic Mixed Precision: None
Some weights of RobertaModel were not initialized from the model checkpoint at deepset/roberta-base-squad2 and are newly initialized: ['roberta.pooler.dense.weight', 'roberta.pooler.dense.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
03/06/2021 10:59:42 - INFO - farm.utils -   Using device: CUDA 
03/06/2021 10:59:42 - INFO - farm.utils -   Number of GPUs: 1
03/06/2021 10:59:42 - INFO - farm.utils -   Distributed Training: False
03/06/2021 10:59:42 - INFO - farm.utils -   Automatic Mixed Precision: None
03/06/2021 10:59:45 - INFO - farm.infer -   Got ya 7 parallel workers to do inference ...
03/06/2021 10:59:45 - INFO - farm.infer -    0    0    0

In [None]:
# check evaluation on SQuAD v2
reader_eval_results = reader.eval_on_file("data/squad", "dev-v2.0.json", device='cuda')

Preprocessing Dataset data/squad/dev-v2.0.json: 100%|██████████| 1204/1204 [00:07<00:00, 162.32 Dicts/s]
Evaluating: 100%|██████████| 274/274 [02:36<00:00,  1.75it/s]


In [None]:
print("Reader Top-N-Accuracy:", reader_eval_results["top_n_accuracy"])
## Reader Exact Match is the proportion of questions where the predicted answer is exactly the same as the correct answer
print("Reader Exact Match:", reader_eval_results["EM"])
## Reader F1-Score is the average overlap between the predicted answers and the correct answers
print("Reader F1-Score:", reader_eval_results["f1"])

Reader Top-N-Accuracy: 0.9746483618293608
Reader Exact Match: 0.7843005137707403
Reader F1-Score: 0.8260896852846605


In [None]:
# check evaluation on AmazonQA
reader_eval_results = reader.eval_on_file("data/amazon-qa", "val-qar_squad-music.json", device='cuda')

Preprocessing Dataset data/amazon-qa/val-qar_squad-music.json: 100%|██████████| 1150/1150 [00:03<00:00, 371.15 Dicts/s]
Evaluating: 100%|██████████| 133/133 [01:17<00:00,  1.72it/s]


In [None]:
print("Reader Top-N-Accuracy:", reader_eval_results["top_n_accuracy"])
## Reader Exact Match is the proportion of questions where the predicted answer is exactly the same as the correct answer
print("Reader Exact Match:", reader_eval_results["EM"])
## Reader F1-Score is the average overlap between the predicted answers and the correct answers
print("Reader F1-Score:", reader_eval_results["f1"])

Reader Top-N-Accuracy: 0.542608695652174
Reader Exact Match: 0.0008695652173913044
Reader F1-Score: 0.0752376647890378


In [None]:
pipe = ExtractiveQAPipeline(reader, retriever)

In [None]:
query = "Is a snare included?"
# DIY drumkit
asin = "B009VDW4OW"
number_of_answers_to_fetch = 3

prediction = pipe.run(query=query, filters={"asin": [asin]}, top_k_retriever=10, top_k_reader=number_of_answers_to_fetch)
print(f"Question: {prediction['query']}")
print("\n")
for i in range(number_of_answers_to_fetch):
    print(f"#{i+1}")
    print(f"Answer: {prediction['answers'][i]['answer']}")
    print(f"ASIN: {prediction['answers'][i]['meta']['asin']}")
    print(f"Is answerable?: {prediction['answers'][i]['meta']['is_answerable']}")
    print(f"Context: {prediction['answers'][i]['context']}")
    print('\n\n')

03/05/2021 14:39:19 - INFO - elasticsearch -   POST http://localhost:9200/document/_search [status:200 request:0.088s]
Inferencing Samples: 100%|██████████| 1/1 [00:00<00:00,  7.25 Batches/s]
Inferencing Samples: 100%|██████████| 1/1 [00:00<00:00, 12.66 Batches/s]
Inferencing Samples: 100%|██████████| 1/1 [00:00<00:00, 12.14 Batches/s]
Inferencing Samples: 100%|██████████| 1/1 [00:00<00:00, 15.83 Batches/s]
Inferencing Samples: 100%|██████████| 1/1 [00:00<00:00, 15.97 Batches/s]
Inferencing Samples: 100%|██████████| 1/1 [00:00<00:00, 15.93 Batches/s]
Inferencing Samples: 100%|██████████| 1/1 [00:00<00:00, 18.37 Batches/s]
Inferencing Samples: 100%|██████████| 1/1 [00:00<00:00, 18.49 Batches/s]
Inferencing Samples: 100%|██████████| 1/1 [00:00<00:00, 15.88 Batches/s]
Inferencing Samples: 100%|██████████| 1/1 [00:00<00:00, 15.86 Batches/s]

Question: Is a snare included?


#1
Answer: this one only came with one
ASIN: B009VDW4OW
Is answerable?: 1
Context: the correct sounds out of it. When I slapped the "bass", it would play a "snare" sound combined with the bass. When I slapped the "snare", I would just get a wood sound.I've also seen images that most cajons come with multiple snares... this one only came with one.I'm really not sure what else to say. I wanted a Cajon to play with.but didn't want to pay 100.00 plus. This was a great option, Easy to put together with the limited tools I had on hand. And cheap enough that I wasn't worried to have 



#2
Answer: this one only came with one
ASIN: B009VDW4OW
Is answerable?: 1
Context: the correct sounds out of it. When I slapped the "bass", it would play a "snare" sound combined with the bass. When I slapped the "snare", I would just get a wood sound.I've also seen images that most cajons come with multiple snares... this one only came with one.I'm really not sure what else to




## Fine-tuning

### Converting to the true SQuAD format

One problem with our SQuAD dataset is that it is composed of _line-separated_ JSON instead of the single JSON object that SQuAD traditionally uses. So instead of having examples like 

```json
{
    "context": "blah blah",
    "qas": [
        {
            "id": 331392,
            "is_impossible": false,
            "question": "blah blah?",
            "answers": [
                {
                    "answer_start": 2881,
                    "text": "blah blah"
                },
                ...
            ],
            "human_answers": [
                "blah blah",
                ...
            ]
        }
    ]
}
```

what we really need is a JSON of the form

```json
{
    "data": [
        {
            "title": "Beyoncé",
            "paragraphs": [
                {
                    "qas": [
                        {
                            "question": "When did Beyonce start becoming popular?",
                            "id": "56be85543aeaaa14008c9063",
                            "answers": [
                                {
                                    "text": "in the late 1990s",
                                    "answer_start": 269
                                }
                            ],
                            "is_impossible": false
                        }
                        ...
                    ],
                    "context": "Beyoncé ..."
                },
                ...
            ]
        }
    ]
}
```

Let's write a function that does the conversion for us. To warm-up let's load a single example from the training set:

In [None]:
examples = []

with open(data/"train-qar_squad.jsonl", 'r', encoding='utf-8') as f:
    for line in f:
        ex = json.loads(line)
        qid = ex["qas"][0]["id"]
        asin = qid2asin[qid]
        if asin == "B0057JCYYE" or asin == "B00F9ECDRU":
            examples.append(ex)
        if len(examples) > 4:
            break
examples

We don't need the human answers, but we do need the mapping from `qid` to `asin` so that we can collect all questions together that belong to the same product.

In [None]:
asin2qas = {}
seen_asin = set()

for ex in examples:
    qid = ex["qas"][0]["id"]
    asin = qid2asin[qid]
    qas = [{k:v for k,v in ex["qas"][0].items() if k != "human_answers"}]
    par = [{"qas": qas, "context": ex["context"]}]

    if asin in seen_asin:
        asin2qas[asin].extend(par)
    else:
        asin2qas[asin] = par
        seen_asin.add(asin)


# asin2qas

In [None]:
squad_data = []

for k,v in asin2qas.items():
    squad_ex = {}
    squad_ex["title"] = k
    squad_ex["paragraphs"] = v
    squad_data.append(squad_ex)
    
squad_data

In [None]:
squad_dict = {"data": squad_data}

In [None]:
with open(data/"train-qar_squad.json", 'w', encoding='utf-8') as f:
    json.dump(squad_dict, f)

In [None]:
# pick out answer fields
with open(data/"val-qar_squad.jsonl", 'r', encoding='utf-8') as f:
    for line in f:
        ex = json.loads(line)
        break

In [None]:
[k for k in ex["qas"][0].keys() if k.startswith("answers")]

['answers_snippet_spans_bleu2',
 'answers_snippet_spans_bleu4',
 'answers_snippet_spans_rouge',
 'answers_sentence_ir',
 'answers_sentence_bleu2',
 'answers_sentence_bleu4']

In [None]:
def convert_to_squad_format(input_file: Path, output_file: Path, category: str = "Musical_Instruments"):
    squad_data = []
    asin2qas = {}
    seen_asin = set()
    answer_fields = ['answers_snippet_spans_bleu2', 'answers_snippet_spans_bleu4',  
                     'answers_snippet_spans_rouge', 'answers_sentence_ir', 
                     'answers_sentence_bleu2',  'answers_sentence_bleu4']
    
    with open(input_file, 'r', encoding='utf-8') as f:
        for _, line in tqdm(enumerate(f)):
            row = json.loads(line)
            qid = row["qas"][0]["id"]
            if qid2category[qid] == category:
                asin = qid2asin[qid]
                qas = [{"answers" if k in answer_fields else k:v for k,v in row["qas"][0].items()}]
                par = [{"qas": qas, "context": row["context"]}]
                
                if asin in seen_asin:
                    asin2qas[asin].extend(par)
                else:
                    asin2qas[asin] = par
                    seen_asin.add(asin)
                    
    for k,v in asin2qas.items():
        squad_ex = {}
        squad_ex["title"] = k
        squad_ex["paragraphs"] = v
        squad_data.append(squad_ex)

    squad_dict = {"data": squad_data}
        
    with open(output_file, 'w', encoding='utf-8') as f:
        json.dump(squad_dict, f)    

In [None]:
category = "Electronics"
convert_to_squad_format(data/'train-qar_squad.jsonl', data/f'train-qar_squad-{category.lower()}.json', category)

455931it [00:13, 34990.08it/s]


In [None]:
convert_to_squad_format(data/'val-qar_squad.jsonl', data/f'val-qar_squad-{category.lower()}.json', category)

58969it [00:03, 15988.79it/s]


### Load single example

In [None]:
val_df = pd.read_json(data/'train-qar_squad-electronics.json')

In [None]:
val_df

Unnamed: 0,data
0,"{'title': 'B00009R95M', 'paragraphs': [{'qas': [{'id': 604553, 'is_impossibl..."
1,"{'title': 'B0051GN8GQ', 'paragraphs': [{'qas': [{'id': 698250, 'is_impossibl..."
2,"{'title': 'B00CQ35HBQ', 'paragraphs': [{'qas': [{'id': 639762, 'is_impossibl..."
3,"{'title': 'B00BOYQH44', 'paragraphs': [{'qas': [{'id': 701290, 'is_impossibl..."
4,"{'title': 'B008HODL7K', 'paragraphs': [{'qas': [{'id': 319235, 'is_impossibl..."
...,...
25296,"{'title': 'B005LLFY5Y', 'paragraphs': [{'qas': [{'id': 212671, 'is_impossibl..."
25297,"{'title': 'B0053QC0EU', 'paragraphs': [{'qas': [{'id': 596763, 'is_impossibl..."
25298,"{'title': 'B0068PVBLS', 'paragraphs': [{'qas': [{'id': 525680, 'is_impossibl..."
25299,"{'title': 'B009I9MX5Y', 'paragraphs': [{'qas': [{'id': 632546, 'is_impossibl..."


### Fine-tune model

Either something is wrong with my data preparation or getting the model to generalise is _hard_!

In [None]:
reader = FARMReader(model_name_or_path="distilbert-base-uncased-distilled-squad", use_gpu=True, context_window_size=500)

03/06/2021 14:57:00 - INFO - farm.utils -   Using device: CUDA 
03/06/2021 14:57:00 - INFO - farm.utils -   Number of GPUs: 1
03/06/2021 14:57:00 - INFO - farm.utils -   Distributed Training: False
03/06/2021 14:57:00 - INFO - farm.utils -   Automatic Mixed Precision: None
03/06/2021 14:57:10 - INFO - farm.utils -   Using device: CUDA 
03/06/2021 14:57:10 - INFO - farm.utils -   Number of GPUs: 1
03/06/2021 14:57:10 - INFO - farm.utils -   Distributed Training: False
03/06/2021 14:57:10 - INFO - farm.utils -   Automatic Mixed Precision: None
03/06/2021 14:57:11 - INFO - farm.infer -   Got ya 7 parallel workers to do inference ...
03/06/2021 14:57:11 - INFO - farm.infer -    0    0    0    0    0    0    0 
03/06/2021 14:57:11 - INFO - farm.infer -   /w\  /w\  /w\  /w\  /w\  /w\  /w\
03/06/2021 14:57:11 - INFO - farm.infer -   /'\  / \  /'\  /'\  / \  / \  /'\
03/06/2021 14:57:11 - INFO - farm.infer -               


In [None]:
train_data = "data/amazon-qa/"

In [None]:
reader.train(data_dir=train_data, 
             train_filename="train-qar_squad-electronics.json", 
             dev_filename="val-qar_squad-electronics.json", 
             use_gpu=True, n_epochs=1, save_dir="models/haystack/",
             evaluate_every=1000,
             batch_size=64)

03/06/2021 14:57:17 - INFO - farm.utils -   Using device: CUDA 
03/06/2021 14:57:17 - INFO - farm.utils -   Number of GPUs: 1
03/06/2021 14:57:17 - INFO - farm.utils -   Distributed Training: False
03/06/2021 14:57:17 - INFO - farm.utils -   Automatic Mixed Precision: None
Preprocessing Dataset data/amazon-qa/train-qar_squad-electronics.json:  88%|████████▊ | 96000/108614 [01:36<00:11, 1131.67 Dicts/s]Token indices sequence length is longer than the specified maximum sequence length for this model (589 > 512). Running this sequence through the model will result in indexing errors
Preprocessing Dataset data/amazon-qa/train-qar_squad-electronics.json: 100%|██████████| 108614/108614 [01:41<00:00, 1068.48 Dicts/s]
Preprocessing Dataset data/amazon-qa/val-qar_squad-electronics.json: 100%|██████████| 13647/13647 [00:15<00:00, 889.83 Dicts/s] 
03/06/2021 14:59:31 - INFO - farm.modeling.optimization -   Loading optimizer `TransformersAdamW`: '{'correct_bias': False, 'weight_decay': 0.01, 'lr':

In [None]:
new_reader = FARMReader(model_name_or_path="models/haystack")

03/06/2021 12:40:51 - INFO - farm.utils -   Using device: CUDA 
03/06/2021 12:40:51 - INFO - farm.utils -   Number of GPUs: 1
03/06/2021 12:40:51 - INFO - farm.utils -   Distributed Training: False
03/06/2021 12:40:51 - INFO - farm.utils -   Automatic Mixed Precision: None
03/06/2021 12:40:54 - INFO - farm.utils -   Using device: CUDA 
03/06/2021 12:40:54 - INFO - farm.utils -   Number of GPUs: 1
03/06/2021 12:40:54 - INFO - farm.utils -   Distributed Training: False
03/06/2021 12:40:54 - INFO - farm.utils -   Automatic Mixed Precision: None
03/06/2021 12:40:55 - INFO - farm.infer -   Got ya 7 parallel workers to do inference ...
03/06/2021 12:40:55 - INFO - farm.infer -    0    0    0    0    0    0    0 
03/06/2021 12:40:55 - INFO - farm.infer -   /w\  /w\  /w\  /w\  /w\  /w\  /w\
03/06/2021 12:40:55 - INFO - farm.infer -   /'\  / \  /'\  /'\  / \  / \  /'\
03/06/2021 12:40:55 - INFO - farm.infer -               


In [None]:
# eval
reader_eval_results = new_reader.eval_on_file("data/amazon-qa", "val-qar_squad-music.json", device='cuda')

Preprocessing Dataset data/amazon-qa/val-qar_squad-music.json: 100%|██████████| 1150/1150 [00:02<00:00, 390.39 Dicts/s]
Evaluating: 100%|██████████| 133/133 [01:17<00:00,  1.71it/s]


In [None]:
print("Reader Top-N-Accuracy:", reader_eval_results["top_n_accuracy"])
## Reader Exact Match is the proportion of questions where the predicted answer is exactly the same as the correct answer
print("Reader Exact Match:", reader_eval_results["EM"])
## Reader F1-Score is the average overlap between the predicted answers and the correct answers
print("Reader F1-Score:", reader_eval_results["f1"])

Reader Top-N-Accuracy: 0.7417391304347826
Reader Exact Match: 0.0
Reader F1-Score: 0.0


In [None]:
pipe = ExtractiveQAPipeline(new_reader, retriever)

In [None]:
query = "Is a snare included?"
# DIY drumkit
asin = "B009VDW4OW"
number_of_answers_to_fetch = 3

prediction = pipe.run(query=query, filters={"asin": [asin]}, top_k_retriever=10, top_k_reader=number_of_answers_to_fetch)
print(f"Question: {prediction['query']}")
print("\n")
for i in range(number_of_answers_to_fetch):
    print(f"#{i+1}")
    print(f"Answer: {prediction['answers'][i]['answer']}")
    print(f"ASIN: {prediction['answers'][i]['meta']['asin']}")
    print(f"Is answerable?: {prediction['answers'][i]['meta']['is_answerable']}")
    print(f"Context: {prediction['answers'][i]['context']}")
    print('\n\n')

Traceback (most recent call last):
  File "/root/miniconda3/envs/transformerlab/lib/python3.8/site-packages/urllib3/connection.py", line 156, in _new_conn
    conn = connection.create_connection(
  File "/root/miniconda3/envs/transformerlab/lib/python3.8/site-packages/urllib3/util/connection.py", line 84, in create_connection
    raise err
  File "/root/miniconda3/envs/transformerlab/lib/python3.8/site-packages/urllib3/util/connection.py", line 74, in create_connection
    sock.connect(sa)
ConnectionRefusedError: [Errno 111] Connection refused

During handling of the above exception, another exception occurred:

Traceback (most recent call last):
  File "/root/miniconda3/envs/transformerlab/lib/python3.8/site-packages/elasticsearch/connection/http_urllib3.py", line 245, in perform_request
    response = self.pool.urlopen(
  File "/root/miniconda3/envs/transformerlab/lib/python3.8/site-packages/urllib3/connectionpool.py", line 719, in urlopen
    retries = retries.increment(
  File "/ro

ConnectionError: ConnectionError(<urllib3.connection.HTTPConnection object at 0x7ff914bc2d00>: Failed to establish a new connection: [Errno 111] Connection refused) caused by: NewConnectionError(<urllib3.connection.HTTPConnection object at 0x7ff914bc2d00>: Failed to establish a new connection: [Errno 111] Connection refused)

### Evaluation

#### New reader

In [None]:
reader_eval_results = new_reader.eval_on_file(train_data, "val-qar_squad-music.json", device='cuda')

## Reader Top-N-Accuracy is the proportion of predicted answers that match with their corresponding correct answer
print("Reader Top-N-Accuracy:", reader_eval_results["top_n_accuracy"])
## Reader Exact Match is the proportion of questions where the predicted answer is exactly the same as the correct answer
print("Reader Exact Match:", reader_eval_results["EM"])
## Reader F1-Score is the average overlap between the predicted answers and the correct answers
print("Reader F1-Score:", reader_eval_results["f1"])

Preprocessing Dataset data/amazon-qa/val-qar_squad-music.json: 100%|██████████| 1828/1828 [00:03<00:00, 507.82 Dicts/s]
Evaluating: 100%|██████████| 238/238 [02:16<00:00,  1.74it/s]


Reader Top-N-Accuracy: 0.5
Reader Exact Match: 0.0
Reader F1-Score: 0.0


#### SQuAD reader

In [None]:
reader_eval_results = reader.eval_on_file(train_data, "train-qar_squad-music.json", device='cuda')

## Reader Top-N-Accuracy is the proportion of predicted answers that match with their corresponding correct answer
print("Reader Top-N-Accuracy:", reader_eval_results["top_n_accuracy"])
## Reader Exact Match is the proportion of questions where the predicted answer is exactly the same as the correct answer
print("Reader Exact Match:", reader_eval_results["EM"])
## Reader F1-Score is the average overlap between the predicted answers and the correct answers
print("Reader F1-Score:", reader_eval_results["f1"])

Preprocessing Dataset data/amazon-qa/train-qar_squad-music.json: 100%|██████████| 2100/2100 [00:03<00:00, 664.52 Dicts/s]
Evaluating: 100%|██████████| 210/210 [02:00<00:00,  1.75it/s]


Reader Top-N-Accuracy: 0.0
Reader Exact Match: 0.0
Reader F1-Score: 0.0
