# Setup and Importing

In [1]:
from utils import *

setup_notebook()

Using transformers v4.25.1
Using datasets v2.11.0


In [2]:
!apt-get install -qq git-lfs

import os
os.environ['TOKENIZERS_PARALLELISM']='false'

import warnings, logging
warnings.simplefilter('ignore')
logging.disable(logging.WARNING)

# Haystack

## Initializing Haystack

In [3]:
if not os.path.exists('elasticsearch-7.9.2'):
    ! wget https://artifacts.elastic.co/downloads/elasticsearch/elasticsearch-7.9.2-linux-x86_64.tar.gz -q
    ! tar -xzf elasticsearch-7.9.2-linux-x86_64.tar.gz
    ! chown -R daemon:daemon elasticsearch-7.9.2

In [4]:
%%bash --bg

sudo -u daemon -- elasticsearch-7.9.2/bin/elasticsearch

In [5]:
import time

time.sleep(30)

In [6]:
!curl -X GET "localhost:9200/?pretty"

{
  "name" : "nsh6lp2hal",
  "cluster_name" : "elasticsearch",
  "cluster_uuid" : "84RYn7Q3Rr-3lyKlq_dCdw",
  "version" : {
    "number" : "7.9.2",
    "build_flavor" : "default",
    "build_type" : "tar",
    "build_hash" : "d34da0ea4a966c4e49417f2da2f244e3e97b4e6e",
    "build_date" : "2020-09-23T00:45:33.626720Z",
    "build_snapshot" : false,
    "lucene_version" : "8.6.2",
    "minimum_wire_compatibility_version" : "6.8.0",
    "minimum_index_compatibility_version" : "6.0.0-beta1"
  },
  "tagline" : "You Know, for Search"
}


In [7]:
from haystack.document_stores import ElasticsearchDocumentStore

host=os.environ.get("ELASTICSEARCH_HOST", "localhost")
document_store=ElasticsearchDocumentStore(
    host=host,
    port=9200,
    username="elastic",
    password="32HKbWu8IFllgjU67oLNWQfz",
    index='documents'
)

In [8]:
print(f"We have {document_store.get_document_count()} documents i document store.")

We have 2981 documents i document store.


In [9]:
from haystack.nodes import BM25Retriever

bm25_retriever=BM25Retriever(document_store=document_store)

In [10]:
from haystack.nodes import FARMReader

max_seq_length, doc_stride=384, 128

reader=FARMReader(model_name_or_path='mdeberta-model',
                  progress_bar=False,
                  max_seq_len=max_seq_length,
                  doc_stride=doc_stride,
                  return_no_answer=True)

In [11]:
from haystack.pipelines import ExtractiveQAPipeline

pipe=ExtractiveQAPipeline(reader, bm25_retriever)

In [12]:
import gradio as gr

def qa_model(question):
    preds=pipe.run(query=question, params={'Retriever':{'top_k':3},
                                           'Reader':{'top_k':3}})
    return preds['answers'][0].answer

app=gr.Interface(fn=qa_model, inputs=["text"], 
                 outputs=["textbox"])
app.launch(share=True)

Running on local URL:  http://127.0.0.1:7860
Running on public URL: https://1959adc70ce4afdf9d.gradio.live

This share link expires in 72 hours. For free permanent hosting and GPU upgrades (NEW!), check out Spaces: https://huggingface.co/spaces


