## NLP (Natural Language Processing)

In [5]:
!pip install farm-haystack[inference]



In [1]:
from haystack.document_stores import InMemoryDocumentStore
from haystack.pipelines import Pipeline, ExtractiveQAPipeline
from haystack.nodes import DensePassageRetriever, TextConverter, PreProcessor, FARMReader
from haystack.utils import print_answers
from haystack import document_stores
import os, openai

In [2]:
doc_store = InMemoryDocumentStore()

In [3]:
retriever = DensePassageRetriever(document_store=doc_store)

  return self.fget.__get__(instance, owner)()


In [4]:
text_converter = TextConverter()

In [5]:
preprocessor = PreProcessor(
    split_length=200,
    split_overlap=100
)

In [6]:
pipe = Pipeline()

In [7]:
pipe.add_node(component=text_converter, name="converter", inputs=["File"])
pipe.add_node(component=preprocessor, name="preprocessor", inputs=["converter"])
pipe.add_node(component=doc_store, name="store", inputs=["preprocessor"])

In [8]:
folder_name = "data\DnD Classes"
file_paths = [os.path.join(folder_name, filename) for filename in os.listdir(folder_name)]

pipe.run(file_paths=file_paths)

Converting files: 100%|██████████| 6/6 [00:00<00:00, 45.25it/s]
Preprocessing: 100%|██████████| 6/6 [00:00<00:00, 19.06docs/s]


{'documents': [<Document: {'content': "Class - Fighter\n\nLevel 1 - Fighting Style: You adopt a particular style of fighting as your specialty. Choose one of the following options: Archery, Defense, Dueling, Great Weapon Fighting, Protection, Two-Weapon Fighting. You can't take the same Fighting Style option more than once, even if you get to choose again.; Second Wind: You have a limited well of stamina that you can draw on to protect yourself from harm. On your turn, you can use a bonus action to regain hit points equal to 1d10 + your fighter level. Once you use this feature, you must finish a short or long rest before you can use it again.\n\nLevel 2 - Action Surge: Starting at 2nd level, you can push yourself beyond your normal limits for a moment. On your turn, you can take one additional action. Once you use this feature, you must finish a short or long rest before you can use it again. Starting at 17th level, you can use it twice before a rest, but only once on the same turn.\n\

In [9]:
for doc in doc_store.get_all_documents_generator():
    print(doc)
    break

<Document: id=1d1625192fe6e5e8d9886e9ea552f4ff, content='Class - Fighter

Level 1 - Fighting Style: You adopt a particular style of fighting as your specialt...'>


In [10]:
len(doc_store.get_all_documents())

673

In [11]:
doc_store.update_embeddings(retriever=retriever)

Documents Processed: 10000 docs [02:11, 75.91 docs/s]          


In [12]:
reader = FARMReader(model_name_or_path="deepset/roberta-base-squad2", return_no_answer=False, top_k=100)

In [13]:
pipe_qa = ExtractiveQAPipeline(reader=reader, retriever=retriever)

In [72]:
ans = pipe_qa.run(query="How many times does the fighter attack at level 20?", params={"Retriever": {"top_k": 20}})

Inferencing Samples: 100%|██████████| 1/1 [00:07<00:00,  7.26s/ Batches]


In [73]:
answer = ans["answers"][0]
print(answer.answer)

four


In [74]:
context = ans["answers"][0]
print(context.context)

 you may instead take a feat.

Level 20 - Extra Attack (3): At 20th level, you can attack four times whenever you take the Attack action on your turn.


In [75]:
from transformers import MBartForConditionalGeneration, MBart50TokenizerFast

answer_en = str(answer.answer)

model = MBartForConditionalGeneration.from_pretrained("facebook/mbart-large-50-many-to-many-mmt")
tokenizer = MBart50TokenizerFast.from_pretrained("facebook/mbart-large-50-many-to-many-mmt")

# translate the text
tokenizer.src_lang = "en_XX"
encoded_en = tokenizer(answer_en, return_tensors="pt")
generated_tokens = model.generate(
    **encoded_en,
    forced_bos_token_id=tokenizer.lang_code_to_id["it_IT"]
)
tokenizer.batch_decode(generated_tokens, skip_special_tokens=True)



['quattro.']

In [78]:
print_answers(ans, details="medium")

'Query: How many times does the fighter attack at level 20?'
'Answers:'
[   {   'answer': 'four',
        'context': ' you may instead take a feat.\n'
                   '\n'
                   'Level 20 - Extra Attack (3): At 20th level, you can attack '
                   'four times whenever you take the Attack action on your '
                   'turn.',
        'score': 0.7411504983901978},
    {   'answer': '10d6',
        'context': 'e use of feats, you may select a feat instead.\n'
                   '\n'
                   'Level 20 - Sneak Attack: 10d6; Stroke of Luck: At 20th '
                   'level, you have an uncanny knack for succeeding',
        'score': 0.3277077078819275},
    {   'answer': 'up to 1 hour',
        'context': 'el: 2; Cast Time: 1 Action; Range: 60 Feet; Duration: '
                   'Concentration, up to 1 hour; Components: Verbal and '
                   'somatic; Spell Description: You conjure a ma',
        'score': 0.030639030039310455},
    {