# Import Libraries

In [1]:
from sentence_transformers import SentenceTransformer
from pypdf import PdfReader
import os
import faiss

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
import pre_processing

# Load Embedding Model

In [3]:
check_point = 'mixedbread-ai/mxbai-embed-large-v1'
embedding_model = SentenceTransformer(check_point)



In [4]:
pre_processing.model = embedding_model

# Process Doc

In [5]:
#reader = PdfReader('/workspaces/digital_research_guide/1706.03762v7.pdf')
reader = PdfReader('/workspaces/digital_research_guide/Novel_Encoder_Training_for_Neural_Machine_Translation_in_Low_Resource_Settings.pdf')

In [6]:
first_section = "Abstract"
ignore_after = "References"

In [7]:
context_list = pre_processing.parese_doc(reader,first_section,ignore_after)
pre_processing.create_embedding(context_list)

# Linking ONXX Model

In [8]:
import onnxruntime_genai as og

In [9]:
model = og.Model('/workspaces/digital_research_guide/cpu_and_mobile/cpu-int4-rtn-block-32-acc-level-4')
tokenizer = og.Tokenizer(model)
tokenizer_stream = tokenizer.create_stream()

In [10]:
chat_template = '<|user|>\n{input} <|end|>\n<|assistant|>'

In [11]:
search_options ={}
#search_options['max_length'] = 4000
search_options['temperature'] = 1

In [12]:
base_path = os.getcwd()

In [13]:
index: faiss.IndexFlatL2 = faiss.read_index(os.path.join(base_path, 'doc.index'))

In [14]:
while True:
        text = input("Input: ")
        if not text:
            print("Error, input cannot be empty")
            break

        # If there is a chat template, use it
        #prompt = f'{chat_template.format(input=text)}'



        query_embedding = embedding_model.encode(text).reshape(1, -1)
        top_k = 1
        _scores, binary_ids = index.search(query_embedding, top_k)
        binary_ids = binary_ids[0]
        _scores = _scores[0]
        temp_list = []
        for idx in binary_ids:
             temp_list.append(context_list[idx])
        context = '. '.join(temp_list)
        
        text += " With respect to context: "+context
        

        prompt = f'{chat_template.format(input=text)}'
        print(prompt)



        input_tokens = tokenizer.encode(prompt)

        params = og.GeneratorParams(model)
        params.try_use_cuda_graph_with_max_batch_size(1)
        params.set_search_options(**search_options)
        params.input_ids = input_tokens
        generator = og.Generator(model, params)
        

        print()
        print("Output: ", end='', flush=True)

        try:
            while not generator.is_done():
                generator.compute_logits()
                generator.generate_next_token()
                new_token = generator.get_next_tokens()[0]
                print(tokenizer_stream.decode(new_token), end='', flush=True)
        except KeyboardInterrupt:
            print("  --control+c pressed, aborting generation--")
        print()
        print()

<|user|>
describe the paper. With respect to context: Source and Target vocabulary size 272 is 40K each. 273 •Helsinki-NLP/opus-100 de-en subset : 274 German-Engish parallel corpus. Training set 275 size 1M. Source and Target vocabulary size is 276 30K each. 277 3Dataset #EpochsBLEU Score Running Time SOTA ( µ±σ) Our ( µ±σ) Improvement SOTA Our Improvement en-fr 20 30.07±0.41 31 .29±0.15 4 .05% 6.10 6 .60 8 .19% af-en 20 31.06±0.39 34 .90±0.42 12 .36% 5.78 6 .60 14 .10% en-hi 11 10.98±0.24 12 .21±0.11 11 .20% 10.78 11 .40 5 .75% de-en 6 16.09±0.75 16 .77±0.43 4 .22% 9.63 10 .20 5 .91% Table 1: Comparison of BLEU score on different datasets. Dataset SOTA BLEU ( µ)# Epochs Running Time SOTA Our Improvement SOTA Our Improvement en-fr 30.07 20 18 10.00% 6.10 6 .00 1 .64% af-en 31.06 20 12 40.00% 5.78 4 .00 30 .79% en-hi 10.98 11 7 36.36% 10.78 7 .31 32 .18% de-en 16.09 6 5 16.66% 9.63 9 .00 6 .54% Table 2: Comparison of training time to achieve SOTA results on different datasets. 6 Experim