### Downloading of the required libraries

In [1]:
pip install --upgrade torch

Note: you may need to restart the kernel to use updated packages.




In [2]:
pip install --upgrade transformers


Note: you may need to restart the kernel to use updated packages.




In [3]:
pip install --upgrade torchvision

Note: you may need to restart the kernel to use updated packages.




In [4]:
!pip install faiss-cpu



### Importing the required Libraries

In [5]:
import transformers
from transformers import DPRContextEncoderTokenizer,DPRContextEncoder,DPRQuestionEncoderTokenizer,DPRQuestionEncoder

  from .autonotebook import tqdm as notebook_tqdm


In [6]:
import numpy as np
import torch
from transformers import AutoModelForCausalLM,AutoTokenizer
import pandas as pd

### Loading The Data (The Company Policies)

In [7]:
with open('companyPolicies.txt','r') as f:
    text_input=f.read()

In [8]:
text_input



In [9]:
def split_para(text):
    text=[p.strip() for p in text.split("\n")]
    return text

In [10]:
text_input=split_para(text_input)
paragraphs=text_input

In [11]:
text_input

['1.\tCode of Conduct',
 '',
 'Our Code of Conduct outlines the fundamental principles and ethical standards that guide every member of our organization. We are committed to maintaining a workplace that is built on integrity, respect, and accountability.',
 'Integrity: We hold ourselves to the highest ethical standards. This means acting honestly and transparently in all our interactions, whether with colleagues, clients, or the broader community. We respect and protect sensitive information, and we avoid conflicts of interest.',
 "Respect: We embrace diversity and value each individual's contributions. Discrimination, harassment, or any form of disrespectful behavior is unacceptable. We create an inclusive environment where differences are celebrated and everyone is treated with dignity and courtesy.",
 'Accountability: We take responsibility for our actions and decisions. We follow all relevant laws and regulations, and we strive to continuously improve our practices. We report any p

### Loading The DPR Context Encoder

In [12]:
context_tokenizer=DPRContextEncoderTokenizer.from_pretrained('facebook/dpr-ctx_encoder-single-nq-base')
context_tokenizer

DPRContextEncoderTokenizer(name_or_path='facebook/dpr-ctx_encoder-single-nq-base', vocab_size=30522, model_max_length=1000000000000000019884624838656, padding_side='right', truncation_side='right', special_tokens={'unk_token': '[UNK]', 'sep_token': '[SEP]', 'pad_token': '[PAD]', 'cls_token': '[CLS]', 'mask_token': '[MASK]'}, added_tokens_decoder={
	0: AddedToken("[PAD]", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),
	100: AddedToken("[UNK]", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),
	101: AddedToken("[CLS]", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),
	102: AddedToken("[SEP]", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),
	103: AddedToken("[MASK]", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),
}
)

In [13]:
text=[("How are you","I am Fine"),("Code of Conduct","Our Code of Conduct outlines the fundamental principles and ethical standards that guide every member of our organization.")]
tokenized_text=context_tokenizer(text,return_tensors='pt',padding=True,turnucation=True,max_length=256)
tokenized_text



{'input_ids': tensor([[  101,   100,  2024,  2017,   102,   100,  2572,   100,   102,     0,
             0,     0,     0,     0,     0,     0,     0,     0,     0,     0,
             0,     0,     0,     0,     0],
        [  101,   100,  1997,   100,   102,   100,   100,  1997,   100, 22106,
          1996,  8050,  6481,  1998, 12962,  4781,  2008,  5009,  2296,  2266,
          1997,  2256,  3029,  1012,   102]]), 'token_type_ids': tensor([[0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
         0],
        [0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
         1]]), 'attention_mask': tensor([[1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
         0],
        [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
         1]])}

In [None]:
context_encoder=DPRContextEncoder.from_pretrained('facebook/dpr-ctx_encoder-single-nq-base')

In [None]:
encoded_text=context_encoder(**tokenized_text)

In [None]:
encoded_text

### Function to perform this encoding on Given Company Policies

In [None]:
def encode_context(text_input,context_tokenizer,context_encoder):
    tokenized_text=context_tokenizer(text_input,return_tensors='pt',padding=True,turnucation=True,max_length=256)
    encoded_text=context_encoder(**tokenized_text)
    return encoded_text

In [None]:
text_input=encode_context(text_input,context_tokenizer,context_encoder)

In [None]:
text_input

In [None]:
emb=text_input.pooler_output
emb.shape

### Making FAISS Instance

In [None]:
import faiss
embedding_dim=text_input.pooler_output.shape[1]   
embedding_dim

In [None]:
index=faiss.IndexFlatL2(embedding_dim)

In [None]:
context_embeddings_np=np.array(text_input.pooler_output.detach()).astype('float32')

In [None]:
index.add(context_embeddings_np)

### Example Query and response using Faiss

In [None]:
querry="Code of Conduct"

In [None]:
question_tokenizer=DPRQuestionEncoderTokenizer.from_pretrained('facebook/dpr-ctx_encoder-single-nq-base')
question_encoder=question_encoder=DPRQuestionEncoder.from_pretrained('facebook/dpr-ctx_encoder-single-nq-base')

In [None]:
tokenized_querry=question_tokenizer(querry,padding=True,turnucation=True,return_tensors='pt',max_len=256)
encoded_querry=question_encoder(**tokenized_querry).pooler_output.detach().numpy().astype('float32')

In [None]:
encoded_querry

In [None]:
D,I=index.search(encoded_querry,k=5)

In [None]:
I

In [None]:
for i in I[0]:
    print(paragraphs(i))