In [6]:
from langchain.vectorstores import FAISS
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.document_loaders import PyPDFLoader, DirectoryLoader
from langchain.embeddings import HuggingFaceEmbeddings

DATA_PATH = 'BillingManagement/'
DB_FAISS_PATH = 'vectorstores2/db_faiss'

In [2]:
loader = DirectoryLoader(DATA_PATH, glob='*.pdf', loader_cls=PyPDFLoader)
documents = loader.load()
documents[:10]

[Document(metadata={'source': 'BillingManagement/BillingManagement.pdf', 'page': 0}, page_content='Billing : .API Supported Packages vJune-23'),
 Document(metadata={'source': 'BillingManagement/BillingManagement.pdf', 'page': 1}, page_content=' | Contents | ii\nContents\nBilling : .API Supported Packages vJune-23 .......................................................33\nBilling : .About Billing vOctober-23 .....................................................................33\nKey Terminology ................................................................................................................................................36\nAttachments: ........................................................................................................................................................37\nBilling : .Account Locations v202402.2.0 ..............................................................37\nTo add a new location ......................................................

In [19]:
text_splitter = RecursiveCharacterTextSplitter(chunk_size = 1000, chunk_overlap = 100)
texts = text_splitter.split_documents(documents)
texts[:10]

[Document(metadata={'source': 'BillingManagement/BillingManagement.pdf', 'page': 0}, page_content='Billing : .API Supported Packages vJune-23'),
 Document(metadata={'source': 'BillingManagement/BillingManagement.pdf', 'page': 1}, page_content='| Contents | ii\nContents\nBilling : .API Supported Packages vJune-23 .......................................................33\nBilling : .About Billing vOctober-23 .....................................................................33\nKey Terminology ................................................................................................................................................36\nAttachments: ........................................................................................................................................................37\nBilling : .Account Locations v202402.2.0 ..............................................................37\nTo add a new location .......................................................

In [8]:
embeddings = HuggingFaceEmbeddings(model_name='sentence-transformers/all-MiniLM-L6-v2', model_kwargs = { "device": "cpu" })
embeddings

  warn_deprecated(


HuggingFaceEmbeddings(client=SentenceTransformer(
  (0): Transformer({'max_seq_length': 256, 'do_lower_case': False}) with Transformer model: BertModel 
  (1): Pooling({'word_embedding_dimension': 384, 'pooling_mode_cls_token': False, 'pooling_mode_mean_tokens': True, 'pooling_mode_max_tokens': False, 'pooling_mode_mean_sqrt_len_tokens': False, 'pooling_mode_weightedmean_tokens': False, 'pooling_mode_lasttoken': False, 'include_prompt': True})
  (2): Normalize()
), model_name='sentence-transformers/all-MiniLM-L6-v2', cache_folder=None, model_kwargs={'device': 'cpu'}, encode_kwargs={}, multi_process=False, show_progress=False)

In [5]:
vdb_chunks  =  FAISS.from_documents(texts, embeddings)
vdb_chunks.save_local(DB_FAISS_PATH, index_name="base_and_adjacent")
vdb_chunks

<langchain_community.vectorstores.faiss.FAISS at 0x7a213db01930>

In [9]:
vdb_chunks  =   FAISS.load_local(DB_FAISS_PATH, embeddings, index_name="base_and_adjacent", allow_dangerous_deserialization=True)
vdb_chunks

<langchain_community.vectorstores.faiss.FAISS at 0x7573af2d7340>

In [10]:
retriever = vdb_chunks.as_retriever()
retriever

VectorStoreRetriever(tags=['FAISS', 'HuggingFaceEmbeddings'], vectorstore=<langchain_community.vectorstores.faiss.FAISS object at 0x7573af2d7340>)

In [11]:
from langchain.llms import CTransformers

llm = CTransformers(
    model="llama-2-7b-chat.ggmlv3.q8_0.bin",
    model_type="llama",
    config={
        'max_new_tokens': 600,
        'temperature': 0.01,
        'context_length': 700,
    }
)

llm

CTransformers(client=<ctransformers.llm.LLM object at 0x7573b064a7d0>, model='llama-2-7b-chat.ggmlv3.q8_0.bin', model_type='llama', config={'max_new_tokens': 600, 'temperature': 0.01, 'context_length': 700})

In [17]:
from transformers import LlamaTokenizer, AutoModelForCausalLM

tokenizer = LlamaTokenizer.from_pretrained("hf-internal-testing/llama-tokenizer")
tokenizer

LlamaTokenizer(name_or_path='hf-internal-testing/llama-tokenizer', vocab_size=32000, model_max_length=2048, is_fast=False, padding_side='right', truncation_side='right', special_tokens={'bos_token': '<s>', 'eos_token': '</s>', 'unk_token': '<unk>'}, clean_up_tokenization_spaces=False),  added_tokens_decoder={
	0: AddedToken("<unk>", rstrip=False, lstrip=False, single_word=False, normalized=True, special=True),
	1: AddedToken("<s>", rstrip=False, lstrip=False, single_word=False, normalized=True, special=True),
	2: AddedToken("</s>", rstrip=False, lstrip=False, single_word=False, normalized=True, special=True),
}

In [18]:
def split_text(text, max_tokens, tokenizer):
    tokens = tokenizer.tokenize(text)
    chunks = [' '.join(tokens[i:i+max_tokens]) for i in range(0, len(tokens), max_tokens)]
    return chunks

# Function to process question and ensure context length is respected
def retrieve_with_chunks(question, retriever, llm, tokenizer, max_context_length=700):
    documents = retriever.get_relevant_documents(question)
    all_texts = " ".join([doc.page_content for doc in documents])
    chunks = split_text(all_texts, max_context_length, tokenizer)
    results = []
    for chunk in chunks:
        # Ensure that chunk is decoded back to string from tokens
        chunk_text = tokenizer.convert_tokens_to_string(chunk.split())
        result = llm(chunk_text)
        results.append(result)
    return results

In [19]:
question = "What is Billing Management?"
results = retrieve_with_chunks(question, retriever, llm, tokenizer)
for result in results:
    print(result)

  warn_deprecated(


KeyboardInterrupt: 