In [None]:
import os
import warnings

os.environ['KPM_DUPLICATE_LIB_OK'] = 'True'
warnings.filterwarnings('ignore')

In [1]:
from langchain.document_loaders import PyPDFLoader

loader = PyPDFLoader("docs\\health supplements\\3.health_supplements_side_effects.pdf") 
docs = loader.load()
len(docs)

11

In [2]:
from langchain_text_splitters import RecursiveCharacterTextSplitter

text_sppliter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=100)
chunks = text_sppliter.split_documents(docs)
len(chunks)

52

In [3]:
import tiktoken

encoding = tiktoken.encoding_for_model('gpt-4o-mini')
len(encoding.encode(chunks[0].page_content))

200

In [5]:
from langchain_ollama import OllamaEmbeddings
import faiss
from langchain_community.vectorstores import FAISS
from langchain_community.docstore.in_memory import InMemoryDocstore

In [None]:
base_url = "http://localhost:11434"

model_name = 'nomic-embed-text'

embeddings = OllamaEmbeddings(model='phi3', base_url=base_url)

In [10]:
vector = embeddings.embed_query('hello world')
len(vector)

3072

In [11]:
index = faiss.IndexFlatL2(len(vector))
index.ntotal, index.d

(0, 3072)

In [12]:
vector_store = FAISS(
    embedding_function=embeddings,
    index=index,
    docstore=InMemoryDocstore(),
    index_to_docstore_id={})

In [13]:
vector_store.index.ntotal, vector_store.index.d

(0, 3072)

In [14]:
ids = vector_store.add_documents(documents=chunks)

In [16]:
len(ids), vector_store.index.ntotal, vector_store.index.d

(52, 52, 3072)

In [17]:
vector_store.index_to_docstore_id

{0: 'd46146da-b041-4063-a7f0-19c155dbae8f',
 1: '823ac91d-5df1-4de8-91d8-78a410bf1c3a',
 2: 'cde06469-6797-4c77-8ab6-960eaffcfa61',
 3: '6b302d39-ac75-455a-9f5f-bc92c0437df1',
 4: '1bd9be02-f6e8-4b81-9900-d49749906dee',
 5: 'ebb52402-b861-4be5-a73f-41b5f3ab3c03',
 6: '69f66a01-8c10-4e49-9698-ee2ceb17c765',
 7: 'f10c13ca-c71f-40c8-9613-e8065dccf74d',
 8: '27e3a7fa-cce2-434e-8394-efa04df5b65f',
 9: '088cf094-bfe9-4469-84d4-d828ff816b4a',
 10: '846f9810-1089-4f4c-aeab-df7cd64180cd',
 11: '07588485-2cb6-42a6-8b7f-580ec6a27f02',
 12: 'be8a4357-efdc-4b97-b8d8-2db3b00fccae',
 13: '7aa86453-93f1-4d05-a005-53f1796b5364',
 14: 'd118f9ef-e8fa-4a56-8b42-daa2cfc7c433',
 15: '92bcf56c-0ea1-48c3-82bd-6d49e77e74da',
 16: '0b7bf9b7-3387-48d4-8f31-2955791f9d6f',
 17: 'fa1abcc0-de1e-4550-a45d-ba1338c86a0d',
 18: '086254c7-4210-4251-a179-c0b4a5f06999',
 19: 'ac58da79-7c5a-422e-b3c2-84ba629bcbee',
 20: '4f94e565-5bd5-46dc-a560-a85bf56f9aca',
 21: '6fb52f8c-34cc-4bcc-8097-9e25da313efd',
 22: 'd9f19f59-8077-

In [20]:
question = "talk about suplements"
answers = vector_store.search(query=question, k=5, search_type='similarity')

In [21]:
answers

[Document(metadata={'source': 'docs\\health supplements\\3.health_supplements_side_effects.pdf', 'page': 10}, page_content='difficult to elucidate mechanisms, although the authors of the above case study speculated \nthat black cohosh regulates heart rate via activation of serotonin receptors, consistent with \nexperimental results (117). Both garlic and ginkgo biloba use have been involved in several \ncases of excessive bleeding. For example, a 71 year-old man had persistent surgical bleeding \nthat was attributed to indulgent garlic ingestion prior to the operation (118). Furthermore, \naged garlic extract inhibits platelet aggregation (119). Gingkolide B, an active component of \ngingko biloba, has been shown to inhibit platelet aggregating factor, and men and women \ntaking ginkgo biloba have suffered spontaneous bleeding (120).\nCompared to the above outcomes, more is known about potential herb-drug interactions. \nPharmacologically active compounds in botanicals are, like drugs,

In [24]:
for a in answers:
    print('page: ', a.metadata.get('page'))
    print(a.page_content)
    print('\n')

page:  10
difficult to elucidate mechanisms, although the authors of the above case study speculated 
that black cohosh regulates heart rate via activation of serotonin receptors, consistent with 
experimental results (117). Both garlic and ginkgo biloba use have been involved in several 
cases of excessive bleeding. For example, a 71 year-old man had persistent surgical bleeding 
that was attributed to indulgent garlic ingestion prior to the operation (118). Furthermore, 
aged garlic extract inhibits platelet aggregation (119). Gingkolide B, an active component of 
gingko biloba, has been shown to inhibit platelet aggregating factor, and men and women 
taking ginkgo biloba have suffered spontaneous bleeding (120).
Compared to the above outcomes, more is known about potential herb-drug interactions. 
Pharmacologically active compounds in botanicals are, like drugs, substrates of 
metabolizing enzymes. As such, induction or suppression of relevant metabolizing enzymes


page:  6
supplem

In [25]:
vector_store.save_local('docs\\health supplements\\')