### Embeddings (Ollama: nomic-embed-text)

In [3]:
from langchain_community.embeddings import OllamaEmbeddings
embeddings = OllamaEmbeddings(model="nomic-embed-text")
query_result = embeddings.embed_query("Test query.")
query_result[:5]

[0.5855991840362549,
 0.4734927713871002,
 -3.261312961578369,
 -0.27085670828819275,
 2.32326078414917]

### PDF Loader

In [4]:
from langchain_community.document_loaders import PyPDFLoader
loader = PyPDFLoader("docs/Understanding_LLMs.pdf")
pages = loader.load_and_split()
pages[:5]

[Document(page_content='Understanding LLMs: A Comprehensive Overview from Training\nto Inference\nYiheng Liua, Hao Hea, Tianle Hana, Xu Zhanga, Mengyuan Liua, Jiaming Tiana,\nYutong Zhangb, Jiaqi Wangc, Xiaohui Gaod, Tianyang Zhongd, Yi Pane, Shaochen Xue,\nZihao Wue, Zhengliang Liue, Xin Zhangb, Shu Zhangc, Xintao Hud, Tuo Zhangd,\nNing Qianga, Tianming Liueand Bao Gea\naSchool of Physics and Information Technology, Shaanxi Normal University, Xi’an, 710119, Shaanxi, China\nbInstitute of Medical Research, Northwestern Polytechnical University, Xi’an, 710072, Shaanxi, China\ncSchool of Computer Science, Northwestern Polytechnical University, Xi’an, 710072, Shaanxi, China\ndSchool of Automation, Northwestern Polytechnical University, Xi’an, 710072, Shaanxi, China\neSchool of Computing, The University of Georgia, Athens, 30602, USA\nARTICLE INFO\nKeywords :\nLarge Language Models\nTraining\nInference\nSurveyABSTRACT\nThe introduction of ChatGPT has led to a significant increase in the uti

## Chroma db setup

In [6]:
import os
import shutil
from langchain_chroma import Chroma
if os.path.exists('./chroma_db'):
    shutil.rmtree('./chroma_db')
db = Chroma.from_documents(pages, embeddings, persist_directory="./chroma_db")

In [7]:

# query it
query = "What is LLM?"
docs = db.similarity_search(query)

# print results
docs

[Document(page_content='A Comprehensive Overview from Training to Inference\nTable 5\nList of open source LLMs.\nLLM Size (B) Links\nT5 [68] 11B https://github.com/google-research/text-to-text-transfer-transformer\nCodeGen [81] 16B https://github.com/salesforce/CodeGen\nMOSS [203] 16B https://github.com/OpenLMLab/MOSS\nGLM [37] 130B https://github.com/THUDM/GLM\nChatGLM [37] 6B https://github.com/THUDM/ChatGLM3\nChatYuan [204] 0.7B https://github.com/clue-ai/ChatYuan\nOPT [83] 175B https://github.com/facebookresearch/metaseq\nBLOOM [38] 176B https://huggingface.co/bigscience/bloom\nLLaMA [9] 65B https://github.com/facebookresearch/llama\nCodeGeeX [82] 13B https://github.com/THUDM/CodeGeeX\nBaichuan [205] 13B https://github.com/baichuan-inc/Baichuan2\nAquila 7B https://github.com/FlagAI-Open/FlagAI/tree/master/examples/Aquila\nMiniGPT-4 [206] 25B https://github.com/Vision-CAIR/MiniGPT-4\nVicuna [207] 13B https://github.com/lm-sys/FastChat\nLLMs is expected to continue expanding, thereby