In [10]:
from langchain_community.document_loaders import TextLoader
loader = TextLoader("file.txt")


In [11]:
text_doc = loader.load()
text_doc

[Document(page_content="Python is a versatile and powerful programming language that has gained immense popularity in recent years. Known for its simplicity and readability, Python is often the first choice for beginners and experienced developers alike. Created by Guido van Rossum and first released in 1991, Python has since evolved into a robust language used in various fields such as web development, data science, artificial intelligence, and more. One of the key reasons for Python's popularity is its easy-to-understand syntax, which emphasizes readability and reduces the cost of program maintenance. Python's design philosophy emphasizes code readability with its notable use of significant whitespace. It provides constructs that enable clear programming on both small and large scales.\n\nPython supports multiple programming paradigms, including object-oriented, imperative, functional, and procedural programming. This versatility allows developers to choose the approach that best sui

In [12]:
from langchain_community.document_loaders import PyPDFLoader
loader = PyPDFLoader('python.pdf')
pdf_content = loader.load()
pdf_content

[Document(page_content='', metadata={'source': 'python.pdf', 'page': 0}),
 Document(page_content='Learning Python', metadata={'source': 'python.pdf', 'page': 1}),
 Document(page_content='', metadata={'source': 'python.pdf', 'page': 2}),
 Document(page_content='FOURTH EDITION\nLearning Python\nMark Lutz\nBeijing •Cambridge •Farnham •Köln •Sebastopol •Taipei •Tokyo', metadata={'source': 'python.pdf', 'page': 3}),
 Document(page_content='Learning Python, Fourth Edition\nby Mark Lutz\nCopyright © 2009 Mark Lutz. All rights reserved.\nPrinted in the United States of America.\nPublished by O’Reilly Media, Inc., 1005 Gravenstein Highway North, Sebastopol, CA 95472.\nO’Reilly books may \nbe purchased for educational, business, or sales promotional use. Online editions\nare also available for most titles ( http://my.safaribooksonline.com). For more information, contact our\ncorporate/institutional sales department: (800) 998-9938 or corporate@oreilly.com.\nEditor: Julie Steele\nProduction Edito

In [13]:
from langchain.text_splitter import RecursiveCharacterTextSplitter
text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=100)
documents = text_splitter.split_documents(text_doc)
documents

[Document(page_content="Python is a versatile and powerful programming language that has gained immense popularity in recent years. Known for its simplicity and readability, Python is often the first choice for beginners and experienced developers alike. Created by Guido van Rossum and first released in 1991, Python has since evolved into a robust language used in various fields such as web development, data science, artificial intelligence, and more. One of the key reasons for Python's popularity is its", metadata={'source': 'file.txt'}),
 Document(page_content="science, artificial intelligence, and more. One of the key reasons for Python's popularity is its easy-to-understand syntax, which emphasizes readability and reduces the cost of program maintenance. Python's design philosophy emphasizes code readability with its notable use of significant whitespace. It provides constructs that enable clear programming on both small and large scales.", metadata={'source': 'file.txt'}),
 Docume

In [17]:
from langchain_community.embeddings import OllamaEmbeddings
embeddings = OllamaEmbeddings(model="mxbai-embed-large")

In [30]:
from langchain_community.vectorstores import Chroma
embeddings = OllamaEmbeddings(model="llama2-uncensored")
db = Chroma.from_documents(documents[:5], embedding=embeddings)

In [33]:
query = 'Python supports multiple programming paradigms, including'
result = db.similarity_search(query)
result[0].page_content

"Python is a versatile and powerful programming language that has gained immense popularity in recent years. Known for its simplicity and readability, Python is often the first choice for beginners and experienced developers alike. Created by Guido van Rossum and first released in 1991, Python has since evolved into a robust language used in various fields such as web development, data science, artificial intelligence, and more. One of the key reasons for Python's popularity is its"

In [34]:
from langchain_community.vectorstores import FAISS
embeddings = OllamaEmbeddings(model="llama2-uncensored")
db1 = FAISS.from_documents(documents[:10], embedding=embeddings)

In [36]:
query = 'Python supports multiple programming paradigms, including'
result = db1.similarity_search(query)
result[0].page_content

"Python is a versatile and powerful programming language that has gained immense popularity in recent years. Known for its simplicity and readability, Python is often the first choice for beginners and experienced developers alike. Created by Guido van Rossum and first released in 1991, Python has since evolved into a robust language used in various fields such as web development, data science, artificial intelligence, and more. One of the key reasons for Python's popularity is its"