In [19]:
from langchain_community.llms import Ollama
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.output_parsers import StrOutputParser
from langchain_community.document_loaders import PyPDFLoader

In [12]:

llm = Ollama(model = 'tinyllama')
output_parser = StrOutputParser()


In [11]:
prompt = ChatPromptTemplate.from_messages(
    [
        ('system',"hi act like a Doctor!"),
        ("user","{input}")
    ]
)

In [18]:
chain = prompt | llm | output_parser

res = chain.invoke(
    {
        "input":"tell me a joke of medicine in 5 words!"
    }
)
print(res)

1. Painkillers for sore throat and headache
2. Antibiotics to kill bacteria
3. Nausea relievers that make you feel lightheaded
4. Anti-inflammatory medication to soothe inflamed muscles
5. Blood pressure drops to reduce fever

Hope this helps!


In [33]:
loader = PyPDFLoader('only_amendment_case_001.pdf')
pages = loader.load()
pages

[Document(page_content='', metadata={'source': 'only_amendment_case_001.pdf', 'page': 0}),
 Document(page_content='', metadata={'source': 'only_amendment_case_001.pdf', 'page': 1})]

In [46]:
from typing import AsyncIterator, Iterator

from langchain_core.document_loaders import BaseLoader
from langchain_core.documents import Document


class CustomDocumentLoader(BaseLoader):
    """An example document loader that reads a file line by line."""

    def __init__(self, file_path: str) -> None:
        """Initialize the loader with a file path.

        Args:
            file_path: The path to the file to load.
        """
        self.file_path = file_path

    def lazy_load(self) -> Iterator[Document]:  # <-- Does not take any arguments
        """A lazy loader that reads a file line by line.

        When you're implementing lazy load methods, you should use a generator
        to yield documents one by one.
        """
        with open(self.file_path, encoding="utf-8") as f:
            line_number = 0
            for line in f:
                yield Document(
                    page_content=line,
                    metadata={"line_number": line_number, "source": self.file_path},
                )
                line_number += 1

    # alazy_load is OPTIONAL.
    # If you leave out the implementation, a default implementation which delegates to lazy_load will be used!
    async def alazy_load(
        self,
    ) -> AsyncIterator[Document]:  # <-- Does not take any arguments
        """An async lazy loader that reads a file line by line."""
        # Requires aiofiles
        # Install with `pip install aiofiles`
        # https://github.com/Tinche/aiofiles
        import aiofiles

        async with aiofiles.open(self.file_path, encoding="utf-8") as f:
            line_number = 0
            async for line in f:
                yield Document(
                    page_content=line,
                    metadata={"line_number": line_number, "source": self.file_path},
                )
                line_number += 1

In [47]:
with open("./meow.txt", "w", encoding="utf-8") as f:
    quality_content = "meow meow🐱 \n meow meow🐱 \n meow😻😻"
    f.write(quality_content)

loader = CustomDocumentLoader("./meow.txt")

In [48]:
## Test out the lazy load interface
for doc in loader.lazy_load():
    print()
    print(type(doc))
    print(doc)


<class 'langchain_core.documents.base.Document'>
page_content='meow meow🐱 \n' metadata={'line_number': 0, 'source': './meow.txt'}

<class 'langchain_core.documents.base.Document'>
page_content=' meow meow🐱 \n' metadata={'line_number': 1, 'source': './meow.txt'}

<class 'langchain_core.documents.base.Document'>
page_content=' meow😻😻' metadata={'line_number': 2, 'source': './meow.txt'}


In [45]:
from langchain_community.document_loaders import PyMuPDFLoader
loader = PyMuPDFLoader('only_amendment_case_001.pdf')
pages = loader.lazy_load()
next(iter(pages))

Document(page_content='', metadata={'source': 'only_amendment_case_001.pdf', 'file_path': 'only_amendment_case_001.pdf', 'page': 0, 'total_pages': 2, 'format': 'PDF 1.3', 'title': '', 'author': '', 'subject': '', 'keywords': '', 'creator': '', 'producer': 'pypdf', 'creationDate': '', 'modDate': '', 'trapped': ''})

In [29]:
from langchain_community.document_loaders import MathpixPDFLoader
loader = MathpixPDFLoader("data/only_amendment_case_001.pdf")

ValueError: Did not find mathpix_api_key, please add an environment variable `MATHPIX_API_KEY` which contains it, or pass `mathpix_api_key` as a named parameter.