In [1]:
"""
Task 7: Loaders & Parsers
- Explore document loaders
- Implement OutputFixParser
LLM: Ollama (llama3)
"""

from langchain_ollama import ChatOllama
from langchain_community.document_loaders import TextLoader
from langchain_core.prompts import PromptTemplate
from langchain_core.output_parsers import JsonOutputParser
from langchain_classic.output_parsers import OutputFixingParser
from pydantic import BaseModel, Field


# -------------------------------------------------
# Create Local LLM
# -------------------------------------------------
llm = ChatOllama(
    model="llama3",
    temperature=0
)


# =================================================
# DOCUMENT LOADER
# =================================================
def load_document():
    print("\n========== DOCUMENT LOADER ==========")

    loader = TextLoader("sample.txt")
    documents = loader.load()

    for doc in documents:
        print("Content:\n", doc.page_content)


# =================================================
# OUTPUT PARSER 
# =================================================
class SummarySchema(BaseModel):
    topic: str = Field(description="Main topic")
    summary: str = Field(description="Short summary")
    keywords: list[str] = Field(description="Important keywords")


def parse_with_output_parser():
    print("\n========== OUTPUT PARSER ==========")

    parser = JsonOutputParser(pydantic_object=SummarySchema)

    prompt = PromptTemplate(
        template=(
            "Summarize the following text.\n"
            "{format_instructions}\n"
            "{text}"
        ),
        input_variables=["text"],
        partial_variables={
            "format_instructions": parser.get_format_instructions()
        }
    )

    chain = prompt | llm | parser

    result = chain.invoke(
        {
            "text": (
                "LangChain is a framework used to build "
                "LLM-powered applications."
            )
        }
    )

    print(result)


# =================================================
# OUTPUT FIX PARSER 
# =================================================
def parse_with_output_fix_parser():
    print("\n========== OUTPUT FIX PARSER ==========")

    base_parser = JsonOutputParser(
        pydantic_object=SummarySchema
    )

    fix_parser = OutputFixingParser.from_llm(
        parser=base_parser,
        llm=llm
    )

    prompt = PromptTemplate(
        template=(
            "Give a summary in JSON format with fields: "
            "topic, summary, keywords.\n"
            "{text}"
        ),
        input_variables=["text"]
    )

    chain = prompt | llm | fix_parser

    result = chain.invoke(
        {
            "text": (
                "LangChain helps developers build AI "
                "applications using large language models."
            )
        }
    )

    print(result)


# -------------------------------------------------
# Main Execution
# -------------------------------------------------
if __name__ == "__main__":
    load_document()
    parse_with_output_parser()
    parse_with_output_fix_parser()




Content:
 LangChain is a framework for building applications using large language models.
It helps with chaining prompts, managing memory, tools, and agents.
It is widely used for building RAG and agent-based systems.


{'properties': {'topic': {'description': 'Main topic', 'title': 'Topic', 'type': 'string'}, 'summary': {'description': 'Short summary', 'title': 'Summary', 'type': 'string'}, 'keywords': {'description': 'Important keywords', 'items': {'type': 'string'}, 'title': 'Keywords', 'type': 'array'}}}

{'topic': 'Natural Language Processing', 'summary': 'LangChain helps developers build AI applications using large language models.', 'keywords': ['NLP', 'AI', 'Language Models']}
