### Import

In [None]:
import os
import sys
import json
from pathlib import Path
from langchain.prompts.prompt import PromptTemplate
from langchain.prompts.few_shot import FewShotPromptTemplate

from dotenv import load_dotenv
load_dotenv("../.env")
code_path = os.environ.get("CODE_PATH")
doc_path = os.environ.get("DOC_PATH")
sys.path.append(code_path)

from load_doc import load_text
from model import get_ollama_chat

### Example

In [None]:
def get_last_lines(text: str, row: int, col: int) -> str:
    lines = text.splitlines()
    lines = lines if len(lines) <= row else lines[-row:]
    max_col = max(len(line) for line in lines) - col
    lines = [line[max_col:] for line in lines]
    return "\n".join(lines)

In [None]:
file_path = os.path.join(doc_path, "TEST", "TEST.json")
with open(file_path, 'r') as file:
    test_config = json.load(file)

# print(test_config)

In [None]:
file_path = os.path.join(doc_path, "TXT", test_config["example_doc"])
# print(file_path)

In [None]:
example_doc = load_text(str(file_path))
# print(example_doc.page_content)

In [None]:
# print(get_last_lines(example_doc.page_content, 20))
example_resp = test_config["response"]
examples = [{
    "content": get_last_lines(example_doc.page_content, 20, 100),
    "response": (
        f"DOCUMENT TITLE: {example_resp["title"]}\n"
        f"JOB NO.: {example_resp["project_no"]}\n"
        f"DOCUMENT NO.: {example_resp["doc_no"]}\n"
        f"REV: {example_resp["revision"]}"
    )
}]

In [None]:
example_prompt = PromptTemplate(
    input_variables=["content", "response"],
    template="Example document:\n\n--CONTENT--\n{content}\n--CONTENT--\n\nExample document information:\n\n{response}"
)

prompt_val = example_prompt.invoke(examples[0])
print(prompt_val.text)

### Prompt

In [None]:
instruction = (
    """---INSTRUCTION--- \nExtract document title, job no., document no. and rev information. 
Document title may span multiple lines.\n
Follow the examples below to identify where such information in the document are embedded.\n
Provide a response with the extracted information in the format as shown in the examples."""
)

# has input variables "context" and "keywords"
prompt_template = "---CONTEXT---\n{context}"

prompt = FewShotPromptTemplate(
    prefix=instruction, # system instruction
    
    # iterate through examples to be included in prompt
    # insert examples in between "prefix" and "suffix"
    examples=examples,
    example_prompt=example_prompt,

    suffix=prompt_template, # user's request
    input_variables=["context"],
)

### LLM

In [None]:
file_names = test_config["docs"]

In [None]:
llm = get_ollama_chat()

In [None]:
chain = prompt | llm

### Extract

In [None]:
file_name = file_names[4]
file_path = os.path.join(doc_path, "TXT", file_name)
doc = load_text(str(file_path))
context = get_last_lines(doc.page_content, 20, 150)
print(context)

In [None]:
response = chain.invoke({"context": context}, 
    config={"configurable": {"temperature": 0.1}})

In [None]:
print(file_name, "\n")
print(response.content)