In [1]:
import os
from dotenv import load_dotenv

load_dotenv()
openai_api_key = os.getenv("OPENAI_API_KEY")

In [12]:
from langchain_community.document_loaders import PyPDFLoader
from langchain.chat_models import init_chat_model
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.output_parsers import StrOutputParser
from langchain.chains.summarize import load_summarize_chain

pdf_path = "contract.pdf"
loader = PyPDFLoader(pdf_path)
pages = loader.load()
full_text = "\n".join([page.page_content for page in pages])

llm = init_chat_model(
    model="gpt-4o-mini",
    temperature=0,
    model_provider="openai",
)

prompt = ChatPromptTemplate.from_messages([
    ("system", "You are a lawyer and helpful assistant."),
    ("human", "Answer the {question} based on {text}\n\nSummary:")
])

parser = StrOutputParser()

chain = prompt | llm | parser

response = chain.invoke({"question": input("ask"), "text": full_text})

print("\nðŸ¤– Bot-reply:\n", response)


ðŸ¤– Bot-reply:
 **Summary of Contract Agreement**

**Parties Involved:**
- **Company:** ABC Corporation, a Delaware corporation with its principal office in Wilmington, DE.
- **Contractor:** John Doe, an individual residing in Springfield, IL.

**Key Terms:**

1. **Engagement:** The Company engages the Contractor to perform specific services outlined in Exhibit A, which the Contractor accepts.

2. **Term:** The Agreement is effective from July 1, 2025, to December 31, 2025, unless terminated earlier.

3. **Compensation:** The Contractor will receive $5,000 per month, payable on the last business day of each month.

4. **Confidentiality:** The Contractor must keep all proprietary information confidential and cannot disclose it without the Company's written consent.

5. **Intellectual Property:** Any intellectual property created by the Contractor while performing the Services will belong exclusively to the Company.

6. **Termination:** Either party can terminate the Agreement with thi

In [13]:
# 1. Loading a PDF doc
pdf_path = "contract.pdf"  # doc name in same directory
loader = PyPDFLoader(pdf_path)
pages = loader.load()
# no need to join texts

# 2. LLM
llm = init_chat_model(
    model="gpt-4o-mini",
    temperature=0,
    model_provider="openai", 
)

# 3. Prompt with roles
stuff_prompt = ChatPromptTemplate.from_messages([
    ("system", "You are a lawyer and helpful assistant always making some jokes."),
    ("human", "Summarize the following text:\n\n{text}\n\nSummary:")
])

# 4 . Building a chain (simple stuff-mode)
chain = load_summarize_chain(
    llm,
    chain_type="stuff",
    prompt=stuff_prompt
)

# 5. invoke chain
summary = chain.invoke({"input_documents": pages}) 

# 6. Output
print("\nðŸ“„ Summary:\n", summary["output_text"])


ðŸ“„ Summary:
 This Contract Agreement, effective July 1, 2025, is between ABC Corporation and Contractor John Doe. The Contractor will provide specified services until December 31, 2025, for a monthly fee of $5,000. The Contractor must maintain confidentiality regarding proprietary information and all intellectual property created will belong to the Company. Either party can terminate the agreement with 30 days' notice. The Contractor is classified as an independent contractor, not an employee. The agreement is governed by Delaware law. 

And remember, if you ever need to terminate a contract, just give 30 days' noticeâ€”unless it's a bad haircut, then you might want to act faster!


In [20]:
summary.keys()

dict_keys(['input_documents', 'output_text'])