<a href="https://colab.research.google.com/github/joshuaalpuerto/ML-guide/blob/main/Contract_analyzer.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install -qU langchain --progress-bar off
!pip install -qU langchainhub --progress-bar off
!pip install -qU fireworks-ai --progress-bar off
!pip install -qU openai --progress-bar off
!pip install -qU langchain-fireworks --progress-bar off
!pip install -qU  pypdf --progress-bar off

In [None]:
# @title load fireworks API key
#connect to google drive
from google.colab import drive
import json
import os


drive.mount('/content/drive')

with open('/content/drive/MyDrive/env/env.json') as jsonfile:
    env = json.load(jsonfile)

os.environ["FIREWORKS_API_KEY"] = env['fireworks.ai']['apiKey']

Mounted at /content/drive


In [None]:
from langchain.globals import set_llm_cache, set_debug
from langchain.cache import InMemoryCache

set_llm_cache(InMemoryCache())
# Turn this on only if you want to debug other wise it's hard to see the conversations.
set_debug(True)

In [None]:
from langchain_fireworks import Fireworks

# Initialize a Fireworks chat model
# For function calling we cannot use ChatFireworks integration as it doesn't properly pass functions
llm = Fireworks(
  model="accounts/fireworks/models/mixtral-8x7b-instruct",
  fireworks_api_key=env['fireworks.ai']['apiKey'],
  base_url="https://api.fireworks.ai/inference/v1/completions",
  temperature= 0, max_tokens= 30000
)

In [None]:
from langchain_community.document_loaders import PyPDFLoader

loader = PyPDFLoader("/content/drive/MyDrive/datasets/PDFS/Purchase-agreement-1.pdf")
splits = loader.load_and_split()

# General summarization

In [None]:
from langchain.prompts import PromptTemplate
from langchain.chains.summarize import load_summarize_chain

prompt_template = (
    "<s>[INST]You are a realstate advisor and expert at analyzing contracts and agreements for property agreement.\n"
    "Your goal is to analyze and summarize part of the contract below, \n"
    "You must include the most relevant information for the client "
    "to any prevent suprises or gotchas before signing the contract. return only the summary nothing else.:\n"
    "------------\n"
    "{text}\n"
    "------------\n"
    "DETAILED SUMMARY:[/INST]"
)

prompt = PromptTemplate.from_template(prompt_template)

refine_template = (
    "<s>[INST]You are a realstate advisor and expert at analyzing contracts and agreements for property agreement.\n"
    "Your job is to produce a final summary of property agreement.\n"
    "We have provided an existing summary up to a certain point: {existing_answer}\n"
    "We have the opportunity to refine the existing summary"
    "(only if needed) with some more context below.\n"
    "------------\n"
    "{text}\n"
    "------------\n"
    "Given the new context, refine the original summary to provide a thorough overview of the obligations, "
    "party details, termination clauses that the client needs to be aware of before signing the contract.[/INST]"
)
refine_prompt = PromptTemplate.from_template(refine_template)
chain = load_summarize_chain(
    llm=llm,
    chain_type="refine",
    question_prompt=prompt,
    refine_prompt=refine_prompt,
    return_intermediate_steps=True,
    input_key="input_documents",
    output_key="output_text",
)
result = chain({"input_documents": splits}, return_only_outputs=True)

In [None]:
result

In [None]:
intermediate_steps = result['intermediate_steps']
for i in range(len(intermediate_steps)):
    print(intermediate_steps[i])

# Risk assessment

In [None]:
from langchain.prompts import PromptTemplate
from langchain.chains.summarize import load_summarize_chain

prompt_template = (
    "<s>[INST]You are a realstate advisor and expert at analyzing contracts and agreements for property agreement.\n"
    "Your goal is to analyze and summarize part of the contract below, \n"
    "You must include the most relevant information related to risks that the buyer needs to be aware of, "
    "return only the summary nothing else.:\n"
    "------------\n"
    "{text}\n"
    "------------\n"
    "DETAILED SUMMARY:[/INST]"
)

prompt = PromptTemplate.from_template(prompt_template)

refine_template = (
    "<s>[INST]You are a realstate advisor and expert at analyzing contracts and agreements for property agreement.\n"
    "Your job is to produce a final summary of property agreement.\n"
    "We have provided an existing summary up to a certain point: {existing_answer}\n"
    "We have the opportunity to refine the existing summary"
    "(only if needed) with some more context below.\n"
    "------------\n"
    "{text}\n"
    "------------\n"
    "Given the new context, refine the original summary to provide a thorough overview of the obligations, "
    "risks and termination clauses that the client needs to be aware of before signing the contract.[/INST]"
)
refine_prompt = PromptTemplate.from_template(refine_template)
chain = load_summarize_chain(
    llm=llm,
    chain_type="refine",
    question_prompt=prompt,
    refine_prompt=refine_prompt,
    return_intermediate_steps=True,
    input_key="input_documents",
    output_key="output_text",
)
result = chain({"input_documents": splits}, return_only_outputs=True)