# Extract Contract Information

## Prompts

In [1]:
system_prompt = """
You are a seasoned legal expert specializing in the meticulous review and analysis of commercial contracts. 
Your expertise lies in identifying critical elements within legal documents, assessing compliance with legal standards, 
and ensuring that contracts serve the best interests of the parties involved. 
Your approach is thorough, detail-oriented, and guided by a deep understanding of legal principles and commercial practices.
You will be presented with contracts and be asked questions by users who usually need their output in JSON format.
"""

In [2]:
extraction_prompt = """
Generate a valid JSON document. Do not include anything else other than the JSON document
Using the Answers to the following questions and The schema of the resulting JSON file ( which is specified further down)
In your answers, Use information exclusively on this contract. 

1) What type of contract is this? 
2) Who are the parties and their roles? Where are they incorporated? Name state and country (use ISO 3166 Country name)
3) What is the Agreement Date? (if absolute date is mentioned use yyyy-mm-dd)
4) What is the Effective date? (if absolute date is mentioned use yyyy-mm-dd)
5) What is the expiration date? (if absolute date is mentioned use yyyy-mm-dd)
6) What is the Renewal Term ? 
7) What is the Notice Period To Terminate Renewal? 
8) What is the governing law ? 
Name the state and country (use ISO 3166 Country name)
9) If multiple countries are in the governing law, what is the most favoured country? if there is only one country just repeat the same information for governing law 

10) For each of the contract clause types, extract the following: 
a) A Yes/No that indicates if you think the clause is found in this contract 
b) A list of full (long) excerpts, directly taken from the contract that give you reason to believe that this this clause type exists. 
 

The only Contract Clause types are: Competitive Restriction Exception, Non-Compete, Exclusivity, No-Solicit Of Customers,
No-Solicit Of Employees, Non-Disparagement, Termination For Convenience, Rofr/Rofo/Rofn, Change Of Control, 
Anti-Assignment, Revenue/Profit Sharing, Price Restrictions, Minimum Commitment,Volume Restriction, 
IP Ownership Assignment, Joint IP Ownership, License grant, Non-Transferable License, 
Affiliate License-Licensor, Affiliate License-Licensee,Unlimited/All-You-Can-Eat-License,Irrevocable Or Perpetual License, 
Source Code Escrow, Post-Termination Services, Audit Rights, Uncapped Liability, Cap On Liability, Liquidated Damages, 
Warranty Duration, Insurance, Covenant Not To Sue, Third Party Beneficiary.

Finally, Using the answers to the questions above, provide your final answer in a JSON document.
Make sure the JSON document is VALID and adheres to the correct format. 
 
The JSON document has the following structure: 

{
  "agreement": {
    "agreement_name": "string",
    "agreement_type": "string",
    "effective_date": "string",
    "expiration_date": "string",
    "renewal_term": "string",
    "Notice_period_to_Terminate_Renewal": "string",
    "parties": [
      {
        "role": "string",
        "name": "string",
        "incorporation_country": "string",
        "incorporation_state": "string"
      }
    ],
    "governing_law": {
      "country": "string",
      "state": "string",
      "most_favored_country": "string"
    },
    "clauses": [
      {
        "clause_type": "string",
        "exists": "boolean",
        "excerpts": ["string"]
      }
    ]
  }
}
Ensure the JSON is valid and correctly formatted.



"""

## Test Example Extraction

In [3]:
import boto3, json

session = boto3.Session()
bedrock = session.client(service_name='bedrock-runtime')

In [4]:
with open("./input/AtnInternational.pdf", "rb") as doc_file:
    doc_bytes = doc_file.read()

doc_message = {
    "role": "user",
    "content": [
        {
            "document": {
                "name": "AtnInternational_pdf",
                "format": "pdf",
                "source": {
                    "bytes": doc_bytes #Look Ma, no base64 encoding!
                }
            }
        },
        { "text": extraction_prompt }
    ]
}

In [5]:
response = bedrock.converse(
    modelId="us.anthropic.claude-3-5-sonnet-20241022-v2:0",
    system=[{ "text": system_prompt}],
    messages=[doc_message],
    inferenceConfig={
        "temperature": 0
    },
)

In [6]:
response['output']['message']['content'][0].keys()

dict_keys(['text'])

In [7]:
response_text = response['output']['message']['content'][0]['text']
print(response_text)

{
  "agreement": {
    "agreement_name": "Network Build and Maintenance Agreement",
    "agreement_type": "Services and Construction Agreement",
    "effective_date": "2019-07-31",
    "expiration_date": "Not Specified",
    "renewal_term": "Not Specified",
    "Notice_period_to_Terminate_Renewal": "Not Specified",
    "parties": [
      {
        "role": "Vendor",
        "name": "Commnet Wireless, LLC",
        "incorporation_country": "United States",
        "incorporation_state": "Delaware"
      },
      {
        "role": "Customer",
        "name": "AT&T Mobility LLC",
        "incorporation_country": "United States",
        "incorporation_state": "Delaware"
      }
    ],
    "governing_law": {
      "country": "United States",
      "state": "New York",
      "most_favored_country": "United States"
    },
    "clauses": [
      {
        "clause_type": "Change Of Control",
        "exists": true,
        "excerpts": ["If Vendor, prior to Location Acceptance at all Cell Sites 

## Extract All Contrac ts and Save to JSON

In [8]:
# helper function
def process_pdf(source_dir: str, file_name: str) -> str:
    with open(os.path.join(source_dir, file_name), "rb") as doc_file:
        doc_bytes = doc_file.read()

    doc_message = {
        "role": "user",
        "content": [
            {
                "document": {
                    "name": re.sub(r'[^a-zA-Z0-9]', '_', file_name),
                    "format": "pdf",
                    "source": {
                        "bytes": doc_bytes #Look Ma, no base64 encoding!
                    }
                }
            },
            { "text": extraction_prompt }
        ]
    }
    response = bedrock.converse(
        modelId="us.anthropic.claude-3-5-sonnet-20241022-v2:0",
        system=[{ "text": system_prompt}],
        messages=[doc_message],
        inferenceConfig={
            "temperature": 0
        },
    )
    return response['output']['message']['content'][0]['text']

def extract_json_from_string(input_string):
    try:
        # Parse the JSON string into a Python object (dictionary)
        if input_string.startswith('```json'):
            input_string = re.sub(r'^```json\s*|\s*```$', '', input_string, flags=re.DOTALL)
        
        json_object = json.loads(input_string)
        return json_object
    except json.JSONDecodeError as e:
            print(f"Error parsing JSON: {e}")
            return None
    else:
        print("No valid JSON block found.")
        return None

def save_json_string_to_file(json_string, file_path):
    # Open the file in write mode and save the JSON string
    with open(file_path, 'w') as file:
        file.write(json_string)
    

In [10]:
# Create Directories if not already exist
import os 

for d in ['./debug/', './output/']:
    if not os.path.exists(d):
            os.makedirs(d)

In [11]:
import os
import re
from tqdm import tqdm

source_dir = './input/'
file_names = [filename for filename in os.listdir(source_dir) if filename.endswith('.pdf')]

for file_name in tqdm(file_names):
    # Extract content from PDF using LLM
    response = process_pdf(source_dir, file_name)
    # Log the complete response to debug
    save_json_string_to_file(response, './debug/ ' + 'complete_response_' + file_name + '.json')
    # Try to load the response as valid JSON
    try:
        contract_json = extract_json_from_string(response)
        # Store as valid JSON so it can be imported into a KG later
        json_string = json.dumps(contract_json, indent=4)
        save_json_string_to_file(json_string, './output/' + file_name + '.json')
    except json.JSONDecodeError as e:
        print(f"Failed to decode JSON: {e}")
    

100%|██████████| 10/10 [03:38<00:00, 21.82s/it]
