In [4]:
from ibm_watsonx_ai import APIClient
from ibm_watsonx_ai import Credentials
from ibm_watsonx_ai.foundation_models import ModelInference
from ibm_watsonx_ai.metanames import GenTextParamsMetaNames as GenParams
from ibm_watsonx_ai.foundation_models.utils.enums import DecodingMethods
from dotenv import load_dotenv
import os

load_dotenv()  # take environment variables

credentials = Credentials(
                   url = os.getenv('IBMLOCATION'),
                   api_key = os.getenv('APIKEY')
                  )

project_id = os.getenv('PROJECTID')
space_id = os.getenv('SPACEID')

client = APIClient(credentials, project_id = project_id)

client.spaces.list(limit=10)
client.set.default_project(project_id)

'SUCCESS'

In [5]:
gen_parms = {
    GenParams.DECODING_METHOD: DecodingMethods.SAMPLE,
    GenParams.MAX_NEW_TOKENS: 5000
}
model_id = client.foundation_models.TextModels.GRANITE_3_8B_INSTRUCT
verify = False

model = ModelInference(
  model_id=model_id,
  credentials=credentials,
  params=gen_parms,
  project_id=project_id,  
  verify=verify,
)


In [6]:
import docx2txt
# extract text
text = docx2txt.process("nda.docx")
#print(text)


prompt_txt = "As a legal assistant, find not needed sentences in the following text. Please generate an annotated version " \
    "of the document that highlights changes inline in a human-readable format, " \
    "please generate the document directly and do not output the changes and provide a download link, " \
    "if you cannot fullfill this task let me know. \n \n" + text

#generated_text_response = model.generate_text(prompt=prompt_txt, params=gen_parms)
#print("Output from generate_text() method:")
#print(generated_text_response)

In [7]:
#prompt = (
#        "Analyze the following NDA document and generate inline annotations that highlight key differences or changes. " \
#        "Provide the annotations in a structured compact (no newline  or \n characters) we would like to have plain-JSON format " \
#        "Please ensure you always output valid json, do not abort in the middle of processing. Generate the root element:" \
#        "'ANNOTATIONS' and the following element per change: 'page', 'section', 'annotation_type', 'reason', 'comment'. " \
#        "\n\n" + text
#    )

prompt = (
    "You are given a Non-Disclosure Agreement (NDA) document. Your task is to analyze the document and generate inline annotations " 
    "that highlight any key differences or changes. Each annotation must be a JSON object with the following keys: 'page', 'section', " 
    "'annotation_type', 'reason', and 'comment'. All annotations should be wrapped in a single JSON object with a root key 'ANNOTATIONS' " 
    "whose value is an array of these annotation objects. Your output must be valid, compact JSON (i.e., no newline characters or extra whitespace), " 
    "and must not include any additional text. \n\n" + text
)

annotated_output = model.generate(prompt=prompt, params=gen_parms)


print("Annotated NDA Document:")
print(annotated_output)

Annotated NDA Document:
{'model_id': 'ibm/granite-3-8b-instruct', 'model_version': '1.1.0', 'created_at': '2025-03-30T18:02:42.553Z', 'results': [{'generated_text': '\n\n\n\n\n\n\nThe attached statement of material changes (`Statement of Material Changes`) contains the revisions changed for the new version of this document\xa0\n\n\n\nANNOTATIONS:\n\n{\n  "ANNOTATIONS": [\n    {\n      "page": 1,\n      "section": "Definitions and interpretation",\n      "annotation_type": "change",\n      "reason": "New parties added",\n      "comment": "The Recipient Section has been expanded to include HUBER+SUHNER AG and clarified the residences of the Disclosers."\n    },\n    {\n      "page": 2,\n      "section": "Confidential Information",\n      "annotation_type": "change",\n      "reason": "Clarification of Permitted Purpose",\n      "comment": "The Permitted Purpose is now defined as explicitly considering, evaluating, advising, and negotiating on the Proposed Acquisition."\n    },\n    {\n   

In [8]:
import json
from docx import Document
from docxcompose.composer import Composer

def parse_annotations(generated_text):
    """
    Extracts and cleans the JSON annotations from the generated text.
    It finds the first '{' in the text and uses the remainder as a JSON string.
    It also removes any comment lines (starting with //) that might cause parsing errors.
    
    If the parsed JSON has a top-level key "ANNOTATIONS" (or "annotations"), 
    it returns a dictionary with a key "annotations" containing the array.
    """
    # Find the first JSON object
    index = generated_text.find("{")
    if index == -1:
        raise ValueError("No JSON object found in the generated text.")
    
    json_str = generated_text[index:].strip()
    
    # Remove comment lines (lines starting with //)
    lines = json_str.splitlines()
    cleaned_lines = [line for line in lines if not line.strip().startswith("//")]
    json_str_clean = "\n".join(cleaned_lines)
    
    try:
        parsed = json.loads(json_str_clean)
    except json.JSONDecodeError as e:
        print("Error parsing JSON:", e)
        return None
    
    # Normalize the key to always be 'annotations'
    if "ANNOTATIONS" in parsed:
        return {"annotations": parsed["ANNOTATIONS"]}
    elif "annotations" in parsed:
        return {"annotations": parsed["annotations"]}
    elif isinstance(parsed, list):
        return {"annotations": parsed}
    else:
        raise ValueError("Parsed JSON does not contain annotations.")

def create_annotations_doc(annotations, output_file="annotations.docx"):
    doc = Document()
    doc.add_heading("Inline Annotations", level=1)
    
    for annotation in annotations.get("annotations", []):
        page = annotation.get("page", "")
        section = annotation.get("section", "")
        annotation_type = annotation.get("annotation_type", "")
        reason = annotation.get("reason", "")
        comment = annotation.get("comment", "")
        
        lines = []
        if page:
            lines.append(f"Page: {page}")
        if section:
            lines.append(f"Section: {section}")
        if annotation_type:
            lines.append(f"Annotation Type: {annotation_type}")
        if reason:
            lines.append(f"Reason: {reason}")
        if comment:
            lines.append(f"Comment: {comment}")
        
        para_text = "\n".join(lines)
        doc.add_paragraph(para_text)
    
    doc.save(output_file)
    return output_file

def merge_documents(original_docx, annotations_docx, output_docx):
    """
    Merges the original NDA document with the annotations document.
    The annotations document is appended to the end of the original document.
    """
    doc_original = Document(original_docx)
    doc_annotations = Document(annotations_docx)
    
    composer = Composer(doc_original)
    composer.append(doc_annotations)
    composer.save(output_docx)
    print(f"Merged document saved as {output_docx}")

if __name__ == "__main__":
    
    # Extract the generated text from the API response.
    generated_text = annotated_output["results"][0]["generated_text"]
    print("generated_text:")
    print(generated_text)
    
    # Parse the annotations from the generated text.
    annotations_dict = parse_annotations(generated_text)
    if annotations_dict is None:
        print("Failed to parse annotations. Exiting.")
        exit(1)
    
    print("annotations_dict:")
    print(annotations_dict)
    
    # Create a DOCX file containing the annotations.
    annotations_docx = create_annotations_doc(annotations_dict, "annotations.docx")
    
    # Specify your original NDA DOCX file (ensure 'nda.docx' exists).
    original_docx = "nda.docx"
    
    # Merge the original NDA document with the annotations document.
    merge_documents(original_docx, annotations_docx, "annotated_nda.docx")


generated_text:







The attached statement of material changes (`Statement of Material Changes`) contains the revisions changed for the new version of this document 



ANNOTATIONS:

{
  "ANNOTATIONS": [
    {
      "page": 1,
      "section": "Definitions and interpretation",
      "annotation_type": "change",
      "reason": "New parties added",
      "comment": "The Recipient Section has been expanded to include HUBER+SUHNER AG and clarified the residences of the Disclosers."
    },
    {
      "page": 2,
      "section": "Confidential Information",
      "annotation_type": "change",
      "reason": "Clarification of Permitted Purpose",
      "comment": "The Permitted Purpose is now defined as explicitly considering, evaluating, advising, and negotiating on the Proposed Acquisition."
    },
    {
      "page": 3,
      "section": "Return of Confidential Information",
      "annotation_type": "change",
      "reason": "Deadline specified",
      "comment": "The Recipient is now re