In [8]:
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.output_parsers import StrOutputParser
from helpers.llm_integrations import get_llm
from dotenv import load_dotenv

load_dotenv()

instruction = """
You are an expert in multilingual document comparison and modification. Your task is to analyze two documents in different languages and suggest changes to align them while preserving the original language and style of the target document(s).

Here are the two documents and their respective languages:

Document 1:
<document1>
{document1}
</document1>

Document 2:
<document2>
{document2}
</document2>

Now, here is the explanation of the differences between the two documents:
<comparison_explanation>
{explanation}
</comparison_explanation>

The document(s) that need to be modified are:
<target_document>
{target_documents}
</target_document>

Your task is to analyze the differences explained in the comparison explanation and suggest specific changes to make the target document(s) match the other document as closely as possible, while maintaining the original language and style of the target document(s).

Please follow these steps:

1. Analyze the documents and the comparison explanation.
2. Identify the key differences between the documents.
3. Generate {k} suggestions for modifying the target document(s).
4. Ensure that your suggested changes are appropriate for the language and context of the target document(s).
5. Consider any cultural or linguistic nuances that need to be addressed.

After your analysis, provide your suggestions in JSON format. The JSON structure should only include suggestions for the specified target document(s). Each suggestion should include the following:
- The original text
- The suggested modification (which should be the complete updated version of the original text)
- A version number (v1, v2, etc.)

Here's an example of the expected JSON structure (do not use this content in your response, it's just for illustration):

```json
{{
  "document1_suggestions": {{
    [
      {{
        "version": "v1",
        "modification": "Modified text here"
      }},
      {{
        "version": "v2",
        "modification": "Another modified text"
      }}
    ]
  }}
}}
```

Important notes:
1. Generate exactly {k} suggestions per target document. If both documents are targets, provide {k} suggestions for each.
2. Only include suggestions for the specified target document(s) in your JSON output.
3. Do not include explanations for the suggestions.

After completing your suggestions, review them to ensure they accurately address the differences noted in the comparison explanation and are appropriate for the target document's language and context.
"""

user_input = "Begin your analysis now."

prompt = ChatPromptTemplate.from_messages(
    [("system", instruction), ("user", user_input)]
)

llm = get_llm("gpt-4o")

chain = prompt | llm | StrOutputParser()

  from .autonotebook import tqdm as notebook_tqdm


In [14]:
document1 = """
The Cardholder agrees to pay all Fees, commissions and/or charges incurred in this clause and authorise the Bank to debit the Account, at any time notwithstanding that such debiting may cause the Account to be overdrawn. The following Fees, commissions and/or charges is imposed at the following rate or such other rate as the Bank shall at its discretion vary from time to time by giving twenty-one (21) calendar days’ prior notice to the Cardholder for transactions effected by use of the Debit Card. For the full list of fees and charges, please visit our website www.hlb.com.my/dc1 or scan here:"""

document2 = """
Pemegang Kad bersetuju untuk membayar semua fi, komisen dan/atau caj yang dikenakan dalam klausa ini dan membenarkan Bank untuk mendebit Akaun Pemegang Kad, tanpa mengambilkira pendebitan tersebut mungkin menyebabkan Akaun terlebihguna. Bayaran, komisen dan/atau caj berikut dikenakan pada kadar yang dinyatakan atau kadar lain yang ditetapkan, yang boleh dipinda oleh Bank untuk membuat pemindahan dari semasa ke semasa dengan memberi dua puluh satu (21) hari kalendar notis terlebih dahulu kepada Pemegang Kad untuk transaksi yang dilaksanakan melalui penggunaan Kad Debit. Untuk senarai fi dan caj yang lengkap, sila layari laman web kami www.hlb.com.my/dc2 atau imbas di sini:
"""

explanation = """
The URLs provided for the full list of fees and charges differ between the two documents. Document 1 refers to www.hlb.com.my/dc1, while Document 2 refers to www.hlb.com.my/dc2. This discrepancy could lead to confusion or misdirection for the cardholder seeking information.
"""

target_documents = "Document 1, Document 2"

In [15]:
result = chain.invoke(
    {
        "document1": document1,
        "document2": document2,
        "explanation": explanation,
        "target_documents": target_documents,
        "k": 2,
    }
)

In [16]:
import json
from helpers.document_processor import DocumentProcessor
from IPython.display import JSON

document_processor = DocumentProcessor()
final_result = json.loads(document_processor.remove_code_fences(result))

JSON(final_result)

<IPython.core.display.JSON object>