In [None]:
!pip install python-docx ipywidgets langchain-google-genai

In [1]:
import ipywidgets as widgets
from IPython.display import display, Markdown

uploader_v1 = widgets.FileUpload(accept=".txt,.docx", multiple=False)
uploader_v2 = widgets.FileUpload(accept=".txt,.docx", multiple=False)

display(uploader_v1)
display(uploader_v2)


FileUpload(value=(), accept='.txt,.docx', description='Upload')

FileUpload(value=(), accept='.txt,.docx', description='Upload')

In [2]:
from docx import Document
import os

def read_file_content(uploaded_file):
    if isinstance(uploaded_file, dict):
        file_info = next(iter(uploaded_file.values()))
    elif isinstance(uploaded_file, (tuple, list)):
        file_info = uploaded_file[0]
    else:
        raise ValueError("Unexpected uploaded_file type:", type(uploaded_file))

    name = file_info['name']  
    content = file_info['content']

    with open(name, 'wb') as f:
        f.write(content)

    text = ""
    if name.endswith('.txt'):
        with open(name, 'r', encoding='utf-8', errors='ignore') as f:
            text = f.read()
    elif name.endswith('.docx'):
        doc = Document(name)
        text = "\n".join([p.text for p in doc.paragraphs])

    os.remove(name)
    return text


In [3]:
text_v1 = read_file_content(uploader_v1.value)
text_v2 = read_file_content(uploader_v2.value)

In [4]:
from langchain_google_genai import ChatGoogleGenerativeAI

def ai_semantic_diff(text_v1, text_v2):
#     prompt = f"""
# You are a professional document analyst. Compare the following two versions of a file.
# List all differences in **content**, **meaning**, and **tone**.

# Version 1:
# {text_v1}

# Version 2:
# {text_v2}

# Provide your answer in this format:
# - Added content:
# - Removed content:
# - Modified content:
# - Summary of overall changes:
# """
    prompt = f"""
You are a professional document analyst. Compare the following two versions of a file.
List all differences in **content**, **meaning**, and **tone**.

Version 1:
{text_v1}

Version 2:
{text_v2}

Provide your answer in this format:
- Modified content:
- Summary of overall changes:
"""
    llm = ChatGoogleGenerativeAI(
        model="gemini-2.5-flash",
        temperature=0,
        google_api_key = "AIzaSyC3e7DiyNIhs1FllYUP2I1Zbr6mS9DXNAU"
    )

    result = llm.invoke(prompt)
    return getattr(result, "content", str(result))



In [5]:
import difflib
from IPython.display import Markdown, display


diff = difflib.unified_diff(
        text_v1.splitlines(),
        text_v2.splitlines(),
        fromfile="Version 1",
        tofile="Version 2",
        lineterm=""
    )
simple_diff = "\n".join(diff)
ai_report = ai_semantic_diff(text_v1, text_v2)

final_report = f"""
# 🔍 AI-Powered Change Report

## 🧠 Semantic Summary
{ai_report}

---

## 🧩 Technical Line Differences
"""

display(Markdown(final_report))



# 🔍 AI-Powered Change Report

## 🧠 Semantic Summary
Here's a comparison of the two versions:

**Modified content:**

*   **Document Title:**
    *   **Version 1:** "SOP 1: HR Policy – Standard Operating Procedure"
    *   **Version 2:** "SOP 2: HR Policy – Revised Version (Minor Changes)"
    *   *Difference:* The SOP number is incremented, and the description explicitly states it's a "Revised Version (Minor Changes)" instead of "Standard Operating Procedure."
*   **Version Number:**
    *   **Version 1:** "1.0"
    *   **Version 2:** "1.1"
    *   *Difference:* Version number is incremented to reflect a minor update.
*   **New Section in Version 2:** "Changes Made from SOP 1:"
    *   **Version 1:** N/A
    *   **Version 2:** Includes a bulleted list summarizing key changes:
        *   "Added a Policy Review Timeline (every 6 months)."
        *   "Included a Feedback Mechanism section."
        *   "Simplified communication methods for clarity."
    *   *Difference:* Version 2 adds a new section providing a high-level overview of the updates.
*   **Section 4. Procedure Title:**
    *   **Version 1:** "4. Procedure"
    *   **Version 2:** "4. Procedure (Revised)"
    *   *Difference:* Version 2 explicitly labels the section as "Revised."
*   **4.1 Policy Drafting:**
    *   **Version 1:** "HR drafts new policies or updates existing ones based on organizational needs or legal changes."
    *   **Version 2:** "HR drafts or updates policies every 6 months or as required."
    *   *Difference:* Version 2 introduces a specific, proactive review timeline ("every 6 months") in addition to reactive updates.
*   **4.2 Internal Review:**
    *   **Version 1:** "HR circulates the draft for review by department heads and management."
    *   **Version 2:** "HR circulates drafts to department heads for comments via email or HR system."
    *   *Difference:* Version 2 specifies the method of circulation ("via email or HR system") and narrows the primary reviewers for "comments" to "department heads," omitting "and management" from this step.
*   **4.3 Approval:**
    *   **Version 1:** "Final policy is reviewed and approved by top management or authorized personnel."
    *   **Version 2:** "Management approves the final version before rollout."
    *   *Difference:* Version 2 simplifies the language, removing "reviewed and" and specifying "Management" rather than "top management or authorized personnel," and adds "before rollout" for clarity on timing.
*   **4.4 Communication:**
    *   **Version 1:** "Approved policy is shared with all employees through email, HR portal, or meetings."
    *   **Version 2:** "HR communicates changes via email, intranet, and town hall meetings."
    *   *Difference:* Version 2 specifies that "HR communicates changes" (focusing on updates) and refines the communication channels from "HR portal" to "intranet" and "meetings" to "town hall meetings."
*   **4.5 Feedback Collection (New):**
    *   **Version 1:** N/A (This was "Implementation" in V1)
    *   **Version 2:** "Employees can submit feedback or suggestions through an online HR portal or suggestion box."
    *   *Difference:* Version 2 introduces an entirely new step for collecting employee feedback, which was not present in Version 1.
*   **4.6 Implementation & Monitoring:**
    *   **Version 1:** (This was split into 4.5 Implementation and 4.6 Monitoring & Review)
        *   4.5 Implementation: "HR ensures all employees acknowledge receipt and understanding of the policy."
        *   4.6 Monitoring & Review: "HR reviews the policy annually or as needed to ensure relevance and compliance."
    *   **Version 2:** "HR tracks acknowledgment and ensures all employees understand the policy updates."
    *   *Difference:* Version 2 combines the previous two steps into one, focusing on tracking acknowledgment and understanding of "policy updates." The explicit "annual review" frequency is removed from this step, as the review cycle is now stated in 4.1.
*   **7. Revision History:**
    *   **Version 1:** Contains only the "1.0 | [Date] | Initial Release | [Name]" entry.
    *   **Version 2:** Adds a new entry: "1.1 | [Date] | Added review cycle & feedback system | [Name]".
    *   *Difference:* Version 2 updates the revision history to document the changes made in version 1.1.

**Summary of overall changes:**

*   **Content:** Version 2 introduces a proactive policy review timeline (every 6 months), a formal employee feedback mechanism, and refines communication channels. It also streamlines and clarifies several procedural steps, combining some and making others more specific. The document's metadata is updated to reflect its status as a minor revision.
*   **Meaning:** The shift from Version 1 to Version 2 indicates a move towards a more structured, proactive, and inclusive HR policy management process. The introduction of a mandatory review cycle and a feedback mechanism signifies a greater emphasis on continuous improvement and employee engagement. The changes also aim for clearer, more specific procedural instructions.
*   **Tone:** Version 2 maintains a professional and formal tone but becomes slightly more **prescriptive** (e.g., "every 6 months") and **transparent** (e.g., "Changes Made from SOP 1" section, formal feedback mechanism). It also feels more **streamlined** and **modern** with updated communication methods and concise language.

---

## 🧩 Technical Line Differences


In [6]:
with open("AI_change_report.md", "w", encoding="utf-8") as f:
    f.write(final_report)
