In [1]:
import pandas as pd
import os
import json
import sys
import ipywidgets as widgets
from IPython.display import display, clear_output
from datetime import datetime

pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)
pd.set_option('display.max_colwidth', None)
pd.set_option('display.max_rows', None)

PROJECT_ROOT = os.path.abspath(os.path.join(os.getcwd(), '..'))
if PROJECT_ROOT not in sys.path:
    sys.path.append(PROJECT_ROOT)
    print(f"Added to sys.path: {PROJECT_ROOT}")
from scraper.llm_scraper import call_gemini # Ensure this line works after path modification


DATA_DIR = "/Users/juankostelec/Google_drive/Projects/legal-assistant-bot/data/"
METADATA_PATH = os.path.join(DATA_DIR, "raw", "agreement_metadata.csv")
VALIDITY_ANALYSIS_PATH = os.path.join(DATA_DIR, "raw", "agreement_validity.csv") # Or "agreement_validity_analysis_v1.csv"
CATEGORY_COL_TO_FILTER = "category"
VALUE_TO_FILTER = "Commercials" 

# Global list to store all agreement titles for "impacted_agreements" fix
ALL_AGREEMENT_TITLES = []
try:
    if os.path.exists(METADATA_PATH):
        meta_df_for_titles = pd.read_csv(METADATA_PATH)
        if 'agreement_title' in meta_df_for_titles.columns:
            ALL_AGREEMENT_TITLES = meta_df_for_titles['agreement_title'].dropna().tolist()
            print(f"Loaded {len(ALL_AGREEMENT_TITLES)} agreement titles for AI Fix context.")
        else:
            print("Warning: 'agreement_title' column not found in metadata CSV. AI Fix for impacted_agreements may be impaired.")
    else:
        print(f"Warning: Metadata file not found at {METADATA_PATH}. ALL_AGREEMENT_TITLES will be empty.")
except Exception as e:
    print(f"Error loading all agreement titles: {e}")

Added to sys.path: /Users/juankostelec/Google_drive/Projects/legal-assistant-bot
Loaded 141 agreement titles for AI Fix context.


In [2]:
# --- Prompts and Schemas for AI Fix ---

# For valid_from
PROMPT_FIX_VALIDITY_FROM = """
Given the title and full text of a legal agreement, please extract the exact start date of its validity period.
Also, provide a direct quote or a context of at least 50 words from the agreement text that justifies this start date.

Format the date as YYYY/MM/DD. If a specific day or month is not mentioned, use XX (e.g., YYYY/XX/XX or YYYY/MM/XX).
If the date cannot be determined, return an empty string for 'valid_from' and an empty string for 'justification'.

Agreement Title: {title}
Agreement Info: {info}
Full Agreement Text:
```text
{text}
```
Response Format: JSON object with keys "valid_from" (string) and "justification" (string).
"""

SCHEMA_FIX_VALIDITY_FROM = {
    "type": "object",
    "properties": {
        "valid_from": {"type": "string", "description": "The extracted start date (YYYY/MM/DD or with XX)."},
        "justification": {"type": "string", "description": "Quote or context justifying the start date."}
    },
    "required": ["valid_from", "justification"],
}

# For valid_to
PROMPT_FIX_VALID_TO = """
Given the title and full text of a legal agreement, please extract the exact end date of its validity period.
Also, provide a direct quote or a context of at least 50 words from the agreement text that justifies this end date.

Format the date as YYYY/MM/DD. If a specific day or month is not mentioned, use XX (e.g., YYYY/XX/XX or YYYY/MM/XX).
If the agreement is indefinite or the end date cannot be determined, return an empty string or "Indefinite" for 'valid_to' and an empty string for 'justification'.

Agreement Title: {title}
Agreement Info: {info}
Full Agreement Text:
```text
{text}
```
Response Format: JSON object with keys "valid_to" (string) and "justification" (string).
"""

SCHEMA_FIX_VALID_TO = {
    "type": "object",
    "properties": {
        "valid_to": {"type": "string", "description": "The extracted end date (YYYY/MM/DD or with XX, or 'Indefinite')."},
        "justification": {"type": "string", "description": "Quote or context justifying the end date."}
    },
    "required": ["valid_to", "justification"],
}

# For impacted_agreements
PROMPT_FIX_IMPACTED_AGREEMENTS = """
Given the title, full text of a primary legal agreement, and a list of all possible agreement titles, identify which of these other agreements are directly amended, superseded, referenced, or otherwise impacted by the primary agreement.

For each impacted agreement found, provide:
1. Its exact title from the provided list of all possible agreement titles.
2. A direct quote or a context of at least 50 words from the primary agreement text that justifies why this specific agreement is considered impacted.

Return a list of objects. Each object should have "impacted_agreement_title" (string) and "justification" (string).
If no other agreements are impacted, return an empty list.

Primary Agreement Title: {title}
Primary Agreement Info: {info}
Full Primary Agreement Text:
```text
{text}
```

List of All Possible Agreement Titles for reference:
{all_titles}

Response Format: JSON list of objects. Each object: {"impacted_agreement_title": "...", "justification": "..."}
"""

SCHEMA_FIX_IMPACTED_AGREEMENTS = {
    "type": "object",
    "properties": {
        "impacted_agreements_justified": {
            "type": "array",
            "items": {
                "type": "object",
                "properties": {
                    "impacted_agreement_title": {"type": "string", "description": "The exact title of the impacted agreement."},
                    "justification": {"type": "string", "description": "Quote or context justifying the impact."}
                },
                "required": ["impacted_agreement_title", "justification"]
            }
        }
    },
    "required": ["impacted_agreements_justified"],
}

LLM_MODEL_FOR_FIX = "gemini-2.5-pro-preview-03-25"

In [3]:
def analyze_agreements_by_category(metadata_csv_path, validity_csv_path, category_column, category_value):
    """
    Loads agreement metadata and validity analysis, merges them, and filters by a specific category.

    Args:
        metadata_csv_path (str): Path to the agreement metadata CSV file.
        validity_csv_path (str): Path to the agreement validity analysis CSV file.
        category_column (str): The name of the column in the metadata CSV to filter by.
        category_value (str): The specific category value to filter for.

    Returns:
        pandas.DataFrame: A DataFrame containing the merged and filtered agreement data.
    """
    try:
        # Load the datasets
        metadata_df = pd.read_csv(metadata_csv_path)
        validity_df = pd.read_csv(validity_csv_path)

        # Ensure 'agreement_id' is present for merging
        if "agreement_id" not in metadata_df.columns:
            print("Error: 'agreement_id' column not found in metadata CSV. Please ensure it exists (e.g., by running the preprocessing script).")
            return pd.DataFrame()
        if "agreement_id" not in validity_df.columns:
            print("Error: 'agreement_id' column not found in validity analysis CSV.")
            return pd.DataFrame()

        # Merge the dataframes
        merged_df = pd.merge(metadata_df, validity_df, on="agreement_id", how="inner")
        
        # TODO: Ensure that the agreement_title_x and agreement_title_y are the same and then drop the agreement_title_y column, rename the agreement_title_x column to agreement_title
        if merged_df["agreement_title_x"].equals(merged_df["agreement_title_y"]):
            merged_df = merged_df.drop(columns=["agreement_title_y"])
            merged_df = merged_df.rename(columns={"agreement_title_x": "agreement_title"})
        else:
            raise ValueError("agreement_title_x and agreement_title_y are not the same")


        # Filter by the specified category
        if category_column not in merged_df.columns:
            print(f"Error: Category column '{category_column}' not found in the merged data.")
            return pd.DataFrame()
        
        filtered_df = merged_df[merged_df[category_column] == category_value]

        if filtered_df.empty:
            print(f"No agreements found for category '{category_column}' with value '{category_value}'.")

        return filtered_df

    except FileNotFoundError as e:
        print(f"Error: File not found. Please check the paths. Details: {e}")
        return pd.DataFrame()
    except Exception as e:
        print(f"An unexpected error occurred: {e}")
        return pd.DataFrame()


In [4]:
ANNOTATION_DIR = "/Users/juankostelec/Google_drive/Projects/legal-assistant-bot/data/validation"
if not os.path.exists(ANNOTATION_DIR):
    os.makedirs(ANNOTATION_DIR)
    print(f"Created annotation directory: {ANNOTATION_DIR}")

ANNOTATION_FILE = os.path.join(ANNOTATION_DIR, "annotations.csv")
ANNOTATION_COLUMNS = [
    "agreement_id", "agreement_title", "category",
    "llm_validity_from", "human_validity_from_is_correct", "human_validity_from_corrected",
    "llm_valid_to", "human_valid_to_is_correct", "human_valid_to_corrected",
    "llm_impacted_agreements", "human_impacted_agreements_is_correct", "human_impacted_agreements_corrected",
    "annotation_notes", "last_annotated_timestamp"
]

# This will be taken from the 'VALUE_TO_FILTER' in the cell where 'filtered_agreements' is created.
# Initialize here as a placeholder. It will be updated by the UI creation function.
current_category_for_annotation = 'UnknownCategory'
if 'VALUE_TO_FILTER' in locals():
    current_category_for_annotation = VALUE_TO_FILTER

def load_or_initialize_annotations():
    if os.path.exists(ANNOTATION_FILE):
        try:
            print(f"Loading existing annotations from {ANNOTATION_FILE}")
            df = pd.read_csv(ANNOTATION_FILE, keep_default_na=False, na_values=['']) # Treat empty strings as empty, not NaN by default for corrected fields
            # Ensure all defined columns exist, add if missing
            for col in ANNOTATION_COLUMNS:
                if col not in df.columns:
                    # Set default for boolean columns to True, others to None (which becomes pd.NA or empty string later)
                    default_val = True if col in ['human_validity_from_is_correct', 'human_valid_to_is_correct', 'human_impacted_agreements_is_correct'] else None
                    df[col] = default_val
            return df[ANNOTATION_COLUMNS] # Ensure column order and selection
        except pd.errors.EmptyDataError:
            print(f"Annotation file {ANNOTATION_FILE} is empty. Initializing new one.")
            return pd.DataFrame(columns=ANNOTATION_COLUMNS)
        except Exception as e:
            print(f"Error loading annotations: {e}. Re-initializing.")
            return pd.DataFrame(columns=ANNOTATION_COLUMNS)
    else:
        print(f"No existing annotation file found. Initializing new one at {ANNOTATION_FILE}")
        return pd.DataFrame(columns=ANNOTATION_COLUMNS)

annotations_df = load_or_initialize_annotations()

def save_single_annotation(annotation_data):
    global annotations_df
    
    new_row_df = pd.DataFrame([annotation_data], columns=ANNOTATION_COLUMNS)

    # Convert boolean columns from potential strings/objects to actual booleans for new_row_df
    bool_cols = ['human_validity_from_is_correct', 'human_valid_to_is_correct', 'human_impacted_agreements_is_correct']
    for col in bool_cols:
        if col in new_row_df.columns:
             # Handle potential string 'True'/'False' or actual booleans
            new_row_df[col] = new_row_df[col].apply(lambda x: x if isinstance(x, bool) else str(x).lower() == 'true')

    # Ensure existing annotations_df also has correct boolean types for comparison
    for col in bool_cols:
        if col in annotations_df.columns:
            annotations_df[col] = annotations_df[col].astype(bool)
            
    existing_index = annotations_df[annotations_df["agreement_id"] == annotation_data["agreement_id"]].index

    if not existing_index.empty:
        idx_to_update = existing_index[0]
        for col in ANNOTATION_COLUMNS:
            annotations_df.loc[idx_to_update, col] = new_row_df.at[0, col]
        print(f"Updated annotation for agreement_id: {annotation_data['agreement_id']}")
    else:
        annotations_df = pd.concat([annotations_df, new_row_df], ignore_index=True)
        print(f"Added new annotation for agreement_id: {annotation_data['agreement_id']}")
    
    try:
        # When saving, replace pd.NA with empty strings for CSV readability where appropriate
        save_df = annotations_df.copy()
        for col in ['human_validity_from_corrected', 'human_valid_to_corrected', 'human_impacted_agreements_corrected', 'annotation_notes']:
            save_df[col] = save_df[col].fillna('')

        save_df.to_csv(ANNOTATION_FILE, index=False)
        print(f"Annotations saved to {ANNOTATION_FILE}")
    except Exception as e:
        # Assuming output_area is defined in the UI cell.
        # This function might be called from outside UI, so check:
        if 'output_area' in globals():
             with output_area:
                print(f"Error saving annotations to CSV: {e}")
        else:
            print(f"Error saving annotations to CSV: {e}")


Loading existing annotations from /Users/juankostelec/Google_drive/Projects/legal-assistant-bot/data/validation/annotations.csv


In [5]:
# --- Annotation UI --- #
current_index_widget = widgets.IntText(value=0, description='Index:', disabled=False, layout=widgets.Layout(width='150px'))
total_agreements_widget = widgets.IntText(value=0, description='Total:', disabled=True, layout=widgets.Layout(width='150px'))
go_to_button = widgets.Button(description="Go to Index")

agreement_title_widget = widgets.Textarea(description='Title:', disabled=True, layout=widgets.Layout(width='98%', height='50px'))
llm_vf_widget = widgets.Text(description='LLM From:', disabled=True, layout=widgets.Layout(width='auto'))
llm_vt_widget = widgets.Text(description='LLM To:', disabled=True, layout=widgets.Layout(width='auto'))
llm_ia_widget = widgets.Textarea(description='LLM Impacted:', disabled=True, layout=widgets.Layout(width='98%', height='80px'))
agreement_url_widget = widgets.HTML(value="", layout=widgets.Layout(margin='5px 0 10px 0'))

vf_correct_widget = widgets.Checkbox(value=True, description='Date From Correct', indent=False)
vf_corrected_widget = widgets.Text(description='Corrected From:', layout=widgets.Layout(width='auto'))
ai_fix_vf_button = widgets.Button(description="AI Fix From", tooltip="Use AI to suggest a correction for Validity From", button_style='info', icon='magic', disabled=True)
vt_correct_widget = widgets.Checkbox(value=True, description='Date To Correct', indent=False)
vt_corrected_widget = widgets.Text(description='Corrected To:', layout=widgets.Layout(width='auto'))
ai_fix_vt_button = widgets.Button(description="AI Fix To", tooltip="Use AI to suggest a correction for Valid To", button_style='info', icon='magic', disabled=True)
ia_correct_widget = widgets.Checkbox(value=True, description='Impacted Correct', indent=False)
ia_corrected_widget = widgets.Textarea(description='Corrected Impacted (JSON list):', layout=widgets.Layout(width='98%', height='80px'))
ai_fix_ia_button = widgets.Button(description="AI Fix Impacted", tooltip="Use AI to suggest a correction for Impacted Agreements", button_style='info', icon='magic', disabled=True)
notes_widget = widgets.Textarea(description='Notes:', layout=widgets.Layout(width='98%', height='60px'))

save_button = widgets.Button(description="Save Annotation", button_style='success')
prev_button = widgets.Button(description="Previous")
next_button = widgets.Button(description="Next")
output_area = widgets.Output() # For messages and errors
ai_justification_area = widgets.HTML(
    value="<p><i>AI Fix Justification will appear here...</i></p>",
    layout=widgets.Layout(width='98%', height='150px', border='1px solid #ccc', padding='5px', margin_top='10px', overflow_y='auto')
)
def get_existing_annotation_for_agreement(agreement_id):
    global annotations_df
    # Ensure annotations_df is the most recent version
    # annotations_df = load_or_initialize_annotations() # Optional: reload on every check
    existing = annotations_df[annotations_df['agreement_id'] == agreement_id]
    if not existing.empty:
        return existing.iloc[0].to_dict()
    return None

def display_agreement(index_to_display, df_to_annotate_ui):
    global current_category_for_annotation # Ensure it's accessible and updatable

    if not (0 <= index_to_display < len(df_to_annotate_ui)):
        with output_area:
            clear_output(wait=True)
            print(f"Invalid index: {index_to_display}. Must be between 0 and {len(df_to_annotate_ui)-1}.")
        return

    current_index_widget.value = index_to_display
    agreement = df_to_annotate_ui.iloc[index_to_display]
    agreement_id = agreement['agreement_id']
    
    current_category_for_annotation = str(agreement.get('category', 'UnknownCategory'))

    agreement_title_widget.value = str(agreement.get('agreement_title', 'N/A'))
    llm_vf_widget.value = str(agreement.get('valid_fro', ''))
    llm_vt_widget.value = str(agreement.get('valid_to', ''))
    
    llm_impacted_str = str(agreement.get('impacted_agreements', '[]'))
    try:
        # Try json.loads first for valid JSON strings, then fallback to eval for list-like strings
        if isinstance(llm_impacted_str, str) and llm_impacted_str.startswith('[') and llm_impacted_str.endswith(']'):
            try:
                parsed_list = json.loads(llm_impacted_str)
            except json.JSONDecodeError:
                parsed_list = eval(llm_impacted_str) # CAUTION with eval
        else:
            parsed_list = eval(llm_impacted_str) # CAUTION with eval

        llm_ia_widget.value = json.dumps(parsed_list, indent=2) if isinstance(parsed_list, list) else llm_impacted_str
    except: # Fallback if all parsing fails
        llm_ia_widget.value = llm_impacted_str
        
    # --- Handling agreement_url ---
    url = agreement.get('agreement_url', '')
    if url and isinstance(url, str) and (url.startswith('http://') or url.startswith('https://') or url.startswith('file:/')):
        agreement_url_widget.value = f'<a href="{url}" target="_blank" style="font-size:small; word-break:break-all;">Open Agreement Document: {url}</a>'
    else:
        agreement_url_widget.value = f'<span style="font-size:small;">URL: {url if url else "Not available"}</span>'

    existing_data = get_existing_annotation_for_agreement(agreement_id)
    if existing_data:
        vf_correct_widget.value = bool(existing_data.get('human_validity_from_is_correct', True))
        vf_corrected_widget.value = str(existing_data.get('human_validity_from_corrected', '')) if pd.notna(existing_data.get('human_validity_from_corrected')) else ''
        vt_correct_widget.value = bool(existing_data.get('human_valid_to_is_correct', True))
        vt_corrected_widget.value = str(existing_data.get('human_valid_to_corrected', '')) if pd.notna(existing_data.get('human_valid_to_corrected')) else ''
        ia_correct_widget.value = bool(existing_data.get('human_impacted_agreements_is_correct', True))
        ia_corrected_widget.value = str(existing_data.get('human_impacted_agreements_corrected', '[]')) if pd.notna(existing_data.get('human_impacted_agreements_corrected')) else '[]'
        notes_widget.value = str(existing_data.get('annotation_notes', '')) if pd.notna(existing_data.get('annotation_notes')) else ''
    else: 
        vf_correct_widget.value = True
        vf_corrected_widget.value = ''
        vt_correct_widget.value = True
        vt_corrected_widget.value = ''
        ia_correct_widget.value = True
        ia_corrected_widget.value = llm_ia_widget.value 
        notes_widget.value = ''
    
    toggle_corrected_fields() 
    with output_area:
        clear_output(wait=True)
        print(f"Displaying ({index_to_display + 1}/{len(df_to_annotate_ui)}): {agreement_title_widget.value} (ID: {agreement_id}) - Category: {current_category_for_annotation}")

def toggle_corrected_fields(*args):
    vf_corrected_widget.disabled = vf_correct_widget.value
    ai_fix_vf_button.disabled = vf_correct_widget.value # Enable AI fix if checkbox is unchecked

    vt_corrected_widget.disabled = vt_correct_widget.value
    ai_fix_vt_button.disabled = vt_correct_widget.value # Enable AI fix if checkbox is unchecked

    ia_corrected_widget.disabled = ia_correct_widget.value
    ai_fix_ia_button.disabled = ia_correct_widget.value # Enable AI fix if checkbox is unchecked

    if vf_correct_widget.value: vf_corrected_widget.value = ''
    if vt_correct_widget.value: vt_corrected_widget.value = ''
    if ia_correct_widget.value: ia_corrected_widget.value = llm_ia_widget.value

vf_correct_widget.observe(toggle_corrected_fields, 'value')
vt_correct_widget.observe(toggle_corrected_fields, 'value')
ia_correct_widget.observe(toggle_corrected_fields, 'value')

def read_markdown_content(markdown_path):
    if not markdown_path or not isinstance(markdown_path, str) or not os.path.exists(markdown_path):
        return None
    try:
        with open(markdown_path, 'r', encoding='utf-8') as f:
            # Consider reading only first N characters if files are very large, like in preprocessing
            # For now, reading full content for better context in focused fix
            return f.read() 
    except Exception as e:
        print(f"Error reading markdown file {markdown_path}: {e}")
        return None


# --- AI Fix Button Click Handlers (Updated for Justification) ---
def on_ai_fix_vf_clicked(b):
    index = current_index_widget.value
    agreement = filtered_agreements_for_ui.iloc[index]
    title = agreement.get('agreement_title', 'N/A')
    info = agreement.get('agreement_info', '')
    markdown_path = agreement.get('markdown_path')
    
    with output_area:
        clear_output(wait=True)
        print(f"Attempting AI Fix for 'Validity From' for: {title}...")
    ai_justification_area.value = "<p><i>Fetching AI suggestion for Validity From...</i></p>" # Update justification area
        
    agreement_text = read_markdown_content(markdown_path)
    if not agreement_text:
        with output_area: # Append message
            print(f"Error: Could not read markdown content from {markdown_path} for AI Fix.")
        vf_corrected_widget.value = "Error: Could not read text"
        ai_justification_area.value = "<p style='color:red;'>Error: Could not read agreement text.</p>"
        return

    prompt = PROMPT_FIX_VALIDITY_FROM.format(title=title, info=info, text=agreement_text)
    # Using the specified Gemini Pro model
    ai_response = call_gemini(prompt=prompt, model_name=LLM_MODEL_FOR_FIX, response_schema=SCHEMA_FIX_VALIDITY_FROM) 
    
    current_output_messages = []
    if ai_response and 'valid_fro' in ai_response and 'justification' in ai_response:
        vf_corrected_widget.value = ai_response['valid_fro']
        current_output_messages.append(f"AI Fix for 'Validity From' successful. Suggested: {ai_response['valid_fro']}")
        justification_html = f"<b>Justification for Validity From:</b><br><pre style='white-space: pre-wrap; word-wrap: break-word; background-color: #f0f0f0; padding: 5px; border-radius: 3px;'>{ai_response['justification']}</pre>"
        ai_justification_area.value = justification_html
    else:
        vf_corrected_widget.value = "AI Fix Failed"
        current_output_messages.append("AI Fix for 'Validity From' failed or returned an unexpected response.")
        current_output_messages.append(f"LLM Response: {json.dumps(ai_response, indent=2)}") # Pretty print JSON
        ai_justification_area.value = "<p style='color:red;'>AI Fix for Validity From failed.</p>"
    
    with output_area: # Update with all messages
        clear_output(wait=True)
        for msg in current_output_messages:
            print(msg)

def on_ai_fix_vt_clicked(b):
    index = current_index_widget.value
    agreement = filtered_agreements_for_ui.iloc[index]
    title = agreement.get('agreement_title', 'N/A')
    info = agreement.get('agreement_info', '')
    markdown_path = agreement.get('markdown_path')

    with output_area:
        clear_output(wait=True)
        print(f"Attempting AI Fix for 'Valid To' for: {title}...")
    ai_justification_area.value = "<p><i>Fetching AI suggestion for Valid To...</i></p>"

    agreement_text = read_markdown_content(markdown_path)
    if not agreement_text:
        with output_area:
            print(f"Error: Could not read markdown content from {markdown_path} for AI Fix.")
        vt_corrected_widget.value = "Error: Could not read text"
        ai_justification_area.value = "<p style='color:red;'>Error: Could not read agreement text.</p>"
        return

    prompt = PROMPT_FIX_VALID_TO.format(title=title, info=info, text=agreement_text)
    ai_response = call_gemini(prompt=prompt, model_name=LLM_MODEL_FOR_FIX, response_schema=SCHEMA_FIX_VALID_TO)

    current_output_messages = []
    if ai_response and 'valid_to' in ai_response and 'justification' in ai_response:
        vt_corrected_widget.value = ai_response['valid_to']
        current_output_messages.append(f"AI Fix for 'Valid To' successful. Suggested: {ai_response['valid_to']}")
        justification_html = f"<b>Justification for Valid To:</b><br><pre style='white-space: pre-wrap; word-wrap: break-word; background-color: #f0f0f0; padding: 5px; border-radius: 3px;'>{ai_response['justification']}</pre>"
        ai_justification_area.value = justification_html
    else:
        vt_corrected_widget.value = "AI Fix Failed"
        current_output_messages.append("AI Fix for 'Valid To' failed or returned an unexpected response.")
        current_output_messages.append(f"LLM Response: {json.dumps(ai_response, indent=2)}")
        ai_justification_area.value = "<p style='color:red;'>AI Fix for Valid To failed.</p>"
        
    with output_area:
        clear_output(wait=True)
        for msg in current_output_messages:
            print(msg)


def on_ai_fix_ia_clicked(b):
    index = current_index_widget.value
    agreement = filtered_agreements_for_ui.iloc[index]
    title = agreement.get('agreement_title', 'N/A')
    info = agreement.get('agreement_info', '')
    markdown_path = agreement.get('markdown_path')
    global ALL_AGREEMENT_TITLES

    with output_area:
        clear_output(wait=True)
        print(f"Attempting AI Fix for 'Impacted Agreements' for: {title}...")
    ai_justification_area.value = "<p><i>Fetching AI suggestion for Impacted Agreements...</i></p>"
    
    if not ALL_AGREEMENT_TITLES:
        with output_area: print("Warning: List of all agreement titles is empty. AI Fix for impacted agreements might be inaccurate.")

    agreement_text = read_markdown_content(markdown_path)
    if not agreement_text:
        with output_area:
            print(f"Error: Could not read markdown content from {markdown_path} for AI Fix.")
        ia_corrected_widget.value = "Error: Could not read text"
        ai_justification_area.value = "<p style='color:red;'>Error: Could not read agreement text.</p>"
        return
        
    formatted_titles = "\\n".join([f"- {t}" for t in ALL_AGREEMENT_TITLES])
    prompt = PROMPT_FIX_IMPACTED_AGREEMENTS.format(title=title, info=info, text=agreement_text, all_titles=formatted_titles)
    ai_response = call_gemini(prompt=prompt, model_name=LLM_MODEL_FOR_FIX, response_schema=SCHEMA_FIX_IMPACTED_AGREEMENTS)

    current_output_messages = []
    justification_html_parts = ["<b>Justifications for Impacted Agreements:</b>"]
    if ai_response and 'impacted_agreements_justified' in ai_response and isinstance(ai_response['impacted_agreements_justified'], list):
        extracted_titles = [item.get('impacted_agreement_title') for item in ai_response['impacted_agreements_justified'] if item.get('impacted_agreement_title')]
        ia_corrected_widget.value = json.dumps(extracted_titles, indent=2)
        current_output_messages.append(f"AI Fix for 'Impacted Agreements' successful. Suggested: {json.dumps(extracted_titles)}")
        
        if not ai_response['impacted_agreements_justified']:
            justification_html_parts.append("<p><i>No impacted agreements found by AI.</i></p>")
        else:
            for item in ai_response['impacted_agreements_justified']:
                just_title = item.get('impacted_agreement_title', 'N/A')
                just_text = item.get('justification', 'No justification provided.')
                justification_html_parts.append(f"<p><b>{just_title}:</b> <pre style='white-space: pre-wrap; word-wrap: break-word; background-color: #f0f0f0; padding: 5px; border-radius: 3px;'>{just_text}</pre></p>")
        ai_justification_area.value = "".join(justification_html_parts)
    else:
        ia_corrected_widget.value = "AI Fix Failed" # Keep this as a plain list
        current_output_messages.append("AI Fix for 'Impacted Agreements' failed or returned an unexpected response.")
        current_output_messages.append(f"LLM Response: {json.dumps(ai_response, indent=2)}")
        ai_justification_area.value = "<p style='color:red;'>AI Fix for Impacted Agreements failed.</p>"
        
    with output_area:
        clear_output(wait=True)
        for msg in current_output_messages:
            print(msg)

def on_save_button_clicked(b):
    index = current_index_widget.value
    if 'filtered_agreements_for_ui' not in globals() or filtered_agreements_for_ui.empty:
        with output_area: clear_output(wait=True); print("Error: No data loaded in the UI to save.")
        return
    agreement = filtered_agreements_for_ui.iloc[index]
    
    corrected_ia_val_str = ia_corrected_widget.value
    if not ia_correct_widget.value: # Only validate if user claims it's a correction
        try:
            json.loads(corrected_ia_val_str)
        except json.JSONDecodeError:
            with output_area:
                clear_output(wait=True) 
                print(f"ERROR: Corrected Impacted Agreements must be a valid JSON list string. You entered: {corrected_ia_val_str}")
            return
            
    annotation = {
        "agreement_id": agreement['agreement_id'],
        "agreement_title": str(agreement.get('agreement_title', 'N/A')),
        "category": current_category_for_annotation, # Use the category of the currently displayed agreement
        "llm_validity_from": llm_vf_widget.value,
        "human_validity_from_is_correct": bool(vf_correct_widget.value),
        "human_validity_from_corrected": vf_corrected_widget.value if not vf_correct_widget.value else pd.NA,
        "llm_valid_to": llm_vt_widget.value,
        "human_valid_to_is_correct": bool(vt_correct_widget.value),
        "human_valid_to_corrected": vt_corrected_widget.value if not vt_correct_widget.value else pd.NA,
        "llm_impacted_agreements": llm_ia_widget.value, # This should be the JSON string from LLM output
        "human_impacted_agreements_is_correct": bool(ia_correct_widget.value),
        "human_impacted_agreements_corrected": corrected_ia_val_str if not ia_correct_widget.value else pd.NA,
        "annotation_notes": notes_widget.value,
        "last_annotated_timestamp": datetime.now().isoformat()
    }
    with output_area:
        clear_output(wait=True)
        save_single_annotation(annotation) # This will print save status
        # print(f"Annotation for '{agreement['agreement_title']}' (ID: {agreement['agreement_id']}) processed.") # Redundant if save_single_annotation prints
        
        if index + 1 < len(filtered_agreements_for_ui):
            print(f"Moving to next agreement ({index + 1} of {len(filtered_agreements_for_ui)-1})...")
            display_agreement(index + 1, filtered_agreements_for_ui)
        elif index + 1 == len(filtered_agreements_for_ui):
            print("All agreements in this filter have been processed. Annotation complete for this set!")

def on_prev_button_clicked(b):
    if 'filtered_agreements_for_ui' in globals():
        display_agreement(current_index_widget.value - 1, filtered_agreements_for_ui)

def on_next_button_clicked(b):
    if 'filtered_agreements_for_ui' in globals():
        display_agreement(current_index_widget.value + 1, filtered_agreements_for_ui)

def on_go_to_button_clicked(b):
    if 'filtered_agreements_for_ui' in globals():
        display_agreement(current_index_widget.value, filtered_agreements_for_ui)


ai_fix_vf_button.on_click(on_ai_fix_vf_clicked)
ai_fix_vt_button.on_click(on_ai_fix_vt_clicked)
ai_fix_ia_button.on_click(on_ai_fix_ia_clicked)
save_button.on_click(on_save_button_clicked)
prev_button.on_click(on_prev_button_clicked)
next_button.on_click(on_next_button_clicked)
go_to_button.on_click(on_go_to_button_clicked)

# Global reference to the DataFrame being annotated by the UI
filtered_agreements_for_ui = pd.DataFrame()
preview_area = widgets.Output() # Output widget for the DataFrame preview

def create_annotation_ui(df_to_annotate):
    global annotations_df, filtered_agreements_for_ui, current_category_for_annotation, preview_area
    
    if not isinstance(df_to_annotate, pd.DataFrame) or df_to_annotate.empty:
        clear_output(wait=True)
        display(widgets.HTML("<b style='color:red;'>No agreements to annotate. Please ensure the 'filtered_agreements' DataFrame is populated from the previous cell.</b>"))
        return
    
    filtered_agreements_for_ui = df_to_annotate.copy() 
    
    annotations_df = load_or_initialize_annotations() 
    
    total_agreements_widget.value = len(filtered_agreements_for_ui)
    current_index_widget.max = len(filtered_agreements_for_ui) -1 
    current_index_widget.min = 0
    current_index_widget.value = 0

    if not filtered_agreements_for_ui.empty:
        current_category_for_annotation = str(filtered_agreements_for_ui.iloc[0].get('category', 'UnknownCategory'))
    elif 'VALUE_TO_FILTER' in locals(): 
         current_category_for_annotation = VALUE_TO_FILTER
    else:
        current_category_for_annotation = "UnknownCategory"

    # --- Add DataFrame Preview ---
    with preview_area:
        clear_output(wait=True)
        print("Preview of Filtered Agreements for Annotation:")
        columns_to_preview = ['agreement_id', 'agreement_title', 'valid_from', 'valid_to', 'impacted_agreements', 'agreement_url']
        # Ensure all requested columns exist in the DataFrame before trying to display them
        existing_preview_columns = [col for col in columns_to_preview if col in filtered_agreements_for_ui.columns]
        if not existing_preview_columns:
            print("Warning: None of the specified preview columns exist in the DataFrame.")
        else:
            # Display the head of the DataFrame with existing preview columns
            display(filtered_agreements_for_ui[existing_preview_columns].head())
            if len(existing_preview_columns) < len(columns_to_preview):
                missing_cols = set(columns_to_preview) - set(existing_preview_columns)
                print(f"Warning: The following preview columns were not found and are not displayed: {missing_cols}")

    display_agreement(0, filtered_agreements_for_ui) 

    navigation_controls = widgets.HBox([prev_button, next_button, current_index_widget, go_to_button, total_agreements_widget], layout=widgets.Layout(margin_bottom='10px'))
    
    llm_outputs_section = widgets.VBox([
        widgets.HTML("<h2>LLM Output:</h2>"), agreement_title_widget, agreement_url_widget, 
        widgets.HBox([llm_vf_widget, llm_vt_widget], layout=widgets.Layout(justify_content='flex-start')), 
        llm_ia_widget
    ], layout=widgets.Layout(border='1px solid #ddd', padding='10px', margin_bottom='10px', background_color='#f9f9f9'))
    

    human_inputs_section = widgets.VBox([
        widgets.HTML("<h2>Your Annotation:</h2>"),
        widgets.GridBox(
            [vf_correct_widget, vf_corrected_widget, ai_fix_vf_button, # Added AI fix button
             vt_correct_widget, vt_corrected_widget, ai_fix_vt_button], # Added AI fix button
            layout=widgets.Layout(grid_template_columns="auto auto auto auto auto auto", gap="5px", align_items="center") # Adjusted grid
        ),
        widgets.HTML("<hr style='margin:10px 0;'>"),
        widgets.HBox([ia_correct_widget, ai_fix_ia_button], layout=widgets.Layout(align_items="center")), # Grouped checkbox and button
        ia_corrected_widget,
        widgets.HTML("<hr style='margin:10px 0;'>"),
        notes_widget,
        save_button
    ], layout=widgets.Layout(border='1px solid #ddd', padding='10px', background_color='#f9f9f9'))
    
    clear_output(wait=True) 
    display(
        preview_area, 
        navigation_controls, 
        llm_outputs_section, 
        human_inputs_section, 
        ai_justification_area,  # Added AI Justification Area here
        output_area
    )

KeyError: '"impacted_agreement_title"'

KeyError: '"impacted_agreement_title"'

KeyError: '"impacted_agreement_title"'

2025-05-16 08:33:36 - INFO - --- Calling Gemini (gemini-2.5-pro-preview-03-25) ---
2025-05-16 08:33:36 - INFO - AFC is enabled with max remote calls: 10.
2025-05-16 08:34:02 - INFO - HTTP Request: POST https://generativelanguage.googleapis.com/v1beta/models/gemini-2.5-pro-preview-03-25:generateContent "HTTP/1.1 200 OK"
2025-05-16 08:34:02 - INFO - AFC remote call 1 is done.


KeyError: '"impacted_agreement_title"'

In [6]:
!jupyter nbextension enable --py widgetsnbextension --sys-prefix

filtered_agreements = analyze_agreements_by_category(
    metadata_csv_path=METADATA_PATH,
    validity_csv_path=VALIDITY_ANALYSIS_PATH,
    category_column=CATEGORY_COL_TO_FILTER,
    category_value=VALUE_TO_FILTER
)
# print("\nFiltered Agreements:")
# display(filtered_agreements[["agreement_title", "valid_from", "valid_to", "impacted_agreements", "agreement_url"]])


# --- Cell to run the annotation UI ---
# This cell should be run AFTER the cell that defines and populates 'filtered_agreements'.
if 'filtered_agreements' in locals() and isinstance(filtered_agreements, pd.DataFrame) and not filtered_agreements.empty:
    print(f"Starting annotation UI for {len(filtered_agreements)} agreements...")
    # Pass the currently filtered agreements to the UI
    create_annotation_ui(filtered_agreements) 
else:
    clear_output(wait=True)
    display(widgets.VBox([
        widgets.HTML("<b style='color:red;'>'filtered_agreements' is not loaded or is empty.</b>"),
        widgets.HTML("Please ensure the previous cell that calls <code>analyze_agreements_by_category</code> has been run successfully and returned data.")
    ]))


Output()

HBox(children=(Button(description='Previous', style=ButtonStyle()), Button(description='Next', style=ButtonSty…

VBox(children=(HTML(value='<h2>LLM Output:</h2>'), Textarea(value='2022 Commercials Contract Memorandum of Agr…

VBox(children=(HTML(value='<h2>Your Annotation:</h2>'), GridBox(children=(Checkbox(value=True, description='Da…

HTML(value='<p><i>AI Fix Justification will appear here...</i></p>', layout=Layout(border_bottom='1px solid #c…

Output()