In [None]:
!pip install gradio
!pip install anthropic
!pip install langgraph langchain langchain_core

Collecting gradio
  Downloading gradio-5.31.0-py3-none-any.whl.metadata (16 kB)
Collecting aiofiles<25.0,>=22.0 (from gradio)
  Downloading aiofiles-24.1.0-py3-none-any.whl.metadata (10 kB)
Collecting fastapi<1.0,>=0.115.2 (from gradio)
  Downloading fastapi-0.115.12-py3-none-any.whl.metadata (27 kB)
Collecting ffmpy (from gradio)
  Downloading ffmpy-0.5.0-py3-none-any.whl.metadata (3.0 kB)
Collecting gradio-client==1.10.1 (from gradio)
  Downloading gradio_client-1.10.1-py3-none-any.whl.metadata (7.1 kB)
Collecting groovy~=0.1 (from gradio)
  Downloading groovy-0.1.2-py3-none-any.whl.metadata (6.1 kB)
Collecting pydub (from gradio)
  Downloading pydub-0.25.1-py2.py3-none-any.whl.metadata (1.4 kB)
Collecting python-multipart>=0.0.18 (from gradio)
  Downloading python_multipart-0.0.20-py3-none-any.whl.metadata (1.8 kB)
Collecting ruff>=0.9.3 (from gradio)
  Downloading ruff-0.11.12-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (25 kB)
Collecting safehttpx<0.2.0,>=0.1.

In [None]:
import gradio as gr
import pandas as pd
import tempfile
import os
from anthropic import Anthropic
from openai import OpenAI
import re
import traceback
import io
from contextlib import redirect_stdout
import numpy as np
import logging
import json
from typing import List, Optional, Dict, Any, Union
from langchain_core.messages import AIMessage, HumanMessage
from langgraph.graph import StateGraph, END

In [None]:
# Set up logging
logging.basicConfig(
    level=logging.INFO,
    format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
)
logger = logging.getLogger('rent_roll_analyzer')

# Global variables and API keys

In [None]:
from typing import TypedDict, List, Optional, Union, Dict, Any

# Define the state as a TypedDict
class AgentState(TypedDict, total=False):
    messages: List[Dict[str, str]]
    df: Optional[pd.DataFrame]
    issues: List[str]
    execution_plan: Optional[str]
    needs_clarification: bool
    clarification_question: Optional[str]
    generate_code: bool
    code_execution_results: Optional[str]
    final_response: Optional[str]
    anthropic_client: Optional[Any]  # For Claude API
    openai_client: Optional[Any]     # For OpenAI API


In [None]:
def read_rent_roll_simple(file_path):
    """
    Improved function to read rent roll Excel files that handles special formatting
    commonly found in commercial real estate rent roll sheets.
    """
    # Read the raw Excel file with no header
    df = pd.read_excel(file_path, header=None)

    # Find the row containing the column headers
    header_row = None
    for i, row in df.iterrows():
        if row.iloc[0] == "Current":
            header_row = i + 1  # Headers are in the row after "Current"
            break

    if header_row is None:
        logger.warning("Could not find header row with 'Current' marker. Falling back to standard loading.")
        return pd.read_excel(file_path)

    # Get the headers
    headers = []
    for val in df.iloc[header_row]:
        if pd.isna(val):
            headers.append("NaN")  # Use "NaN" for empty header cells
        else:
            headers.append(str(val))

    # Create a new dataframe starting after the header row
    data_rows = df.iloc[(header_row+1):].values

    # Create a new dataframe with the extracted headers
    result_df = pd.DataFrame(data_rows, columns=headers)

    logger.info(f"Successfully loaded rent roll with {len(result_df)} rows using specialized loader")
    return result_df

In [None]:
def analyze_rent_roll_gpt(file_path, api_key):
    """
    Analyzes a CRE rent roll Excel file by sending the data rows to GPT-4.
    """
    # Load the rent roll
    try:
        df = read_rent_roll_simple(file_path)
        logger.info("File loaded successfully for GPT analysis.")
    except Exception as e:
        logger.error(f"Error loading file: {e}")
        return []

    # Initialize OpenAI client
    client = OpenAI(api_key=api_key)

    # Convert the DataFrame to CSV string format
    csv_data = df.to_csv(index=False)
    logger.info(f"Converted DataFrame to CSV with {len(df)} rows and {len(df.columns)} columns")

    # Enhance the system prompt to focus on general rent roll issues
    system_prompt = """
    You are a Commercial Real Estate rent roll expert specializing in identifying data quality, formatting, and consistency issues.

    When analyzing any CRE rent roll, rigorously check for these common categories of issues:

    1. DUPLICATE OR REDUNDANT ENTRIES: Look for any repeated charges, fees, or line items
    2. INCONSISTENT TERMINOLOGY: Identify any unclear, non-standard, or ambiguous descriptions
    3. DATE ANOMALIES: Flag any suspicious or illogical date patterns across move-in, lease start/end
    4. RENT DISCREPANCIES: Identify deviations between market rent values and actual charged amounts
    5. CALCULATION INCONSISTENCIES: Check if component charges properly sum to totals
    6. EXCEL ARTIFACTS: Identify any visible formulas, function calls, or spreadsheet mechanics
    7. FORMATTING IRREGULARITIES: Notice inconsistent data entry patterns or splitting of information
    8. BALANCE ANOMALIES: Identify unusual balances, especially negative values
    9. OCCUPANCY MISMATCHES: Look for occupied units with zero rent or vacant units with charges
    10. UNIT IDENTIFICATION PATTERNS: Check for inconsistencies in unit numbering or identification

    Be extremely thorough and specific in your analysis. Report ALL issues you find, regardless of how minor they may seem.
    DO NOT return "No issues detected" unless you've comprehensively analyzed the data for each category above.
    """

    # Use a simplified prompt focused on analyzing the raw CSV data
    prompt = (
        f"Please analyze this Commercial Real Estate rent roll data in CSV format and identify ALL potential issues "
        f"that could affect data quality, accuracy, or decision-making.\n\n{csv_data}\n\n"

        f"Based on your expertise in CRE rent rolls, provide a numbered list of ALL issues you can identify, including but not limited to:\n\n"

        f"- Any duplicate or redundant charges\n"
        f"- Unclear, non-standard, or inconsistent descriptions\n"
        f"- Suspicious or illogical date patterns\n"
        f"- Inconsistencies between market rent and actual rent values\n"
        f"- Calculation errors where components don't match totals\n"
        f"- Spreadsheet artifacts like visible formulas\n"
        f"- Inconsistent data entry patterns\n"
        f"- Unusual balance values\n"
        f"- Occupancy status mismatches\n"
        f"- Inconsistent unit numbering or identification\n\n"

        f"IMPORTANT: For each issue found, please reference the specific unit(s) affected and explain why it's problematic. "
        f"Be comprehensive - rent roll accuracy is critical for CRE investment and property management decisions."
    )

    try:
        logger.info("Sending request to GPT-4 for analysis...")
        response = client.chat.completions.create(
            model="gpt-4o",
            messages=[
                {"role": "system", "content": system_prompt},
                {"role": "user", "content": prompt}
            ],
            max_tokens=2000,
            temperature=0.3
        )
        response_text = response.choices[0].message.content
        logger.info("Received response from GPT-4.")

        # Simple parsing of the response - split by numbered items
        lines = response_text.split('\n')
        issues = []
        current_issue = ""

        for line in lines:
            # If it's a new numbered item
            if line.strip() and line[0].isdigit() and '. ' in line[:5]:
                # If we were building a previous issue, add it
                if current_issue:
                    issues.append(current_issue.strip())
                current_issue = line.strip()
            elif line.strip() and current_issue:
                # Continue building the current issue
                current_issue += " " + line.strip()

        # Add the last issue if there is one
        if current_issue:
            issues.append(current_issue.strip())

        if not issues:
            issues.append("No issues detected by GPT-4.")

        logger.info(f"Identified {len(issues)} issues in the rent roll")
        return issues

    except Exception as e:
        logger.error(f"Error calling GPT-4 for analysis: {e}")
        logger.error(traceback.format_exc())
        return ["Failed to analyze rent roll due to API error."]

In [None]:
def determine_action(state):
    """Decide whether to answer directly, ask for clarification, or generate code."""

    messages = state["messages"]
    user_message = messages[-1]["content"] if messages[-1]["role"] == "user" else ""
    df = state["df"]

    # Create OpenAI client for this function call
    client = OpenAI(api_key=DEFAULT_OPENAI_API_KEY)

    # Get column information for context
    if df is not None:
        try:
            # Safer way to get column data types
            column_info = []
            for col in df.columns:
                try:
                    dtype_str = str(df[col].dtype)  # Convert dtype to string directly
                    column_info.append(f"- {col}: {dtype_str}")
                except:
                    column_info.append(f"- {col}: unknown type")
            column_info_str = "\n".join(column_info)
            df_preview = df.head(3).to_string()
        except Exception as e:
            logger.error(f"Error getting column info: {e}")
            column_info_str = "Error retrieving column information"
            df_preview = "Error retrieving data preview"
    else:
        column_info_str = "No dataframe loaded"
        df_preview = "No data available"

    # Use GPT-4 to analyze the query and determine the best action
    prompt = f"""
    User query: {user_message}

    Dataframe information:
    - Rows: {len(df) if df is not None else 'No data loaded'}
    - Columns: {column_info_str}

    Data preview:
    {df_preview}

    Analyze the user query and determine the most appropriate action:
    1. If the query is ambiguous or lacks specificity, choose "ask_clarification"
    2. If the query can be answered with a simple explanation without analysis, choose "text_response"
    3. If the query requires data analysis, calculations, or visualizations, choose "generate_code"

    Respond with a JSON object containing:
    {{"action": "ask_clarification" | "text_response" | "generate_code", "reason": "brief explanation"}}
    """

    try:
        response = client.chat.completions.create(
            model="gpt-4o",
            messages=[
                {"role": "system", "content": "You are a decision-making agent for a rent roll analysis system. Output ONLY a JSON object with the determined action and reason."},
                {"role": "user", "content": prompt}
            ],
            max_tokens=500,
            temperature=0.2
        )

        response_text = response.choices[0].message.content

        # Extract JSON from the response
        json_match = re.search(r'{.*}', response_text, re.DOTALL)
        if json_match:
            action_data = json.loads(json_match.group(0))
            action = action_data.get("action", "text_response")
        else:
            # Default to text response if parsing fails
            action = "text_response"

        logger.info(f"Determined action using GPT-4: {action}")

        # Create a new state dict with updated values
        new_state = dict(state)  # Create a copy
        new_state["needs_clarification"] = action == "ask_clarification"
        new_state["generate_code"] = action == "generate_code"

        return new_state
    except Exception as e:
        logger.error(f"Error in determine_action: {e}")
        # Default to text response on error
        new_state = dict(state)
        new_state["needs_clarification"] = False
        new_state["generate_code"] = False
        return new_state

In [None]:

def ask_clarification(state: AgentState) -> Dict:
    """Generate a clarification question for the user using GPT-4."""

    messages = state["messages"]
    user_message = messages[-1]["content"] if messages[-1]["role"] == "user" else ""

    # Create OpenAI client for this function call
    client = OpenAI(api_key=DEFAULT_OPENAI_API_KEY)

    try:
        response = client.chat.completions.create(
            model="gpt-4o",
            messages=[
                {"role": "system", "content": """You are a commercial real estate rent roll analyst.
                Generate a clear, specific clarification question to better understand
                what the user is asking about their rent roll data."""},
                {"role": "user", "content": f"My question is: {user_message}"}
            ],
            max_tokens=300,
            temperature=0.3
        )

        clarification_question = response.choices[0].message.content

        # Create a new state dict with updated values
        new_state = dict(state)
        new_state["clarification_question"] = clarification_question
        new_state["final_response"] = clarification_question

        # Add the clarification question to the messages
        new_messages = state["messages"].copy()
        new_messages.append({"role": "assistant", "content": clarification_question})
        new_state["messages"] = new_messages

        logger.info(f"Generated clarification question using GPT-4: {clarification_question[:50]}...")
        return new_state
    except Exception as e:
        logger.error(f"Error in ask_clarification: {e}")
        # Fallback to a generic clarification question
        generic_question = "Could you please clarify what specific aspect of the rent roll you'd like me to analyze?"

        new_state = dict(state)
        new_state["clarification_question"] = generic_question
        new_state["final_response"] = generic_question

        new_messages = state["messages"].copy()
        new_messages.append({"role": "assistant", "content": generic_question})
        new_state["messages"] = new_messages

        return new_state

In [None]:
def generate_text_response(state):
    """Generate a simple text response to the user query using GPT-4."""

    messages = state["messages"]
    df = state["df"]
    issues = state["issues"]

    # Create OpenAI client for this function call
    client = OpenAI(api_key=DEFAULT_OPENAI_API_KEY)

    # Prepare context for GPT-4
    issues_text = "\n".join([f"- {issue}" for issue in issues])

    # Get column and data preview for context
    if df is not None:
        column_info = ", ".join(df.columns)
        data_stats = []
        for col in df.columns[:10]:  # Limit to first 10 columns to avoid token limits
            try:
                if pd.api.types.is_numeric_dtype(df[col]):
                    stat = f"- {col}: min={df[col].min()}, max={df[col].max()}, mean={df[col].mean():.2f}, null={df[col].isna().sum()}"
                else:
                    unique_vals = df[col].nunique()
                    stat = f"- {col}: unique values={unique_vals}, null={df[col].isna().sum()}"
                data_stats.append(stat)
            except:
                data_stats.append(f"- {col}: [error calculating stats]")
        data_stats_str = "\n".join(data_stats)
        df_preview = df.head(3).to_string()
    else:
        column_info = "No columns available"
        data_stats_str = "No data statistics available"
        df_preview = "No data preview available"

    system_prompt = f"""You are a commercial real estate rent roll analyst.
    The rent roll data has {len(df) if df is not None else 0} rows and
    {len(df.columns) if df is not None else 0} columns.

    Column information: {column_info}

    Data statistics:
    {data_stats_str}

    Data preview:
    {df_preview}

    Identified issues:
    {issues_text}

    Provide a concise, informative answer to the user's question.
    Focus on being helpful and direct, with only 1-2 paragraphs.
    Do not include code or detailed analysis unless absolutely necessary.
    """

    # Extract system message and filter other messages
    filtered_messages = []
    for msg in messages:
        if msg["role"] != "system":
            filtered_messages.append(msg)

    try:
        response = client.chat.completions.create(
            model="gpt-4o",
            messages=[
                {"role": "system", "content": system_prompt},
                *filtered_messages
            ],
            max_tokens=1000,
            temperature=0.3
        )

        text_response = response.choices[0].message.content

        # Create a new state dict with updated values
        new_state = dict(state)
        new_state["final_response"] = text_response

        # Add the response to the messages
        new_messages = state["messages"].copy()
        new_messages.append({"role": "assistant", "content": text_response})
        new_state["messages"] = new_messages

        logger.info(f"Generated text response using GPT-4: {text_response[:50]}...")
        return new_state
    except Exception as e:
        logger.error(f"Error in generate_text_response: {e}")
        # Fallback to a generic response
        fallback_response = "I'm sorry, I'm having trouble analyzing your rent roll data right now. Could you try rephrasing your question?"

        new_state = dict(state)
        new_state["final_response"] = fallback_response

        new_messages = state["messages"].copy()
        new_messages.append({"role": "assistant", "content": fallback_response})
        new_state["messages"] = new_messages

        return new_state

In [None]:
def trim_dataframe_output(output_text, max_rows=20, max_chars=None):
    """
    Extremely simplified function that just returns the first 20 lines of output.

    Args:
        output_text: The text output
        max_rows: Maximum number of rows to keep (default: 20)
        max_chars: Not used, kept for compatibility

    Returns:
        Trimmed text showing only top rows
    """
    lines = output_text.split('\n')

    if len(lines) <= max_rows:
        return output_text

    trimmed_lines = lines[:max_rows]
    trimmed_lines.append(f"... [output truncated, showing first {max_rows} lines only] ...")

    return '\n'.join(trimmed_lines)

In [None]:

from datetime import datetime
def save_dataframe_version(df, operation_description=""):
    """Save the current state of the dataframe as both CSV and Excel files.

    Args:
        df: The dataframe to save
        operation_description: A string describing what operation was performed

    Returns:
        version_name: The name of the version that was saved
    """
    import os
    from datetime import datetime

    # Create versions directory if it doesn't exist
    versions_dir = "rent_roll_versions"
    os.makedirs(versions_dir, exist_ok=True)

    # Generate version name with timestamp
    timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
    version_name = f"v_{timestamp}"

    # Create filenames for both CSV and Excel
    csv_filename = os.path.join(versions_dir, f"rent_roll_{version_name}.csv")
    excel_filename = os.path.join(versions_dir, f"rent_roll_{version_name}.xlsx")

    # Save as CSV
    df.to_csv(csv_filename, index=False)

    # Save as Excel
    df.to_excel(excel_filename, index=False, engine='openpyxl')

    # Add version metadata to the registry
    if 'app_state' in globals():
        version_info = {
            'name': version_name,
            'description': operation_description,
            'timestamp': timestamp,
            'filename': csv_filename,  # Keep CSV as primary for backward compatibility
            'excel_filename': excel_filename,  # Add Excel filename
            'is_original': len(app_state["df_versions"]) == 0  # First one is original
        }
        app_state["df_versions"].append(version_info)

    print(f"✓ Saved dataframe version {version_name}: {operation_description}")
    print(f"  - CSV: {csv_filename}")
    print(f"  - Excel: {excel_filename}")

    # Return the version name for reference
    return version_name

def get_versions_info_for_prompt():
    """Generate version information for the Claude prompt."""
    if not app_state["df_versions"]:
        return "No versions available yet."

    # Find the original version
    original = next((v for v in app_state["df_versions"] if v.get('is_original')), app_state["df_versions"][0])

    # Get the latest version
    latest = app_state["df_versions"][-1]

    # Format all versions
    all_versions = []
    for i, version in enumerate(app_state["df_versions"]):
        status = []
        if version == original:
            status.append("ORIGINAL")
        if version == latest:
            status.append("LATEST")

        status_str = f" ({', '.join(status)})" if status else ""
        all_versions.append(f"{i+1}. {version['name']}{status_str}: {version['description']}")

    versions_text = "\n".join(all_versions)

    return f"""
DATAFRAME VERSION HISTORY:
{versions_text}

Original version: {original['name']}
Latest version: {latest['name']}
Total versions: {len(app_state["df_versions"])}
"""
def generate_code_and_execute(state: AgentState) -> Dict:
    """
    Generate and execute code using a two-step AI approach:
    1. Use GPT-4 to create an optimal prompt for Claude
    2. Have Claude generate the code based on this optimized prompt
    3. Execute the code and handle errors with up to 3 retries
    """
    messages = state["messages"]
    df = state["df"]

    # Get OpenAI client from state or create new one
    openai_client = state.get("openai_client") or OpenAI(api_key=DEFAULT_OPENAI_API_KEY)
    # Get Anthropic client from state or create new one
    anthropic_client = state.get("anthropic_client") or Anthropic(api_key=DEFAULT_ANTHROPIC_API_KEY)

    # Get column information for context
    column_info = ", ".join(df.columns) if df is not None else "No columns available"

    # Create FULL dataframe content for GPT-4.1 (instead of just a sample)
    if df is not None:
        # Convert entire dataframe to string representation
        df_full_content = df.to_string(index=False)

        # Also get CSV format for better structure
        df_csv_content = df.to_csv(index=False)

        # Prepare comprehensive data summary
        df_summary = f"""
FULL DATAFRAME CONTENT:
{df_full_content}

CSV FORMAT:
{df_csv_content}

DATAFRAME STATISTICS:
- Shape: {df.shape}
- Columns: {list(df.columns)}
- Data types: {dict(df.dtypes)}
- Memory usage: {df.memory_usage(deep=True).sum()} bytes
- Null values per column: {dict(df.isnull().sum())}
"""
    else:
        df_summary = "No data available"

    # Create versions directory if it doesn't exist
    versions_dir = "rent_roll_versions"
    os.makedirs(versions_dir, exist_ok=True)

    # Print initial state for debugging
    print(f"\n==== STARTING CODE GENERATION ====")
    print(f"User query: {messages[-1]['content'] if messages[-1]['role'] == 'user' else 'No user query found'}")
    print(f"Dataframe has {len(df) if df is not None else 0} rows and {len(df.columns) if df is not None else 0} columns")
    print(f"Sending FULL dataframe to GPT-4.1 (not just sample)")

    try:
        # First, use GPT-4 to create the optimal prompt for Claude
        print("\n==== STEP 1: GENERATING PROMPT WITH GPT-4 (WITH FULL DATAFRAME) ====")
        versions_info = get_versions_info_for_prompt()

        # System prompt for GPT-4 to create a Claude prompt
        gpt_system_prompt = f"""You are an expert at creating prompts for Claude AI to generate code.
        Your task is to analyze the user query history and convert it into an optimal prompt for Claude to generate Python code that analyzes a rent roll dataframe.

        CRITICAL INFORMATION: The dataframe is ALREADY LOADED and available as 'df'.
        It contains REAL DATA with {len(df)} rows and {len(df.columns)} columns.

        HERE IS THE COMPLETE DATAFRAME CONTENT (ALL ROWS AND COLUMNS):
        {df_summary}

        # IMPORTANT: DATAFRAME VERSION MANAGEMENT
        {versions_info}

        # IMPORTANT VERSION IDENTIFICATION:
        - Versions are stored in chronological order by timestamp
        - The original version is always the first one saved (earliest timestamp)
        - The latest version is always the most recent one saved (latest timestamp)
        - When a user says "original dataframe," load the version with the earliest timestamp
        - When a user says "latest version," use the current df (which is already the latest)
        - When a user specifies a version by name (e.g., "v_20250518_112345"), load that exact version

        ALL versions are saved as CSV files in the "rent_roll_versions" directory.
        For example, to load a specific version:

        ```python
        # To load a specific version (e.g., the original version)
        import pandas as pd
        import os

        # Example: Load the original version
        original_version_name = "{{app_state["df_versions"][0]['name'] if app_state["df_versions"] else "v_example"}}"
        original_file_path = os.path.join("rent_roll_versions", f"rent_roll_{{original_version_name}}.csv")
        original_df = pd.read_csv(original_file_path)

        print(f"Loaded original version: {{original_version_name}}")
        print(f"Shape: {{original_df.shape}}")

        # You can either work with this as a separate dataframe, or replace the current df:
        # df = original_df  # This would replace the current df with the original
        ```

        If you make any changes to the dataframe, ALWAYS save a new version using save_dataframe_version().

        Some important guidelines to include in your prompt to Claude:
        1. The variable 'df' is ALREADY DEFINED and CONTAINS DATA. Claude must not say "I need to see the data first"
        2. Claude should explain its approach step by step before showing code
        3. Code must be wrapped in ```python and ``` blocks
        4. Code MUST display ALL rows in the output when showing tables (no limiting rows)
        5. Claude should not attempt to clean data unless specifically requested
        6. Code should include proper error handling
        7. IMPORTANT: After performing any analysis or showing results, Claude should ALWAYS call the save_dataframe_version() function to maintain version history, even if no changes were made to the dataframe.
        8. CRITICAL: Claude should NOT use try-except blocks in its code. Any errors should be allowed to propagate naturally. This ensures that our retry system can properly handle errors.

        Your output will be directly sent to Claude, so format it as a complete system prompt.
        Include any table formatting functions that might be useful.

        Make sure to include these helper functions in your prompt:

        ```python
        # For tabular display with proper formatting (PREFERRED METHOD):
        def print_formatted_table(df, title=None): #Print a dataframe with proper formatting without modifying data
            if title:
                print(f"\\n{{title}}")
                print("=" * 80)

            # Create a display copy (doesn't change original df)
            display_df = df.copy()

            # Set pandas display options for better readability
            # Show ALL rows - no limits
            pd.set_option('display.max_rows', None)
            pd.set_option('display.max_columns', None)
            pd.set_option('display.width', 1000)
            pd.set_option('display.colheader_justify', 'left')
            pd.set_option('display.precision', 2)

            # Display the dataframe - ALL rows will be shown
            print(display_df)

            # Reset display options to default
            pd.reset_option('display.max_rows')
            pd.reset_option('display.max_columns')
            pd.reset_option('display.width')
            pd.reset_option('display.colheader_justify')
            pd.reset_option('display.precision')
        ```

        ```python
        # For bordered table display with precise control:
        def print_bordered_table(df, title=None): #Print a dataframe with borders for better readability - SHOWS ALL ROWS
            if title:
                print(f"\\n{{title}}")
                print("=" * 80)

            if len(df) == 0:
                print("No data available")
                return

            # Create a display copy (doesn't change original data)
            display_df = df.copy()

            # Calculate column widths for display purposes only
            col_widths = {{}}
            for col in display_df.columns:
                # Convert values to string only for width calculation
                col_values = display_df[col].astype(str)
                max_data_width = col_values.str.len().max()
                col_widths[col] = max(len(str(col)), max_data_width) + 2  # +2 for padding

            # Create header row
            header = "| " + " | ".join(str(col).ljust(col_widths[col]) for col in display_df.columns) + " |"
            separator = "+" + "+".join("-" * (col_widths[col] + 2) for col in display_df.columns) + "+"

            # Print header
            print(separator)
            print(header)
            print(separator)

            # Print ALL rows - NO LIMIT
            for i in range(len(display_df)):
                row = display_df.iloc[i]
                row_str = "| " + " | ".join(str(val).ljust(col_widths[col]) for col, val in row.items()) + " |"
                print(row_str)

            print(separator)
            print(f"Total rows: {{len(display_df)}}")
        ```

        ```python
        # Function to save dataframe versions
        def save_dataframe_version(df, operation_description=""):
            \"\"\"Save the current state of the dataframe as both CSV and Excel files.

            This function should be called whenever you make changes to the dataframe,
            or after generating analysis results, to maintain version history.

            Args:
                df: The dataframe to save
                operation_description: A string describing what operation was performed

            Returns:
                version_name: The name of the version that was saved
            \"\"\"
            import os
            from datetime import datetime

            # Create versions directory if it doesn't exist
            versions_dir = "rent_roll_versions"
            os.makedirs(versions_dir, exist_ok=True)

            # Generate version name with timestamp
            timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
            version_name = f"v_{{timestamp}}"

            # Create filenames for both CSV and Excel
            csv_filename = os.path.join(versions_dir, f"rent_roll_{{version_name}}.csv")
            excel_filename = os.path.join(versions_dir, f"rent_roll_{{version_name}}.xlsx")

            # Save as CSV
            df.to_csv(csv_filename, index=False)

            # Save as Excel
            df.to_excel(excel_filename, index=False, engine='openpyxl')

            print(f"✓ Saved dataframe version {{version_name}}: {{operation_description}}")
            print(f"  - CSV: {{csv_filename}}")
            print(f"  - Excel: {{excel_filename}}")

            # Return the version name for reference
            return version_name
        ```
        """

        # Filter out system messages and DON'T trim dataframe outputs in the conversation history
        filtered_messages = []
        for msg in messages:
            if msg["role"] != "system":
                # Don't trim here anymore
                filtered_messages.append({"role": msg["role"], "content": msg["content"]})

        # Convert the messages to the format expected by OpenAI
        gpt_messages = [{"role": "system", "content": gpt_system_prompt}]
        for msg in filtered_messages:
            gpt_messages.append(msg)

        # Add a final message explaining the task clearly
        gpt_messages.append({
            "role": "user",
            "content": "Based on this conversation history and the COMPLETE dataframe content provided above, create the optimal Claude prompt to generate Python code for rent roll analysis. The prompt should emphasize that the dataframe already exists and is loaded as 'df', that ALL rows should be displayed when requested, and that versions should be saved with save_dataframe_version() function. You have access to the ENTIRE dataframe, not just a sample."
        })

        # Get the optimized prompt from GPT-4
        gpt_response = openai_client.chat.completions.create(
            model="gpt-4.1",
            messages=gpt_messages,
            max_tokens=4000,  # Increased token limit to handle larger dataframes
            temperature=0.3
        )

        claude_system_prompt = gpt_response.choices[0].message.content

        # Print the generated prompt for debugging
        print("\n==== GPT-4 GENERATED PROMPT FOR CLAUDE ====")
        print(claude_system_prompt[:500] + "..." if len(claude_system_prompt) > 500 else claude_system_prompt)
        print("==== END OF PROMPT (TRUNCATED) ====\n")

        logger.info("Generated optimized prompt for Claude using GPT-4 with full dataframe")

        # Now use the GPT-4 generated prompt to ask Claude for code
        print("\n==== STEP 2: SENDING TO CLAUDE FOR CODE GENERATION ====")
        logger.info("Sending optimized prompt to Claude for code generation")

        # Prepare messages for Claude with the full dataframe content
        claude_messages = filtered_messages.copy()

        # Add the complete dataframe content to help Claude understand the data exists
        full_data_message = {
            "role": "user",
            "content": f"Here is the COMPLETE dataframe that's already loaded as 'df':\n\n{df_summary}\n\nPlease process my request using this FULL dataset and remember to save versions with save_dataframe_version()."
        }
        claude_messages.append(full_data_message)

        # Try to get code from Claude
        claude_response = anthropic_client.messages.create(
            model="claude-3-7-sonnet-20250219",
            system=claude_system_prompt,
            messages=claude_messages,
            max_tokens=4000,  # Increased to handle more complex responses
            temperature=0.3
        )

        # Extract the response text from Claude
        response_text = claude_response.content[0].text

        # Print Claude's response for debugging
        print("\n==== CLAUDE'S RESPONSE ====")
        print(response_text[:500] + "..." if len(response_text) > 500 else response_text)
        print("==== END OF CLAUDE RESPONSE (TRUNCATED) ====\n")

        # Extract code blocks
        code_blocks = re.findall(r'```python\s*(.*?)\s*```', response_text, re.DOTALL)

        # Print extracted code blocks for debugging
        print(f"\n==== EXTRACTED {len(code_blocks)} CODE BLOCKS ====")
        for i, block in enumerate(code_blocks):
            print(f"\n-- Code Block {i+1} --")
            print(block[:200] + "..." if len(block) > 200 else block)

        # If no code blocks are found, add emergency code
        if len(code_blocks) == 0:
            emergency_code = """
            # Emergency code to display the dataframe
            pd.set_option('display.max_rows', None)
            pd.set_option('display.max_columns', None)
            pd.set_option('display.width', 1000)

            print("\\n=== RENT ROLL DATA ===\\n")
            print(f"Displaying all {len(df)} rows and {len(df.columns)} columns\\n")

            # Print the entire dataframe
            print(df)

            # Save a version of the dataframe
            from datetime import datetime
            import os

            # Create versions directory if it doesn't exist
            versions_dir = "rent_roll_versions"
            os.makedirs(versions_dir, exist_ok=True)

            # Generate version name with timestamp
            timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
            version_name = f"v_{timestamp}"

            # Create filename
            filename = os.path.join(versions_dir, f"rent_roll_{version_name}.csv")

            # Save dataframe
            df.to_csv(filename, index=False)

            print(f"✓ Saved dataframe version {version_name}: Emergency display of data")
            """
            code_blocks.append(emergency_code)
            print("\n-- Added Emergency Code Block --")
            print("Emergency code added since Claude didn't generate code")

        # Rest of the function remains the same (helper functions, execution loop, etc.)
        # Define helper functions
        def print_formatted_table(df, title=None):
            if title:
                print(f"\n{title}")
                print("=" * 80)

            # Create a display copy (doesn't change original df)
            display_df = df.copy()

            # Set pandas display options for better readability
            # Show ALL rows - no limits
            pd.set_option('display.max_rows', None)
            pd.set_option('display.max_columns', None)
            pd.set_option('display.width', 1000)
            pd.set_option('display.colheader_justify', 'left')
            pd.set_option('display.precision', 2)

            # Display the dataframe - ALL rows will be shown
            print(display_df)

            # Reset display options to default
            pd.reset_option('display.max_rows')
            pd.reset_option('display.max_columns')
            pd.reset_option('display.width')
            pd.reset_option('display.colheader_justify')
            pd.reset_option('display.precision')

        def print_bordered_table(df, title=None):
            if title:
                print(f"\n{title}")
                print("=" * 80)

            if len(df) == 0:
                print("No data available")
                return

            # Create a display copy (doesn't change original data)
            display_df = df.copy()

            # Calculate column widths for display purposes only
            col_widths = {}
            for col in display_df.columns:
                # Convert values to string only for width calculation
                col_values = display_df[col].astype(str)
                max_data_width = col_values.str.len().max()
                col_widths[col] = max(len(str(col)), max_data_width) + 2  # +2 for padding

            # Create header row
            header = "| " + " | ".join(str(col).ljust(col_widths[col]) for col in display_df.columns) + " |"
            separator = "+" + "+".join("-" * (col_widths[col] + 2) for col in display_df.columns) + "+"

            # Print header
            print(separator)
            print(header)
            print(separator)

            # Print ALL rows - NO LIMIT
            for i in range(len(display_df)):
                row = display_df.iloc[i]
                row_str = "| " + " | ".join(str(val).ljust(col_widths[col]) for col, val in row.items()) + " |"
                print(row_str)

            print(separator)
            print(f"Total rows: {len(display_df)}")

        # Add to globals_dict before executing code
        globals_dict = {
            "df": df,
            "pd": pd,
            "np": np,
            "os": os,                   # Add os for folder creation
            "datetime": datetime,       # Add datetime for timestamp
            "versions_dir": versions_dir,  # Pass the versions directory
            "print_formatted_table": print_formatted_table,  # Add the helper function
            "print_bordered_table": print_bordered_table,    # Add the helper function
            "save_dataframe_version": save_dataframe_version  # Make sure this is defined too
        }

        execution_results = ""
        all_executed_successfully = False
        max_retries = 5  # Maximum number of retries
        retry_count = 0  # Initialize retry counter
        failed_code = ""  # Store the failed code for context
        error_msg = ""    # Store the error message

        print("\n==== STEP 3: EXECUTING CODE WITH RETRIES ====")

        # Main retry loop (rest of the execution code remains the same)
        while not all_executed_successfully and retry_count <= max_retries:
            # If this is a retry attempt (not the first try)
            if retry_count > 0:
                print(f"\n==== RETRY ATTEMPT {retry_count}/{max_retries} ====")

                # Create a retry message with more details each time
                retry_message = {
                    "role": "user",
                    "content": f"""The code you provided failed with this error: {error_msg}

                    Here is the code that failed:
                    ```python
                    {failed_code}
                    ```

                    This is retry attempt {retry_count} of {max_retries}.

                    {"After multiple attempts, please try a completely different approach." if retry_count >= 2 else "Please fix this specific error."}
                    IMPORTANT: DO NOT use try-except blocks in your code. Allow any errors to propagate naturally so our system can detect them.
                    Please fix this code to handle the specific error while maintaining the requirement to show ALL rows in the output and saving a version with save_dataframe_version().
                    Return the corrected code wrapped in ```python and ``` blocks."""
                }

                # Add this feedback to the messages
                fix_messages = claude_messages.copy()
                fix_messages.append({"role": "assistant", "content": response_text})
                fix_messages.append(retry_message)

                # Get Claude's fixed code
                retry_response = anthropic_client.messages.create(
                    model="claude-3-7-sonnet-20250219",
                    system=claude_system_prompt,
                    messages=fix_messages,
                    max_tokens=3500,
                    temperature=0.3
                )

                retry_text = retry_response.content[0].text
                print(f"\n==== CLAUDE'S FIX SUGGESTION (ATTEMPT {retry_count}) ====")
                print(retry_text[:500] + "..." if len(retry_text) > 500 else retry_text)

                # Extract the fixed code blocks
                fixed_code_blocks = re.findall(r'```python\s*(.*?)\s*```', retry_text, re.DOTALL)

                if fixed_code_blocks:
                    # Use the first fixed code block
                    code_to_execute = fixed_code_blocks[0]

                    # Update response text to include the fix explanation
                    fix_explanation = f"\n\n**🔧 Code Fix (Attempt {retry_count}):**\n"
                    fix_explanation += f"The code encountered an error. Here's the fix for retry attempt {retry_count}:\n"
                    fix_explanation += "\n```python\n" + code_to_execute + "\n```\n"

                    if retry_count == 1:
                        # First retry - add to original response
                        response_text = response_text + fix_explanation
                    else:
                        # Subsequent retries - replace previous fix explanation
                        prev_fix_marker = f"**🔧 Code Fix (Attempt {retry_count-1}):**"
                        if prev_fix_marker in response_text:
                            # Replace previous fix with new one
                            response_text = response_text.replace(
                                prev_fix_marker,
                                f"**🔧 Code Fix (Attempt {retry_count}):**"
                            )
                        else:
                            # Just append this fix
                            response_text = response_text + fix_explanation
                else:
                    # If no code blocks found in retry, try emergency code
                    code_to_execute = f"""
                    # Emergency code for retry {retry_count}
                    print(f"\\n=== EMERGENCY DISPLAY (RETRY {retry_count}) ===\\n")
                    print(f"DataFrame shape: {{df.shape}}")
                    print("\\nColumn names:")
                    for col in df.columns:
                        print(f"- {{col}}")

                    print("\\nFirst 10 rows:")
                    print(df.head(10))

                    save_dataframe_version(df, f"Emergency display after retry {retry_count}")
                    """
                    print(f"No code blocks found in retry. Using emergency code.")
            else:
                # Initial execution (not a retry)
                # Run the original code block
                if code_blocks:
                    code_to_execute = code_blocks[0]  # Use the first code block
                else:
                    # This should not happen due to the earlier check, but just in case
                    code_to_execute = """
                    print("No code blocks found. Displaying basic dataframe info.")
                    print(f"DataFrame shape: {df.shape}")
                    print(df.head())
                    save_dataframe_version(df, "Automatic save after initial execution")
                    """

            # Execute the current code
            print(f"\n{'Executing' if retry_count == 0 else 'Retrying'} code...")
            output_buffer = io.StringIO()
            try:
                # Store the code in case it fails
                failed_code = code_to_execute

                with redirect_stdout(output_buffer):
                    exec(code_to_execute, globals_dict)

                execution_output = output_buffer.getvalue()
                print(f"Execution {'successful' if retry_count == 0 else 'fixed on retry ' + str(retry_count)}! Output length: {len(execution_output)} characters")
                print(execution_output[:200] + "..." if len(execution_output) > 200 else execution_output)

                # ONLY trim the execution output for storing, not the entire response
                trimmed_output = trim_dataframe_output(execution_output, max_rows=20)

                # Format the results message based on retry count
                if retry_count == 0:
                    results_msg = "**✅ Code Execution Results:**"
                else:
                    results_msg = f"**✅ Code Execution Results (After Fix Attempt {retry_count}):**"

                execution_results = f"\n\n{results_msg}\n```\n{trimmed_output}\n```\n"

                # Check if a version was saved
                if "✓ Saved dataframe version" not in execution_output:
                    # Auto-save a version
                    timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
                    version_name = f"v_{timestamp}"
                    csv_filename = os.path.join(versions_dir, f"rent_roll_{version_name}.csv")
                    excel_filename = os.path.join(versions_dir, f"rent_roll_{version_name}.xlsx")

                    # Save both CSV and Excel
                    df.to_csv(csv_filename, index=False)
                    df.to_excel(excel_filename, index=False, engine='openpyxl')

                    save_message = f"✓ Saved dataframe version {version_name}: Automatic save after {'execution' if retry_count == 0 else 'retry ' + str(retry_count)}"
                    print(save_message)
                    print(f"  - CSV: {csv_filename}")
                    print(f"  - Excel: {excel_filename}")
                    execution_results += f"\n{save_message}\n"

                # Mark as successful and break the retry loop
                all_executed_successfully = True
                logger.info(f"Successfully executed code {'' if retry_count == 0 else 'on retry ' + str(retry_count)}")
                break

            except Exception as e:
                # Execution failed
                error_msg = f"Error: {str(e)}"
                print(f"Execution failed with error: {error_msg}")

                # Log the error
                if retry_count == 0:
                    execution_results = f"\n\n**❌ Code Execution Failed:**\n```\n{error_msg}\n```\n"
                else:
                    execution_results = f"\n\n**❌ Code Execution Failed (Retry {retry_count}):**\n```\n{error_msg}\n```\n"

                logger.error(f"Code execution failed on {'initial attempt' if retry_count == 0 else 'retry ' + str(retry_count)}: {e}")
                logger.error(traceback.format_exc())

                # Increment retry counter
                retry_count += 1

                # If we've hit max retries and still failed, try emergency display as last resort
                if retry_count > max_retries:
                    print("\n==== MAX RETRIES REACHED, TRYING EMERGENCY DISPLAY ====")

                    # Create emergency display code
                    emergency_code = """
                    try:
                        print("\\n=== EMERGENCY FALLBACK DISPLAY ===\\n")
                        print(f"DataFrame shape: {df.shape}")
                        print("\\nColumn names:")
                        for col in df.columns:
                            print(f"- {col}")

                        print("\\nFirst 10 rows:")
                        print(df.head(10))

                        # Try to show some basic stats about numeric columns
                        try:
                            numeric_cols = df.select_dtypes(include=['number']).columns
                            if len(numeric_cols) > 0:
                                print("\\nBasic statistics for numeric columns:")
                                print(df[numeric_cols].describe())
                        except Exception as stats_err:
                            print(f"Could not generate statistics: {stats_err}")

                        # Save version - both CSV and Excel
                        timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
                        version_name = f"v_{timestamp}_emergency"
                        csv_filename = os.path.join(versions_dir, f"rent_roll_{version_name}.csv")
                        excel_filename = os.path.join(versions_dir, f"rent_roll_{version_name}.xlsx")

                        df.to_csv(csv_filename, index=False)
                        df.to_excel(excel_filename, index=False, engine='openpyxl')

                        print(f"✓ Saved dataframe version {version_name}: Emergency display after all retries failed")
                        print(f"  - CSV: {csv_filename}")
                        print(f"  - Excel: {excel_filename}")
                    except Exception as e_inner:
                        print(f"Even emergency display failed: {e_inner}")
                    """

                    output_buffer = io.StringIO()
                    try:
                        with redirect_stdout(output_buffer):
                            exec(emergency_code, globals_dict)

                        emergency_output = output_buffer.getvalue()
                        # Only trim the emergency output, not the whole response
                        execution_results += f"\n\n**⚠️ Emergency Data Display (After {max_retries} Failed Retries):**\n```\n{trim_dataframe_output(emergency_output, max_rows=20)}\n```\n"
                    except Exception as e_final:
                        print(f"Emergency fallback also failed: {e_final}")
                        execution_results += f"\n\n**❌ All Recovery Attempts Failed**\n"

        # Add a note about the hybrid approach and retry attempts
        if retry_count > 0 and all_executed_successfully:
            hybrid_note = f"\n\n**📝 Note:** This analysis was performed using a hybrid approach with GPT-4 and Claude. The code was successfully fixed after {retry_count} retry attempts. GPT-4 received the complete dataframe for optimal context."
        elif retry_count > max_retries:
            hybrid_note = f"\n\n**📝 Note:** This analysis was attempted using a hybrid approach with GPT-4 and Claude, but all {max_retries} retry attempts failed. Some basic information was displayed as a fallback."
        else:
            hybrid_note = "\n\n**📝 Note:** This analysis was performed using a hybrid approach: GPT-4 optimized the prompt with full dataframe context, and Claude generated and executed the code for detailed rent roll analysis."

        # Combine the response and execution results
        full_response = response_text + execution_results + hybrid_note

        print("\n==== FINAL RESPONSE GENERATED ====")
        print(f"Original response length: {len(full_response)} characters")
        print(f"Retry attempts: {retry_count}")
        print(f"Execution successful: {all_executed_successfully}")

        # Create a new state dict with updated values
        new_state = dict(state)
        new_state["code_execution_results"] = execution_results
        new_state["final_response"] = full_response  # Don't trim the full response

        # Add the response to the messages - don't trim it here either
        new_messages = state["messages"].copy()
        new_messages.append({"role": "assistant", "content": full_response})
        new_state["messages"] = new_messages

        logger.info("Code generation and execution complete using hybrid GPT-4/Claude approach with full dataframe")
        print("\n==== CODE GENERATION COMPLETE ====")

        return new_state

    except Exception as e:
        logger.error(f"Error in hybrid code generation: {e}")
        logger.error(traceback.format_exc())
        print(f"\n==== ERROR IN CODE GENERATION ====\n{e}\n{traceback.format_exc()}")

        # Try to save a version even on error
        try:
            # Generate version name with timestamp
            timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
            version_name = f"v_{timestamp}_system_error"

            # Create filenames
            csv_filename = os.path.join(versions_dir, f"rent_roll_{version_name}.csv")
            excel_filename = os.path.join(versions_dir, f"rent_roll_{version_name}.xlsx")

            # Save both formats
            df.to_csv(csv_filename, index=False)
            df.to_excel(excel_filename, index=False, engine='openpyxl')

            save_message = f"✓ Saved dataframe version {version_name}: System error - {str(e)[:100]}"
            print(save_message)
            print(f"  - CSV: {csv_filename}")
            print(f"  - Excel: {excel_filename}")
        except Exception as save_error:
            print(f"Failed to save error version: {save_error}")

        # Fallback to a generic response
        fallback_response = f"""
        I'm sorry, I encountered an issue while generating and executing code for your request.

        **Technical Details:** {str(e)}

        Could you try asking your question in a different way? For complex analyses, it sometimes helps to break down your request into smaller, more specific questions.
        """

        new_state = dict(state)
        new_state["final_response"] = fallback_response

        new_messages = state["messages"].copy()
        new_messages.append({"role": "assistant", "content": fallback_response})
        new_state["messages"] = new_messages

        return new_state

In [None]:
# Build the LangGraph workflow
def create_agentic_rent_roll_analyzer():
    """Create and return the agentic rent roll analyzer workflow."""

    # Create the graph
    workflow = StateGraph(AgentState)

    # Add nodes to the graph
    workflow.add_node("determine_action", determine_action)
    workflow.add_node("ask_clarification", ask_clarification)
    workflow.add_node("generate_text_response", generate_text_response)
    workflow.add_node("generate_code_and_execute", generate_code_and_execute)

    # Set the entry point
    workflow.set_entry_point("determine_action")

    # Define conditional edges based on dictionary state values
    workflow.add_conditional_edges(
        "determine_action",
        lambda state: "ask_clarification" if state.get("needs_clarification") else
                      "generate_code_and_execute" if state.get("generate_code") else
                      "generate_text_response"
    )

    # Add edges to END
    workflow.add_edge("ask_clarification", END)
    workflow.add_edge("generate_text_response", END)
    workflow.add_edge("generate_code_and_execute", END)

    # Compile the graph
    agentic_analyzer = workflow.compile()

    return agentic_analyzer


In [None]:
def upload_rent_roll(file, anthropic_api_key, openai_api_key, auto_analyze):
    """Process the uploaded rent roll file and initialize the chat."""
    global app_state

    logger.info("Starting rent roll upload and processing")

    # Use the default API keys if none are provided
    anthropic_key = anthropic_api_key if anthropic_api_key else DEFAULT_ANTHROPIC_API_KEY
    openai_key = openai_api_key if openai_api_key else DEFAULT_OPENAI_API_KEY
    logger.info("API keys configured")

    # Validate inputs
    if not file:
        logger.warning("No file uploaded")
        return "Please upload a rent roll Excel file.", None, gr.update(visible=False)

    try:
        # Save the uploaded file to a temporary location
        temp_file = tempfile.NamedTemporaryFile(delete=False, suffix='.xlsx')
        temp_file.close()
        file_path = temp_file.name
        logger.info(f"Created temporary file: {file_path}")

        # Copy the uploaded file to our temporary location
        with open(file.name, 'rb') as src_file, open(file_path, 'wb') as dst_file:
            dst_file.write(src_file.read())
        logger.info("File copied to temporary location")

        # Use our improved rent roll loader
        try:
            logger.info("Loading rent roll with specialized loader...")
            rent_roll_df = read_rent_roll_simple(file_path)
        except Exception as e:
            logger.warning(f"Error with specialized loader: {e}. Falling back to standard loading.")
            # Fallback to basic loading
            rent_roll_df = pd.read_excel(file_path)
            logger.info("Fallback: Loaded rent roll with default pandas settings")

        logger.info(f"Loaded rent roll data: {len(rent_roll_df)} rows, {len(rent_roll_df.columns)} columns")

        # Auto-analyze with GPT if selected
        if auto_analyze:
            logger.info("Auto-analyze option selected. Calling GPT for analysis...")
            issues_list = analyze_rent_roll_gpt(file_path, openai_key)  # Use OpenAI key for this
            logger.info(f"GPT analysis complete. Found {len(issues_list)} issues.")
        else:
            # Create empty issues list if not auto-analyzing
            issues_list = []
            logger.info("No auto-analysis performed.")

        # Initialize the global app state with version tracking
        app_state = {
            "df": rent_roll_df,
            "issues": issues_list,
            "anthropic_client": Anthropic(api_key=anthropic_key),
            "openai_client": OpenAI(api_key=openai_key),
            "system_message": "",  # Will be populated below
            "df_versions": []  # Initialize empty version registry
        }

        # Save the initial version
        initial_version = save_dataframe_version(rent_roll_df, "Initial upload - original dataset")
        logger.info(f"Created initial dataframe version: {initial_version}")

        # Create system message with data understanding
        column_info = []
        for col in rent_roll_df.columns:
            try:
                dtype_str = str(rent_roll_df[col].dtype)
                column_info.append(f"- {col}: {dtype_str}")
            except Exception as e:
                column_info.append(f"- {col}: [Error determining type: {str(e)}]")
        column_info_str = "\n".join(column_info)
        # Calculate basic stats about the data
        data_stats = []
        for col in rent_roll_df.columns:
            try:
                if pd.api.types.is_numeric_dtype(rent_roll_df[col]):
                    stat = f"- {col}: min={rent_roll_df[col].min()}, max={rent_roll_df[col].max()}, mean={rent_roll_df[col].mean():.2f}, null={rent_roll_df[col].isna().sum()}"
                else:
                    unique_vals = rent_roll_df[col].nunique()
                    stat = f"- {col}: unique values={unique_vals}, null={rent_roll_df[col].isna().sum()}"
                data_stats.append(stat)
            except:
                data_stats.append(f"- {col}: [error calculating stats]")
        data_stats_str = "\n".join(data_stats)

        # Format issues for display
        issues_text = "\n".join([f"- {issue}" for issue in issues_list])

        system_message = f"""
        You are a Commercial Real Estate rent roll assistant that has analyzed a rent roll and found the following issues:

        {issues_text}

        The rent roll data has {len(rent_roll_df)} rows and {len(rent_roll_df.columns)} columns.

        Column information:
        {column_info_str}

        Data statistics:
        {data_stats_str}

        When helping the user, follow these critical guidelines:
        1. DO NOT generate placeholder code with fake column names. Work ONLY with the actual columns from the dataframe.
        2. NEVER assume column names that don't exist in the actual data.
        3. Always start by examining the first few rows to understand the meaning of each column.
        4. If you can't identify which columns contain certain information, clearly state this limitation.
        5. DO NOT proceed with analysis using made-up column names that don't exist in the data.

        The entire dataframe is available as 'df' in the execution environment.

        Important instructions for code and calculations:
        1. ALWAYS share your chain of thought reasoning in your responses. For each analysis:
          - Begin with "**Thinking through this step by step:**" in bold
          - Clearly explain your understanding of the request
          - Describe your approach to solving the problem
          - Outline the data exploration steps you'll take
          - Explain why you're choosing specific columns and methods
          - Discuss any challenges you anticipate with the data structure
          This chain of thought should be visible to the user in your chat responses.
        """

        # Save the system message to the app state
        app_state["system_message"] = system_message

        # Clean up the temporary file
        os.unlink(file_path)
        logger.info("Temporary file removed")

        # Generate a preview of the data and issues
        preview_html = f"""
        <h3>Rent Roll Preview</h3>
        <p>Successfully loaded rent roll with {len(rent_roll_df)} rows and {len(rent_roll_df.columns)} columns.</p>
        {rent_roll_df.head(5).fillna('').to_html(index=False)}

        <h3>Identified Issues</h3>
        <ol>
        """

        # Format each issue for the HTML preview
        for issue in issues_list:
            # If issue starts with a number (like "1. Issue"), strip the number
            if issue and issue[0].isdigit() and ". " in issue[:5]:
                issue = issue[issue.find(". ")+2:]
            preview_html += f"<li>{issue}</li>"

        preview_html += """
        </ol>
        <p>You can now start asking questions in the chat below!</p>
        <p><strong>Note:</strong> This application uses GPT-4 for decision making and text responses,
        and Claude AI specifically for code generation and execution.</p>
        """
        version_choices = get_version_choices()
        # Make the chat interface visible
        logger.info("Setup complete. Ready for chat interaction.")
        return (
            "Rent roll loaded successfully! You can now start chatting.",
            preview_html,
            gr.update(visible=True),  # chatbot visibility
            gr.update(choices=version_choices, value=version_choices[-1] if version_choices else None)  # version dropdown
        )


    # Also update the error return:
    except Exception as e:
        logger.error(f"Error during rent roll processing: {e}")
        logger.error(traceback.format_exc())
        if 'file_path' in locals() and os.path.exists(file_path):
            os.unlink(file_path)
            logger.info("Cleaned up temporary file after error")
        return f"Error: {str(e)}", None, gr.update(visible=False), gr.update(choices=[], value=None)

In [None]:
def load_latest_version_for_editing():
    """Load the most recent version of the dataframe for editing"""
    global app_state

    if app_state is None or app_state["df"] is None:
        return None, "No data loaded. Please upload a rent roll first."

    try:
        # Use the current dataframe (which is the latest)
        df = app_state["df"].copy()
        df = df.fillna('')
        # Get version info
        if app_state["df_versions"]:
            latest_version = app_state["df_versions"][-1]
            version_info = f"Loaded version: {latest_version['name']} - {latest_version['description']}"
        else:
            version_info = "Loaded current data (no versions saved yet)"

        logger.info(f"Loaded dataframe for editing: {df.shape}")
        return df, version_info
    except Exception as e:
        logger.error(f"Error loading data for editing: {e}")
        return None, f"Error loading data: {str(e)}"

def save_edited_dataframe(edited_df, description):
    """Save the edited dataframe as a new version"""
    global app_state

    if edited_df is None or edited_df.empty:
        return "No data to save", gr.update()

    try:
        # Convert the edited dataframe to proper pandas DataFrame if needed
        if not isinstance(edited_df, pd.DataFrame):
            edited_df = pd.DataFrame(edited_df)

        # Generate a meaningful description
        if not description:
            description = "Manual edits via data editor"

        # Save as new version
        version_name = save_dataframe_version(edited_df, description)

        # Update the app state with the edited dataframe
        app_state["df"] = edited_df

        # Log the changes
        logger.info(f"Saved edited dataframe as version {version_name}")

        # Return success message and update the view
        return f"✅ Successfully saved as version {version_name}", gr.update(value=edited_df)

    except Exception as e:
        logger.error(f"Error saving edited dataframe: {e}")
        return f"❌ Error saving: {str(e)}", gr.update()

def load_specific_version(version_name):
    """Load a specific version for editing"""
    global app_state

    if not version_name:
        return None, "Please select a version to load"

    try:
        # Find the version file
        versions_dir = "rent_roll_versions"
        csv_filename = os.path.join(versions_dir, f"rent_roll_{version_name}.csv")

        if os.path.exists(csv_filename):
            df = pd.read_csv(csv_filename)
            df = df.fillna('')
            logger.info(f"Loaded version {version_name} for editing")
            return df, f"Loaded version: {version_name}"
        else:
            return None, f"Version file not found: {version_name}"

    except Exception as e:
        logger.error(f"Error loading version {version_name}: {e}")
        return None, f"Error loading version: {str(e)}"

def get_version_choices():
    """Get list of available versions for dropdown"""
    global app_state

    if app_state and "df_versions" in app_state and app_state["df_versions"]:
        choices = []
        for i, version in enumerate(app_state["df_versions"]):
            status = ""
            if i == 0:
                status = " (ORIGINAL)"
            elif i == len(app_state["df_versions"]) - 1:
                status = " (LATEST)"

            choices.append(f"{version['name']}{status}")
        return choices
    return []

def refresh_version_dropdown():
    """Refresh the version dropdown choices"""
    choices = get_version_choices()
    if choices:
        return gr.update(choices=choices, value=choices[-1])  # Default to latest
    return gr.update(choices=[], value=None)

In [None]:
class SessionRecorder:
    def __init__(self):
        self.sessions_dir = "copiloting_sessions"
        os.makedirs(self.sessions_dir, exist_ok=True)
        self.current_session_file = None
        self.current_session_data = {}

    def start_session_recording(self, rent_roll_filename):
        """Start recording the entire copiloting session"""
        session_id = f"session_{datetime.now().strftime('%Y%m%d_%H%M%S')}"
        self.current_session_file = os.path.join(self.sessions_dir, f"{session_id}.txt")

        # Initialize session data
        self.current_session_data = {
            "session_id": session_id,
            "start_time": datetime.now().isoformat(),
            "rent_roll_file": rent_roll_filename,
            "conversation_history": [],
            "code_executions": [],
            "dataframe_versions": [],
            "issues_found": [],
            "user_goals": []
        }

        # Write session header to text file
        with open(self.current_session_file, 'w', encoding='utf-8') as f:
            f.write(f"=== RENT ROLL COPILOTING SESSION ===\n")
            f.write(f"Session ID: {session_id}\n")
            f.write(f"Started: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}\n")
            f.write(f"Rent Roll File: {rent_roll_filename}\n")
            f.write(f"=" * 50 + "\n\n")

        print(f"📝 Started session recording: {session_id}")
        return session_id

    def record_conversation_turn(self, user_message, ai_response, action_type, code_executed=None, version_saved=None):
        """Record each conversation turn in real-time"""
        if not self.current_session_file:
            return

        timestamp = datetime.now().strftime('%H:%M:%S')
        turn_data = {
            "timestamp": timestamp,
            "user_message": user_message,
            "ai_response": ai_response,
            "action_type": action_type,
            "code_executed": code_executed,
            "version_saved": version_saved
        }

        # Add to session data
        self.current_session_data["conversation_history"].append(turn_data)

        # Append to text file immediately
        with open(self.current_session_file, 'a', encoding='utf-8') as f:
            f.write(f"[{timestamp}] USER: {user_message}\n")
            f.write(f"Action Type: {action_type}\n")

            if code_executed:
                f.write(f"CODE EXECUTED:\n```python\n{code_executed}\n```\n")

            f.write(f"AI RESPONSE: {ai_response}\n")

            if version_saved:
                f.write(f"VERSION SAVED: {version_saved}\n")

            f.write("-" * 80 + "\n\n")

        # Track code executions separately
        if code_executed:
            self.current_session_data["code_executions"].append({
                "timestamp": timestamp,
                "code": code_executed,
                "purpose": user_message,
                "result": ai_response[:200] + "..." if len(ai_response) > 200 else ai_response
            })

    def record_dataframe_version(self, version_name, description, shape, columns):
        """Record dataframe version changes"""
        version_info = {
            "timestamp": datetime.now().strftime('%H:%M:%S'),
            "version_name": version_name,
            "description": description,
            "shape": shape,
            "columns": columns
        }

        self.current_session_data["dataframe_versions"].append(version_info)

        # Append to text file
        if self.current_session_file:
            with open(self.current_session_file, 'a', encoding='utf-8') as f:
                f.write(f"VERSION SAVED: {version_name}\n")
                f.write(f"Description: {description}\n")
                f.write(f"Shape: {shape}\n")
                f.write(f"Columns: {columns}\n")
                f.write("-" * 40 + "\n\n")

    def record_issue_found(self, issue_description, severity="medium"):
        """Record issues found during analysis"""
        issue_info = {
            "timestamp": datetime.now().strftime('%H:%M:%S'),
            "description": issue_description,
            "severity": severity
        }

        self.current_session_data["issues_found"].append(issue_info)

        if self.current_session_file:
            with open(self.current_session_file, 'a', encoding='utf-8') as f:
                f.write(f"ISSUE FOUND [{severity.upper()}]: {issue_description}\n")
                f.write("-" * 40 + "\n\n")

    def finalize_session(self):
        """End session recording and return session data"""
        if not self.current_session_file:
            return None

        end_time = datetime.now()
        duration = end_time - datetime.fromisoformat(self.current_session_data["start_time"])

        # Write session summary
        with open(self.current_session_file, 'a', encoding='utf-8') as f:
            f.write("\n" + "=" * 50 + "\n")
            f.write("SESSION SUMMARY\n")
            f.write("=" * 50 + "\n")
            f.write(f"Session Duration: {duration.total_seconds()/60:.1f} minutes\n")
            f.write(f"Total Conversations: {len(self.current_session_data['conversation_history'])}\n")
            f.write(f"Code Executions: {len(self.current_session_data['code_executions'])}\n")
            f.write(f"Versions Created: {len(self.current_session_data['dataframe_versions'])}\n")
            f.write(f"Issues Found: {len(self.current_session_data['issues_found'])}\n")
            f.write(f"Ended: {end_time.strftime('%Y-%m-%d %H:%M:%S')}\n")

        # Update session data
        self.current_session_data["end_time"] = end_time.isoformat()
        self.current_session_data["duration_minutes"] = duration.total_seconds() / 60

        session_data = self.current_session_data.copy()

        # Reset for next session
        self.current_session_file = None
        self.current_session_data = {}

        print(f"✅ Session recording finalized: {session_data['session_id']}")
        return session_data

# Global session recorder
session_recorder = SessionRecorder()

In [None]:
class EnhancedTemplateManager:
    def __init__(self):
        self.templates_dir = "rent_roll_templates"
        os.makedirs(self.templates_dir, exist_ok=True)
        self.current_session = None

    def create_template_from_session(self, session_data, starting_df, final_df, template_name):
        """Generate comprehensive template from complete session data using GPT-4"""

        print("🤖 Analyzing session with GPT-4.1 to generate instructions...")

        # Prepare comprehensive session context for GPT-4
        session_context = self._prepare_session_context(session_data)

        # Use GPT-4.1 to analyze and generate instructions
        client = OpenAI(api_key=DEFAULT_OPENAI_API_KEY)

        analysis_prompt = f"""
        You are an expert at analyzing data analysis workflows and creating reusable instruction templates.

        I will provide you with a complete copiloting session where a user worked on a rent roll analysis.
        Your task is to:
        1. Analyze the entire workflow
        2. Identify the key transformation patterns
        3. Create step-by-step instructions that can be applied to similar rent roll files
        4. Generate reusable code templates with placeholders
        5. Document the business logic and decision points

        SESSION DATA:
        {session_context}

        STARTING DATAFRAME INFO:
        - Shape: {starting_df.shape}
        - Columns: {list(starting_df.columns)}
        - Sample data: {starting_df.head(2).to_string()}

        FINAL DATAFRAME INFO:
        - Shape: {final_df.shape}
        - Columns: {list(final_df.columns)}
        - Sample data: {final_df.head(2).to_string()}

        Please generate a comprehensive analysis in the following JSON format:
        {{
            "workflow_summary": "Brief description of what was accomplished",
            "key_transformations": [
                {{
                    "step_name": "Clean Tenant Names",
                    "description": "Standardize tenant name formatting",
                    "business_rule": "All tenant names should be Title Case with no extra whitespace",
                    "code_template": "df['{{column_name}}'] = df['{{original_column}}'].str.strip().str.title()",
                    "parameters": ["column_name", "original_column"],
                    "conditions": "Apply when tenant names have inconsistent formatting"
                }}
            ],
            "data_quality_improvements": [
                "List of data quality issues that were resolved"
            ],
            "reusable_patterns": [
                "Pattern 1: Column standardization",
                "Pattern 2: Missing value handling"
            ],
            "business_insights": [
                "Key insights discovered during analysis"
            ],
            "prerequisites": [
                "What conditions must be met for this template to work"
            ],
            "instructions_for_reuse": [
                "Step 1: Upload new rent roll file",
                "Step 2: Map columns (if different names)",
                "Step 3: Apply transformations in order"
            ]
        }}
        """

        try:
            response = client.chat.completions.create(
                model="gpt-4.1",  # Using latest GPT-4
                messages=[
                    {"role": "system", "content": "You are an expert data analyst who creates reusable workflow templates from analysis sessions. Provide detailed, actionable instructions."},
                    {"role": "user", "content": analysis_prompt}
                ],
                max_tokens=4000,
                temperature=0.3
            )

            # Extract the analysis
            gpt_analysis = response.choices[0].message.content

            # Try to extract JSON from the response
            json_match = re.search(r'{.*}', gpt_analysis, re.DOTALL)
            if json_match:
                try:
                    workflow_analysis = json.loads(json_match.group(0))
                except:
                    # Fallback if JSON parsing fails
                    workflow_analysis = {"analysis": gpt_analysis}
            else:
                workflow_analysis = {"analysis": gpt_analysis}

        except Exception as e:
            print(f"❌ Error with GPT-4 analysis: {e}")
            workflow_analysis = {"error": str(e), "fallback_analysis": "Manual analysis required"}

        # Create template with all data
        template_id = f"template_{datetime.now().strftime('%Y%m%d_%H%M%S')}"

        # Save dataframes as text files
        starting_df_file = f"{template_id}_starting_df.txt"
        final_df_file = f"{template_id}_final_df.txt"
        session_file = f"{template_id}_session.txt"

        starting_df_path = os.path.join(self.templates_dir, starting_df_file)
        final_df_path = os.path.join(self.templates_dir, final_df_file)
        session_path = os.path.join(self.templates_dir, session_file)

        # Save dataframes
        starting_df.to_csv(starting_df_path, index=False)
        final_df.to_csv(final_df_path, index=False)

        # Save raw session data
        with open(session_path, 'w', encoding='utf-8') as f:
            f.write(session_context)

        # Create comprehensive template
        template_data = {
            "template_id": template_id,
            "template_name": template_name,
            "created_date": datetime.now().isoformat(),
            "source_session_id": session_data.get("session_id", "unknown"),
            "source_file_name": session_data.get("rent_roll_file", "unknown"),

            "files": {
                "starting_dataframe": starting_df_file,
                "final_dataframe": final_df_file,
                "raw_session": session_file
            },

            "session_summary": {
                "duration_minutes": session_data.get("duration_minutes", 0),
                "total_conversations": len(session_data.get("conversation_history", [])),
                "code_executions": len(session_data.get("code_executions", [])),
                "versions_created": len(session_data.get("dataframe_versions", [])),
                "issues_found": len(session_data.get("issues_found", []))
            },

            "gpt4_analysis": workflow_analysis,

            "raw_workflow_steps": session_data.get("conversation_history", []),
            "code_executions": session_data.get("code_executions", []),
            "dataframe_changes": session_data.get("dataframe_versions", []),
            "issues_identified": session_data.get("issues_found", [])
        }

        # Save template metadata
        template_json_path = os.path.join(self.templates_dir, f"{template_id}.json")
        with open(template_json_path, 'w') as f:
            json.dump(template_data, f, indent=2, default=str)

        print(f"✅ Comprehensive template created: {template_id}")
        print(f"📁 Starting DF: {starting_df_path}")
        print(f"📁 Final DF: {final_df_path}")
        print(f"📁 Session Data: {session_path}")
        print(f"📋 Template: {template_json_path}")

        return template_data

    def _prepare_session_context(self, session_data):
        """Prepare session data for GPT-4 analysis"""
        context = f"""
COPILOTING SESSION ANALYSIS
============================

Session ID: {session_data.get('session_id', 'N/A')}
Duration: {session_data.get('duration_minutes', 0):.1f} minutes
Rent Roll File: {session_data.get('rent_roll_file', 'N/A')}

CONVERSATION HISTORY:
"""

        for i, conv in enumerate(session_data.get('conversation_history', []), 1):
            context += f"""
--- Conversation {i} [{conv.get('timestamp', 'N/A')}] ---
USER QUERY: {conv.get('user_message', 'N/A')}
ACTION TYPE: {conv.get('action_type', 'N/A')}
"""
            if conv.get('code_executed'):
                context += f"CODE EXECUTED:\n{conv['code_executed']}\n"

            context += f"AI RESPONSE: {conv.get('ai_response', 'N/A')[:300]}...\n"

            if conv.get('version_saved'):
                context += f"VERSION SAVED: {conv['version_saved']}\n"

            context += "\n"

        context += "\nCODE EXECUTIONS SUMMARY:\n"
        for code_exec in session_data.get('code_executions', []):
            context += f"- [{code_exec.get('timestamp')}] {code_exec.get('purpose', 'N/A')}\n"
            context += f"  Code: {code_exec.get('code', 'N/A')[:100]}...\n"

        context += "\nISSUES IDENTIFIED:\n"
        for issue in session_data.get('issues_found', []):
            context += f"- [{issue.get('timestamp')}] {issue.get('description', 'N/A')}\n"

        context += "\nDATAFRAME VERSIONS:\n"
        for version in session_data.get('dataframe_versions', []):
            context += f"- {version.get('version_name', 'N/A')}: {version.get('description', 'N/A')}\n"

        return context

    def load_template_dataframes(self, template_id):
        """Load both starting and final dataframes from a template"""
        try:
            # Load template metadata
            template_json_path = os.path.join(self.templates_dir, f"{template_id}.json")
            with open(template_json_path, 'r') as f:
                template_data = json.load(f)

            # Load starting dataframe
            starting_df_path = os.path.join(self.templates_dir, template_data["files"]["starting_dataframe"])
            starting_df = pd.read_csv(starting_df_path)

            # Load final dataframe
            final_df_path = os.path.join(self.templates_dir, template_data["files"]["final_dataframe"])
            final_df = pd.read_csv(final_df_path)

            return template_data, starting_df, final_df

        except Exception as e:
            print(f"❌ Error loading template: {e}")
            return None, None, None

    def list_templates(self):
        """List all available templates"""
        try:
            json_files = [f for f in os.listdir(self.templates_dir) if f.endswith('.json')]
            templates = []

            for json_file in json_files:
                template_path = os.path.join(self.templates_dir, json_file)
                with open(template_path, 'r') as f:
                    template_data = json.load(f)

                templates.append({
                    "template_id": template_data["template_id"],
                    "template_name": template_data["template_name"],
                    "created_date": template_data["created_date"],
                    "source_file": template_data["source_file_name"],
                    "steps_count": len(template_data.get("raw_workflow_steps", [])),
                    "gpt4_analysis_available": "gpt4_analysis" in template_data
                })

            return sorted(templates, key=lambda x: x["created_date"], reverse=True)

        except Exception as e:
            print(f"❌ Error listing templates: {e}")
            return []

    def get_template_summary(self, template_id):
        """Get a human-readable summary of a template"""
        try:
            template_json_path = os.path.join(self.templates_dir, f"{template_id}.json")
            with open(template_json_path, 'r') as f:
                template_data = json.load(f)

            summary = f"""
📋 Template: {template_data.get('template_name', 'Unknown')}
🆔 ID: {template_data.get('template_id', 'Unknown')}
📅 Created: {template_data.get('created_date', 'Unknown')}
📁 Source File: {template_data.get('source_file_name', 'Unknown')}

📊 Session Summary:
• Duration: {template_data.get('session_summary', {}).get('duration_minutes', 0):.1f} minutes
• Conversations: {template_data.get('session_summary', {}).get('total_conversations', 0)}
• Code Executions: {template_data.get('session_summary', {}).get('code_executions', 0)}
• Versions Created: {template_data.get('session_summary', {}).get('versions_created', 0)}
• Issues Found: {template_data.get('session_summary', {}).get('issues_found', 0)}

🤖 GPT-4 Analysis: {'✅ Available' if 'gpt4_analysis' in template_data else '❌ Not Available'}
"""

            # Add GPT-4 analysis summary if available
            if 'gpt4_analysis' in template_data and isinstance(template_data['gpt4_analysis'], dict):
                gpt_analysis = template_data['gpt4_analysis']

                if 'workflow_summary' in gpt_analysis:
                    summary += f"\n🔍 Workflow Summary:\n{gpt_analysis['workflow_summary']}\n"

                if 'key_transformations' in gpt_analysis:
                    summary += f"\n🔧 Key Transformations ({len(gpt_analysis['key_transformations'])}):\n"
                    for i, transform in enumerate(gpt_analysis['key_transformations'][:3], 1):  # Show first 3
                        summary += f"{i}. {transform.get('step_name', 'Unknown')}: {transform.get('description', 'No description')}\n"
                    if len(gpt_analysis['key_transformations']) > 3:
                        summary += f"... and {len(gpt_analysis['key_transformations']) - 3} more\n"

                if 'prerequisites' in gpt_analysis:
                    summary += f"\n📋 Prerequisites:\n"
                    for prereq in gpt_analysis['prerequisites'][:3]:  # Show first 3
                        summary += f"• {prereq}\n"

            return summary

        except Exception as e:
            return f"❌ Error getting template summary: {str(e)}"

    def delete_template(self, template_id):
        """Delete a template and all its associated files"""
        try:
            template_json_path = os.path.join(self.templates_dir, f"{template_id}.json")

            if not os.path.exists(template_json_path):
                return f"❌ Template {template_id} not found"

            # Load template to get file list
            with open(template_json_path, 'r') as f:
                template_data = json.load(f)

            files_to_delete = []
            files_to_delete.append(template_json_path)  # The main template file

            # Add dataframe and session files
            if 'files' in template_data:
                for file_key, filename in template_data['files'].items():
                    file_path = os.path.join(self.templates_dir, filename)
                    if os.path.exists(file_path):
                        files_to_delete.append(file_path)

            # Delete all files
            deleted_count = 0
            for file_path in files_to_delete:
                try:
                    os.remove(file_path)
                    deleted_count += 1
                except Exception as e:
                    print(f"Warning: Could not delete {file_path}: {e}")

            return f"✅ Template {template_id} deleted successfully. Removed {deleted_count} files."

        except Exception as e:
            return f"❌ Error deleting template: {str(e)}"

# Global enhanced template manager
enhanced_template_manager = EnhancedTemplateManager()

In [None]:
def create_template_from_current_session():
    """Create template from current copiloting session"""
    global app_state, session_recorder, enhanced_template_manager

    if not session_recorder.current_session_file:
        return "❌ No active session to create template from"

    if app_state is None or app_state["df"] is None:
        return "❌ No dataframe loaded"

    try:
        # Get starting dataframe (first version)
        if app_state.get("df_versions") and len(app_state["df_versions"]) > 0:
            first_version = app_state["df_versions"][0]
            starting_df_path = first_version.get("filename") or first_version.get("csv_filename")
            starting_df = pd.read_csv(starting_df_path)
        else:
            starting_df = app_state["df"]  # Fallback if no versions

        # Current dataframe is the final version
        final_df = app_state["df"]

        # Finalize current session
        session_data = session_recorder.finalize_session()

        if session_data is None:
            return "❌ Error finalizing session"

        # Generate template name suggestion
        template_name = f"Rent Roll Process {datetime.now().strftime('%Y-%m-%d %H:%M')}"

        # Create comprehensive template
        template_data = enhanced_template_manager.create_template_from_session(
            session_data=session_data,
            starting_df=starting_df,
            final_df=final_df,
            template_name=template_name
        )

        return f"✅ Template created successfully!\nTemplate ID: {template_data['template_id']}\nSteps captured: {len(session_data.get('conversation_history', []))}"

    except Exception as e:
        return f"❌ Error creating template: {str(e)}"

In [None]:
# Global state for the application (Not part of graph state)
app_state = {
    "df": None,
    "anthropic_client": None,
    "openai_client": None,  # Added for GPT-4
    "issues": [],
    "system_message": ""
}
# Enhanced Chat Function with Complete Session Recording and Template Generation

def chat(message, history):
    """
    Enhanced chat function with comprehensive session recording and template generation.
    Records every interaction, code execution, and dataframe change for template creation.
    """
    global app_state, session_recorder, enhanced_template_manager

    logger.info(f"Received chat message: {message[:50]}...")

    # Check if system is ready
    if app_state is None or app_state["df"] is None:
        logger.warning("Chat attempted before setup is complete")
        return history + [(message, "Please upload a rent roll file and set up your API keys first.")]

    # Start session recording if not already started
    if not session_recorder.current_session_file:
        rent_roll_filename = getattr(app_state, 'original_filename', 'uploaded_rent_roll.xlsx')
        session_id = session_recorder.start_session_recording(rent_roll_filename)
        logger.info(f"Started new session recording: {session_id}")

        # Record initial dataframe state
        if app_state.get("df_versions") and len(app_state["df_versions"]) > 0:
            first_version = app_state["df_versions"][0]
            session_recorder.record_dataframe_version(
                version_name=first_version["name"],
                description=first_version["description"],
                shape=list(app_state["df"].shape),
                columns=list(app_state["df"].columns)
            )

    # Get previous messages from history
    prev_messages = []
    if history:
        for user_msg, assistant_msg in history:
            prev_messages.append({"role": "user", "content": user_msg})
            prev_messages.append({"role": "assistant", "content": assistant_msg})

    # Create message list without system message
    all_messages = []
    all_messages.extend(prev_messages)

    # Add the current user message
    all_messages.append({"role": "user", "content": message})

    # Create a state dictionary for the graph
    state = {
        "messages": all_messages,
        "system_message": app_state["system_message"],
        "df": app_state["df"],
        "issues": app_state["issues"],
        "needs_clarification": False,
        "generate_code": False,
        "execution_plan": None,
        "clarification_question": None,
        "code_execution_results": None,
        "final_response": None,
        "anthropic_client": app_state["anthropic_client"],
        "openai_client": app_state["openai_client"]
    }

    try:
        # Create the workflow if not already created
        if not hasattr(chat, "workflow"):
            chat.workflow = create_agentic_rent_roll_analyzer()
            logger.info("Created agentic workflow")

        # Run the workflow with the current state
        logger.info("Running agentic workflow")
        result = chat.workflow.invoke(state)

        # Get the final response from the result state
        final_response = result.get("final_response", "I'm sorry, I couldn't process your request.")
        logger.info(f"Received final response from workflow: {final_response[:50]}...")

        # === ENHANCED SESSION RECORDING ===

        # 1. Determine action type based on response content and workflow state
        action_type = "analysis"  # default

        if result.get("needs_clarification"):
            action_type = "clarification"
        elif result.get("generate_code"):
            action_type = "data_processing"
        elif "error" in final_response.lower() or "sorry" in final_response.lower():
            action_type = "error_handling"
        elif "```python" in final_response:
            action_type = "code_execution"
        elif any(keyword in message.lower() for keyword in ["clean", "fix", "correct", "standardize"]):
            action_type = "data_cleaning"
        elif any(keyword in message.lower() for keyword in ["calculate", "compute", "sum", "average"]):
            action_type = "calculation"
        elif any(keyword in message.lower() for keyword in ["find", "show", "display", "list"]):
            action_type = "data_exploration"
        elif any(keyword in message.lower() for keyword in ["chart", "graph", "plot", "visualize"]):
            action_type = "visualization"

        # 2. Extract executed code from response
        code_executed = None
        code_blocks = re.findall(r'```python\s*(.*?)\s*```', final_response, re.DOTALL)
        if code_blocks:
            # Combine all code blocks if multiple
            code_executed = "\n\n# --- Next Code Block ---\n\n".join(code_blocks)

        # 3. Check if a new dataframe version was saved
        version_saved = None
        if "✓ Saved dataframe version" in final_response:
            version_match = re.search(r'version (v_\w+)', final_response)
            if version_match:
                version_saved = version_match.group(1)
                logger.info(f"Detected new version saved: {version_saved}")

        # 4. Detect if issues were found or resolved
        if any(keyword in final_response.lower() for keyword in ["issue", "problem", "error", "missing", "duplicate"]):
            issue_description = message + " - " + final_response[:100] + "..."
            severity = "high" if any(word in final_response.lower() for word in ["critical", "error", "failed"]) else "medium"
            session_recorder.record_issue_found(issue_description, severity)

        # 5. Extract any business insights or patterns
        insights = []
        if "found" in final_response.lower() and any(word in final_response.lower() for word in ["units", "rent", "tenant"]):
            insights.append(f"Business insight from query: {message}")

        # 6. Record the complete conversation turn with enhanced metadata
        session_recorder.record_conversation_turn(
            user_message=message,
            ai_response=final_response,
            action_type=action_type,
            code_executed=code_executed,
            version_saved=version_saved
        )

        # 7. Record dataframe version details if saved
        if version_saved:
            # Find the latest version info
            latest_version = None
            if app_state.get("df_versions"):
                for version in app_state["df_versions"]:
                    if version["name"] == version_saved:
                        latest_version = version
                        break

            if latest_version:
                session_recorder.record_dataframe_version(
                    version_name=version_saved,
                    description=latest_version.get("description", "Auto-saved during copiloting"),
                    shape=list(app_state["df"].shape),
                    columns=list(app_state["df"].columns)
                )
            else:
                # Fallback if version not found in registry
                session_recorder.record_dataframe_version(
                    version_name=version_saved,
                    description="Auto-saved during copiloting session",
                    shape=list(app_state["df"].shape),
                    columns=list(app_state["df"].columns)
                )

        # 8. Track user goals and patterns
        user_goals = []
        if any(keyword in message.lower() for keyword in ["clean", "standardize", "fix"]):
            user_goals.append("Data cleaning and standardization")
        if any(keyword in message.lower() for keyword in ["analyze", "find", "calculate"]):
            user_goals.append("Data analysis and insights")
        if any(keyword in message.lower() for keyword in ["chart", "graph", "visualize"]):
            user_goals.append("Data visualization")

        if user_goals:
            session_recorder.current_session_data.setdefault("user_goals", []).extend(user_goals)

        # 9. Log session statistics
        if session_recorder.current_session_data:
            total_turns = len(session_recorder.current_session_data.get("conversation_history", []))
            total_code = len(session_recorder.current_session_data.get("code_executions", []))
            logger.info(f"Session stats - Turns: {total_turns}, Code executions: {total_code}")

        # Use the correct format for Gradio chatbot
        history_list = list(history) if history else []
        history_list.append((message, final_response))

        logger.info("Chat response processing complete with session recording")
        return history_list

    except Exception as e:
        logger.error(f"Error processing chat: {e}")
        logger.error(traceback.format_exc())

        # Record the error in session
        error_message = f"Error getting response: {str(e)}"

        if session_recorder.current_session_file:
            session_recorder.record_conversation_turn(
                user_message=message,
                ai_response=error_message,
                action_type="system_error",
                code_executed=None,
                version_saved=None
            )

            # Record as a system issue
            session_recorder.record_issue_found(
                f"System error during processing: {str(e)}",
                severity="high"
            )

        # Handle errors properly in the chat history format
        history_list = list(history) if history else []
        history_list.append((message, error_message))
        return history_list


def create_template_from_current_session(template_name_input=""):
    """
    Create a comprehensive template from the current copiloting session.
    This includes GPT-4.1 analysis of the entire workflow.
    """
    global app_state, session_recorder, enhanced_template_manager

    if not session_recorder.current_session_file:
        return "❌ No active copiloting session found. Please start chatting with the system first."

    if app_state is None or app_state["df"] is None:
        return "❌ No dataframe loaded. Cannot create template."

    try:
        logger.info("Starting template creation from current session...")

        # 1. Get starting dataframe (first version saved)
        starting_df = None
        if app_state.get("df_versions") and len(app_state["df_versions"]) > 0:
            # Load the original/first version
            first_version = app_state["df_versions"][0]
            starting_df_path = first_version.get("filename") or first_version.get("csv_filename")
            if starting_df_path and os.path.exists(starting_df_path):
                starting_df = pd.read_csv(starting_df_path)
                logger.info(f"Loaded starting dataframe from: {starting_df_path}")
            else:
                # Try to construct the path
                versions_dir = "rent_roll_versions"
                csv_filename = os.path.join(versions_dir, f"rent_roll_{first_version['name']}.csv")
                if os.path.exists(csv_filename):
                    starting_df = pd.read_csv(csv_filename)
                    logger.info(f"Loaded starting dataframe from: {csv_filename}")

        # Fallback: use current dataframe if no versions found
        if starting_df is None:
            starting_df = app_state["df"].copy()
            logger.warning("Using current dataframe as starting point (no version history found)")

        # 2. Current dataframe is the final version
        final_df = app_state["df"].copy()

        # 3. Finalize current session to get complete session data
        logger.info("Finalizing current session...")
        session_data = session_recorder.finalize_session()

        if session_data is None:
            return "❌ Error finalizing session data."

        # 4. Generate template name if not provided
        if not template_name_input.strip():
            rent_roll_file = session_data.get('rent_roll_file', 'Unknown')
            timestamp = datetime.now().strftime('%Y-%m-%d')
            template_name = f"Rent Roll Process - {rent_roll_file} - {timestamp}"
        else:
            template_name = template_name_input.strip()

        # 5. Create comprehensive template using GPT-4.1 analysis
        logger.info("Creating template with GPT-4.1 analysis...")
        template_data = enhanced_template_manager.create_template_from_session(
            session_data=session_data,
            starting_df=starting_df,
            final_df=final_df,
            template_name=template_name
        )

        # 6. Prepare success message with details
        session_stats = session_data.get('session_summary', {})
        success_message = f"""✅ Template Created Successfully!

📋 Template Details:
• Template ID: {template_data['template_id']}
• Template Name: {template_name}
• Source File: {session_data.get('rent_roll_file', 'Unknown')}

📊 Session Summary:
• Duration: {session_stats.get('duration_minutes', 0):.1f} minutes
• Conversations: {session_stats.get('total_conversations', 0)}
• Code Executions: {session_stats.get('code_executions', 0)}
• Versions Created: {session_stats.get('versions_created', 0)}
• Issues Found: {session_stats.get('issues_found', 0)}

📁 Files Created:
• Starting Dataframe: {template_data['files']['starting_dataframe']}
• Final Dataframe: {template_data['files']['final_dataframe']}
• Session Recording: {template_data['files']['raw_session']}
• Template Metadata: {template_data['template_id']}.json

🤖 GPT-4.1 Analysis: {'✅ Completed' if 'gpt4_analysis' in template_data else '❌ Failed'}

This template can now be applied to similar rent roll files using the Template Manager."""

        logger.info(f"Template creation completed: {template_data['template_id']}")
        return success_message

    except Exception as e:
        error_msg = f"❌ Error creating template: {str(e)}"
        logger.error(f"Template creation failed: {e}")
        logger.error(traceback.format_exc())
        return error_msg


def end_current_session():
    """
    Manually end the current copiloting session without creating a template.
    Useful for starting fresh or when session gets too long.
    """
    global session_recorder

    if not session_recorder.current_session_file:
        return "ℹ️ No active session to end."

    try:
        session_data = session_recorder.finalize_session()

        if session_data:
            session_stats = {
                'duration': session_data.get('duration_minutes', 0),
                'conversations': len(session_data.get('conversation_history', [])),
                'code_executions': len(session_data.get('code_executions', [])),
                'versions': len(session_data.get('dataframe_versions', []))
            }

            return f"""✅ Session Ended Successfully

            📊 Final Session Statistics:
            • Session ID: {session_data.get('session_id', 'Unknown')}
            • Duration: {session_stats['duration']:.1f} minutes
            • Total Conversations: {session_stats['conversations']}
            • Code Executions: {session_stats['code_executions']}
            • Dataframe Versions: {session_stats['versions']}

            💾 Session data saved to: {session_data.get('session_id', 'unknown')}.txt

            You can now start a new session or create a template from this completed session."""
        else:
            return "⚠️ Session ended but no data was saved."

    except Exception as e:
        return f"❌ Error ending session: {str(e)}"


# Additional helper function to get session status
def get_current_session_status():
    """Get the current session recording status and statistics."""
    global session_recorder

    if not session_recorder.current_session_file:
        return "📴 No active session recording"

    try:
        if session_recorder.current_session_data:
            data = session_recorder.current_session_data
            start_time = datetime.fromisoformat(data.get('start_time', datetime.now().isoformat()))
            duration = (datetime.now() - start_time).total_seconds() / 60

            status = f"""📹 Session Recording Active

            📊 Current Statistics:
            • Session ID: {data.get('session_id', 'Unknown')}
            • Duration: {duration:.1f} minutes
            • Conversations: {len(data.get('conversation_history', []))}
            • Code Executions: {len(data.get('code_executions', []))}
            • Versions Created: {len(data.get('dataframe_versions', []))}
            • Issues Found: {len(data.get('issues_found', []))}

            📁 Recording File: {session_recorder.current_session_file}

            All interactions are being automatically recorded for template creation."""

            return status
        else:
            return "📹 Session recording active but no data collected yet"

    except Exception as e:
        return f"❌ Error getting session status: {str(e)}"

In [None]:
def view_data():
    """Return a preview of the rent roll data."""
    global app_state  # Use app_state instead of agent_state

    logger.info("View data requested")

    if app_state is None or app_state["df"] is None:  # Note the dictionary access with ["df"]
        logger.warning("View data requested but no data is loaded")
        return "No rent roll data loaded yet."

    # Generate HTML representation of the dataframe
    logger.info(f"Generating HTML preview of data with {len(app_state['df'])} rows")
    html = f"""
    <h3>Rent Roll Data</h3>
    <p>{len(app_state['df'])} rows × {len(app_state['df'].columns)} columns</p>
    {app_state['df'].head(10).fillna('').to_html(index=False)}
    """

    return html

In [None]:

def clear_chat():
    """Reset the chat history."""
    logger.info("Clearing chat history")
    return []  # Return empty list for Gradio chat history

In [None]:
def view_dataframe_versions():
    """Return HTML showing all versions of the rent roll dataframe."""
    global app_state
    logger.info("View dataframe versions requested")

    versions_dir = "rent_roll_versions"

    if not os.path.exists(versions_dir):
        logger.warning("No versions directory found")
        return "No version history found. Please save a version first."

    # Get all files in the versions directory
    try:
        all_files = os.listdir(versions_dir)
        # Match any CSV file containing rent_roll in the name
        version_files = [f for f in all_files if f.endswith('.csv') and 'rent_roll' in f]
    except Exception as e:
        logger.error(f"Error reading versions directory: {e}")
        return f"Error listing versions: {str(e)}"

    if not version_files:
        logger.warning("No version files found in directory")
        return f"No version files found in the versions directory ({versions_dir})."

    # Extract version information
    versions = []
    for file in version_files:
        # Extract the version name from the filename
        if file.startswith('rent_roll_v_'):
            version_name = file.replace('rent_roll_', '').replace('.csv', '')
        else:
            version_name = os.path.splitext(file)[0].replace('rent_roll_', '')

        # Get file stats
        try:
            file_path = os.path.join(versions_dir, file)
            file_stats = os.stat(file_path)
            file_size = file_stats.st_size
            modified_time = datetime.fromtimestamp(file_stats.st_mtime).strftime("%Y-%m-%d %H:%M:%S")

            # Try to get row and column counts
            df_info = ""
            try:
                temp_df = pd.read_csv(file_path)
                df_info = f"{len(temp_df)} rows × {len(temp_df.columns)} columns"
            except:
                df_info = "Unable to read file"

            # If we have version info in app_state
            description = ""
            is_original = False

            for v in app_state.get("df_versions", []):
                if v.get("name") == version_name:
                    description = v.get("description", "")
                    is_original = v.get("is_original", False)
                    break

            # If not found in app_state, use fallback description
            if not description and os.path.exists(file_path):
                description = "Found in directory"

            versions.append({
                'version_name': version_name,
                'file_size': file_size,
                'modified_time': modified_time,
                'df_info': df_info,
                'description': description,
                'is_original': is_original,
                'file_path': file_path
            })
        except Exception as e:
            logger.error(f"Error processing version file {file}: {e}")
            versions.append({
                'version_name': version_name,
                'file_size': 0,
                'modified_time': 'Error',
                'df_info': f"Error: {str(e)}",
                'description': '',
                'is_original': False,
                'file_path': os.path.join(versions_dir, file)
            })

    # Sort versions by modification time
    versions.sort(key=lambda x: x['modified_time'])

    # Create basic HTML table without zebra striping
    html = """
    <h3 style="color: white;">Rent Roll Dataframe Version History</h3>
    """

    html += f"""
    <p style="color: white;">Found {len(versions)} version(s) in {versions_dir}</p>
    <table border="1" cellpadding="5" cellspacing="0" style="width: 100%; border-collapse: collapse; color: white;">
        <thead style="background-color: #009879;">
            <tr>
                <th style="text-align: left; padding: 10px;">Version Name</th>
                <th style="text-align: left; padding: 10px;">Status</th>
                <th style="text-align: left; padding: 10px;">Created</th>
                <th style="text-align: left; padding: 10px;">Size</th>
                <th style="text-align: left; padding: 10px;">Data</th>
                <th style="text-align: left; padding: 10px;">Description</th>
            </tr>
        </thead>
        <tbody>
    """

    for i, v in enumerate(versions):
        # No alternating rows - all cells have the same background and text color
        # Always use dark background with white text for all rows

        # Determine status badge
        if i == 0 or v.get('is_original'):
            status_html = '<span style="background-color: #3949ab; color: white; padding: 3px 6px; border-radius: 3px; display: inline-block;">ORIGINAL</span>'
        elif i == len(versions) - 1:
            status_html = '<span style="background-color: #43a047; color: white; padding: 3px 6px; border-radius: 3px; display: inline-block;">LATEST</span>'
        else:
            # Middle version with orange badge
            status_html = f'<span style="background-color: #f57c00; color: white; padding: 3px 6px; border-radius: 3px; display: inline-block;">v{i+1}</span>'

        # All rows have dark background and white text
        html += f"""
        <tr style="background-color: #25292e; color: white; border-bottom: 1px solid #333;">
            <td style="padding: 10px;"><code style="font-family: monospace; font-weight: bold;">{v['version_name']}</code></td>
            <td style="padding: 10px;">{status_html}</td>
            <td style="padding: 10px;">{v['modified_time']}</td>
            <td style="padding: 10px;">{round(v['file_size']/1024, 2)} KB</td>
            <td style="padding: 10px;">{v['df_info']}</td>
            <td style="padding: 10px;">{v['description']}</td>
        </tr>
        """

    html += """
        </tbody>
    </table>
    """

    logger.info(f"Generated version history display with {len(versions)} versions")
    return html

In [None]:
def analyze_dataframe_changes_with_gpt4(original_df, modified_df, user_description=""):
    """
    Use GPT-4.1 to analyze differences between original and modified dataframes
    and generate a detailed description of changes made.
    """
    try:
        client = OpenAI(api_key=DEFAULT_OPENAI_API_KEY)

        # Prepare comparison data for GPT-4
        original_info = {
            "shape": original_df.shape,
            "columns": list(original_df.columns),
            "dtypes": dict(original_df.dtypes.astype(str)),
            "sample_data": original_df.head(5).to_string(),
            "null_counts": dict(original_df.isnull().sum()),
            "memory_usage": original_df.memory_usage(deep=True).sum()
        }

        modified_info = {
            "shape": modified_df.shape,
            "columns": list(modified_df.columns),
            "dtypes": dict(modified_df.dtypes.astype(str)),
            "sample_data": modified_df.head(5).to_string(),
            "null_counts": dict(modified_df.isnull().sum()),
            "memory_usage": modified_df.memory_usage(deep=True).sum()
        }

        # Detect specific changes
        shape_changed = original_df.shape != modified_df.shape
        columns_changed = set(original_df.columns) != set(modified_df.columns)

        # Sample comparison for data changes
        data_changes_detected = False
        changed_cells = []

        if original_df.shape == modified_df.shape and list(original_df.columns) == list(modified_df.columns):
            # Compare cell by cell for first 10 rows to detect changes
            for i in range(min(10, len(original_df))):
                for col in original_df.columns:
                    try:
                        orig_val = original_df.iloc[i][col]
                        mod_val = modified_df.iloc[i][col]

                        # Handle NaN comparisons
                        if pd.isna(orig_val) and pd.isna(mod_val):
                            continue
                        elif pd.isna(orig_val) or pd.isna(mod_val):
                            data_changes_detected = True
                            changed_cells.append({
                                "row": i,
                                "column": col,
                                "original": str(orig_val),
                                "modified": str(mod_val)
                            })
                        elif str(orig_val).strip() != str(mod_val).strip():
                            data_changes_detected = True
                            changed_cells.append({
                                "row": i,
                                "column": col,
                                "original": str(orig_val),
                                "modified": str(mod_val)
                            })
                    except Exception as e:
                        continue

        # Prepare analysis prompt for GPT-4
        analysis_prompt = f"""
        You are an expert data analyst specializing in tracking and documenting dataframe modifications.

        I will provide you with before/after information about a rent roll dataframe that was edited manually.
        Your task is to analyze the changes and provide a comprehensive summary.

        USER PROVIDED DESCRIPTION: "{user_description}"

        ORIGINAL DATAFRAME INFO:
        - Shape: {original_info['shape']}
        - Columns: {original_info['columns']}
        - Data Types: {original_info['dtypes']}
        - Null Counts: {original_info['null_counts']}
        - Sample Data:
        {original_info['sample_data']}

        MODIFIED DATAFRAME INFO:
        - Shape: {modified_info['shape']}
        - Columns: {modified_info['columns']}
        - Data Types: {modified_info['dtypes']}
        - Null Counts: {modified_info['null_counts']}
        - Sample Data:
        {modified_info['sample_data']}

        DETECTED CHANGES:
        - Shape Changed: {shape_changed}
        - Columns Changed: {columns_changed}
        - Data Changes Detected: {data_changes_detected}
        - Sample Cell Changes: {changed_cells[:5]}  # First 5 changes

        Please provide a comprehensive analysis in the following JSON format:
        {{
            "change_summary": "Brief overview of what was modified",
            "change_type": "data_edit|structure_change|mixed",
            "structural_changes": {{
                "rows_added": 0,
                "rows_removed": 0,
                "columns_added": [],
                "columns_removed": [],
                "columns_renamed": []
            }},
            "data_modifications": {{
                "cells_changed": 0,
                "columns_affected": [],
                "common_patterns": [],
                "data_quality_impact": "improved|degraded|neutral"
            }},
            "business_impact": {{
                "rent_calculations_affected": false,
                "tenant_information_updated": false,
                "occupancy_status_changed": false,
                "financial_data_modified": false
            }},
            "recommendations": [
                "List of recommendations based on the changes made"
            ],
            "session_description": "Detailed description for session recording"
        }}
        """

        # Get GPT-4 analysis
        response = client.chat.completions.create(
            model="gpt-4o",  # Latest GPT-4
            messages=[
                {"role": "system", "content": "You are an expert data analyst who specializes in tracking dataframe modifications and their business impact. Provide detailed, accurate analysis in JSON format."},
                {"role": "user", "content": analysis_prompt}
            ],
            max_tokens=2000,
            temperature=0.2
        )

        gpt_analysis = response.choices[0].message.content

        # Try to extract JSON from response
        json_match = re.search(r'{.*}', gpt_analysis, re.DOTALL)
        if json_match:
            try:
                analysis_result = json.loads(json_match.group(0))
                analysis_result["raw_gpt_response"] = gpt_analysis
                return analysis_result
            except json.JSONDecodeError:
                # Fallback if JSON parsing fails
                return {
                    "change_summary": f"Manual edits made via data editor: {user_description}",
                    "change_type": "data_edit",
                    "session_description": f"User made manual changes to dataframe via edit tab. Description: {user_description}",
                    "raw_gpt_response": gpt_analysis,
                    "parsing_error": "Failed to parse JSON response"
                }
        else:
            return {
                "change_summary": f"Manual edits made: {user_description}",
                "change_type": "data_edit",
                "session_description": f"User edited dataframe manually. Changes: {user_description}",
                "raw_gpt_response": gpt_analysis,
                "parsing_error": "No JSON found in response"
            }

    except Exception as e:
        logger.error(f"Error in GPT-4 dataframe analysis: {e}")
        return {
            "change_summary": f"Manual dataframe edits: {user_description}",
            "change_type": "data_edit",
            "session_description": f"User made manual changes via edit data tab. Description: {user_description}. Error in analysis: {str(e)}",
            "error": str(e)
        }


def save_edited_dataframe_enhanced(edited_df, description):
    """
    Enhanced version that analyzes changes with GPT-4.1 and records in session.
    """
    global app_state, session_recorder

    if edited_df is None or edited_df.empty:
        return "No data to save", gr.update()

    try:
        # Convert the edited dataframe to proper pandas DataFrame if needed
        if not isinstance(edited_df, pd.DataFrame):
            edited_df = pd.DataFrame(edited_df)

        # Get the original dataframe for comparison
        original_df = app_state["df"].copy()

        logger.info("Analyzing dataframe changes with GPT-4.1...")
        print("🤖 Analyzing changes with GPT-4.1...")

        # Use GPT-4.1 to analyze the differences
        change_analysis = analyze_dataframe_changes_with_gpt4(
            original_df=original_df,
            modified_df=edited_df,
            user_description=description
        )

        # Generate a meaningful description if not provided
        if not description:
            description = change_analysis.get("change_summary", "Manual edits via data editor")

        # Save as new version
        version_name = save_dataframe_version(edited_df, description)

        # Update the app state with the edited dataframe
        app_state["df"] = edited_df

        # Record this in the copiloting session if active
        if session_recorder.current_session_file:
            session_description = change_analysis.get("session_description", f"Manual data edits: {description}")

            # Create detailed session entry
            session_entry = f"""
MANUAL DATA EDIT SESSION
========================
Timestamp: {datetime.now().strftime('%H:%M:%S')}
Edit Description: {description}
Version Saved: {version_name}

GPT-4.1 CHANGE ANALYSIS:
{'-' * 40}
Change Summary: {change_analysis.get('change_summary', 'N/A')}
Change Type: {change_analysis.get('change_type', 'N/A')}

Structural Changes:
{json.dumps(change_analysis.get('structural_changes', {}), indent=2)}

Data Modifications:
{json.dumps(change_analysis.get('data_modifications', {}), indent=2)}

Business Impact:
{json.dumps(change_analysis.get('business_impact', {}), indent=2)}

Recommendations:
{chr(10).join([f"• {rec}" for rec in change_analysis.get('recommendations', [])])}

Original DataFrame Shape: {original_df.shape}
Modified DataFrame Shape: {edited_df.shape}
{'-' * 80}
"""

            # Append to session file
            with open(session_recorder.current_session_file, 'a', encoding='utf-8') as f:
                f.write(session_entry + "\n")

            # Record in session data structure
            session_recorder.record_conversation_turn(
                user_message=f"MANUAL EDIT: {description}",
                ai_response=session_description,
                action_type="manual_data_edit",
                code_executed=None,
                version_saved=version_name
            )

            # Record the dataframe version change
            session_recorder.record_dataframe_version(
                version_name=version_name,
                description=description,
                shape=list(edited_df.shape),
                columns=list(edited_df.columns)
            )

            # Record any issues found by GPT-4
            if change_analysis.get('data_modifications', {}).get('data_quality_impact') == 'degraded':
                session_recorder.record_issue_found(
                    f"Data quality may have degraded due to manual edits: {description}",
                    severity="medium"
                )

            logger.info("Manual edit recorded in copiloting session")

        # Log the changes
        logger.info(f"Saved edited dataframe as version {version_name}")

        # Create detailed success message
        success_message = f"""✅ Successfully saved as version {version_name}

🤖 GPT-4.1 Analysis Summary:
{change_analysis.get('change_summary', 'Changes analyzed')}

📊 Change Details:
• Change Type: {change_analysis.get('change_type', 'Unknown')}
• Original Shape: {original_df.shape}
• New Shape: {edited_df.shape}

📝 Session Recording: {'✅ Recorded' if session_recorder.current_session_file else '❌ No active session'}
"""

        # Add recommendations if available
        if change_analysis.get('recommendations'):
            success_message += f"\n💡 Recommendations:\n"
            for rec in change_analysis['recommendations'][:3]:  # Show first 3
                success_message += f"• {rec}\n"

        return success_message, gr.update(value=edited_df)

    except Exception as e:
        error_msg = f"❌ Error saving: {str(e)}"
        logger.error(f"Error saving edited dataframe: {e}")
        logger.error(traceback.format_exc())

        # Still try to record the error in session
        if session_recorder.current_session_file:
            session_recorder.record_conversation_turn(
                user_message=f"MANUAL EDIT FAILED: {description}",
                ai_response=error_msg,
                action_type="manual_edit_error",
                code_executed=None,
                version_saved=None
            )

        return error_msg, gr.update()


def load_latest_version_for_editing_enhanced():
    """Enhanced version that records when user loads data for editing"""
    global app_state, session_recorder

    if app_state is None or app_state["df"] is None:
        return None, "No data loaded. Please upload a rent roll first."

    try:
        # Use the current dataframe (which is the latest)
        df = app_state["df"].copy()
        df = df.fillna('')

        # Get version info
        if app_state["df_versions"]:
            latest_version = app_state["df_versions"][-1]
            version_info = f"Loaded version: {latest_version['name']} - {latest_version['description']}"
        else:
            version_info = "Loaded current data (no versions saved yet)"

        # Record this action in session if active
        if session_recorder.current_session_file:
            session_entry = f"""
DATA EDITING SESSION STARTED
============================
Timestamp: {datetime.now().strftime('%H:%M:%S')}
Action: User loaded dataframe for manual editing
Version Loaded: {latest_version['name'] if app_state["df_versions"] else 'Current'}
DataFrame Shape: {df.shape}
DataFrame Columns: {list(df.columns)}
{'-' * 80}
"""

            # Append to session file
            with open(session_recorder.current_session_file, 'a', encoding='utf-8') as f:
                f.write(session_entry + "\n")

            # Record in session data
            session_recorder.record_conversation_turn(
                user_message="LOAD FOR EDITING: User opened data editor",
                ai_response="Dataframe loaded for manual editing",
                action_type="load_for_editing",
                code_executed=None,
                version_saved=None
            )

        logger.info(f"Loaded dataframe for editing: {df.shape}")
        return df, version_info

    except Exception as e:
        error_msg = f"Error loading data: {str(e)}"
        logger.error(f"Error loading data for editing: {e}")

        # Record error in session
        if session_recorder.current_session_file:
            session_recorder.record_conversation_turn(
                user_message="LOAD FOR EDITING FAILED",
                ai_response=error_msg,
                action_type="load_editing_error",
                code_executed=None,
                version_saved=None
            )

        return None, error_msg


def load_specific_version_enhanced(version_name):
    """Enhanced version that records version loading with GPT-4 analysis"""
    global app_state, session_recorder

    if not version_name:
        return None, "Please select a version to load"

    try:
        # Extract clean version name (remove status indicators)
        clean_version_name = version_name.split(" (")[0]

        # Find the version file
        versions_dir = "rent_roll_versions"
        csv_filename = os.path.join(versions_dir, f"rent_roll_{clean_version_name}.csv")

        if os.path.exists(csv_filename):
            df = pd.read_csv(csv_filename)
            df = df.fillna('')

            # Record this action in session if active
            if session_recorder.current_session_file:
                session_entry = f"""
SPECIFIC VERSION LOADED FOR EDITING
===================================
Timestamp: {datetime.now().strftime('%H:%M:%S')}
Version Loaded: {clean_version_name}
DataFrame Shape: {df.shape}
DataFrame Columns: {list(df.columns)}
File Path: {csv_filename}
{'-' * 80}
"""

                # Append to session file
                with open(session_recorder.current_session_file, 'a', encoding='utf-8') as f:
                    f.write(session_entry + "\n")

                # Record in session data
                session_recorder.record_conversation_turn(
                    user_message=f"LOAD SPECIFIC VERSION: {clean_version_name}",
                    ai_response=f"Loaded version {clean_version_name} for editing",
                    action_type="load_specific_version",
                    code_executed=None,
                    version_saved=None
                )

            logger.info(f"Loaded version {clean_version_name} for editing")
            return df, f"Loaded version: {clean_version_name}"
        else:
            error_msg = f"Version file not found: {clean_version_name}"

            # Record error in session
            if session_recorder.current_session_file:
                session_recorder.record_conversation_turn(
                    user_message=f"LOAD VERSION FAILED: {clean_version_name}",
                    ai_response=error_msg,
                    action_type="load_version_error",
                    code_executed=None,
                    version_saved=None
                )

            return None, error_msg

    except Exception as e:
        error_msg = f"Error loading version: {str(e)}"
        logger.error(f"Error loading version {version_name}: {e}")

        # Record error in session
        if session_recorder.current_session_file:
            session_recorder.record_conversation_turn(
                user_message=f"LOAD VERSION ERROR: {version_name}",
                ai_response=error_msg,
                action_type="load_version_error",
                code_executed=None,
                version_saved=None
            )

        return None, error_msg


# Update the Gradio event handlers to use enhanced functions
def setup_enhanced_edit_data_handlers():
    """
    Setup function to update Gradio event handlers for enhanced edit data functionality.
    Add this to your Gradio interface setup.
    """

    # Enhanced event handlers for Edit Data tab
    refresh_versions_btn.click(
        refresh_version_dropdown,
        outputs=[version_dropdown]
    )

    load_version_btn.click(
        load_specific_version_enhanced,  # Use enhanced version
        inputs=[version_dropdown],
        outputs=[editable_df, edit_status]
    )

    save_changes_btn.click(
        save_edited_dataframe_enhanced,  # Use enhanced version
        inputs=[editable_df, save_description],
        outputs=[save_status, editable_df]
    ).then(
        refresh_version_dropdown,  # Refresh the dropdown after saving
        outputs=[version_dropdown]
    )

    # You can also add a "Load Latest" button that uses the enhanced function
    # load_latest_btn.click(
    #     load_latest_version_for_editing_enhanced,
    #     outputs=[editable_df, edit_status]
    # )

In [None]:
class TemplateApplicationEngine:
    def __init__(self):
        self.templates_dir = "rent_roll_templates"
        self.application_sessions_dir = "template_applications"
        os.makedirs(self.application_sessions_dir, exist_ok=True)
        self.current_application = None

    def start_template_application(self, template_id, new_rent_roll_file, new_rent_roll_df):
        """Initialize a new template application session"""

        # Load the template data
        template_data, starting_template_df, final_template_df = enhanced_template_manager.load_template_dataframes(template_id)

        if template_data is None:
            return None, "❌ Failed to load template data"

        # Create application session
        app_session_id = f"app_{datetime.now().strftime('%Y%m%d_%H%M%S')}"

        self.current_application = {
            "session_id": app_session_id,
            "template_id": template_id,
            "template_data": template_data,
            "starting_template_df": starting_template_df,
            "final_template_df": final_template_df,
            "new_rent_roll_file": new_rent_roll_file,
            "new_rent_roll_df": new_rent_roll_df.copy(),
            "current_df": new_rent_roll_df.copy(),
            "step_results": [],
            "current_step": 0,
            "total_steps": len(template_data.get("raw_workflow_steps", [])),
            "completed_steps": [],
            "failed_steps": [],
            "log_file": None
        }

        # Create log file
        log_filename = f"{app_session_id}_application_log.txt"
        self.current_application["log_file"] = os.path.join(self.application_sessions_dir, log_filename)

        # Write initial log
        with open(self.current_application["log_file"], 'w', encoding='utf-8') as f:
            f.write(f"=== TEMPLATE APPLICATION SESSION ===\n")
            f.write(f"Session ID: {app_session_id}\n")
            f.write(f"Template ID: {template_id}\n")
            f.write(f"Template Name: {template_data.get('template_name', 'Unknown')}\n")
            f.write(f"New Rent Roll File: {new_rent_roll_file}\n")
            f.write(f"Started: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}\n")
            f.write(f"Total Steps to Execute: {self.current_application['total_steps']}\n")
            f.write(f"=" * 60 + "\n\n")

        return app_session_id, "✅ Template application session started successfully"

    def get_next_step_to_execute(self):
        """Get the next workflow step to execute"""
        if not self.current_application:
            return None, "No active application session"

        current_step = self.current_application["current_step"]
        workflow_steps = self.current_application["template_data"].get("raw_workflow_steps", [])

        if current_step >= len(workflow_steps):
            return None, "All steps completed"

        return workflow_steps[current_step], f"Step {current_step + 1} of {len(workflow_steps)}"

    def execute_next_step_with_ai(self):
        """Execute the next template step using GPT-4.1 + Claude 3.7"""
        if not self.current_application:
            return "❌ No active application session"

        # Get next step
        next_step, step_info = self.get_next_step_to_execute()
        if next_step is None:
            return self._finalize_application()

        current_step_num = self.current_application["current_step"] + 1

        try:
            # Log step start
            with open(self.current_application["log_file"], 'a', encoding='utf-8') as f:
                f.write(f"\n--- EXECUTING STEP {current_step_num} ---\n")
                f.write(f"Timestamp: {datetime.now().strftime('%H:%M:%S')}\n")
                f.write(f"Original User Query: {next_step.get('user_message', 'N/A')}\n")
                f.write(f"Original Action Type: {next_step.get('action_type', 'N/A')}\n")
                f.write(f"Original Code: {next_step.get('code_executed', 'N/A')}\n")
                f.write("-" * 40 + "\n")

            # Use GPT-4.1 to analyze step and create optimal prompt for Claude
            step_analysis = self._analyze_step_with_gpt4(next_step, current_step_num)

            # Use Claude 3.7 to execute the adapted step
            execution_result = self._execute_step_with_claude(step_analysis, current_step_num)

            # Record results
            step_result = {
                "step_number": current_step_num,
                "original_step": next_step,
                "gpt4_analysis": step_analysis,
                "claude_execution": execution_result,
                "success": execution_result.get("success", False),
                "timestamp": datetime.now().isoformat()
            }

            self.current_application["step_results"].append(step_result)

            if execution_result.get("success", False):
                self.current_application["completed_steps"].append(current_step_num)
                # Update current dataframe if changes were made
                if execution_result.get("updated_df") is not None:
                    self.current_application["current_df"] = execution_result["updated_df"]
            else:
                self.current_application["failed_steps"].append(current_step_num)

            # Move to next step
            self.current_application["current_step"] += 1

            # Log step completion
            with open(self.current_application["log_file"], 'a', encoding='utf-8') as f:
                f.write(f"Step {current_step_num} {'✅ SUCCESS' if execution_result.get('success') else '❌ FAILED'}\n")
                f.write(f"Result: {execution_result.get('summary', 'No summary')}\n")
                f.write("-" * 40 + "\n\n")

            # Prepare status message
            total_steps = self.current_application["total_steps"]
            completed = len(self.current_application["completed_steps"])
            failed = len(self.current_application["failed_steps"])

            status_msg = f"""🔄 Step {current_step_num}/{total_steps} {'✅ Completed' if execution_result.get('success') else '❌ Failed'}

            **Step Details:**
            • Original Query: {next_step.get('user_message', 'N/A')[:100]}...
            • Action Type: {next_step.get('action_type', 'N/A')}

            **Execution Results:**
            {execution_result.get('summary', 'No summary available')}

            **Progress:**
            • Completed: {completed}/{total_steps}
            • Failed: {failed}/{total_steps}
            • Remaining: {total_steps - current_step_num}

            {'🎉 All steps completed!' if current_step_num >= total_steps else '⏭️ Ready for next step'}"""

            return status_msg

        except Exception as e:
            error_msg = f"❌ Error executing step {current_step_num}: {str(e)}"

            # Log error
            with open(self.current_application["log_file"], 'a', encoding='utf-8') as f:
                f.write(f"❌ ERROR in step {current_step_num}: {str(e)}\n")
                f.write("-" * 40 + "\n\n")

            self.current_application["failed_steps"].append(current_step_num)
            self.current_application["current_step"] += 1

            return error_msg

    def _analyze_step_with_gpt4(self, step_data, step_number):
            """Use GPT-4.1 to analyze the template step and adapt it for the new CRE rent roll"""

            client = OpenAI(api_key=DEFAULT_OPENAI_API_KEY)

            # Prepare context for GPT-4
            template_context = f"""
            CRE RENT ROLL TEMPLATE APPLICATION CONTEXT:
            ==========================================

            Current Step: {step_number}/{self.current_application['total_steps']}
            Template Name: {self.current_application['template_data'].get('template_name', 'Unknown')}

            ORIGINAL TEMPLATE DATAFRAMES:
            - Starting Template DF Shape: {self.current_application['starting_template_df'].shape}
            - Starting Template Columns: {list(self.current_application['starting_template_df'].columns)}
            - Final Template DF Shape: {self.current_application['final_template_df'].shape}
            - Final Template Columns: {list(self.current_application['final_template_df'].columns)}

            NEW CRE RENT ROLL TO PROCESS:
            - New Rent Roll DF Shape: {self.current_application['current_df'].shape}
            - New Rent Roll Columns: {list(self.current_application['current_df'].columns)}
            - New Rent Roll Sample:
            {self.current_application['current_df'].head(3).to_string()}

            ORIGINAL STEP FROM TEMPLATE:
            - User Query: {step_data.get('user_message', 'N/A')}
            - Action Type: {step_data.get('action_type', 'N/A')}
            - Original Code: {step_data.get('code_executed', 'N/A')}
            - AI Response: {step_data.get('ai_response', 'N/A')[:300]}...

            PREVIOUS COMPLETED STEPS:
            {[f"Step {i}: Success" for i in self.current_application['completed_steps']]}

            TEMPLATE GPT-4 ANALYSIS (if available):
            {json.dumps(self.current_application['template_data'].get('gpt4_analysis', {}), indent=2)[:1000]}...
            """

            analysis_prompt = f"""
            You are an expert at adapting commercial real estate (CRE) rent roll analysis workflows to new datasets.

            Your task is to analyze the original template step and adapt it for the new CRE rent roll data, considering typical
            commercial real estate data structures, tenant classification, lease terms, and occupancy analysis patterns.

            {template_context}

            Please provide a comprehensive analysis in JSON format:
            {{
                "step_adaptation": {{
                    "can_execute": true/false,
                    "reason": "Why this step can or cannot be executed",
                    "column_mapping": {{"original_column": "new_column"}},
                    "parameter_adjustments": ["List of parameter changes needed"],
                    "prerequisites": ["What must be true before this step"]
                }},
                "claude_prompt": "Detailed prompt for Claude 3.7 to execute this CRE rent roll analysis step",
                "expected_outcome": "What this step should accomplish for CRE analysis",
                "validation_criteria": ["How to verify the step succeeded"],
                "business_context": "Why this step is important for commercial real estate rent roll processing and analysis"
            }}

            Focus on CRE-specific considerations:
            1. Mapping columns from template to new rent roll (tenant names, lease dates, rent amounts, square footage)
            2. Adapting any hardcoded values or assumptions for different CRE properties
            3. Ensuring the business logic remains sound for commercial lease analysis
            4. Creating clear instructions for Claude that understand CRE data patterns
            5. Handling typical CRE data variations (vacant spaces, lease expirations, rent per SF calculations)
            """

            try:
                response = client.chat.completions.create(
                    model="gpt-4.1",  # Fixed model name
                    messages=[
                        {"role": "system", "content": "You are an expert commercial real estate data analyst who adapts CRE rent roll processing workflows to new datasets. Provide detailed, actionable analysis in JSON format that considers CRE-specific data patterns and business requirements."},
                        {"role": "user", "content": analysis_prompt}
                    ],
                    max_tokens=3000,
                    temperature=0.2
                )

                gpt_response = response.choices[0].message.content

                # Try to extract JSON
                json_match = re.search(r'{.*}', gpt_response, re.DOTALL)
                if json_match:
                    try:
                        return json.loads(json_match.group(0))
                    except:
                        return {"analysis": gpt_response, "error": "JSON parsing failed"}
                else:
                    return {"analysis": gpt_response, "error": "No JSON found"}

            except Exception as e:
                return {"error": str(e), "fallback": "GPT-4 analysis failed"}

    def _execute_step_with_claude(self, step_analysis, step_number):
        """Use Claude 3.7 to execute the adapted step"""

        if not step_analysis.get("step_adaptation", {}).get("can_execute", False):
            return {
                "success": False,
                "summary": f"Step {step_number} cannot be executed: {step_analysis.get('step_adaptation', {}).get('reason', 'Unknown reason')}",
                "error": "Step cannot be executed"
            }

        claude_client = Anthropic(api_key=DEFAULT_ANTHROPIC_API_KEY)

        # Prepare comprehensive prompt for Claude
        claude_prompt = step_analysis.get("claude_prompt", "Execute the data processing step")

        # Add current dataframe context
        df_context = f"""
        CURRENT DATAFRAME STATUS:
        ========================
        Shape: {self.current_application['current_df'].shape}
        Columns: {list(self.current_application['current_df'].columns)}
        Data Types: {dict(self.current_application['current_df'].dtypes.astype(str))}

        Sample Data:
        {self.current_application['current_df'].head(5).to_string()}

        The dataframe is already loaded as 'df' variable.
        """

        full_claude_prompt = f"""
        {claude_prompt}

        {df_context}

        IMPORTANT INSTRUCTIONS:
        1. The dataframe 'df' is already loaded and available
        2. Execute the required data processing step
        3. Show your work step by step
        4. Use proper error handling
        5. Display results clearly
        6. If you modify the dataframe, ensure it's properly updated

        Expected Outcome: {step_analysis.get('expected_outcome', 'Process the data as required')}

        Validation Criteria:
        {chr(10).join([f"• {criteria}" for criteria in step_analysis.get('validation_criteria', [])])}

        Please provide your code in ```python``` blocks and explain your approach.
        """

        try:
            # Execute with Claude
            claude_messages = [
                {"role": "user", "content": full_claude_prompt}
            ]

            claude_response = claude_client.messages.create(
                model="claude-3-7-sonnet-20250219",  # Claude 3.5 Sonnet
                messages=claude_messages,
                max_tokens=4000,
                temperature=0.3
            )

            response_text = claude_response.content[0].text

            # Extract and execute code
            code_blocks = re.findall(r'```python\s*(.*?)\s*```', response_text, re.DOTALL)

            if not code_blocks:
                return {
                    "success": False,
                    "summary": f"Step {step_number}: No executable code generated by Claude",
                    "response": response_text,
                    "error": "No code blocks found"
                }

            # Execute the code
            execution_success = False
            execution_output = ""
            updated_df = None

            # Create execution environment
            exec_globals = {
                "df": self.current_application["current_df"].copy(),
                "pd": pd,
                "np": np,
                "datetime": datetime,
                "os": os
            }

            output_buffer = io.StringIO()

            try:
                with redirect_stdout(output_buffer):
                    for i, code_block in enumerate(code_blocks):
                        exec(code_block, exec_globals)

                execution_output = output_buffer.getvalue()
                updated_df = exec_globals["df"]
                execution_success = True

            except Exception as e:
                execution_output = f"Execution error: {str(e)}"
                execution_success = False

            # Prepare result
            result = {
                "success": execution_success,
                "summary": f"Step {step_number}: {'✅ Successfully executed' if execution_success else '❌ Execution failed'}",
                "claude_response": response_text,
                "executed_code": "\n\n".join(code_blocks),
                "execution_output": execution_output,
                "updated_df": updated_df if execution_success else None
            }

            return result

        except Exception as e:
            return {
                "success": False,
                "summary": f"Step {step_number}: Claude execution failed - {str(e)}",
                "error": str(e)
            }

    def _finalize_application(self):
        """Finalize the template application session"""
        if not self.current_application:
            return "No active session to finalize"

        total_steps = self.current_application["total_steps"]
        completed = len(self.current_application["completed_steps"])
        failed = len(self.current_application["failed_steps"])

        # Save final results
        final_df = self.current_application["current_df"]
        session_id = self.current_application["session_id"]

        # Save final dataframe
        final_df_path = os.path.join(self.application_sessions_dir, f"{session_id}_final_result.csv")
        final_df.to_csv(final_df_path, index=False)

        # Write final summary to log
        with open(self.current_application["log_file"], 'a', encoding='utf-8') as f:
            f.write(f"\n{'=' * 60}\n")
            f.write("TEMPLATE APPLICATION COMPLETED\n")
            f.write(f"{'=' * 60}\n")
            f.write(f"Total Steps: {total_steps}\n")
            f.write(f"Completed Successfully: {completed}\n")
            f.write(f"Failed: {failed}\n")
            f.write(f"Success Rate: {(completed/total_steps)*100:.1f}%\n")
            f.write(f"Final Result Saved: {final_df_path}\n")
            f.write(f"Completed: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}\n")

        success_msg = f"""🎉 Template Application Completed!

📊 Final Results:
• Total Steps: {total_steps}
• Successfully Completed: {completed}
• Failed: {failed}
• Success Rate: {(completed/total_steps)*100:.1f}%

📁 Output Files:
• Final Processed Data: {final_df_path}
• Application Log: {self.current_application["log_file"]}

📈 Final Dataframe:
• Shape: {final_df.shape}
• Columns: {len(final_df.columns)}

The template has been successfully applied to your new rent roll!"""

        # Reset application state
        self.current_application = None

        return success_msg

    def get_application_status(self):
        """Get current application status"""
        if not self.current_application:
            return "📴 No active template application session"

        total = self.current_application["total_steps"]
        current = self.current_application["current_step"]
        completed = len(self.current_application["completed_steps"])
        failed = len(self.current_application["failed_steps"])

        return f"""📋 Template Application Status

🎯 Template: {self.current_application['template_data'].get('template_name', 'Unknown')}
📁 Processing: {self.current_application['new_rent_roll_file']}

📊 Progress:
• Current Step: {current}/{total}
• Completed: {completed}
• Failed: {failed}
• Remaining: {total - current}

🔄 Status: {'🎉 Completed' if current >= total else '⏳ In Progress'}"""

# Global template application engine
template_app_engine = TemplateApplicationEngine()

# Functions for the Template Application tab

def load_template_for_application(template_id):
    """Load template details for application"""
    if not template_id:
        return "Please enter a template ID", "", ""

    try:
        template_summary = enhanced_template_manager.get_template_summary(template_id)

        # Get template steps for preview
        template_json_path = os.path.join("rent_roll_templates", f"{template_id}.json")
        if os.path.exists(template_json_path):
            with open(template_json_path, 'r') as f:
                template_data = json.load(f)

            steps_preview = ""
            workflow_steps = template_data.get("raw_workflow_steps", [])
            for i, step in enumerate(workflow_steps[:5], 1):  # Show first 5 steps
                steps_preview += f"{i}. {step.get('user_message', 'N/A')[:80]}...\n"

            if len(workflow_steps) > 5:
                steps_preview += f"... and {len(workflow_steps) - 5} more steps\n"

            return template_summary, steps_preview, f"✅ Template loaded: {len(workflow_steps)} steps found"
        else:
            return template_summary, "", "❌ Template file not found"

    except Exception as e:
        return f"❌ Error loading template: {str(e)}", "", "Failed to load"

def start_template_application_session(template_id, new_rent_roll_file):
    """Start applying template to new rent roll"""
    if not template_id:
        return "❌ Please select a template first"

    if not new_rent_roll_file:
        return "❌ Please upload a new rent roll file"

    try:
        # Load the new rent roll
        new_df = pd.read_excel(new_rent_roll_file.name)

        # Start application session
        session_id, status = template_app_engine.start_template_application(
            template_id=template_id,
            new_rent_roll_file=new_rent_roll_file.name,
            new_rent_roll_df=new_df
        )

        if session_id:
            return f"✅ Session started: {session_id}\n\n{status}\n\n📊 New Rent Roll Info:\n• Shape: {new_df.shape}\n• Columns: {list(new_df.columns)}\n\n🎯 Ready to execute {template_app_engine.current_application['total_steps']} template steps!"
        else:
            return status

    except Exception as e:
        return f"❌ Error starting application: {str(e)}"

def execute_next_template_step():
    """Execute the next step in template application"""
    try:
        result = template_app_engine.execute_next_step_with_ai()
        return result
    except Exception as e:
        return f"❌ Error executing step: {str(e)}"

def get_template_application_status():
    """Get current application status"""
    return template_app_engine.get_application_status()

def execute_all_remaining_steps():
    """Execute all remaining steps in sequence with guaranteed finalization"""
    if not template_app_engine.current_application:
        return "❌ No active application session"

    print("🚀 Starting batch execution of all remaining steps...")

    results = []
    step_count = 0
    max_steps = 20  # Prevent infinite loops

    initial_total_steps = template_app_engine.current_application["total_steps"]
    initial_current_step = template_app_engine.current_application["current_step"]

    print(f"📊 Will execute steps {initial_current_step + 1} through {initial_total_steps}")

    # Execute all remaining steps
    while (template_app_engine.current_application and
           template_app_engine.current_application["current_step"] < template_app_engine.current_application["total_steps"] and
           step_count < max_steps):

        current_step_before = template_app_engine.current_application["current_step"]
        step_result = template_app_engine.execute_next_step_with_ai()
        current_step_after = template_app_engine.current_application["current_step"]

        print(f"🔄 Executed step {current_step_after}/{initial_total_steps}")
        results.append(f"Step {current_step_after}: {step_result[:100]}...")
        step_count += 1

        # Safety check: if step didn't advance, break to avoid infinite loop
        if current_step_before == current_step_after:
            print("⚠️ Step didn't advance, breaking loop")
            break

    # ✅ GUARANTEED FINALIZATION: Always check if we need to finalize
    if template_app_engine.current_application:
        current_step = template_app_engine.current_application["current_step"]
        total_steps = template_app_engine.current_application["total_steps"]

        print(f"🔍 Final check - Current step: {current_step}, Total steps: {total_steps}")

        if current_step >= total_steps:
            print("✅ All steps completed, triggering finalization...")
            try:
                finalization_result = template_app_engine._finalize_application()
                results.append(f"✅ FINALIZATION SUCCESS: {finalization_result[:200]}...")
                print("🎉 Finalization completed successfully!")
            except Exception as e:
                error_msg = f"❌ Finalization failed: {str(e)}"
                results.append(error_msg)
                print(f"💥 Finalization error: {e}")
        else:
            incomplete_msg = f"⚠️ Not all steps completed: {current_step}/{total_steps}"
            results.append(incomplete_msg)
            print(incomplete_msg)
    else:
        results.append("⚠️ Application session ended unexpectedly")
        print("💥 Application session is None - may have been finalized already")

    # Create final summary
    final_summary = "\n".join(results[-4:])  # Show last 4 results

    if step_count >= max_steps:
        final_summary += f"\n\n⚠️ Stopped after {max_steps} steps to prevent timeout"

    # Add file location information
    final_summary += f"\n\n📁 Files should be saved in:"
    final_summary += f"\n  • template_applications/{template_app_engine.current_application['session_id'] if template_app_engine.current_application else 'unknown'}_final_result.csv"
    final_summary += f"\n  • rent_roll_versions/rent_roll_v_*_template_result.csv"

    print("🏁 Batch execution completed")
    return final_summary

# Add this new tab to your Gradio interface - place this BEFORE the run section

def add_template_application_tab():
    """Add the Template Application tab to the Gradio interface"""

    with gr.Tab("Apply Template"):
        gr.Markdown("""
        ### 🎯 Apply Saved Templates to New Rent Rolls

        Use your saved templates to automatically process similar rent roll files.
        The system will adapt the template steps to work with your new data.
        """)

        with gr.Row():
            with gr.Column(scale=1):
                gr.Markdown("#### 1. Select Template")
                template_id_select = gr.Textbox(
                    label="Template ID",
                    placeholder="e.g., template_20250526_143022",
                    lines=1
                )
                load_template_btn = gr.Button("📂 Load Template", variant="secondary")

                gr.Markdown("#### 2. Upload New Rent Roll")
                new_rent_roll_file = gr.File(
                    label="New Rent Roll File (.xlsx, .xls)",
                    file_types=[".xlsx", ".xls"]
                )

                start_application_btn = gr.Button("🚀 Start Application", variant="primary", size="lg")

            with gr.Column(scale=2):
                gr.Markdown("#### Template Details")
                template_details_display = gr.HTML(label="Template Information")

                gr.Markdown("#### Workflow Steps Preview")
                template_steps_preview = gr.Textbox(
                    label="Steps to Execute",
                    lines=8,
                    interactive=False
                )

        gr.Markdown("---")

        with gr.Row():
            with gr.Column(scale=2):
                gr.Markdown("#### 3. Execute Template Steps")

                with gr.Row():
                    execute_next_btn = gr.Button("▶️ Execute Next Step", variant="primary")
                    execute_all_btn = gr.Button("⏭️ Execute All Steps", variant="secondary")
                    status_btn = gr.Button("📊 Check Status")

                application_status = gr.Textbox(
                    label="Application Status & Results",
                    lines=15,
                    interactive=False
                )

            with gr.Column(scale=1):
                gr.Markdown("#### Progress Tracking")

                progress_info = gr.HTML(
                    label="Current Progress",
                    value="<p>No active session</p>"
                )

                gr.Markdown("""
                #### 💡 How It Works:
                1. **Select Template**: Choose a saved template
                2. **Upload File**: New rent roll to process
                3. **Auto-Adaptation**: GPT-4.1 adapts each step
                4. **Claude Execution**: Claude 3.7 runs the code
                5. **Step-by-Step**: Execute one or all steps
                6. **Results**: Get processed rent roll

                #### ⚙️ AI Workflow:
                - **GPT-4.1**: Analyzes & adapts template steps
                - **Claude 3.7**: Generates & executes code
                - **Auto-Mapping**: Matches columns intelligently
                - **Error Recovery**: Handles step failures gracefully
                """)

        # Event handlers for Template Application tab
        load_template_btn.click(
            load_template_for_application,
            inputs=[template_id_select],
            outputs=[template_details_display, template_steps_preview, application_status]
        )

        start_application_btn.click(
            start_template_application_session,
            inputs=[template_id_select, new_rent_roll_file],
            outputs=[application_status]
        ).then(
            get_template_application_status,
            outputs=[progress_info]
        )

        execute_next_btn.click(
            execute_next_template_step,
            outputs=[application_status]
        ).then(
            get_template_application_status,
            outputs=[progress_info]
        )

        execute_all_btn.click(
            execute_all_remaining_steps,
            outputs=[application_status]
        ).then(
            get_template_application_status,
            outputs=[progress_info]
        )

        status_btn.click(
            get_template_application_status,
            outputs=[application_status]
        )

In [None]:
# Initialize the global agent state
agent_state = None
custom_css = """
.chatbot-container .message-wrap .message.bot pre {
    white-space: pre !important;
    overflow-x: auto !important;
    max-width: 100% !important;
}
.chatbot-container .message-wrap .message.bot code {
    white-space: pre !important;
}
"""

with gr.Blocks(theme=gr.themes.Soft(primary_hue="blue"), css=custom_css) as demo:
    gr.Markdown("# Agentic Commercial Real Estate Rent Roll Analyzer")
    gr.Markdown("## Hybrid AI System: GPT-4 for Decision Making & Claude for Code Generation")

    with gr.Tab("Setup"):
        with gr.Row():
            with gr.Column():
                file_input = gr.File(label="Upload Rent Roll Excel File (.xlsx, .xls)")

                # Add separate API key inputs for OpenAI and Anthropic
                anthropic_api_key = gr.Textbox(
                    label="Anthropic API Key (Optional - for code generation)",
                    placeholder="Leave blank to use the default API key",
                    type="password"
                )

                openai_api_key = gr.Textbox(
                    label="OpenAI API Key (Optional - for decision making and text responses)",
                    placeholder="Leave blank to use the default API key",
                    type="password"
                )

                # Updated auto-analyze checkbox
                auto_analyze = gr.Checkbox(
                    label="Automatically analyze for issues using GPT-4",
                    value=True,
                    info="When checked, GPT-4 will automatically identify issues in your rent roll"
                )

                upload_button = gr.Button("Load Rent Roll & Start Chat", variant="primary")

            with gr.Column():
                result = gr.Textbox(label="Status")
                preview = gr.HTML(label="Data Preview")

    with gr.Tab("Chat"):
        # Session management buttons
        with gr.Row():
            view_versions_btn = gr.Button("View Version History")
            create_template_btn = gr.Button("🎯 Create Template from Session", variant="primary")
            end_session_btn = gr.Button("🔚 End Current Session")
            session_status_btn = gr.Button("📊 Session Status")

        data_view = gr.HTML()
        chatbot = gr.Chatbot(label="Agentic Rent Roll Analysis Chat", height=500, type="tuples")

        with gr.Row():
            with gr.Column(scale=4):
                msg = gr.Textbox(label="Your question", placeholder="Ask about the rent roll...", lines=2)
            with gr.Column(scale=1):
                send_btn = gr.Button("Send", variant="primary")

        clear_btn = gr.Button("Clear Chat History")

        # Template creation input
        with gr.Accordion("Template Creation", open=False):
            template_name_input = gr.Textbox(
                label="Template Name (Optional)",
                placeholder="e.g., 'Monthly Rent Roll Cleanup Process'",
                lines=1
            )
            template_status = gr.Textbox(label="Template Creation Status", interactive=False, lines=5)

        # Set up event handlers with proper return values for Gradio chatbot
        msg.submit(
            chat,
            inputs=[msg, chatbot],
            outputs=[chatbot]
        ).then(
            lambda: "", None, msg  # Clear the message box after sending
        )

        send_btn.click(
            chat,
            inputs=[msg, chatbot],
            outputs=[chatbot]
        ).then(
            lambda: "", None, msg  # Clear the message box after sending
        )

        clear_btn.click(clear_chat, None, chatbot)

        # Enhanced event handlers for session management
        view_versions_btn.click(view_dataframe_versions, None, data_view)

        create_template_btn.click(
            lambda template_name: create_template_from_current_session(template_name),
            inputs=[template_name_input],
            outputs=[template_status]
        )

        end_session_btn.click(
            end_current_session,
            outputs=[template_status]
        )

        session_status_btn.click(
            get_current_session_status,
            outputs=[template_status]
        )

    with gr.Tab("Edit Data"):
        gr.Markdown("""
        ### 📝 Edit Rent Roll Data

        You can directly edit cells in the table below, just like in Excel.
        - Click on any cell to edit it
        - Use Tab or arrow keys to navigate
        - Changes are analyzed by GPT-4.1 and recorded in your session
        - All changes are automatically saved to session history
        """)

        with gr.Row():
            with gr.Column(scale=3):
                # Version selector
                version_dropdown = gr.Dropdown(
                    label="Select Version to Edit",
                    choices=get_version_choices(),
                    value=None,
                    interactive=True
                )

            with gr.Column(scale=1):
                refresh_versions_btn = gr.Button("🔄 Refresh Versions", size="sm")
                with gr.Row():
                    load_latest_btn = gr.Button("📂 Load Latest", variant="secondary", size="sm")
                    load_version_btn = gr.Button("📂 Load Selected", variant="primary", size="sm")

        # Status display
        edit_status = gr.Textbox(label="Status", interactive=False)

        # The editable dataframe
        editable_df = gr.Dataframe(
            label="Editable Data (Click any cell to edit) - Changes tracked by AI",
            interactive=True,
            wrap=True,
            max_height=500,
            column_widths=["100px"] * 20,
        )

        # Save controls
        with gr.Row():
            with gr.Column(scale=3):
                save_description = gr.Textbox(
                    label="Description of Changes (GPT-4.1 will analyze if left blank)",
                    placeholder="e.g., 'Updated rent for units 101-105' or leave blank for AI analysis",
                    lines=2
                )

            with gr.Column(scale=1):
                save_changes_btn = gr.Button("💾 Save & Analyze Changes", variant="primary", size="lg")

        save_status = gr.Textbox(label="Save Status & AI Analysis", interactive=False, lines=8)

        # Quick actions section
        with gr.Accordion("Quick Actions", open=False):
            gr.Markdown("""
            ### Bulk Operations
            Use these buttons for common bulk edits:
            """)

            with gr.Row():
                # Add quick action buttons here in future
                gr.Button("🧹 Clean Empty Rows", size="sm", interactive=False)
                gr.Button("💵 Round All Currency", size="sm", interactive=False)
                gr.Button("📅 Fix Date Formats", size="sm", interactive=False)
                gr.Button("🔢 Recalculate Totals", size="sm", interactive=False)

        # Enhanced session tracking notice
        gr.Markdown("""
        ### 🤖 AI-Powered Change Tracking
        - **GPT-4.1 Analysis**: Every edit is analyzed for business impact
        - **Session Recording**: All changes saved to copiloting session
        - **Template Ready**: Manual edits become part of reusable workflows
        - **Quality Assurance**: AI detects data quality improvements/issues
        """)

        # Event handlers for Edit Data tab with enhanced functions
        refresh_versions_btn.click(
            refresh_version_dropdown,
            outputs=[version_dropdown]
        )

        load_latest_btn.click(
            load_latest_version_for_editing_enhanced,  # ← Enhanced function
            outputs=[editable_df, edit_status]
        )

        load_version_btn.click(
            load_specific_version_enhanced,  # ← Enhanced function
            inputs=[version_dropdown],
            outputs=[editable_df, edit_status]
        )

        save_changes_btn.click(
            save_edited_dataframe_enhanced,  # ← Enhanced function
            inputs=[editable_df, save_description],
            outputs=[save_status, editable_df]
        ).then(
            refresh_version_dropdown,  # Refresh the dropdown after saving
            outputs=[version_dropdown]
        )

        # Instructions
        gr.Markdown("""
        ---
        ### 💡 How to Use Enhanced Edit Data:
        1. **Load Data**: Click "Load Latest" or select a specific version
        2. **Edit Cells**: Click on any cell and type to edit (just like Excel!)
        3. **Navigate**: Use Tab, Enter, or arrow keys to move between cells
        4. **Save Changes**: Enter a description (optional) and click "Save & Analyze Changes"
        5. **AI Analysis**: GPT-4.1 will analyze your changes and provide insights

        ### ⚠️ Enhanced Features:
        - **Automatic Analysis**: AI understands what you changed and why
        - **Business Impact**: Get insights on how changes affect rent calculations
        - **Session Integration**: All edits become part of your copiloting history
        - **Template Building**: Manual edits are included in reusable templates
        - **Quality Checks**: AI warns if changes might impact data quality
        """)

    # NEW TAB: Apply Template
    with gr.Tab("Apply Template"):
        gr.Markdown("""
        ### 🎯 Apply Saved Templates to New Rent Rolls

        Use your saved templates to automatically process similar rent roll files.
        The system will adapt the template steps to work with your new data.
        """)

        with gr.Row():
            with gr.Column(scale=1):
                gr.Markdown("#### 1. Select Template")
                template_id_select = gr.Textbox(
                    label="Template ID",
                    placeholder="e.g., template_20250526_143022",
                    lines=1
                )
                load_template_btn = gr.Button("📂 Load Template", variant="secondary")

                gr.Markdown("#### 2. Upload New Rent Roll")
                new_rent_roll_file = gr.File(
                    label="New Rent Roll File (.xlsx, .xls)",
                    file_types=[".xlsx", ".xls"]
                )

                start_application_btn = gr.Button("🚀 Start Application", variant="primary", size="lg")

            with gr.Column(scale=2):
                gr.Markdown("#### Template Details")
                template_details_display = gr.HTML(label="Template Information")

                gr.Markdown("#### Workflow Steps Preview")
                template_steps_preview = gr.Textbox(
                    label="Steps to Execute",
                    lines=8,
                    interactive=False
                )

        gr.Markdown("---")

        with gr.Row():
            with gr.Column(scale=2):
                gr.Markdown("#### 3. Execute Template Steps")

                with gr.Row():
                    execute_next_btn = gr.Button("▶️ Execute Next Step", variant="primary")
                    execute_all_btn = gr.Button("⏭️ Execute All Steps", variant="secondary")
                    status_btn = gr.Button("📊 Check Status")

                application_status = gr.Textbox(
                    label="Application Status & Results",
                    lines=15,
                    interactive=False
                )

            with gr.Column(scale=1):
                gr.Markdown("#### Progress Tracking")

                progress_info = gr.HTML(
                    label="Current Progress",
                    value="<p>No active session</p>"
                )

                gr.Markdown("""
                #### 💡 How It Works:
                1. **Select Template**: Choose a saved template
                2. **Upload File**: New rent roll to process
                3. **Auto-Adaptation**: GPT-4.1 adapts each step
                4. **Claude Execution**: Claude 3.7 runs the code
                5. **Step-by-Step**: Execute one or all steps
                6. **Results**: Get processed rent roll

                #### ⚙️ AI Workflow:
                - **GPT-4.1**: Analyzes & adapts template steps
                - **Claude 3.7**: Generates & executes code
                - **Auto-Mapping**: Matches columns intelligently
                - **Error Recovery**: Handles step failures gracefully
                """)

        # Event handlers for Template Application tab
        load_template_btn.click(
            load_template_for_application,
            inputs=[template_id_select],
            outputs=[template_details_display, template_steps_preview, application_status]
        )

        start_application_btn.click(
            start_template_application_session,
            inputs=[template_id_select, new_rent_roll_file],
            outputs=[application_status]
        ).then(
            get_template_application_status,
            outputs=[progress_info]
        )

        execute_next_btn.click(
            execute_next_template_step,
            outputs=[application_status]
        ).then(
            get_template_application_status,
            outputs=[progress_info]
        )

        execute_all_btn.click(
            execute_all_remaining_steps,
            outputs=[application_status]
        ).then(
            get_template_application_status,
            outputs=[progress_info]
        )

        status_btn.click(
            get_template_application_status,
            outputs=[application_status]
        )

    with gr.Tab("Template Manager"):
        gr.Markdown("""
        ### 📋 Template Management System

        Create, view, and apply reusable rent roll processing templates.
        Templates capture your complete workflow including conversations, code, and manual edits.
        """)

        with gr.Row():
            with gr.Column(scale=2):
                gr.Markdown("#### Available Templates")
                template_list = gr.HTML(label="Template List")
                refresh_templates_btn = gr.Button("🔄 Refresh Template List")

            with gr.Column(scale=2):
                gr.Markdown("#### Template Details")
                template_details = gr.HTML(label="Template Summary")

        with gr.Row():
            template_id_input = gr.Textbox(
                label="Template ID",
                placeholder="e.g., template_20250526_143022"
            )
            with gr.Column():
                view_template_btn = gr.Button("👁️ View Template", variant="secondary")
                delete_template_btn = gr.Button("🗑️ Delete Template", variant="stop")

        template_action_status = gr.Textbox(label="Status", interactive=False, lines=3)

        # Template management event handlers
        refresh_templates_btn.click(
            lambda: list_available_templates(),
            outputs=[template_list]
        )

        view_template_btn.click(
            lambda template_id: enhanced_template_manager.get_template_summary(template_id) if template_id else "Please enter a template ID",
            inputs=[template_id_input],
            outputs=[template_details]
        )

        delete_template_btn.click(
            lambda template_id: enhanced_template_manager.delete_template(template_id) if template_id else "Please enter a template ID",
            inputs=[template_id_input],
            outputs=[template_action_status]
        ).then(
            lambda: list_available_templates(),  # Refresh list after deletion
            outputs=[template_list]
        )

    # Initially hide the chat interface
    chatbot.visible = False

    # Updated upload button event with both API keys and version dropdown
    upload_button.click(
        upload_rent_roll,
        inputs=[file_input, anthropic_api_key, openai_api_key, auto_analyze],
        outputs=[result, preview, chatbot, version_dropdown]
    )

    # Updated style and help info
    gr.Markdown("""
    ## How to use this Enhanced Agentic Rent Roll Analyzer:

    ### 🚀 **NEW: Template Application System**

    #### **Workflow Overview:**
    1. **Create Templates** (Chat Tab): Work through your rent roll analysis normally
    2. **Save Templates**: Click "Create Template from Session" to save your workflow
    3. **Apply Templates** (Apply Template Tab): Use saved templates on new rent roll files
    4. **Automated Processing**: GPT-4.1 + Claude 3.7 adapt and execute each step

    ### 📋 **Step-by-Step Guide:**

    #### **Phase 1: Create Your First Template**
    1. **Setup Tab**: Upload your rent roll Excel file
    2. **Chat Tab**: Interact normally - ask questions, get analysis, make changes
    3. **Edit Data Tab**: Make any manual edits (tracked by AI)
    4. **Create Template**: Click "🎯 Create Template from Session"

    #### **Phase 2: Apply Template to New Files**
    1. **Apply Template Tab**: Select your saved template ID
    2. **Upload New File**: Choose a similar rent roll file
    3. **Start Application**: Click "🚀 Start Application"
    4. **Execute Steps**: Run "▶️ Execute Next Step" or "⏭️ Execute All Steps"

    ### 🤖 **AI Workflow in Template Application:**

    **For Each Template Step:**
    1. **GPT-4.1 Analysis**:
       - Analyzes original template step
       - Maps columns from template to new file
       - Adapts parameters and business rules
       - Creates optimized prompt for Claude

    2. **Claude 3.7 Execution**:
       - Receives adapted instructions
       - Generates appropriate Python code
       - Executes data processing
       - Returns results and updates dataframe

    3. **Validation & Progress**:
       - Validates step completion
       - Records success/failure
       - Logs detailed results
       - Moves to next step

    ### 🔧 **Key Features:**

    - **Intelligent Adaptation**: Automatically maps different column names
    - **Business Logic Preservation**: Maintains the intent of original analysis
    - **Error Recovery**: Handles failures gracefully and continues
    - **Progress Tracking**: Real-time status of template application
    - **Complete Logging**: Detailed logs of every step and decision

    ### 💡 **Use Cases:**

    - **Monthly Processing**: Apply same cleanup to each month's rent roll
    - **Property Portfolios**: Use one template across multiple properties
    - **Team Workflows**: Share proven analysis methods
    - **Quality Assurance**: Ensure consistent processing standards
    - **Time Savings**: Automate repetitive analysis tasks

    ### ⚡ **Quick Start:**

    1. Upload rent roll → Chat about analysis → Create template
    2. Get template ID from Template Manager
    3. Go to Apply Template → Enter template ID → Upload new file → Execute!

    The system transforms your one-time analysis into reusable, intelligent automation!
    """)

# Additional helper functions for Template Manager tab (keeping existing)
def list_available_templates():
    """Generate HTML list of available templates"""
    try:
        templates = enhanced_template_manager.list_templates()

        if not templates:
            return "<p>No templates available yet. Create your first template by using the 'Create Template from Session' button in the Chat tab.</p>"

        html = "<div style='max-height: 400px; overflow-y: auto;'>"

        for template in templates:
            gpt_status = "🤖 GPT-4 Analysis" if template.get('gpt4_analysis_available') else "📝 Basic Info"

            html += f"""
            <div style='border: 1px solid #ddd; margin: 10px 0; padding: 15px; border-radius: 8px; background-color: #f9f9f9;'>
                <h4 style='margin: 0 0 10px 0; color: #333;'>{template['template_name']}</h4>
                <p style='margin: 5px 0; color: #666;'><strong>ID:</strong> <code>{template['template_id']}</code></p>
                <p style='margin: 5px 0; color: #666;'><strong>Created:</strong> {template['created_date'][:10]}</p>
                <p style='margin: 5px 0; color: #666;'><strong>Source:</strong> {template['source_file']}</p>
                <p style='margin: 5px 0; color: #666;'><strong>Steps:</strong> {template['steps_count']} workflow steps</p>
                <p style='margin: 5px 0;'><span style='background-color: #e3f2fd; padding: 2px 6px; border-radius: 4px; font-size: 12px;'>{gpt_status}</span></p>
            </div>
            """

        html += "</div>"
        return html

    except Exception as e:
        return f"<p>Error loading templates: {str(e)}</p>"

  chatbot = gr.Chatbot(label="Agentic Rent Roll Analysis Chat", height=500, type="tuples")


In [None]:
# Run the application
if __name__ == "__main__":
    logger.info("Starting Agentic Rent Roll Analyzer application")
    demo.launch(debug=True)
    logger.info("Application shutdown")

It looks like you are running Gradio on a hosted a Jupyter notebook. For the Gradio app to work, sharing must be enabled. Automatically setting `share=True` (you can turn this off by setting `share=False` in `launch()` explicitly).

Colab notebook detected. This cell will run indefinitely so that you can see errors and logs. To turn off, set debug=False in launch().
* Running on public URL: https://47fe40fa2df241777f.gradio.live

This share link expires in 1 week. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)




✓ Saved dataframe version v_20250530_022110: Initial upload - original dataset
  - CSV: rent_roll_versions/rent_roll_v_20250530_022110.csv
  - Excel: rent_roll_versions/rent_roll_v_20250530_022110.xlsx


  state[block._id] = block.__class__(**kwargs)


📝 Started session recording: session_20250530_022207

==== STARTING CODE GENERATION ====
User query: add a new column residents where we classify the tenants into occupied or vacant based on whether the corresponding least start date is empty for which it is vacant or not for which it will be occupied, apply for all the tenants
Dataframe has 34 rows and 9 columns
Sending FULL dataframe to GPT-4.1 (not just sample)

==== STEP 1: GENERATING PROMPT WITH GPT-4 (WITH FULL DATAFRAME) ====

==== GPT-4 GENERATED PROMPT FOR CLAUDE ====
You are a Python code generation assistant. The rent roll dataframe is ALREADY LOADED and available as the variable df. It contains ALL rows and columns of real data (34 rows, 9 columns). You have access to the entire dataframe, not just a sample. The columns are:

['2600 W Big Beaver, Troy Michigan -- Rent Roll', 'Unnamed: 1', 'Unnamed: 2', 'Unnamed: 3', 'Current Rent Roll: 9/1/2024', 'Unnamed: 5', 'Unnamed: 6', 'Unnamed: 7', 'Unnamed: 8']

Your task is as follo

Unnamed: 0,"2600 W Big Beaver, Troy Michigan -- Rent Roll",Unnamed: 1,Unnamed: 2,Unnamed: 3,Current Rent Roll: 9/1/2024,Unnamed: 5,Unnamed: 6,Unnamed: 7,Unnamed: 8
0,,,,,,,,,
1,,,,,,,,,
2,,,,,,,,,
3,,Floor,Lease Start Date,Lease Expiration,RSF,RSF%,Annual Rent,Monthly Rent,Rent PSF
4,AT&T,LL,2024-09-01 00:00:00,2029-08-31 00:00:00,295,0.000703,10110,842.52,34.27
5,Care Tech Solutions,LL,2023-06-01 00:00:00,2028-05-31 00:00:00,8472,0.020199,160968,13414,19
6,Care Tech Solutions,LL,2023-06-01 00:00:00,2028-05-31 00:00:00,1847,0.004404,23088,1923.96,12.5
7,Clear Rate Communication,LL,2017-06-27 00:00:00,2026-08-26 00:00:00,5212,0.012426,76877,6406.42,14.75
8,Aras,1st Fl,2019-09-05 00:00:00,2028-03-31 00:00:00,16759,0.039956,465062,38755.19,27.75
9,Dickinson Wright,3rd Fl,2011-08-01 00:00:00,2029-07-31 00:00:00,67798,0.161642,1923429,160285.77,28.37


Unnamed: 0,Tenant,Floor,Lease Start Date,Lease Expiration,RSF,RSF%,Annual Rent,Monthly Rent,Rent PSF
0,,,,,,,,,
1,,,,,,,,,
2,,,,,,,,,
3,AT&T,LL,2024-09-01 00:00:00,2029-08-31 00:00:00,295.0,0.000703,10110.0,842.52,34.27
4,Care Tech Solutions,LL,2023-06-01 00:00:00,2028-05-31 00:00:00,8472.0,0.020199,160968.0,13414.0,19.0
5,Care Tech Solutions,LL,2023-06-01 00:00:00,2028-05-31 00:00:00,1847.0,0.004404,23088.0,1923.96,12.5
6,Clear Rate Communication,LL,2017-06-27 00:00:00,2026-08-26 00:00:00,5212.0,0.012426,76877.0,6406.42,14.75
7,Aras,1st Fl,2019-09-05 00:00:00,2028-03-31 00:00:00,16759.0,0.039956,465062.0,38755.19,27.75
8,Dickinson Wright,3rd Fl,2011-08-01 00:00:00,2029-07-31 00:00:00,67798.0,0.161642,1923429.0,160285.77,28.37
9,Dickinson Wright,4th Fl,2016-06-01 00:00:00,2029-07-31 00:00:00,19310.0,0.046038,588955.0,49079.58,30.5


🔄 Executed step 1/5


Unnamed: 0,"2600 W Big Beaver, Troy Michigan -- Rent Roll",Unnamed: 1,Unnamed: 2,Unnamed: 3,Current Rent Roll: 9/1/2024,Unnamed: 5,Unnamed: 6,Unnamed: 7,Unnamed: 8
0,,,,,,,,,
1,,,,,,,,,
2,,,,,,,,,
3,,Floor,Lease Start Date,Lease Expiration,RSF,RSF%,Annual Rent,Monthly Rent,Rent PSF
4,AT&T,LL,2024-09-01 00:00:00,2029-08-31 00:00:00,295,0.000703,10110,842.52,34.27
5,Care Tech Solutions,LL,2023-06-01 00:00:00,2028-05-31 00:00:00,8472,0.020199,160968,13414,19
6,Care Tech Solutions,LL,2023-06-01 00:00:00,2028-05-31 00:00:00,1847,0.004404,23088,1923.96,12.5
7,Clear Rate Communication,LL,2017-06-27 00:00:00,2026-08-26 00:00:00,5212,0.012426,76877,6406.42,14.75
8,Aras,1st Fl,2019-09-05 00:00:00,2028-03-31 00:00:00,16759,0.039956,465062,38755.19,27.75
9,Dickinson Wright,3rd Fl,2011-08-01 00:00:00,2029-07-31 00:00:00,67798,0.161642,1923429,160285.77,28.37


Unnamed: 0,"2600 W Big Beaver, Troy Michigan -- Rent Roll",Unnamed: 1,Unnamed: 2,Unnamed: 3,Current Rent Roll: 9/1/2024,Unnamed: 5,Unnamed: 6,Unnamed: 7,Unnamed: 8
0,,,,,,,,,
1,,,,,,,,,
2,,,,,,,,,
3,,Floor,Lease Start Date,Lease Expiration,RSF,RSF%,Annual Rent,Monthly Rent,Rent PSF
4,AT&T,LL,2024-09-01 00:00:00,2029-08-31 00:00:00,295,0.000703,10110,842.52,34.27
5,Care Tech Solutions,LL,2023-06-01 00:00:00,2028-05-31 00:00:00,8472,0.020199,160968,13414,19
6,Care Tech Solutions,LL,2023-06-01 00:00:00,2028-05-31 00:00:00,1847,0.004404,23088,1923.96,12.5
7,Clear Rate Communication,LL,2017-06-27 00:00:00,2026-08-26 00:00:00,5212,0.012426,76877,6406.42,14.75
8,Aras,1st Fl,2019-09-05 00:00:00,2028-03-31 00:00:00,16759,0.039956,465062,38755.19,27.75
9,Dickinson Wright,3rd Fl,2011-08-01 00:00:00,2029-07-31 00:00:00,67798,0.161642,1923429,160285.77,28.37


🔄 Executed step 2/5
🔄 Executed step 3/5
🔄 Executed step 4/5
🔄 Executed step 5/5
🔍 Final check - Current step: 5, Total steps: 5
✅ All steps completed, triggering finalization...
🎉 Finalization completed successfully!
🏁 Batch execution completed
🚀 Starting batch execution of all remaining steps...
📊 Will execute steps 1 through 5
🔄 Executed step 1/5
🔄 Executed step 2/5
🔄 Executed step 3/5


Unnamed: 0,"2600 W Big Beaver, Troy Michigan -- Rent Roll",Unnamed: 1,Unnamed: 2,Unnamed: 3,Current Rent Roll: 9/1/2024,Unnamed: 5,Unnamed: 6,Unnamed: 7,Unnamed: 8,residents
0,,,,,,,,,,
1,,,,,,,,,,
2,,,,,,,,,,
3,,Floor,Lease Start Date,Lease Expiration,RSF,RSF%,Annual Rent,Monthly Rent,Rent PSF,
4,AT&T,LL,2024-09-01 00:00:00,2029-08-31 00:00:00,295,0.000703,10110,842.52,34.27,occupied
5,Care Tech Solutions,LL,2023-06-01 00:00:00,2028-05-31 00:00:00,8472,0.020199,160968,13414,19,occupied
6,Care Tech Solutions,LL,2023-06-01 00:00:00,2028-05-31 00:00:00,1847,0.004404,23088,1923.96,12.5,occupied
7,Clear Rate Communication,LL,2017-06-27 00:00:00,2026-08-26 00:00:00,5212,0.012426,76877,6406.42,14.75,occupied
8,Aras,1st Fl,2019-09-05 00:00:00,2028-03-31 00:00:00,16759,0.039956,465062,38755.19,27.75,occupied
9,Dickinson Wright,3rd Fl,2011-08-01 00:00:00,2029-07-31 00:00:00,67798,0.161642,1923429,160285.77,28.37,occupied


🔄 Executed step 4/5
🔄 Executed step 5/5
🔍 Final check - Current step: 5, Total steps: 5
✅ All steps completed, triggering finalization...
🎉 Finalization completed successfully!
🏁 Batch execution completed
🚀 Starting batch execution of all remaining steps...
📊 Will execute steps 1 through 5
🔄 Executed step 1/5


Unnamed: 0,"2600 W Big Beaver, Troy Michigan -- Rent Roll",Unnamed: 1,Unnamed: 2,Unnamed: 3,Current Rent Roll: 9/1/2024,Unnamed: 5,Unnamed: 6,Unnamed: 7,Unnamed: 8,residents
0,,,,,,,,,,
1,,,,,,,,,,
2,,,,,,,,,,
3,,Floor,Lease Start Date,Lease Expiration,RSF,RSF%,Annual Rent,Monthly Rent,Rent PSF,
4,AT&T,LL,2024-09-01 00:00:00,2029-08-31 00:00:00,295,0.000703,10110,842.520000,34.270000,occupied
5,Care Tech Solutions,LL,2023-06-01 00:00:00,2028-05-31 00:00:00,8472,0.020199,160968,13414,19,occupied
6,Care Tech Solutions,LL,2023-06-01 00:00:00,2028-05-31 00:00:00,1847,0.004404,23088,1923.960000,12.500000,occupied
7,Clear Rate Communication,LL,2017-06-27 00:00:00,2026-08-26 00:00:00,5212,0.012426,76877,6406.420000,14.750000,occupied
8,Aras,1st Fl,2019-09-05 00:00:00,2028-03-31 00:00:00,16759,0.039956,465062,38755.190000,27.750000,occupied
9,Dickinson Wright,3rd Fl,2011-08-01 00:00:00,2029-07-31 00:00:00,67798,0.161642,1923429,160285.770000,28.370000,occupied


🔄 Executed step 2/5
🔄 Executed step 3/5
🔄 Executed step 4/5
🔄 Executed step 5/5
🔍 Final check - Current step: 5, Total steps: 5
✅ All steps completed, triggering finalization...
🎉 Finalization completed successfully!
🏁 Batch execution completed


Summarize the key user-guided instructions and solutions from this rent roll copiloting session.

For each step, briefly describe:

What the user wanted to accomplish (without quoting them directly).

How the request was addressed or solved by the copilot.

Avoid excessive detail, don’t repeat the user’s exact instructions, and keep each summary concise and clear.

In [None]:
!rm -rf /content/rent_roll_versions

Copying any files generated during the session to the google drive

In [None]:
import os
import shutil
from google.colab import drive

# Mount Google Drive
print("Mounting Google Drive...")
drive.mount('/content/drive')
print("Google Drive mounted successfully!")

# Define the source folders (excluding sample_data)
folders_to_save = [
    'copiloting_sessions',
    'rent_roll_templates',
    'rent_roll_versions',
    'template_applications'
]

# Define the destination path in Google Drive
# You can change this path to wherever you want to save in your Drive
gdrive_destination = '/content/drive/MyDrive/CRE_AI_agent/'

# Create the destination directory if it doesn't exist
os.makedirs(gdrive_destination, exist_ok=True)
print(f"Created destination directory: {gdrive_destination}")

# Copy each folder to Google Drive
for folder in folders_to_save:
    source_path = f'/content/{folder}'
    destination_path = os.path.join(gdrive_destination, folder)

    if os.path.exists(source_path):
        print(f"Copying {folder}...")

        # Remove destination folder if it already exists
        if os.path.exists(destination_path):
            shutil.rmtree(destination_path)
            print(f"  Removed existing {folder} folder")

        # Copy the folder
        shutil.copytree(source_path, destination_path)
        print(f"  ✓ Successfully copied {folder} to Google Drive")
    else:
        print(f"  ⚠️  Warning: {folder} not found in /content/")

print("\n" + "="*50)
print("COPY OPERATION COMPLETED!")
print("="*50)
print(f"All folders have been saved to: {gdrive_destination}")
print("\nFolders copied:")
for folder in folders_to_save:
    destination_path = os.path.join(gdrive_destination, folder)
    if os.path.exists(destination_path):
        print(f"  ✓ {folder}")
    else:
        print(f"  ✗ {folder} (failed)")

# Optional: List the contents of the destination folder
print(f"\nContents of {gdrive_destination}:")
try:
    contents = os.listdir(gdrive_destination)
    for item in contents:
        print(f"  📁 {item}")
except:
    print("  Unable to list contents")

Mounting Google Drive...
Mounted at /content/drive
Google Drive mounted successfully!
Created destination directory: /content/drive/MyDrive/CRE_AI_agent/
Copying copiloting_sessions...
  ✓ Successfully copied copiloting_sessions to Google Drive
Copying rent_roll_templates...
  ✓ Successfully copied rent_roll_templates to Google Drive
Copying rent_roll_versions...
  ✓ Successfully copied rent_roll_versions to Google Drive
Copying template_applications...
  ✓ Successfully copied template_applications to Google Drive

COPY OPERATION COMPLETED!
All folders have been saved to: /content/drive/MyDrive/CRE_AI_agent/

Folders copied:
  ✓ copiloting_sessions
  ✓ rent_roll_templates
  ✓ rent_roll_versions
  ✓ template_applications

Contents of /content/drive/MyDrive/CRE_AI_agent/:
  📁 copiloting_sessions
  📁 rent_roll_templates
  📁 rent_roll_versions
  📁 template_applications
