In [None]:
!pip install gradio
!pip install anthropic
!pip install langgraph langchain langchain_core

Collecting anthropic
  Downloading anthropic-0.54.0-py3-none-any.whl.metadata (25 kB)
Downloading anthropic-0.54.0-py3-none-any.whl (288 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m288.8/288.8 kB[0m [31m4.8 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: anthropic
Successfully installed anthropic-0.54.0
Collecting langgraph
  Downloading langgraph-0.4.8-py3-none-any.whl.metadata (6.8 kB)
Collecting langgraph-checkpoint>=2.0.26 (from langgraph)
  Downloading langgraph_checkpoint-2.1.0-py3-none-any.whl.metadata (4.2 kB)
Collecting langgraph-prebuilt>=0.2.0 (from langgraph)
  Downloading langgraph_prebuilt-0.2.2-py3-none-any.whl.metadata (4.5 kB)
Collecting langgraph-sdk>=0.1.42 (from langgraph)
  Downloading langgraph_sdk-0.1.70-py3-none-any.whl.metadata (1.5 kB)
Collecting ormsgpack>=1.10.0 (from langgraph-checkpoint>=2.0.26->langgraph)
  Downloading ormsgpack-1.10.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (43 kB)
[

In [None]:
import gradio as gr
import pandas as pd
import tempfile
import glob
import os
from anthropic import Anthropic
from openai import OpenAI
import re
import traceback
import io
from contextlib import redirect_stdout
import numpy as np
import logging
import json
from typing import List, Optional, Dict, Any, Union
from langchain_core.messages import AIMessage, HumanMessage
from langgraph.graph import StateGraph, END

In [None]:
def smart_dataframe_compression(df, max_tokens=8000, max_rows=None):
    """
    🚀 SMART COMPRESSION: Convert dataframe to string, remove empty parts, and trim intelligently

    Args:
        df: Pandas DataFrame
        max_tokens: Maximum tokens to aim for (default 8000)
        max_rows: Maximum rows to include (auto-calculated if None)

    Returns:
        dict with compressed data and metadata
    """
    if df is None or df.empty:
        return {
            "compressed_data": "Empty dataframe",
            "metadata": {"original_shape": (0, 0), "compression_ratio": 0, "method": "empty"}
        }

    original_shape = df.shape

    # Step 1: Remove completely empty rows and columns
    cleaned_df = df.dropna(how='all').dropna(axis=1, how='all')

    # Step 2: Fill NaN values with empty string for better compression
    cleaned_df = cleaned_df.fillna('')

    # Step 3: Remove columns that are mostly empty (>80% empty)
    mostly_empty_threshold = 0.8
    for col in cleaned_df.columns:
        empty_ratio = (cleaned_df[col] == '').sum() / len(cleaned_df)
        if empty_ratio > mostly_empty_threshold:
            cleaned_df = cleaned_df.drop(columns=[col])

    # Step 4: Truncate long cell values to prevent individual cells from bloating
    def truncate_cell_value(val, max_chars=50):
        if isinstance(val, str) and len(val) > max_chars:
            return val[:max_chars] + "..."
        return val

    # Apply truncation to string columns
    for col in cleaned_df.columns:
        if cleaned_df[col].dtype == 'object':
            cleaned_df[col] = cleaned_df[col].apply(lambda x: truncate_cell_value(str(x)))

    # Step 5: Determine optimal number of rows based on token limit
    if max_rows is None:
        # Estimate tokens per row and calculate max rows
        sample_rows = min(3, len(cleaned_df))
        if sample_rows > 0:
            sample_string = cleaned_df.head(sample_rows).to_string(index=False)
            tokens_per_row = len(sample_string) // 4 // sample_rows  # Rough estimate
            max_rows = min(len(cleaned_df), max_tokens // max(tokens_per_row, 10))
        else:
            max_rows = 0

    # Step 6: Select representative rows (not just first rows)
    if len(cleaned_df) <= max_rows:
        final_df = cleaned_df
        selection_method = "all_rows"
    else:
        # Smart row selection: first rows + some middle rows + last rows
        first_rows = max(1, max_rows // 3)
        last_rows = max(1, max_rows // 3)
        middle_rows = max_rows - first_rows - last_rows

        rows_selected = []

        # First rows
        rows_selected.extend(list(range(first_rows)))

        # Middle rows (evenly distributed)
        if middle_rows > 0 and len(cleaned_df) > first_rows + last_rows:
            middle_start = first_rows
            middle_end = len(cleaned_df) - last_rows
            if middle_end > middle_start:
                middle_indices = np.linspace(middle_start, middle_end-1, middle_rows, dtype=int)
                rows_selected.extend(middle_indices)

        # Last rows
        if last_rows > 0:
            last_indices = list(range(len(cleaned_df) - last_rows, len(cleaned_df)))
            rows_selected.extend(last_indices)

        # Remove duplicates and sort
        rows_selected = sorted(list(set(rows_selected)))
        final_df = cleaned_df.iloc[rows_selected]
        selection_method = f"smart_selection_{len(rows_selected)}_rows"

    # Step 7: Convert to string and clean up formatting
    df_string = final_df.to_string(index=False, max_cols=None)

    # Step 8: Clean up the string - remove excessive whitespace
    # Remove multiple spaces
    df_string = re.sub(r' +', ' ', df_string)

    # Remove empty lines
    lines = [line.strip() for line in df_string.split('\n') if line.strip()]
    df_string = '\n'.join(lines)

    # Step 9: Final trimming if still too long
    estimated_tokens = len(df_string) // 4
    if estimated_tokens > max_tokens:
        # Trim to fit token limit
        char_limit = max_tokens * 4
        df_string = df_string[:char_limit] + "\n... [truncated to fit context limit]"
        final_method = selection_method + "_and_truncated"
    else:
        final_method = selection_method

    # Calculate compression ratio
    original_size = len(df.to_string()) if not df.empty else 1
    compressed_size = len(df_string)
    compression_ratio = compressed_size / original_size

    return {
        "compressed_data": df_string,
        "metadata": {
            "original_shape": original_shape,
            "cleaned_shape": cleaned_df.shape,
            "final_shape": final_df.shape,
            "compression_ratio": compression_ratio,
            "estimated_tokens": len(df_string) // 4,
            "method": final_method,
            "rows_selected": len(final_df),
            "columns_kept": len(final_df.columns),
            "empty_rows_removed": original_shape[0] - cleaned_df.shape[0],
            "empty_columns_removed": original_shape[1] - cleaned_df.shape[1]
        }
    }

In [None]:
import tiktoken

class ConversationHistoryManager:
    def __init__(self, max_context_tokens=20000, compression_threshold=8000):  # ✅ FIXED: Increased thresholds
        self.max_context_tokens = max_context_tokens
        self.compression_threshold = compression_threshold
        self.encoding = tiktoken.encoding_for_model("gpt-4")

    def estimate_tokens(self, text):
        """Estimate token count for text"""
        return len(self.encoding.encode(str(text)))

    def get_conversation_size(self, messages):
        """Calculate total tokens in conversation history"""
        total_tokens = 0
        for msg in messages:
            total_tokens += self.estimate_tokens(msg.get('content', ''))
        return total_tokens

    def compress_conversation_chunk(self, messages_chunk, openai_client):
        """Use GPT-4 to compress a chunk of conversation history"""
        conversation_text = ""
        for msg in messages_chunk:
            role = msg.get('role', 'unknown')
            content = msg.get('content', '')
            conversation_text += f"\n[{role.upper()}]: {content}\n"

        compression_prompt = f"""
        You are an expert at summarizing technical conversations about commercial real estate rent roll analysis.

        Please compress the following conversation history into a concise paragraph that preserves:
        1. Key decisions made by the AI system
        2. Important data analysis findings
        3. User requests and their outcomes
        4. Any dataframe modifications or versions created
        5. Business insights discovered
        6. Technical approaches used (GPT-4 vs Claude, etc.)

        Original conversation ({self.estimate_tokens(conversation_text)} tokens):
        {conversation_text}

        Compress this into a single, information-dense paragraph of approximately 50-80 words that captures the essence and key outcomes.

        Compressed summary:
        """

        try:
            response = openai_client.chat.completions.create(
                model="gpt-4o",  # ✅ Using gpt-4o instead of gpt-4.1 to avoid rate limits
                messages=[
                    {"role": "system", "content": "You are an expert at compressing technical conversations while preserving critical information."},
                    {"role": "user", "content": compression_prompt}
                ],
                max_tokens=100,
                temperature=0.2
            )
            return response.choices[0].message.content.strip()

        except Exception as e:
            print(f"⚠️ Compression failed: {str(e)}")
            return f"[Compression failed: {str(e)}] Conversation chunk with {len(messages_chunk)} messages covering rent roll analysis."

    def compress_history_if_needed(self, messages, openai_client):
        """Compress conversation history if it exceeds limits"""
        current_size = self.get_conversation_size(messages)
        print(f"🔧 Conversation size: {current_size} tokens")

        # ✅ FIXED: Emergency truncation for very large conversations
        if current_size > 28000:  # Near API limit
            print(f"🚨 Emergency truncation: {current_size} -> keeping last 8 messages")
            return messages[-8:] if len(messages) > 8 else messages

        # ✅ FIXED: Simple truncation if over compression threshold
        if current_size > self.compression_threshold:
            print(f"🔄 Applying compression/truncation...")

            # Try compression first
            try:
                compressed = self._compress_with_fallback(messages, openai_client)
                final_size = self.get_conversation_size(compressed)
                print(f"✅ Compressed: {current_size} -> {final_size} tokens")
                return compressed
            except Exception as e:
                print(f"❌ Compression failed: {e}, using truncation")
                # Fallback to simple truncation
                keep_messages = max(6, int(len(messages) * 0.4))  # Keep 40% of messages
                truncated = messages[-keep_messages:]
                final_size = self.get_conversation_size(truncated)
                print(f"✂️ Truncated: {current_size} -> {final_size} tokens")
                return truncated

        # No compression needed
        return messages

    def _compress_with_fallback(self, messages, openai_client):
        """Helper method for compression with multiple fallback strategies"""
        current_size = self.get_conversation_size(messages)

        # Strategy 1: Keep recent messages, compress older ones
        if len(messages) > 10:
            recent_count = min(8, len(messages) // 2)  # Keep half, but at least 8
            recent_messages = messages[-recent_count:]
            older_messages = messages[:-recent_count]

            compressed_older = []
            if older_messages:
                # Compress in chunks
                chunk_size = max(3, len(older_messages) // 3)  # Smaller chunks for better compression
                for i in range(0, len(older_messages), chunk_size):
                    chunk = older_messages[i:i+chunk_size]
                    try:
                        compressed_summary = self.compress_conversation_chunk(chunk, openai_client)
                        summary_message = {
                            "role": "system",
                            "content": f"[COMPRESSED HISTORY]: {compressed_summary}"
                        }
                        compressed_older.append(summary_message)
                    except Exception as e:
                        print(f"⚠️ Chunk compression failed: {e}")
                        # Skip this chunk if compression fails
                        continue

            result = compressed_older + recent_messages

            # Check if compression was effective
            new_size = self.get_conversation_size(result)
            if new_size < current_size * 0.8:  # At least 20% reduction
                return result

        # Strategy 2: Aggressive truncation if compression didn't work well
        keep_messages = max(5, len(messages) // 3)  # Keep 1/3 of messages
        return messages[-keep_messages:]

# ✅ FIXED: Updated global conversation manager with better thresholds
conversation_manager = ConversationHistoryManager(
    max_context_tokens=20000,     # Increased from 4000
    compression_threshold=8000    # Increased from 2000
)

In [None]:
# Set up logging
logging.basicConfig(
    level=logging.INFO,
    format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
)
logger = logging.getLogger('rent_roll_analyzer')

# Global variables and API keys

In [None]:
from typing import TypedDict, List, Optional, Union, Dict, Any

# Define the state as a TypedDict
class AgentState(TypedDict, total=False):
    messages: List[Dict[str, str]]
    df: Optional[pd.DataFrame]
    issues: List[str]
    execution_plan: Optional[str]
    needs_clarification: bool
    clarification_question: Optional[str]
    generate_code: bool
    code_execution_results: Optional[str]
    final_response: Optional[str]
    anthropic_client: Optional[Any]  # For Claude API
    openai_client: Optional[Any]     # For OpenAI API


In [None]:
def read_rent_roll_simple(file_path):
    """
    Improved function to read rent roll Excel files that handles special formatting
    commonly found in commercial real estate rent roll sheets.
    """
    # Read the raw Excel file with no header
    df = pd.read_excel(file_path, header=None)

    # Find the row containing the column headers
    header_row = None
    for i, row in df.iterrows():
        if row.iloc[0] == "Current":
            header_row = i + 1  # Headers are in the row after "Current"
            break

    if header_row is None:
        logger.warning("Could not find header row with 'Current' marker. Falling back to standard loading.")
        return pd.read_excel(file_path)

    # Get the headers
    headers = []
    for val in df.iloc[header_row]:
        if pd.isna(val):
            headers.append("NaN")  # Use "NaN" for empty header cells
        else:
            headers.append(str(val))

    # Create a new dataframe starting after the header row
    data_rows = df.iloc[(header_row+1):].values

    # Create a new dataframe with the extracted headers
    result_df = pd.DataFrame(data_rows, columns=headers)

    logger.info(f"Successfully loaded rent roll with {len(result_df)} rows using specialized loader")
    return result_df

In [None]:
def analyze_rent_roll_gpt(file_path, api_key):
    """
    Analyzes a CRE rent roll Excel file by sending the data rows to GPT-4.
    """
    # Load the rent roll
    try:
        df = read_rent_roll_simple(file_path)
        logger.info("File loaded successfully for GPT analysis.")
    except Exception as e:
        logger.error(f"Error loading file: {e}")
        return []

    # Initialize OpenAI client
    client = OpenAI(api_key=api_key)

    # Convert the DataFrame to CSV string format
    csv_data = df.to_csv(index=False)
    logger.info(f"Converted DataFrame to CSV with {len(df)} rows and {len(df.columns)} columns")

    # Enhance the system prompt to focus on general rent roll issues
    system_prompt = """
    You are a Commercial Real Estate rent roll expert specializing in identifying data quality, formatting, and consistency issues.

    When analyzing any CRE rent roll, rigorously check for these common categories of issues:

    1. DUPLICATE OR REDUNDANT ENTRIES: Look for any repeated charges, fees, or line items
    2. INCONSISTENT TERMINOLOGY: Identify any unclear, non-standard, or ambiguous descriptions
    3. DATE ANOMALIES: Flag any suspicious or illogical date patterns across move-in, lease start/end
    4. RENT DISCREPANCIES: Identify deviations between market rent values and actual charged amounts
    5. CALCULATION INCONSISTENCIES: Check if component charges properly sum to totals
    6. EXCEL ARTIFACTS: Identify any visible formulas, function calls, or spreadsheet mechanics
    7. FORMATTING IRREGULARITIES: Notice inconsistent data entry patterns or splitting of information
    8. BALANCE ANOMALIES: Identify unusual balances, especially negative values
    9. OCCUPANCY MISMATCHES: Look for occupied units with zero rent or vacant units with charges
    10. UNIT IDENTIFICATION PATTERNS: Check for inconsistencies in unit numbering or identification

    Be extremely thorough and specific in your analysis. Report ALL issues you find, regardless of how minor they may seem.
    DO NOT return "No issues detected" unless you've comprehensively analyzed the data for each category above.
    """

    # Use a simplified prompt focused on analyzing the raw CSV data
    prompt = (
        f"Please analyze this Commercial Real Estate rent roll data in CSV format and identify ALL potential issues "
        f"that could affect data quality, accuracy, or decision-making.\n\n{csv_data}\n\n"

        f"Based on your expertise in CRE rent rolls, provide a numbered list of ALL issues you can identify, including but not limited to:\n\n"

        f"- Any duplicate or redundant charges\n"
        f"- Unclear, non-standard, or inconsistent descriptions\n"
        f"- Suspicious or illogical date patterns\n"
        f"- Inconsistencies between market rent and actual rent values\n"
        f"- Calculation errors where components don't match totals\n"
        f"- Spreadsheet artifacts like visible formulas\n"
        f"- Inconsistent data entry patterns\n"
        f"- Unusual balance values\n"
        f"- Occupancy status mismatches\n"
        f"- Inconsistent unit numbering or identification\n\n"

        f"IMPORTANT: For each issue found, please reference the specific unit(s) affected and explain why it's problematic. "
        f"Be comprehensive - rent roll accuracy is critical for CRE investment and property management decisions."
    )

    try:
        logger.info("Sending request to GPT-4 for analysis...")
        response = client.chat.completions.create(
            model="gpt-4o",
            messages=[
                {"role": "system", "content": system_prompt},
                {"role": "user", "content": prompt}
            ],
            max_tokens=2000,
            temperature=0.3
        )
        response_text = response.choices[0].message.content
        logger.info("Received response from GPT-4.")

        # Simple parsing of the response - split by numbered items
        lines = response_text.split('\n')
        issues = []
        current_issue = ""

        for line in lines:
            # If it's a new numbered item
            if line.strip() and line[0].isdigit() and '. ' in line[:5]:
                # If we were building a previous issue, add it
                if current_issue:
                    issues.append(current_issue.strip())
                current_issue = line.strip()
            elif line.strip() and current_issue:
                # Continue building the current issue
                current_issue += " " + line.strip()

        # Add the last issue if there is one
        if current_issue:
            issues.append(current_issue.strip())

        if not issues:
            issues.append("No issues detected by GPT-4.")

        logger.info(f"Identified {len(issues)} issues in the rent roll")
        return issues

    except Exception as e:
        logger.error(f"Error calling GPT-4 for analysis: {e}")
        logger.error(traceback.format_exc())
        return ["Failed to analyze rent roll due to API error."]

In [None]:
def determine_action(state):
    """Decide whether to answer directly, ask for clarification, or generate code."""

    messages = state["messages"]
    user_message = messages[-1]["content"] if messages[-1]["role"] == "user" else ""
    df = state["df"]

    # Create OpenAI client for this function call
    client = OpenAI(api_key=DEFAULT_OPENAI_API_KEY)

    # Get column information for context
    if df is not None:
        try:
            # Safer way to get column data types
            column_info = []
            for col in df.columns:
                try:
                    dtype_str = str(df[col].dtype)  # Convert dtype to string directly
                    column_info.append(f"- {col}: {dtype_str}")
                except:
                    column_info.append(f"- {col}: unknown type")
            column_info_str = "\n".join(column_info)
            df_preview = df.head(3).to_string()
        except Exception as e:
            logger.error(f"Error getting column info: {e}")
            column_info_str = "Error retrieving column information"
            df_preview = "Error retrieving data preview"
    else:
        column_info_str = "No dataframe loaded"
        df_preview = "No data available"

    # Use GPT-4 to analyze the query and determine the best action
    prompt = f"""
    User query: {user_message}

    Dataframe information:
    - Rows: {len(df) if df is not None else 'No data loaded'}
    - Columns: {column_info_str}

    Data preview:
    {df_preview}

    Analyze the user query and determine the most appropriate action:
    1. If the query is ambiguous or lacks specificity, choose "ask_clarification"
    2. If the query can be answered with a simple explanation without analysis, choose "text_response"
    3. If the query requires data analysis, calculations, or visualizations, choose "generate_code"

    Respond with a JSON object containing:
    {{"action": "ask_clarification" | "text_response" | "generate_code", "reason": "brief explanation"}}
    """

    try:
        response = client.chat.completions.create(
            model="gpt-4o",
            messages=[
                {"role": "system", "content": "You are a decision-making agent for a rent roll analysis system. Output ONLY a JSON object with the determined action and reason."},
                {"role": "user", "content": prompt}
            ],
            max_tokens=500,
            temperature=0.2
        )

        response_text = response.choices[0].message.content

        # Extract JSON from the response
        json_match = re.search(r'{.*}', response_text, re.DOTALL)
        if json_match:
            action_data = json.loads(json_match.group(0))
            action = action_data.get("action", "text_response")
        else:
            # Default to text response if parsing fails
            action = "text_response"

        logger.info(f"Determined action using GPT-4: {action}")

        # Create a new state dict with updated values
        new_state = dict(state)  # Create a copy
        new_state["needs_clarification"] = action == "ask_clarification"
        new_state["generate_code"] = action == "generate_code"

        return new_state
    except Exception as e:
        logger.error(f"Error in determine_action: {e}")
        # Default to text response on error
        new_state = dict(state)
        new_state["needs_clarification"] = False
        new_state["generate_code"] = False
        return new_state

In [None]:

def ask_clarification(state: AgentState) -> Dict:
    """Generate a clarification question for the user using GPT-4."""

    messages = state["messages"]
    user_message = messages[-1]["content"] if messages[-1]["role"] == "user" else ""

    # Create OpenAI client for this function call
    client = OpenAI(api_key=DEFAULT_OPENAI_API_KEY)

    try:
        response = client.chat.completions.create(
            model="gpt-4o",
            messages=[
                {"role": "system", "content": """You are a commercial real estate rent roll analyst.
                Generate a clear, specific clarification question to better understand
                what the user is asking about their rent roll data."""},
                {"role": "user", "content": f"My question is: {user_message}"}
            ],
            max_tokens=300,
            temperature=0.3
        )

        clarification_question = response.choices[0].message.content

        # Create a new state dict with updated values
        new_state = dict(state)
        new_state["clarification_question"] = clarification_question
        new_state["final_response"] = clarification_question

        # Add the clarification question to the messages
        new_messages = state["messages"].copy()
        new_messages.append({"role": "assistant", "content": clarification_question})
        new_state["messages"] = new_messages

        logger.info(f"Generated clarification question using GPT-4: {clarification_question[:50]}...")
        return new_state
    except Exception as e:
        logger.error(f"Error in ask_clarification: {e}")
        # Fallback to a generic clarification question
        generic_question = "Could you please clarify what specific aspect of the rent roll you'd like me to analyze?"

        new_state = dict(state)
        new_state["clarification_question"] = generic_question
        new_state["final_response"] = generic_question

        new_messages = state["messages"].copy()
        new_messages.append({"role": "assistant", "content": generic_question})
        new_state["messages"] = new_messages

        return new_state

In [None]:
def generate_text_response(state):
    """Generate a simple text response to the user query using GPT-4."""

    messages = state["messages"]
    df = state["df"]
    issues = state["issues"]

    # Create OpenAI client for this function call
    client = OpenAI(api_key=DEFAULT_OPENAI_API_KEY)

    # Prepare context for GPT-4
    issues_text = "\n".join([f"- {issue}" for issue in issues])

    # Get column and data preview for context
    if df is not None:
        column_info = ", ".join(df.columns)
        data_stats = []
        for col in df.columns[:10]:  # Limit to first 10 columns to avoid token limits
            try:
                if pd.api.types.is_numeric_dtype(df[col]):
                    stat = f"- {col}: min={df[col].min()}, max={df[col].max()}, mean={df[col].mean():.2f}, null={df[col].isna().sum()}"
                else:
                    unique_vals = df[col].nunique()
                    stat = f"- {col}: unique values={unique_vals}, null={df[col].isna().sum()}"
                data_stats.append(stat)
            except:
                data_stats.append(f"- {col}: [error calculating stats]")
        data_stats_str = "\n".join(data_stats)
        df_preview = df.head(3).to_string()
    else:
        column_info = "No columns available"
        data_stats_str = "No data statistics available"
        df_preview = "No data preview available"

    system_prompt = f"""You are a commercial real estate rent roll analyst.
    The rent roll data has {len(df) if df is not None else 0} rows and
    {len(df.columns) if df is not None else 0} columns.

    Column information: {column_info}

    Data statistics:
    {data_stats_str}

    Data preview:
    {df_preview}

    Identified issues:
    {issues_text}

    Provide a concise, informative answer to the user's question.
    Focus on being helpful and direct, with only 1-2 paragraphs.
    Do not include code or detailed analysis unless absolutely necessary.
    """

    # Extract system message and filter other messages
    filtered_messages = []
    for msg in messages:
        if msg["role"] != "system":
            filtered_messages.append(msg)

    try:
        response = client.chat.completions.create(
            model="gpt-4.1",
            messages=[
                {"role": "system", "content": system_prompt},
                *filtered_messages
            ],
            max_tokens=1000,
            temperature=0.3
        )

        text_response = response.choices[0].message.content

        # Create a new state dict with updated values
        new_state = dict(state)
        new_state["final_response"] = text_response

        # Add the response to the messages
        new_messages = state["messages"].copy()
        new_messages.append({"role": "assistant", "content": text_response})
        new_state["messages"] = new_messages

        logger.info(f"Generated text response using GPT-4: {text_response[:50]}...")
        return new_state
    except Exception as e:
        logger.error(f"Error in generate_text_response: {e}")
        # Fallback to a generic response
        fallback_response = "I'm sorry, I'm having trouble analyzing your rent roll data right now. Could you try rephrasing your question?"

        new_state = dict(state)
        new_state["final_response"] = fallback_response

        new_messages = state["messages"].copy()
        new_messages.append({"role": "assistant", "content": fallback_response})
        new_state["messages"] = new_messages

        return new_state

In [None]:
def trim_dataframe_output(output_text, max_rows=20, max_chars=None):
    """
    Extremely simplified function that just returns the first 20 lines of output.

    Args:
        output_text: The text output
        max_rows: Maximum number of rows to keep (default: 20)
        max_chars: Not used, kept for compatibility

    Returns:
        Trimmed text showing only top rows
    """
    lines = output_text.split('\n')

    if len(lines) <= max_rows:
        return output_text

    trimmed_lines = lines[:max_rows]
    trimmed_lines.append(f"... [output truncated, showing first {max_rows} lines only] ...")

    return '\n'.join(trimmed_lines)

In [None]:

from datetime import datetime
def save_dataframe_version(df, operation_description=""):
    """Save the current state of the dataframe as both CSV and Excel files.

    Args:
        df: The dataframe to save
        operation_description: A string describing what operation was performed

    Returns:
        version_name: The name of the version that was saved
    """
    import os
    from datetime import datetime

    # Create versions directory if it doesn't exist
    versions_dir = "rent_roll_versions"
    os.makedirs(versions_dir, exist_ok=True)

    # Generate version name with timestamp
    timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
    version_name = f"v_{timestamp}"

    # Create filenames for both CSV and Excel
    csv_filename = os.path.join(versions_dir, f"rent_roll_{version_name}.csv")
    excel_filename = os.path.join(versions_dir, f"rent_roll_{version_name}.xlsx")

    # Save as CSV
    df.to_csv(csv_filename, index=False)

    # Save as Excel
    df.to_excel(excel_filename, index=False, engine='openpyxl')

    # Add version metadata to the registry
    if 'app_state' in globals():
        version_info = {
            'name': version_name,
            'description': operation_description,
            'timestamp': timestamp,
            'filename': csv_filename,  # Keep CSV as primary for backward compatibility
            'excel_filename': excel_filename,  # Add Excel filename
            'is_original': len(app_state["df_versions"]) == 0  # First one is original
        }
        app_state["df_versions"].append(version_info)

    print(f"✓ Saved dataframe version {version_name}: {operation_description}")
    print(f"  - CSV: {csv_filename}")
    print(f"  - Excel: {excel_filename}")

    # Return the version name for reference
    return version_name

def get_versions_info_for_prompt():
    """Generate version information for the Claude prompt."""
    if not app_state["df_versions"]:
        return "No versions available yet."

    # Find the original version
    original = next((v for v in app_state["df_versions"] if v.get('is_original')), app_state["df_versions"][0])

    # Get the latest version
    latest = app_state["df_versions"][-1]

    # Format all versions
    all_versions = []
    for i, version in enumerate(app_state["df_versions"]):
        status = []
        if version == original:
            status.append("ORIGINAL")
        if version == latest:
            status.append("LATEST")

        status_str = f" ({', '.join(status)})" if status else ""
        all_versions.append(f"{i+1}. {version['name']}{status_str}: {version['description']}")

    versions_text = "\n".join(all_versions)

    return f"""
DATAFRAME VERSION HISTORY:
{versions_text}

Original version: {original['name']}
Latest version: {latest['name']}
Total versions: {len(app_state["df_versions"])}
"""


In [None]:
import os
import json
import traceback
import pandas as pd
from datetime import datetime
from openai import OpenAI
import re
def validate_dataframe_changes_with_gpt4(original_df, new_df, user_query, executed_code=None):
    """
    Use GPT-4.1 to validate if the dataframe changes align with user intent
    Using strategic sampling: first 5 rows + middle 10 rows + last 5 rows
    All inputs and outputs are logged to validation_logs directory
    """
    import os
    import pandas as pd
    import numpy as np
    from datetime import datetime

    # Create logging directory
    logs_dir = "validation_logs"
    os.makedirs(logs_dir, exist_ok=True)

    # Create unique log file with timestamp
    timestamp = datetime.now().strftime('%Y%m%d_%H%M%S_%f')[:-3]
    log_file = os.path.join(logs_dir, f"validation_{timestamp}.log")

    def log_to_file(content, section_title=""):
        """Log content to file with proper formatting"""
        try:
            with open(log_file, 'a', encoding='utf-8') as f:
                if section_title:
                    f.write(f"\n{'='*60}\n")
                    f.write(f"{section_title}\n")
                    f.write(f"{'='*60}\n")
                f.write(str(content))
                f.write(f"\n")
                f.flush()
        except Exception as e:
            print(f"Failed to write to log file: {e}")

    # Log initial information
    log_to_file(f"VALIDATION SESSION STARTED", "VALIDATION LOG")
    log_to_file(f"Timestamp: {datetime.now()}")
    log_to_file(f"Log file: {log_file}")

    # Log input parameters
    log_to_file(f"User Query: {user_query}", "INPUT PARAMETERS")
    log_to_file(f"Original DataFrame Shape: {original_df.shape}")
    log_to_file(f"New DataFrame Shape: {new_df.shape}")
    log_to_file(f"Executed Code: {executed_code if executed_code else 'None'}")

    print(f"📝 Logging validation session to: {log_file}")

    client = OpenAI(api_key=DEFAULT_OPENAI_API_KEY)

    def get_strategic_sample(df, max_sample_rows=20):
        """Get strategic sample: first 5, middle 10, last 5 rows"""
        total_rows = len(df)

        if total_rows <= max_sample_rows:
            # If dataframe is small, return it as-is
            return df, "complete", total_rows

        # Get strategic samples
        first_5 = df.head(5)
        last_5 = df.tail(5)

        # Calculate middle section
        if total_rows > 10:  # Need at least 10 rows to have a meaningful middle
            middle_start = max(5, total_rows // 2 - 5)  # Don't overlap with first 5
            middle_end = min(total_rows - 5, total_rows // 2 + 5)  # Don't overlap with last 5

            if middle_end > middle_start:
                middle_10 = df.iloc[middle_start:middle_end]
                # Combine all samples
                sampled_df = pd.concat([first_5, middle_10, last_5], ignore_index=False)
            else:
                # If overlap would occur, just use first and last
                sampled_df = pd.concat([first_5, last_5], ignore_index=False)
        else:
            # Very small dataframe, just use first and last
            sampled_df = pd.concat([first_5, last_5], ignore_index=False)

        # Remove any duplicate indices
        sampled_df = sampled_df[~sampled_df.index.duplicated(keep='first')]

        return sampled_df, "sampled", total_rows

    # Get strategic samples of both dataframes
    original_sample, orig_sample_type, orig_total_rows = get_strategic_sample(original_df)
    new_sample, new_sample_type, new_total_rows = get_strategic_sample(new_df)

    # Log sampling information
    log_to_file(f"Original DataFrame Sampling:", "SAMPLING INFORMATION")
    log_to_file(f"  Total rows: {orig_total_rows}")
    log_to_file(f"  Sample type: {orig_sample_type}")
    log_to_file(f"  Sample size: {len(original_sample)}")
    log_to_file(f"  Sample indices: {list(original_sample.index)}")

    log_to_file(f"New DataFrame Sampling:")
    log_to_file(f"  Total rows: {new_total_rows}")
    log_to_file(f"  Sample type: {new_sample_type}")
    log_to_file(f"  Sample size: {len(new_sample)}")
    log_to_file(f"  Sample indices: {list(new_sample.index)}")

    # Prepare dataframe information for GPT-4.1 with samples
    original_info = {
        "shape": original_df.shape,
        "total_rows": orig_total_rows,
        "sample_type": orig_sample_type,
        "sample_size": len(original_sample),
        "columns": list(original_df.columns),
        "dtypes": dict(original_df.dtypes.astype(str)),
        "sampled_data": original_sample.to_string(max_rows=None, max_cols=None),
        "null_counts": dict(original_df.isnull().sum())
    }

    new_info = {
        "shape": new_df.shape,
        "total_rows": new_total_rows,
        "sample_type": new_sample_type,
        "sample_size": len(new_sample),
        "columns": list(new_df.columns),
        "dtypes": dict(new_df.dtypes.astype(str)),
        "sampled_data": new_sample.to_string(max_rows=None, max_cols=None),
        "null_counts": dict(new_df.isnull().sum())
    }

    # Log original dataframe details
    log_to_file("Original DataFrame Info:", "ORIGINAL DATAFRAME")
    log_to_file(f"Shape: {original_info['shape']}")
    log_to_file(f"Columns: {original_info['columns']}")
    log_to_file(f"Data Types: {original_info['dtypes']}")
    log_to_file(f"Null Counts: {original_info['null_counts']}")
    log_to_file("Sampled Data:")
    log_to_file(original_info['sampled_data'])

    # Log new dataframe details
    log_to_file("New DataFrame Info:", "NEW DATAFRAME")
    log_to_file(f"Shape: {new_info['shape']}")
    log_to_file(f"Columns: {new_info['columns']}")
    log_to_file(f"Data Types: {new_info['dtypes']}")
    log_to_file(f"Null Counts: {new_info['null_counts']}")
    log_to_file("Sampled Data:")
    log_to_file(new_info['sampled_data'])

    # Create comprehensive validation prompt with sampling info
    validation_prompt = f"""
    You are an expert data analyst validator. Your job is to verify if dataframe changes correctly fulfill the user's request.

    USER'S ORIGINAL REQUEST:
    "{user_query}"

    ORIGINAL DATAFRAME:
    Shape: {original_info['shape']} (Total: {orig_total_rows} rows)
    Sample Type: {orig_sample_type} ({original_info['sample_size']} rows shown)
    Columns: {original_info['columns']}
    Data Types: {original_info['dtypes']}
    Null Counts: {original_info['null_counts']}

    Strategic Sample Data (First 5 + Middle 10 + Last 5 rows):
    {original_info['sampled_data']}

    NEW/MODIFIED DATAFRAME:
    Shape: {new_info['shape']} (Total: {new_total_rows} rows)
    Sample Type: {new_sample_type} ({new_info['sample_size']} rows shown)
    Columns: {new_info['columns']}
    Data Types: {new_info['dtypes']}
    Null Counts: {new_info['null_counts']}

    Strategic Sample Data (First 5 + Middle 10 + Last 5 rows):
    {new_info['sampled_data']}

    {f'''
    CODE THAT WAS EXECUTED:
    {executed_code}
    ''' if executed_code else ''}

    VALIDATION TASK:
    Analyze the dataframe changes based on the strategic samples provided and determine:

    1. INTENT ALIGNMENT: Did the changes accomplish what the user requested?
    2. DATA INTEGRITY: Are there any data corruption, loss, or invalid values in the samples?
    3. LOGICAL CONSISTENCY: Do the changes make logical sense for this type of data?
    4. SCHEMA VALIDATION: Are column types and structures still appropriate?
    5. SAMPLE ANALYSIS: Based on the strategic sampling, do the changes appear consistent?

    NOTE: You are analyzing strategic samples (first 5 + middle ~10 + last 5 rows) of the full dataset.
    The samples should be representative of the overall changes.

    Provide your analysis in this exact JSON format:
    {{
        "validation_result": "PASS" | "FAIL" | "WARNING",
        "intent_fulfilled": true/false,
        "data_integrity_intact": true/false,
        "logical_consistency": true/false,
        "schema_appropriate": true/false,
        "issues_found": [
            "List any specific issues discovered in the samples"
        ],
        "positive_changes": [
            "List what was done correctly"
        ],
        "summary": "Brief overall assessment based on strategic sampling",
        "confidence": "high" | "medium" | "low",
        "recommendation": "PROCEED" | "RETRY" | "MANUAL_REVIEW",
        "sampling_note": "Analysis based on strategic samples from {orig_total_rows} and {new_total_rows} total rows"
    }}

    Be thorough in examining the sample data provided, keeping in mind this represents the broader dataset.
    """

    # Log the full prompt being sent to GPT-4
    log_to_file(validation_prompt, "GPT-4 PROMPT")

    # Calculate approximate token count for logging
    estimated_tokens = len(validation_prompt.split()) * 1.3
    log_to_file(f"Estimated token count: {estimated_tokens:.0f}", "TOKEN ESTIMATION")

    try:
        # Log API call attempt
        log_to_file("Sending request to GPT-4...", "API CALL")

        # Call GPT-4.1 for validation
        response = client.chat.completions.create(
            model="gpt-4.1",  # Using gpt-4o for better availability
            messages=[
                {"role": "system", "content": "You are a meticulous data validation expert. Analyze dataframe changes based on strategic samples and provide detailed validation results in JSON format."},
                {"role": "user", "content": validation_prompt}
            ],
            max_tokens=2000,
            temperature=0.1  # Low temperature for consistent validation
        )

        validation_response = response.choices[0].message.content

        # Log GPT-4 response
        log_to_file(validation_response, "GPT-4 RESPONSE")
        log_to_file(f"Response length: {len(validation_response)} characters")

        # Log token usage if available
        if hasattr(response, 'usage') and response.usage:
            log_to_file(f"Actual token usage:", "TOKEN USAGE")
            log_to_file(f"  Prompt tokens: {response.usage.prompt_tokens}")
            log_to_file(f"  Completion tokens: {response.usage.completion_tokens}")
            log_to_file(f"  Total tokens: {response.usage.total_tokens}")

        # Extract JSON from response
        import json
        import re
        json_match = re.search(r'{.*}', validation_response, re.DOTALL)

        if json_match:
            log_to_file("JSON found in response, attempting to parse...", "JSON PARSING")
            try:
                validation_result = json.loads(json_match.group(0))
                # Add sampling metadata
                validation_result["sampling_method"] = "strategic"
                validation_result["original_sample_info"] = {
                    "total_rows": orig_total_rows,
                    "sample_type": orig_sample_type,
                    "sample_size": len(original_sample)
                }
                validation_result["new_sample_info"] = {
                    "total_rows": new_total_rows,
                    "sample_type": new_sample_type,
                    "sample_size": len(new_sample)
                }
                validation_result["log_file"] = log_file

                # Log successful parsing and final result
                log_to_file("JSON parsing successful!", "PARSING SUCCESS")
                log_to_file(json.dumps(validation_result, indent=2), "FINAL VALIDATION RESULT")
                log_to_file("Validation completed successfully!", "SESSION END")

                return validation_result
            except json.JSONDecodeError as e:
                log_to_file(f"JSON parsing failed: {str(e)}", "PARSING ERROR")
                fallback_result = {
                    "validation_result": "WARNING",
                    "intent_fulfilled": None,
                    "data_integrity_intact": None,
                    "logical_consistency": None,
                    "schema_appropriate": None,
                    "issues_found": ["JSON parsing failed"],
                    "positive_changes": [],
                    "summary": "Validation response could not be parsed",
                    "confidence": "low",
                    "recommendation": "MANUAL_REVIEW",
                    "sampling_method": "strategic",
                    "log_file": log_file,
                    "raw_response": validation_response
                }
                log_to_file(json.dumps(fallback_result, indent=2), "FALLBACK RESULT")
                return fallback_result
        else:
            log_to_file("No JSON found in GPT-4 response", "PARSING ERROR")
            fallback_result = {
                "validation_result": "WARNING",
                "intent_fulfilled": None,
                "data_integrity_intact": None,
                "logical_consistency": None,
                "schema_appropriate": None,
                "issues_found": ["No JSON found in validation response"],
                "positive_changes": [],
                "summary": "Validation failed to produce structured output",
                "confidence": "low",
                "recommendation": "MANUAL_REVIEW",
                "sampling_method": "strategic",
                "log_file": log_file,
                "raw_response": validation_response
            }
            log_to_file(json.dumps(fallback_result, indent=2), "FALLBACK RESULT")
            return fallback_result

    except Exception as e:
        # Log the exception details
        log_to_file(f"Exception occurred: {str(e)}", "EXCEPTION ERROR")
        log_to_file(f"Exception type: {type(e).__name__}")

        import traceback
        log_to_file(f"Full traceback:\n{traceback.format_exc()}", "FULL TRACEBACK")

        # Fallback validation result on API failure
        fallback_result = {
            "validation_result": "WARNING",
            "intent_fulfilled": None,
            "data_integrity_intact": None,
            "logical_consistency": None,
            "schema_appropriate": None,
            "issues_found": [f"Validation API failed: {str(e)}"],
            "positive_changes": [],
            "summary": "Could not perform validation due to API error",
            "confidence": "low",
            "recommendation": "MANUAL_REVIEW",
            "sampling_method": "strategic",
            "log_file": log_file,
            "error": str(e)
        }

        log_to_file(json.dumps(fallback_result, indent=2), "ERROR FALLBACK RESULT")
        log_to_file("Validation session ended with error", "SESSION END")

        return fallback_result


# Function to check and read validation logs
def check_and_read_validation_logs():
    """Check for validation log files and display their contents"""
    import os
    import glob
    from datetime import datetime

    logs_dir = "validation_logs"

    print(f"🔍 Checking for validation logs in: {os.path.abspath(logs_dir)}")

    if not os.path.exists(logs_dir):
        print(f"❌ Logs directory does not exist: {logs_dir}")
        return []

    # Find all validation log files
    log_pattern = os.path.join(logs_dir, "validation_*.log")
    log_files = glob.glob(log_pattern)

    if not log_files:
        print(f"❌ No validation log files found in {logs_dir}")
        return []

    # Sort by modification time (newest first)
    log_files.sort(key=lambda x: os.path.getmtime(x), reverse=True)

    print(f"✅ Found {len(log_files)} validation log files:")

    for i, log_file in enumerate(log_files):
        try:
            stat = os.stat(log_file)
            size = stat.st_size
            mtime = datetime.fromtimestamp(stat.st_mtime)
            print(f"  {i+1}. {os.path.basename(log_file)} ({size:,} bytes, {mtime.strftime('%Y-%m-%d %H:%M:%S')})")
        except Exception as e:
            print(f"  {i+1}. {os.path.basename(log_file)} (error reading stats: {e})")

    # Show content of most recent log
    if log_files:
        latest_log = log_files[0]
        print(f"\n📖 Content of most recent log file: {os.path.basename(latest_log)}")
        print("─" * 80)

        try:
            with open(latest_log, 'r', encoding='utf-8') as f:
                content = f.read()

            # Show full content if reasonable size, otherwise show truncated
            if len(content) > 5000:
                print(content[:2500])
                print(f"\n... [TRUNCATED - showing first 2500 of {len(content)} characters] ...")
                print(content[-2500:])
            else:
                print(content)

        except Exception as e:
            print(f"❌ Could not read log file: {e}")

        print("─" * 80)

    return log_files


print("✅ NEW VALIDATION FUNCTION WITH LOGGING AND SAMPLING LOADED!")
print("📝 This function will create logs in 'validation_logs' directory")
print("🎯 Token usage should be dramatically reduced with strategic sampling")
print("🔧 Run 'check_and_read_validation_logs()' after validation to see logs")


✅ NEW VALIDATION FUNCTION WITH LOGGING AND SAMPLING LOADED!
📝 This function will create logs in 'validation_logs' directory
🎯 Token usage should be dramatically reduced with strategic sampling
🔧 Run 'check_and_read_validation_logs()' after validation to see logs


In [None]:


def format_validation_message(validation_result):
    """
    Format validation result into a user-friendly message
    """
    result = validation_result.get("validation_result", "UNKNOWN")

    if result == "PASS":
        emoji = "✅"
        status = "VALIDATION PASSED"
    elif result == "FAIL":
        emoji = "❌"
        status = "VALIDATION FAILED"
    elif result == "WARNING":
        emoji = "⚠️"
        status = "VALIDATION WARNING"
    else:
        emoji = "❓"
        status = "VALIDATION UNCLEAR"

    message = f"\n\n{emoji} **{status}**\n"
    message += f"**Summary:** {validation_result.get('summary', 'No summary available')}\n"
    message += f"**Confidence:** {validation_result.get('confidence', 'unknown').title()}\n"
    message += f"**Recommendation:** {validation_result.get('recommendation', 'MANUAL_REVIEW')}\n\n"

    # Add specific validation checks
    checks = [
        ("Intent Fulfilled", validation_result.get("intent_fulfilled")),
        ("Data Integrity", validation_result.get("data_integrity_intact")),
        ("Logical Consistency", validation_result.get("logical_consistency")),
        ("Schema Appropriate", validation_result.get("schema_appropriate"))
    ]

    message += "**Validation Checks:**\n"
    for check_name, check_result in checks:
        if check_result is True:
            message += f"• ✅ {check_name}: Passed\n"
        elif check_result is False:
            message += f"• ❌ {check_name}: Failed\n"
        else:
            message += f"• ❓ {check_name}: Unknown\n"

    # Add issues if any
    issues = validation_result.get("issues_found", [])
    if issues:
        message += f"\n**Issues Found ({len(issues)}):**\n"
        for issue in issues:
            message += f"• ⚠️ {issue}\n"

    # Add positive changes if any
    positives = validation_result.get("positive_changes", [])
    if positives:
        message += f"\n**Positive Changes ({len(positives)}):**\n"
        for positive in positives:
            message += f"• ✅ {positive}\n"

    return message


def execute_code_blocks(code_blocks, globals_dict):
    """
    Execute code blocks and return results
    Separated from main function for better structure
    """
    import io
    import contextlib

    execution_results = ""
    success_count = 0
    total_blocks = len(code_blocks)
    failed_code = ""
    error_msg = ""

    for i, code_block in enumerate(code_blocks):
        print(f"\n--- Executing Code Block {i+1}/{total_blocks} ---")

        # Capture stdout for this block
        captured_output = io.StringIO()

        try:
            with contextlib.redirect_stdout(captured_output):
                exec(code_block, globals_dict)

            # Get the captured output
            block_output = captured_output.getvalue()

            # Print the output
            if block_output.strip():
                print(block_output)
                execution_results += f"\n--- Output from Code Block {i+1} ---\n{block_output}\n"
            else:
                print("Code executed successfully (no output)")
                execution_results += f"\n--- Code Block {i+1} executed successfully (no output) ---\n"

            success_count += 1
            print(f"✓ Code Block {i+1} executed successfully")

        except Exception as e:
            error_msg = str(e)
            failed_code = code_block
            print(f"✗ Code Block {i+1} failed with error: {error_msg}")
            execution_results += f"\n--- Code Block {i+1} FAILED ---\nError: {error_msg}\n"
            break  # Stop executing remaining blocks on first error

    all_executed_successfully = (success_count == total_blocks)

    return execution_results, all_executed_successfully, failed_code, error_msg

def generate_code_and_execute(state: AgentState) -> Dict:
    """
    Enhanced version: Generate and execute code using a two-step AI approach with GPT-4.1 validation
    and support for version-specific dataframes with conversation history management
    """
    print("Starting code generation with version support...")

    messages = state["messages"]
    df = state["df"]  # This will be version-specific if user requested a specific version
    selected_context = state.get("selected_context")
    version_context = state.get("version_context", "")
    using_specific_version = state.get("using_specific_version", False)
    requested_version = state.get("requested_version")

    # Store original dataframe for validation
    original_df = df.copy()

    # Get OpenAI client from state or create new one
    openai_client = state.get("openai_client") or OpenAI(api_key=DEFAULT_OPENAI_API_KEY)
    # Get Anthropic client from state or create new one
    anthropic_client = state.get("anthropic_client") or Anthropic(api_key=DEFAULT_ANTHROPIC_API_KEY)

    # Get column information for context
    column_info = ", ".join(df.columns) if df is not None else "No columns available"

    # Create SAMPLE dataframe content for GPT-4.1 (first 5 rows instead of full dataset)
    if df is not None:
        # Get first 5 rows for GPT-4.1 context
        sample_df = df.head(5)

        # Use smart compression for the sample
        compression_result = smart_dataframe_compression(sample_df, max_tokens=500)
        df_sample_content = compression_result['compressed_data']

        # Prepare comprehensive data summary with sample data and version info
        df_summary = f"""
DATAFRAME CONTENT {'(SPECIFIC VERSION)' if using_specific_version else '(CURRENT VERSION)'}:
{version_context if version_context else ''}

SAMPLE DATAFRAME CONTENT (First 5 rows):
{df_sample_content}

FULL DATAFRAME STATISTICS:
- Shape: {df.shape}
- Columns: {list(df.columns)}
- Data types: {dict(df.dtypes)}
- Memory usage: {df.memory_usage(deep=True).sum()} bytes
- Null values per column: {dict(df.isnull().sum())}

{'VERSION-SPECIFIC ANALYSIS CONTEXT:' if using_specific_version else ''}
{f'Working with version: {requested_version}' if using_specific_version else 'Working with current/latest version'}

NOTE: This is a sample of the first 5 rows. The complete dataframe has {len(df)} rows.
"""
    else:
        df_summary = "No data available"

    # Create versions directory if it doesn't exist
    versions_dir = "rent_roll_versions"
    os.makedirs(versions_dir, exist_ok=True)

    # Print initial state for debugging
    print(f"\n==== STARTING CODE GENERATION WITH VERSION SUPPORT ====")
    print(f"User query: {messages[-1]['content'] if messages[-1]['role'] == 'user' else 'No user query found'}")
    print(f"Dataframe has {len(df) if df is not None else 0} rows and {len(df.columns) if df is not None else 0} columns")
    print(f"Using specific version: {using_specific_version}")
    if using_specific_version:
        print(f"Requested version: {requested_version}")
    print(f"Sending FIRST 5 ROWS to GPT-4.1 (sample instead of full dataset)")
    if selected_context:
        print(f"Selected message context: {selected_context[:100]}...")

    try:
        # First, use GPT-4 to create the optimal prompt for Claude
        print("\n==== STEP 1: GENERATING PROMPT WITH GPT-4 (WITH VERSION CONTEXT) ====")
        versions_info = get_versions_info_for_prompt()

        # Enhanced system prompt for GPT-4 to create a Claude prompt with version awareness
        gpt_system_prompt = f"""You are an expert at creating prompts for Claude AI to generate code.
        Your task is to analyze the user query history and convert it into an optimal prompt for Claude to generate Python code that analyzes a rent roll dataframe.

        CRITICAL INFORMATION: The dataframe is ALREADY LOADED and available as 'df'.
        {'This is a SPECIFIC VERSION of the dataframe as requested by the user.' if using_specific_version else 'This is the current/latest version of the dataframe.'}

        {f'''
        VERSION-SPECIFIC CONTEXT:
        ========================
        The user specifically requested version: {requested_version}
        This version has been loaded and is available as 'df'.

        {version_context}

        IMPORTANT: All analysis should be performed on this specific version, not the current/latest version.
        Make sure Claude understands it's working with the requested version.
        ''' if using_specific_version else ''}

        HERE IS A SAMPLE OF THE DATAFRAME CONTENT (FIRST 5 ROWS OUT OF {len(df)} TOTAL ROWS):
        {df_summary}

        IMPORTANT: This is only a sample of the first 5 rows to give you context about the data structure and content.
        The actual dataframe that Claude will work with contains ALL {len(df)} rows.

        {f'''
        SELECTED MESSAGE CONTEXT:
        ========================
        The user has selected a previous assistant message as context for this request. This means they want to build upon or reference previous analysis:

        SELECTED CONTEXT: {selected_context}

        When creating the Claude prompt, make sure to:
        1. Reference this previous context appropriately
        2. Build upon the analysis mentioned in the context
        3. Ensure continuity between the previous analysis and the current request
        4. Help Claude understand what was previously discussed or analyzed
        ''' if selected_context else ''}

        # IMPORTANT: DATAFRAME VERSION MANAGEMENT
        {versions_info}

        Some important guidelines to include in your prompt to Claude:
        1. The variable 'df' is ALREADY DEFINED and CONTAINS ALL {len(df)} ROWS OF DATA from {'version ' + requested_version if using_specific_version else 'the current version'}
        2. Claude should explain its approach step by step before showing code
        3. Code must be wrapped in ```python and ``` blocks
        4. Code MUST display ALL rows in the output when showing tables (no limiting rows)
        5. Claude should not attempt to clean data unless specifically requested
        6. Code should include proper error handling
        7. IMPORTANT: After performing any analysis or showing results, Claude should ALWAYS call the save_dataframe_version() function to maintain version history
        8. CRITICAL: Claude should NOT use try-except blocks in its code. Any errors should be allowed to propagate naturally
        {f'''
        9. VERSION AWARENESS: Claude is working with version {requested_version}, not the current/latest version
        10. VERSION CONTEXT: Include reference to the version context when appropriate
        ''' if using_specific_version else ''}
        {f'''
        9. CONTEXT AWARENESS: Reference and build upon the selected previous message context when appropriate
        10. CONTINUITY: Ensure the analysis flows logically from the previous context
        ''' if selected_context and not using_specific_version else ''}

        Your output will be directly sent to Claude, so format it as a complete system prompt.
        Include any table formatting functions that might be useful.

        Make sure to include these helper functions in your prompt:

        ```python
        # For tabular display with proper formatting (PREFERRED METHOD):
        def print_formatted_table(df, title=None):
            if title:
                print(f"\\n{{title}}")
                print("=" * 80)

            # Create a display copy (doesn't change original df)
            display_df = df.copy()

            # Set pandas display options for better readability
            # Show ALL rows - no limits
            pd.set_option('display.max_rows', None)
            pd.set_option('display.max_columns', None)
            pd.set_option('display.width', 1000)
            pd.set_option('display.colheader_justify', 'left')
            pd.set_option('display.precision', 2)

            # Display the dataframe - ALL rows will be shown
            print(display_df)

            # Reset display options to default
            pd.reset_option('display.max_rows')
            pd.reset_option('display.max_columns')
            pd.reset_option('display.width')
            pd.reset_option('display.colheader_justify')
            pd.reset_option('display.precision')
        ```

        ```python
        # Function to save dataframe versions
        def save_dataframe_version(df, operation_description=""):
            import os
            from datetime import datetime

            # Create versions directory if it doesn't exist
            versions_dir = "rent_roll_versions"
            os.makedirs(versions_dir, exist_ok=True)

            # Generate version name with timestamp
            timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
            version_name = f"v_{{timestamp}}"

            # Create filenames for both CSV and Excel
            csv_filename = os.path.join(versions_dir, f"rent_roll_{{version_name}}.csv")
            excel_filename = os.path.join(versions_dir, f"rent_roll_{{version_name}}.xlsx")

            # Save as CSV
            df.to_csv(csv_filename, index=False)

            # Save as Excel
            df.to_excel(excel_filename, index=False, engine='openpyxl')

            print(f"✓ Saved dataframe version {{version_name}}: {{operation_description}}")
            print(f"  - CSV: {{csv_filename}}")
            print(f"  - Excel: {{excel_filename}}")

            # Return the version name for reference
            return version_name
        ```
        """

        # Filter out system messages and apply conversation history management
        filtered_messages = []
        for msg in messages:
            if msg["role"] != "system":
                filtered_messages.append({"role": msg["role"], "content": msg["content"]})

        # ✅ APPLY CONVERSATION HISTORY MANAGEMENT HERE
        print(f"Original conversation size: {len(filtered_messages)} messages")

        # Check if conversation manager exists and apply compression
        if 'conversation_manager' in globals() and conversation_manager is not None:
            try:
                print("🔧 Applying conversation history compression...")
                initial_size = conversation_manager.get_conversation_size(filtered_messages)
                print(f"Initial token count: {initial_size}")

                optimized_messages = conversation_manager.compress_history_if_needed(
                    filtered_messages, openai_client
                )

                final_size = conversation_manager.get_conversation_size(optimized_messages)
                print(f"Compressed token count: {final_size}")
                print(f"Compression ratio: {final_size/initial_size:.2%} of original size")
                print(f"Messages after compression: {len(optimized_messages)}")

                filtered_messages = optimized_messages

            except Exception as e:
                print(f"⚠️ Conversation compression failed: {str(e)}")
                print("Falling back to recent message truncation...")
                # Fallback: Keep only recent messages
                filtered_messages = filtered_messages[-10:] if len(filtered_messages) > 10 else filtered_messages
        else:
            print("⚠️ No conversation manager found, using simple truncation...")
            # Simple fallback: Keep only recent messages to avoid token limits
            if len(filtered_messages) > 12:
                filtered_messages = filtered_messages[-12:]
                print(f"Truncated to recent {len(filtered_messages)} messages")

        # Convert the messages to the format expected by OpenAI
        gpt_messages = [{"role": "system", "content": gpt_system_prompt}]
        for msg in filtered_messages:
            gpt_messages.append(msg)

        # Add a final message explaining the task clearly with version awareness
        final_instruction = f"""Based on this conversation history and the {'VERSION-SPECIFIC' if using_specific_version else ''} SAMPLE dataframe content (first 5 rows) provided above, create the optimal Claude prompt to generate Python code for rent roll analysis.

        {'CRITICAL: The user requested analysis of version ' + requested_version + '. ' if using_specific_version else ''}The prompt should emphasize that:
        - The dataframe already exists and is loaded as 'df' with ALL {len(df)} rows
        - {'This is version ' + requested_version + ', not the current/latest version' if using_specific_version else 'This is the current version of the data'}
        - ALL rows should be displayed when requested
        - Versions should be saved with save_dataframe_version() function
        - You have access to a representative sample of the data structure

{f'''
IMPORTANT: The user has selected a previous assistant message as context for this request. Make sure the Claude prompt acknowledges this context and builds upon the previous analysis mentioned in the selected message.

SELECTED CONTEXT TO REFERENCE: {selected_context[:500]}...
''' if selected_context else ''}

{f'''
VERSION CONTEXT TO INCLUDE: The user specifically requested analysis of version {requested_version}. Make sure Claude understands this and references it appropriately.
''' if using_specific_version else ''}"""

        gpt_messages.append({
            "role": "user",
            "content": final_instruction
        })

        # Estimate final token count before sending
        if 'conversation_manager' in globals() and conversation_manager is not None:
            final_token_estimate = conversation_manager.get_conversation_size(gpt_messages)
            print(f"Final GPT-4.1 request token estimate: {final_token_estimate}")

            if final_token_estimate > 28000:  # Safety margin
                print("⚠️ Still approaching token limit, applying emergency truncation...")
                # Emergency fallback
                gpt_messages = gpt_messages[:3] + gpt_messages[-5:]  # Keep system + recent messages

        # Get the optimized prompt from GPT-4
        gpt_response = openai_client.chat.completions.create(
            model="gpt-4.1",
            messages=gpt_messages,
            max_tokens=4000,
            temperature=0.3
        )

        claude_system_prompt = gpt_response.choices[0].message.content

        # Print the generated prompt for debugging
        print("\n==== GPT-4 GENERATED PROMPT FOR CLAUDE (WITH VERSION CONTEXT) ====")
        print(claude_system_prompt[:500] + "..." if len(claude_system_prompt) > 500 else claude_system_prompt)
        print("==== END OF PROMPT (TRUNCATED) ====\n")

        logger.info(f"Generated optimized prompt for Claude using GPT-4 with {'version-specific' if using_specific_version else 'current'} dataframe")

        # Now use the GPT-4 generated prompt to ask Claude for code
        print(f"\n==== STEP 2: SENDING TO CLAUDE FOR CODE GENERATION ({'VERSION-SPECIFIC' if using_specific_version else 'CURRENT'}) ====")
        logger.info(f"Sending optimized prompt to Claude for {'version-specific' if using_specific_version else 'current'} code generation")

        # Prepare messages for Claude with enhanced context
        claude_messages = filtered_messages.copy()

        # Enhanced sample data message with version and context awareness
        sample_data_content = f"""Here is a SAMPLE of the dataframe that's already loaded as 'df' (showing first 5 rows out of {len(df)} total rows):

{df_summary}

{'VERSION NOTICE: You are working with version ' + requested_version + ', not the current/latest version. ' if using_specific_version else ''}Please process my request using this FULL dataset of {len(df)} rows and remember to save versions with save_dataframe_version().

{f'IMPORTANT CONTEXT: Please reference and build upon this previous analysis: {selected_context[:300]}...' if selected_context else ''}"""

        sample_data_message = {
            "role": "user",
            "content": sample_data_content
        }
        claude_messages.append(sample_data_message)

        # Try to get code from Claude
        claude_response = anthropic_client.messages.create(
            model="claude-3-7-sonnet-20250219",
            system=claude_system_prompt,
            messages=claude_messages,
            max_tokens=4000,
            temperature=0.3
        )

        # Extract the response text from Claude
        response_text = claude_response.content[0].text

        # Print Claude's response for debugging
        print(f"\n==== CLAUDE'S RESPONSE ({'VERSION ' + requested_version if using_specific_version else 'CURRENT VERSION'}) ====")
        print(response_text[:500] + "..." if len(response_text) > 500 else response_text)
        print("==== END OF CLAUDE RESPONSE (TRUNCATED) ====\n")

        # Extract code blocks
        code_blocks = re.findall(r'```python\s*(.*?)\s*```', response_text, re.DOTALL)

        # Print extracted code blocks for debugging
        print(f"\n==== EXTRACTED {len(code_blocks)} CODE BLOCKS ====")
        for i, block in enumerate(code_blocks):
            print(f"\n-- Code Block {i+1} --")
            print(block[:200] + "..." if len(block) > 200 else block)

        # If no code blocks are found, add emergency code
        if len(code_blocks) == 0:
            emergency_code = f"""
# Emergency code to display the {'version-specific' if using_specific_version else 'current'} dataframe
pd.set_option('display.max_rows', None)
pd.set_option('display.max_columns', None)
pd.set_option('display.width', 1000)

print("\\n=== RENT ROLL DATA {'(VERSION ' + requested_version + ')' if using_specific_version else '(CURRENT VERSION)'} ===\\n")
print(f"Displaying all {{len(df)}} rows and {{len(df.columns)}} columns\\n")

# Print the entire dataframe
print(df)

# Save a version of the dataframe
from datetime import datetime
import os

# Create versions directory if it doesn't exist
versions_dir = "rent_roll_versions"
os.makedirs(versions_dir, exist_ok=True)

# Generate version name with timestamp
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
version_name = f"v_{{timestamp}}"

# Create filename
filename = os.path.join(versions_dir, f"rent_roll_{{version_name}}.csv")

# Save dataframe
df.to_csv(filename, index=False)

print(f"✓ Saved dataframe version {{version_name}}: Emergency display of {'version ' + requested_version if using_specific_version else 'current'} data")
"""
            code_blocks.append(emergency_code)
            print("\n-- Added Emergency Code Block --")
            print(f"Emergency code added since Claude didn't generate code for {'version ' + requested_version if using_specific_version else 'current version'}")

        # Define helper functions
        def print_formatted_table(df, title=None):
            if title:
                print(f"\n{title}")
                print("=" * 80)
            display_df = df.copy()
            pd.set_option('display.max_rows', None)
            pd.set_option('display.max_columns', None)
            pd.set_option('display.width', 1000)
            pd.set_option('display.colheader_justify', 'left')
            pd.set_option('display.precision', 2)
            print(display_df)
            pd.reset_option('display.max_rows')
            pd.reset_option('display.max_columns')
            pd.reset_option('display.width')
            pd.reset_option('display.colheader_justify')
            pd.reset_option('display.precision')

        def print_bordered_table(df, title=None):
            if title:
                print(f"\n{title}")
                print("=" * 80)
            if len(df) == 0:
                print("No data available")
                return
            display_df = df.copy()
            col_widths = {}
            for col in display_df.columns:
                col_values = display_df[col].astype(str)
                max_data_width = col_values.str.len().max()
                col_widths[col] = max(len(str(col)), max_data_width) + 2
            header = "| " + " | ".join(str(col).ljust(col_widths[col]) for col in display_df.columns) + " |"
            separator = "+" + "+".join("-" * (col_widths[col] + 2) for col in display_df.columns) + "+"
            print(separator)
            print(header)
            print(separator)
            for i in range(len(display_df)):
                row = display_df.iloc[i]
                row_str = "| " + " | ".join(str(val).ljust(col_widths[col]) for col, val in row.items()) + " |"
                print(row_str)
            print(separator)
            print(f"Total rows: {len(display_df)}")

        # Add to globals_dict before executing code
        globals_dict = {
            "df": df,  # This is now the version-specific dataframe if requested
            "pd": pd,
            "np": np,
            "os": os,
            "datetime": datetime,
            "versions_dir": versions_dir,
            "print_formatted_table": print_formatted_table,
            "print_bordered_table": print_bordered_table,
            "save_dataframe_version": save_dataframe_version
        }

        print(f"\n==== STEP 3: EXECUTING CODE ({'VERSION ' + requested_version if using_specific_version else 'CURRENT VERSION'}) ====")

        # Execute initial attempt first
        execution_results, all_executed_successfully, failed_code, error_msg = execute_code_blocks(code_blocks, globals_dict)

        # Handle retries if needed
        max_retries = 3
        retry_count = 0

        while not all_executed_successfully and retry_count < max_retries:
            retry_count += 1
            print(f"\n==== RETRY ATTEMPT {retry_count}/{max_retries} ====")

            # Create a retry message with more details each time
            retry_message = {
                "role": "user",
                "content": f"""The code you provided failed with this error: {error_msg}

                Here is the code that failed:
                ```python
                {failed_code}
                ```

                This is retry attempt {retry_count} of {max_retries}.

                {'REMEMBER: You are working with version ' + requested_version + ', not the current version.' if using_specific_version else ''}

                {"After multiple attempts, please try a completely different approach." if retry_count >= 2 else "Please fix this specific error."}
                IMPORTANT: DO NOT use try-except blocks in your code. Allow any errors to propagate naturally so our system can detect them.
                Please fix this code to handle the specific error while maintaining the requirement to show ALL rows in the output and saving a version with save_dataframe_version().
                Return the corrected code wrapped in ```python and ``` blocks."""
            }

            # Add this feedback to the messages
            fix_messages = claude_messages.copy()
            fix_messages.append({"role": "assistant", "content": response_text})
            fix_messages.append(retry_message)

            # Get Claude's fixed code
            retry_response = anthropic_client.messages.create(
                model="claude-3-7-sonnet-20250219",
                system=claude_system_prompt,
                messages=fix_messages,
                max_tokens=3500,
                temperature=0.3
            )

            retry_text = retry_response.content[0].text
            print(f"\n==== CLAUDE'S FIX SUGGESTION (ATTEMPT {retry_count}) ====")
            print(retry_text[:500] + "..." if len(retry_text) > 500 else retry_text)
            print("==== END OF FIX SUGGESTION ====\n")

            # Extract new code blocks from the retry response
            new_code_blocks = re.findall(r'```python\s*(.*?)\s*```', retry_text, re.DOTALL)

            if new_code_blocks:
                code_blocks = new_code_blocks
                response_text = retry_text  # Update the response text
                print(f"Found {len(new_code_blocks)} new code blocks in retry response")
            else:
                print("No code blocks found in retry response, keeping original code")

            # Execute the new code blocks
            retry_results, all_executed_successfully, failed_code, error_msg = execute_code_blocks(code_blocks, globals_dict)
            execution_results += f"\n\n--- RETRY ATTEMPT {retry_count} RESULTS ---\n{retry_results}"

            if all_executed_successfully:
                print(f"\n✅ RETRY {retry_count} SUCCESSFUL!")
                break
            else:
                print(f"\n⚠️ Retry {retry_count} failed. {'Trying again...' if retry_count < max_retries else 'Maximum retries reached.'}")

        # Update the dataframe in state with any changes
        if "df" in globals_dict:
            # Important: Only update app_state if we're working with current version
            if not using_specific_version:
                state["df"] = globals_dict["df"]
            else:
                # For version-specific analysis, keep the changes in the state but don't update global app_state
                state["df"] = globals_dict["df"]

        # Enhanced validation with version awareness
        print(f"\n==== STEP 4: VALIDATING CHANGES ({'VERSION ' + requested_version if using_specific_version else 'CURRENT VERSION'}) ====")

        if execution_results and all_executed_successfully:
            # Get the new dataframe (after changes)
            new_df = globals_dict["df"]

            # Get the user's original message
            user_message = messages[-1]["content"] if messages[-1]["role"] == "user" else ""

            # Get the executed code for context
            executed_code = "\n\n# --- Next Code Block ---\n\n".join(code_blocks)

            # Perform validation
            print("🔍 Validating changes with GPT-4.1...")
            validation_result = validate_dataframe_changes_with_gpt4(
                original_df=original_df,
                new_df=new_df,
                user_query=user_message,
                executed_code=executed_code
            )

            # Format validation message with version awareness
            validation_message = format_validation_message(validation_result)

            if using_specific_version:
                validation_message += f"\n\n📋 **Version Analysis Note**: This validation was performed on version `{requested_version}`. Changes to this version do not affect the current/latest dataframe unless explicitly saved as a new version."

            # Add validation to the response
            final_response = response_text + execution_results + validation_message
            print(f"🔍 Validation: {validation_result.get('validation_result', 'UNKNOWN')}")

        else:
            # No validation if execution failed
            validation_message = f"\n\n🔍 **Validation Skipped** ❌\n*Code execution failed - no changes to validate*"
            if using_specific_version:
                validation_message += f"\n*Note: Analysis was attempted on version {requested_version}*"
            final_response = response_text + execution_results + validation_message
            validation_result = None

        # Add version and hybrid note
        version_note = f"\n\n📋 **Version Info**: Analysis performed on {'version ' + requested_version if using_specific_version else 'current/latest version'}"
        hybrid_note = f"\n\n---\n*This response combines GPT-4 for prompt optimization and Claude for code generation, with automated error handling ({retry_count} retries used).*"
        final_response += version_note + hybrid_note

        # Create complete state return
        new_state = dict(state)
        new_state.update({
            "messages": state["messages"] + [{"role": "assistant", "content": final_response}],
            "df": globals_dict["df"],  # Updated dataframe (version-specific or current)
            "final_response": final_response,
            "code_execution_results": execution_results,
            "validation_result": validation_result
        })

        logger.info(f"Code generation and execution complete with {'version-specific' if using_specific_version else 'current'} validation")
        print(f"\n==== CODE GENERATION COMPLETE ({'VERSION ' + requested_version if using_specific_version else 'CURRENT VERSION'}) ====")

        return new_state

    except Exception as e:
        error_msg = f"An unexpected error occurred: {str(e)}"
        if using_specific_version:
            error_msg += f" (while analyzing version {requested_version})"

        logger.error(f"Error in generate_code_and_execute: {error_msg}")
        logger.error(traceback.format_exc())

        # Return complete error state
        new_state = dict(state)
        new_state.update({
            "messages": state["messages"] + [{"role": "assistant", "content": error_msg}],
            "df": state["df"],
            "final_response": error_msg,
            "code_execution_results": "",
            "validation_result": None
        })

        return new_state

def save_dataframe_version(df, operation_description=""):
    """
    FIXED: Save the current state of the dataframe as both CSV and Excel files.

    Args:
        df: The dataframe to save
        operation_description: A string describing what operation was performed

    Returns:
        version_name: The name of the version that was saved
    """
    global app_state

    # Create versions directory if it doesn't exist
    versions_dir = "rent_roll_versions"
    os.makedirs(versions_dir, exist_ok=True)

    # Generate version name with timestamp
    timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
    version_name = f"v_{timestamp}"

    # Create filenames for both CSV and Excel
    csv_filename = os.path.join(versions_dir, f"rent_roll_{version_name}.csv")
    excel_filename = os.path.join(versions_dir, f"rent_roll_{version_name}.xlsx")

    try:
        # Save as CSV
        df.to_csv(csv_filename, index=False)
        logger.info(f"Saved CSV: {csv_filename}")

        # Save as Excel
        df.to_excel(excel_filename, index=False, engine='openpyxl')
        logger.info(f"Saved Excel: {excel_filename}")

        # FIXED: Ensure app_state and df_versions exist
        if app_state is None:
            logger.warning("app_state is None, initializing basic structure")
            app_state = {"df_versions": []}

        if "df_versions" not in app_state:
            app_state["df_versions"] = []

        # Add version metadata to the registry
        version_info = {
            'name': version_name,
            'description': operation_description,
            'timestamp': timestamp,
            'csv_filename': csv_filename,  # FIXED: Always use csv_filename
            'excel_filename': excel_filename,
            'shape': list(df.shape),
            'columns': list(df.columns),
            'is_original': len(app_state["df_versions"]) == 0  # First one is original
        }

        app_state["df_versions"].append(version_info)

        print(f"✓ Saved dataframe version {version_name}: {operation_description}")
        print(f"  - CSV: {csv_filename}")
        print(f"  - Excel: {excel_filename}")
        print(f"  - Shape: {df.shape}")
        print(f"  - Registry updated: {len(app_state['df_versions'])} total versions")

        logger.info(f"Version {version_name} saved successfully. Total versions: {len(app_state['df_versions'])}")

        return version_name

    except Exception as e:
        error_msg = f"Error saving dataframe version: {str(e)}"
        logger.error(error_msg)
        print(f"❌ {error_msg}")
        return None


def validate_simple_query_execution(original_df, result_df, user_query, execution_output):
    """
    Lightweight validation for queries that don't modify data
    """
    # Check if dataframes are identical
    if original_df.equals(result_df):
        return {
            "validation_result": "PASS",
            "intent_fulfilled": True,
            "data_integrity_intact": True,
            "logical_consistency": True,
            "schema_appropriate": True,
            "issues_found": [],
            "positive_changes": ["Query executed successfully without data modifications"],
            "summary": f"Read-only analysis completed successfully. Query '{user_query[:50]}...' provided insights without altering the data.",
            "confidence": "high",
            "recommendation": "PROCEED"
        }
    else:
        # Data changed, use full GPT-4 validation
        return validate_dataframe_changes_with_gpt4(original_df, result_df, user_query)


# Additional helper functions for enhanced validation

def get_versions_info_for_prompt():
    """Generate version information for the Claude prompt."""
    global app_state

    if not app_state or not app_state.get("df_versions"):
        return "No versions available yet."

    # Find the original version
    original = next((v for v in app_state["df_versions"] if v.get('is_original')), app_state["df_versions"][0])

    # Get the latest version
    latest = app_state["df_versions"][-1]

    # Format all versions
    all_versions = []
    for i, version in enumerate(app_state["df_versions"]):
        status = []
        if version == original:
            status.append("ORIGINAL")
        if version == latest:
            status.append("LATEST")

        status_str = f" ({', '.join(status)})" if status else ""
        all_versions.append(f"{i+1}. {version['name']}{status_str}: {version['description']}")

    versions_text = "\n".join(all_versions)

    return f"""
DATAFRAME VERSION HISTORY:
{versions_text}

Original version: {original['name']}
Latest version: {latest['name']}
Total versions: {len(app_state["df_versions"])}
"""


def trim_dataframe_output(output_text, max_rows=20, max_chars=None):
    """
    Trim dataframe output to prevent overwhelming responses
    """
    lines = output_text.split('\n')

    if len(lines) <= max_rows:
        return output_text

    trimmed_lines = lines[:max_rows]
    trimmed_lines.append(f"... [output truncated, showing first {max_rows} lines only] ...")

    return '\n'.join(trimmed_lines)


# Enhanced logging and debugging functions

def log_validation_attempt(original_df, new_df, user_query, validation_result):
    """
    Log validation attempts for debugging and improvement
    """
    import os
    from datetime import datetime

    logs_dir = "validation_logs"
    os.makedirs(logs_dir, exist_ok=True)

    timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
    log_file = os.path.join(logs_dir, f"validation_{timestamp}.log")

    with open(log_file, 'w', encoding='utf-8') as f:
        f.write(f"VALIDATION LOG - {timestamp}\n")
        f.write("=" * 50 + "\n\n")
        f.write(f"User Query: {user_query}\n\n")
        f.write(f"Original DF Shape: {original_df.shape}\n")
        f.write(f"New DF Shape: {new_df.shape}\n\n")
        f.write(f"Validation Result: {validation_result.get('validation_result', 'UNKNOWN')}\n")
        f.write(f"Intent Fulfilled: {validation_result.get('intent_fulfilled', 'Unknown')}\n")
        f.write(f"Summary: {validation_result.get('summary', 'No summary')}\n\n")
        f.write(f"Issues Found: {validation_result.get('issues_found', [])}\n")
        f.write(f"Positive Changes: {validation_result.get('positive_changes', [])}\n")

    print(f"📝 Validation logged to: {log_file}")



In [None]:
def format_validation_message_for_chat(validation_result):
    """
    Format validation result for always-visible chat display
    Optimized for transparency while remaining readable
    """
    result = validation_result.get("validation_result", "UNKNOWN")
    confidence = validation_result.get("confidence", "unknown")
    summary = validation_result.get("summary", "No summary available")

    # Status emoji and header
    if result == "PASS":
        emoji = "✅"
        status_color = "🟢"
    elif result == "FAIL":
        emoji = "❌"
        status_color = "🔴"
    elif result == "WARNING":
        emoji = "⚠️"
        status_color = "🟡"
    else:
        emoji = "❓"
        status_color = "⚪"

    # Build the always-visible validation section
    message = f"""

🔍 **AI VALIDATION RESULTS** {emoji}
━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━

{status_color} **Status:** {result} | **Confidence:** {confidence.title()} | **Recommendation:** {validation_result.get('recommendation', 'MANUAL_REVIEW')}

📝 **Summary:** {summary}

**Detailed Validation Checks:**"""

    # Always show all validation checks for transparency
    checks = [
        ("Intent Fulfilled", validation_result.get("intent_fulfilled")),
        ("Data Integrity", validation_result.get("data_integrity_intact")),
        ("Logical Consistency", validation_result.get("logical_consistency")),
        ("Schema Appropriate", validation_result.get("schema_appropriate"))
    ]

    for check_name, check_result in checks:
        if check_result is True:
            message += f"\n• ✅ **{check_name}:** Passed"
        elif check_result is False:
            message += f"\n• ❌ **{check_name}:** Failed"
        else:
            message += f"\n• ❓ **{check_name}:** Unknown"

    # Always show issues if any (critical for learning)
    issues = validation_result.get("issues_found", [])
    if issues:
        message += f"\n\n**⚠️ Issues Identified ({len(issues)}):**"
        for i, issue in enumerate(issues, 1):
            message += f"\n{i}. {issue}"

    # Always show positive changes (reinforces learning)
    positives = validation_result.get("positive_changes", [])
    if positives:
        message += f"\n\n**✅ Positive Changes ({len(positives)}):**"
        for i, positive in enumerate(positives, 1):
            message += f"\n{i}. {positive}"

    # Always include recommendation explanation
    recommendation = validation_result.get("recommendation", "MANUAL_REVIEW")
    if recommendation == "PROCEED":
        message += f"\n\n💚 **Recommendation:** Safe to proceed with these changes."
    elif recommendation == "RETRY":
        message += f"\n\n🔄 **Recommendation:** Consider refining the approach and trying again."
    elif recommendation == "MANUAL_REVIEW":
        message += f"\n\n👀 **Recommendation:** Manual review recommended before proceeding."

    message += f"\n━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━"

    return message

In [None]:
def validate_simple_query_execution(original_df, result_df, user_query, execution_output):
    """
    Lightweight validation for queries that don't modify data
    """
    # Check if dataframes are identical
    if original_df.equals(result_df):
        return {
            "validation_result": "PASS",
            "intent_fulfilled": True,
            "data_integrity_intact": True,
            "logical_consistency": True,
            "schema_appropriate": True,
            "issues_found": [],
            "positive_changes": ["Query executed successfully without data modifications"],
            "summary": f"Read-only analysis completed successfully. Query '{user_query[:50]}...' provided insights without altering the data.",
            "confidence": "high",
            "recommendation": "PROCEED"
        }
    else:
        # Data changed, use full GPT-4 validation
        return validate_dataframe_changes_with_gpt4(original_df, result_df, user_query)

In [None]:
def add_validation_context_to_response(response_text, validation_result):
    """
    Add contextual information to help users understand validation
    """
    learning_note = """

💡 **Why AI Validation?**
This validation helps ensure that:
- The AI understood your request correctly
- No unintended data changes occurred
- Business logic remains sound
- You can trust the results for decision-making

*This validation runs automatically after every code execution for your protection and learning.*"""

    return response_text + learning_note

In [None]:
# Build the LangGraph workflow
def create_agentic_rent_roll_analyzer():
    """Create and return the agentic rent roll analyzer workflow."""

    # Create the graph
    workflow = StateGraph(AgentState)

    # Add nodes to the graph
    workflow.add_node("determine_action", determine_action)
    workflow.add_node("ask_clarification", ask_clarification)
    workflow.add_node("generate_text_response", generate_text_response)
    workflow.add_node("generate_code_and_execute", generate_code_and_execute)

    # Set the entry point
    workflow.set_entry_point("determine_action")

    # Define conditional edges based on dictionary state values
    workflow.add_conditional_edges(
        "determine_action",
        lambda state: "ask_clarification" if state.get("needs_clarification") else
                      "generate_code_and_execute" if state.get("generate_code") else
                      "generate_text_response"
    )

    # Add edges to END
    workflow.add_edge("ask_clarification", END)
    workflow.add_edge("generate_text_response", END)
    workflow.add_edge("generate_code_and_execute", END)

    # Compile the graph
    agentic_analyzer = workflow.compile()

    return agentic_analyzer


In [None]:
def upload_rent_roll(file, anthropic_api_key, openai_api_key, auto_analyze):
    """
    FIXED: Process the uploaded rent roll file and initialize the chat with proper versioning
    """
    global app_state

    logger.info("Starting rent roll upload and processing with fixed versioning")

    # Initialize version system first
    ensure_version_system_initialized()

    # Use the default API keys if none are provided
    anthropic_key = anthropic_api_key if anthropic_api_key else DEFAULT_ANTHROPIC_API_KEY
    openai_key = openai_api_key if openai_api_key else DEFAULT_OPENAI_API_KEY
    logger.info("API keys configured")

    # Validate inputs
    if not file:
        logger.warning("No file uploaded")
        return "Please upload a rent roll Excel file.", None, gr.update(visible=False), gr.update(choices=[], value=None)

    try:
        # Save the uploaded file to a temporary location
        temp_file = tempfile.NamedTemporaryFile(delete=False, suffix='.xlsx')
        temp_file.close()
        file_path = temp_file.name
        logger.info(f"Created temporary file: {file_path}")

        # Copy the uploaded file to our temporary location
        with open(file.name, 'rb') as src_file, open(file_path, 'wb') as dst_file:
            dst_file.write(src_file.read())
        logger.info("File copied to temporary location")

        # Use our improved rent roll loader
        try:
            logger.info("Loading rent roll with specialized loader...")
            rent_roll_df = read_rent_roll_simple(file_path)
        except Exception as e:
            logger.warning(f"Error with specialized loader: {e}. Falling back to standard loading.")
            rent_roll_df = pd.read_excel(file_path)
            logger.info("Fallback: Loaded rent roll with default pandas settings")

        logger.info(f"Loaded rent roll data: {len(rent_roll_df)} rows, {len(rent_roll_df.columns)} columns")

        # Auto-analyze with GPT if selected
        issues_list = []
        if auto_analyze:
            logger.info("Auto-analyze option selected. Calling GPT for analysis...")
            issues_list = analyze_rent_roll_gpt(file_path, openai_key)
            logger.info(f"GPT analysis complete. Found {len(issues_list)} issues.")

        # FIXED: Initialize the global app state with proper structure
        app_state = {
            "df": rent_roll_df,
            "issues": issues_list,
            "anthropic_client": Anthropic(api_key=anthropic_key),
            "openai_client": OpenAI(api_key=openai_key),
            "system_message": "",
            "df_versions": [],  # FIXED: Always initialize as empty list
            "original_filename": os.path.basename(file.name)  # Store original filename
        }

        # FIXED: Save the initial version with proper description
        initial_version = save_dataframe_version(
            rent_roll_df,
            f"Initial upload - original dataset from {os.path.basename(file.name)}"
        )

        if initial_version:
            logger.info(f"Created initial dataframe version: {initial_version}")
        else:
            logger.error("Failed to create initial version")

        # Create system message with data understanding
        column_info = []
        for col in rent_roll_df.columns:
            try:
                dtype_str = str(rent_roll_df[col].dtype)
                column_info.append(f"- {col}: {dtype_str}")
            except Exception as e:
                column_info.append(f"- {col}: [Error determining type: {str(e)}]")
        column_info_str = "\n".join(column_info)

        # Calculate basic stats about the data
        data_stats = []
        for col in rent_roll_df.columns:
            try:
                if pd.api.types.is_numeric_dtype(rent_roll_df[col]):
                    stat = f"- {col}: min={rent_roll_df[col].min()}, max={rent_roll_df[col].max()}, mean={rent_roll_df[col].mean():.2f}, null={rent_roll_df[col].isna().sum()}"
                else:
                    unique_vals = rent_roll_df[col].nunique()
                    stat = f"- {col}: unique values={unique_vals}, null={rent_roll_df[col].isna().sum()}"
                data_stats.append(stat)
            except:
                data_stats.append(f"- {col}: [error calculating stats]")
        data_stats_str = "\n".join(data_stats)

        # Format issues for display
        issues_text = "\n".join([f"- {issue}" for issue in issues_list])

        system_message = f"""
        You are a Commercial Real Estate rent roll assistant that has analyzed a rent roll and found the following issues:

        {issues_text}

        The rent roll data has {len(rent_roll_df)} rows and {len(rent_roll_df.columns)} columns.

        Column information:
        {column_info_str}

        Data statistics:
        {data_stats_str}

        When helping the user, follow these critical guidelines:
        1. DO NOT generate placeholder code with fake column names. Work ONLY with the actual columns from the dataframe.
        2. NEVER assume column names that don't exist in the actual data.
        3. Always start by examining the first few rows to understand the meaning of each column.
        4. If you can't identify which columns contain certain information, clearly state this limitation.
        5. DO NOT proceed with analysis using made-up column names that don't exist in the data.

        The entire dataframe is available as 'df' in the execution environment.

        Important instructions for code and calculations:
        1. ALWAYS share your chain of thought reasoning in your responses. For each analysis:
          - Begin with "**Thinking through this step by step:**" in bold
          - Clearly explain your understanding of the request
          - Describe your approach to solving the problem
          - Outline the data exploration steps you'll take
          - Explain why you're choosing specific columns and methods
          - Discuss any challenges you anticipate with the data structure
          This chain of thought should be visible to the user in your chat responses.
        """

        # Save the system message to the app state
        app_state["system_message"] = system_message

        # Clean up the temporary file
        os.unlink(file_path)
        logger.info("Temporary file removed")

        # Generate a preview of the data and issues
        preview_html = f"""
        <h3>Rent Roll Preview</h3>
        <p>Successfully loaded rent roll with {len(rent_roll_df)} rows and {len(rent_roll_df.columns)} columns.</p>
        {rent_roll_df.head(5).fillna('').to_html(index=False)}

        <h3>Identified Issues</h3>
        <ol>
        """

        # Format each issue for the HTML preview
        for issue in issues_list:
            # If issue starts with a number (like "1. Issue"), strip the number
            if issue and issue[0].isdigit() and ". " in issue[:5]:
                issue = issue[issue.find(". ")+2:]
            preview_html += f"<li>{issue}</li>"

        preview_html += """
        </ol>
        <p>You can now start asking questions in the chat below!</p>
        <p><strong>Note:</strong> This application uses GPT-4 for decision making and text responses,
        and Claude AI specifically for code generation and execution.</p>
        """

        # Get updated version choices
        version_choices = get_version_choices()

        # Make the chat interface visible
        logger.info("Setup complete with fixed versioning. Ready for chat interaction.")
        return (
            "Rent roll loaded successfully! You can now start chatting.",
            preview_html,
            gr.update(visible=True),  # chatbot visibility
            gr.update(choices=version_choices, value=version_choices[-1] if version_choices else None)  # version dropdown
        )

    except Exception as e:
        logger.error(f"Error during rent roll processing: {e}")
        logger.error(traceback.format_exc())
        if 'file_path' in locals() and os.path.exists(file_path):
            os.unlink(file_path)
            logger.info("Cleaned up temporary file after error")
        return f"Error: {str(e)}", None, gr.update(visible=False), gr.update(choices=[], value=None)


In [None]:
def load_latest_version_for_editing():
    """
    CONSOLIDATED: Load the most recent version of the dataframe for editing
    Includes enhanced session recording and error handling
    """
    global app_state, session_recorder

    if app_state is None or app_state.get("df") is None:
        return None, "No data loaded. Please upload a rent roll first."

    try:
        # Use the current dataframe (which should be the latest)
        df = app_state["df"].copy()
        df = df.fillna('')  # Replace NaN with empty strings for editing

        # Get version info
        version_info = "Loaded current data"
        if app_state.get("df_versions"):
            latest_version = app_state["df_versions"][-1]
            version_info = f"Loaded version: {latest_version['name']} - {latest_version.get('description', 'No description')}"
        else:
            version_info = "Loaded current data (no versions saved yet)"

        # ENHANCED: Record this action in session if active
        if session_recorder.current_session_file:
            session_entry = f"""
DATA EDITING SESSION STARTED
============================
Timestamp: {datetime.now().strftime('%H:%M:%S')}
Action: User loaded dataframe for manual editing
Version Loaded: {latest_version['name'] if app_state.get("df_versions") else 'Current'}
DataFrame Shape: {df.shape}
DataFrame Columns: {list(df.columns)}
{'-' * 80}
"""

            # Append to session file
            with open(session_recorder.current_session_file, 'a', encoding='utf-8') as f:
                f.write(session_entry + "\n")

            # Record in session data
            session_recorder.record_conversation_turn(
                user_message="LOAD FOR EDITING: User opened data editor",
                ai_response="Dataframe loaded for manual editing",
                action_type="load_for_editing",
                code_executed=None,
                version_saved=None
            )

        logger.info(f"Loaded latest dataframe for editing: {df.shape}")
        return df, version_info

    except Exception as e:
        error_msg = f"Error loading data: {str(e)}"
        logger.error(error_msg)

        # ENHANCED: Record error in session
        if session_recorder.current_session_file:
            session_recorder.record_conversation_turn(
                user_message="LOAD FOR EDITING FAILED",
                ai_response=error_msg,
                action_type="load_editing_error",
                code_executed=None,
                version_saved=None
            )

        return None, error_msg

def save_edited_dataframe(edited_df, description):
    """
    CONSOLIDATED: Save edited dataframe with proper versioning and enhanced GPT-4.1 analysis
    Includes session recording and comprehensive change analysis

    Args:
        edited_df: The edited pandas DataFrame
        description: User description of changes (optional)

    Returns:
        tuple: (success_message, gradio_update)
    """
    global app_state, session_recorder

    if edited_df is None or edited_df.empty:
        return "No data to save", gr.update()

    try:
        # Convert the edited dataframe to proper pandas DataFrame if needed
        if not isinstance(edited_df, pd.DataFrame):
            edited_df = pd.DataFrame(edited_df)

        # Get the original dataframe for comparison
        original_df = app_state["df"].copy()

        # ENHANCED: Analyze changes with GPT-4.1 if available
        change_analysis = None
        try:
            logger.info("Analyzing dataframe changes with GPT-4.1...")
            print("🤖 Analyzing changes with GPT-4.1...")

            # Use the enhanced analysis function if available
            if 'analyze_dataframe_changes_with_gpt4' in globals():
                change_analysis = analyze_dataframe_changes_with_gpt4(
                    original_df=original_df,
                    modified_df=edited_df,
                    user_description=description
                )
            else:
                logger.info("Enhanced GPT-4.1 analysis not available, using basic analysis")
        except Exception as e:
            logger.warning(f"GPT-4.1 analysis failed: {e}")
            change_analysis = None

        # Generate a meaningful description if not provided
        if not description:
            if change_analysis and change_analysis.get("change_summary"):
                description = change_analysis["change_summary"]
            else:
                description = "Manual edits via data editor"

        # FIXED: Save as new version
        version_name = save_dataframe_version(edited_df, description)

        if version_name is None:
            return "❌ Failed to save version", gr.update()

        # FIXED: Update the app state with the edited dataframe
        app_state["df"] = edited_df

        # ENHANCED: Record this in the copiloting session if active
        if session_recorder.current_session_file:
            session_description = change_analysis.get("session_description", f"Manual data edits: {description}") if change_analysis else f"Manual data edits: {description}"

            # Create detailed session entry
            session_entry = f"""
MANUAL DATA EDIT SESSION
========================
Timestamp: {datetime.now().strftime('%H:%M:%S')}
Edit Description: {description}
Version Saved: {version_name}

{'GPT-4.1 CHANGE ANALYSIS:' if change_analysis else 'BASIC CHANGE TRACKING:'}
{'-' * 40}
"""

            if change_analysis:
                session_entry += f"""
Change Summary: {change_analysis.get('change_summary', 'N/A')}
Change Type: {change_analysis.get('change_type', 'N/A')}

Structural Changes:
{json.dumps(change_analysis.get('structural_changes', {}), indent=2)}

Data Modifications:
{json.dumps(change_analysis.get('data_modifications', {}), indent=2)}

Business Impact:
{json.dumps(change_analysis.get('business_impact', {}), indent=2)}

Recommendations:
{chr(10).join([f"• {rec}" for rec in change_analysis.get('recommendations', [])])}
"""
            else:
                session_entry += f"""
Original DataFrame Shape: {original_df.shape}
Modified DataFrame Shape: {edited_df.shape}
Basic Analysis: User made manual edits to the dataframe
"""

            session_entry += f"""
Original DataFrame Shape: {original_df.shape}
Modified DataFrame Shape: {edited_df.shape}
{'-' * 80}
"""

            # Append to session file
            with open(session_recorder.current_session_file, 'a', encoding='utf-8') as f:
                f.write(session_entry + "\n")

            # Record in session data structure
            session_recorder.record_conversation_turn(
                user_message=f"MANUAL EDIT: {description}",
                ai_response=session_description,
                action_type="manual_data_edit_enhanced",
                code_executed=None,
                version_saved=version_name
            )

            # Record the dataframe version change
            session_recorder.record_dataframe_version(
                version_name=version_name,
                description=description,
                shape=list(edited_df.shape),
                columns=list(edited_df.columns)
            )

            # Record any issues found by GPT-4
            if change_analysis and change_analysis.get('data_modifications', {}).get('data_quality_impact') == 'degraded':
                session_recorder.record_issue_found(
                    f"Data quality may have degraded due to manual edits: {description}",
                    severity="medium"
                )

            logger.info("Manual edit recorded in copiloting session")

        # Log the changes
        logger.info(f"Saved edited dataframe as version {version_name}")

        # Create detailed success message
        success_message = f"✅ Successfully saved as version {version_name}"

        if app_state.get("df_versions"):
            success_message += f"\n📊 Total versions: {len(app_state['df_versions'])}"

        # Add enhanced analysis if available
        if change_analysis:
            success_message += f"\n\n🤖 GPT-4.1 Analysis Summary:\n{change_analysis.get('change_summary', 'Changes analyzed')[:200]}..."

            success_message += f"\n\n📊 Change Details:"
            success_message += f"\n• Change Type: {change_analysis.get('change_type', 'Unknown')}"
            success_message += f"\n• Original Shape: {original_df.shape}"
            success_message += f"\n• New Shape: {edited_df.shape}"

            # Add recommendations if available
            if change_analysis.get('recommendations'):
                success_message += f"\n\n💡 Recommendations:\n"
                for rec in change_analysis['recommendations'][:3]:  # Show first 3
                    success_message += f"• {rec}\n"

        success_message += f"\n\n📝 Session Recording: {'✅ Recorded' if session_recorder.current_session_file else '❌ No active session'}"

        return success_message, gr.update(value=edited_df)

    except Exception as e:
        error_msg = f"❌ Error saving: {str(e)}"
        logger.error(f"Error saving edited dataframe: {e}")
        logger.error(traceback.format_exc())

        # ENHANCED: Still try to record the error in session
        if session_recorder.current_session_file:
            session_recorder.record_conversation_turn(
                user_message=f"MANUAL EDIT FAILED: {description}",
                ai_response=error_msg,
                action_type="manual_edit_error_enhanced",
                code_executed=None,
                version_saved=None
            )

        return error_msg, gr.update()

def load_specific_version(version_name):
    """
    CONSOLIDATED: Load a specific version for editing
    Includes enhanced session recording with GPT-4 analysis tracking
    """
    global app_state, session_recorder

    if not version_name:
        return None, "Please select a version to load"

    # Extract clean version name (remove status indicators like "(LATEST)")
    clean_version_name = version_name.split(" (")[0].strip()
    logger.info(f"Loading version: {clean_version_name}")

    try:
        # Method 1: Try to find in version registry first
        version_info = None
        if app_state and "df_versions" in app_state:
            for version in app_state["df_versions"]:
                if version.get("name") == clean_version_name:
                    version_info = version
                    break

        # Method 2: Try loading from CSV file
        csv_filename = None

        if version_info and "csv_filename" in version_info:
            csv_filename = version_info["csv_filename"]
        else:
            # Construct the expected filename
            versions_dir = "rent_roll_versions"
            csv_filename = os.path.join(versions_dir, f"rent_roll_{clean_version_name}.csv")

        if csv_filename and os.path.exists(csv_filename):
            df = pd.read_csv(csv_filename)
            df = df.fillna('')  # Replace NaN with empty strings for editing

            success_msg = f"Loaded version: {clean_version_name}"
            if version_info:
                success_msg += f" - {version_info.get('description', 'No description')}"

            # ENHANCED: Record this action in session if active
            if session_recorder.current_session_file:
                session_entry = f"""
SPECIFIC VERSION LOADED FOR EDITING
===================================
Timestamp: {datetime.now().strftime('%H:%M:%S')}
Version Loaded: {clean_version_name}
DataFrame Shape: {df.shape}
DataFrame Columns: {list(df.columns)}
File Path: {csv_filename}
Version Description: {version_info.get('description', 'No description') if version_info else 'Found in directory'}
{'-' * 80}
"""

                # Append to session file
                with open(session_recorder.current_session_file, 'a', encoding='utf-8') as f:
                    f.write(session_entry + "\n")

                # Record in session data
                session_recorder.record_conversation_turn(
                    user_message=f"LOAD SPECIFIC VERSION: {clean_version_name}",
                    ai_response=f"Loaded version {clean_version_name} for editing",
                    action_type="load_specific_version",
                    code_executed=None,
                    version_saved=None
                )

            logger.info(f"Successfully loaded version {clean_version_name} from {csv_filename}")
            return df, success_msg

        else:
            error_msg = f"Version file not found: {csv_filename or 'Unknown path'}"
            logger.error(error_msg)

            # ENHANCED: Record error in session
            if session_recorder.current_session_file:
                session_recorder.record_conversation_turn(
                    user_message=f"LOAD VERSION FAILED: {clean_version_name}",
                    ai_response=error_msg,
                    action_type="load_version_error",
                    code_executed=None,
                    version_saved=None
                )

            return None, error_msg

    except Exception as e:
        error_msg = f"Error loading version {clean_version_name}: {str(e)}"
        logger.error(error_msg)

        # ENHANCED: Record error in session
        if session_recorder.current_session_file:
            session_recorder.record_conversation_turn(
                user_message=f"LOAD VERSION ERROR: {version_name}",
                ai_response=error_msg,
                action_type="load_version_error",
                code_executed=None,
                version_saved=None
            )

        return None, error_msg

def get_version_choices():
    """
    FIXED: Get list of available versions for dropdown
    """
    global app_state

    if app_state is None or "df_versions" not in app_state or not app_state["df_versions"]:
        logger.info("No versions available in app_state")
        return []

    choices = []
    total_versions = len(app_state["df_versions"])

    for i, version in enumerate(app_state["df_versions"]):
        status_labels = []

        if version.get('is_original', False) or i == 0:
            status_labels.append("ORIGINAL")
        if i == total_versions - 1:  # Last version is latest
            status_labels.append("LATEST")

        status = f" ({', '.join(status_labels)})" if status_labels else ""
        choice_text = f"{version['name']}{status}"
        choices.append(choice_text)

    logger.info(f"Generated {len(choices)} version choices")
    return choices

def refresh_version_dropdown():
    """
    FIXED: Refresh the version dropdown choices
    """
    choices = get_version_choices()
    if choices:
        # Default to latest version (last in list)
        default_value = choices[-1] if choices else None
        logger.info(f"Refreshed dropdown with {len(choices)} choices, default: {default_value}")
        return gr.update(choices=choices, value=default_value)
    else:
        logger.info("No versions available for dropdown")
        return gr.update(choices=[], value=None)

In [None]:
class SessionRecorder:
    def __init__(self):
        self.sessions_dir = "copiloting_sessions"
        os.makedirs(self.sessions_dir, exist_ok=True)
        self.current_session_file = None
        self.current_session_data = {}

    def start_session_recording(self, rent_roll_filename):
        """Start recording the entire copiloting session"""
        session_id = f"session_{datetime.now().strftime('%Y%m%d_%H%M%S')}"
        self.current_session_file = os.path.join(self.sessions_dir, f"{session_id}.txt")

        # Initialize session data
        self.current_session_data = {
            "session_id": session_id,
            "start_time": datetime.now().isoformat(),
            "rent_roll_file": rent_roll_filename,
            "conversation_history": [],
            "code_executions": [],
            "dataframe_versions": [],
            "issues_found": [],
            "user_goals": []
        }

        # Write session header to text file
        with open(self.current_session_file, 'w', encoding='utf-8') as f:
            f.write(f"=== RENT ROLL COPILOTING SESSION ===\n")
            f.write(f"Session ID: {session_id}\n")
            f.write(f"Started: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}\n")
            f.write(f"Rent Roll File: {rent_roll_filename}\n")
            f.write(f"=" * 50 + "\n\n")

        print(f"📝 Started session recording: {session_id}")
        return session_id

    def record_conversation_turn(self, user_message, ai_response, action_type, code_executed=None, version_saved=None):
        """Record each conversation turn in real-time"""
        if not self.current_session_file:
            return

        timestamp = datetime.now().strftime('%H:%M:%S')
        turn_data = {
            "timestamp": timestamp,
            "user_message": user_message,
            "ai_response": ai_response,
            "action_type": action_type,
            "code_executed": code_executed,
            "version_saved": version_saved
        }

        # Add to session data
        self.current_session_data["conversation_history"].append(turn_data)

        # Append to text file immediately
        with open(self.current_session_file, 'a', encoding='utf-8') as f:
            f.write(f"[{timestamp}] USER: {user_message}\n")
            f.write(f"Action Type: {action_type}\n")

            if code_executed:
                f.write(f"CODE EXECUTED:\n```python\n{code_executed}\n```\n")

            f.write(f"AI RESPONSE: {ai_response}\n")

            if version_saved:
                f.write(f"VERSION SAVED: {version_saved}\n")

            f.write("-" * 80 + "\n\n")

        # Track code executions separately
        if code_executed:
            self.current_session_data["code_executions"].append({
                "timestamp": timestamp,
                "code": code_executed,
                "purpose": user_message,
                "result": ai_response[:200] + "..." if len(ai_response) > 200 else ai_response
            })

    def record_dataframe_version(self, version_name, description, shape, columns):
        """Record dataframe version changes"""
        version_info = {
            "timestamp": datetime.now().strftime('%H:%M:%S'),
            "version_name": version_name,
            "description": description,
            "shape": shape,
            "columns": columns
        }

        self.current_session_data["dataframe_versions"].append(version_info)

        # Append to text file
        if self.current_session_file:
            with open(self.current_session_file, 'a', encoding='utf-8') as f:
                f.write(f"VERSION SAVED: {version_name}\n")
                f.write(f"Description: {description}\n")
                f.write(f"Shape: {shape}\n")
                f.write(f"Columns: {columns}\n")
                f.write("-" * 40 + "\n\n")

    def record_issue_found(self, issue_description, severity="medium"):
        """Record issues found during analysis"""
        issue_info = {
            "timestamp": datetime.now().strftime('%H:%M:%S'),
            "description": issue_description,
            "severity": severity
        }

        self.current_session_data["issues_found"].append(issue_info)

        if self.current_session_file:
            with open(self.current_session_file, 'a', encoding='utf-8') as f:
                f.write(f"ISSUE FOUND [{severity.upper()}]: {issue_description}\n")
                f.write("-" * 40 + "\n\n")

    def finalize_session(self):
        """End session recording and return session data"""
        if not self.current_session_file:
            return None

        end_time = datetime.now()
        duration = end_time - datetime.fromisoformat(self.current_session_data["start_time"])

        # Write session summary
        with open(self.current_session_file, 'a', encoding='utf-8') as f:
            f.write("\n" + "=" * 50 + "\n")
            f.write("SESSION SUMMARY\n")
            f.write("=" * 50 + "\n")
            f.write(f"Session Duration: {duration.total_seconds()/60:.1f} minutes\n")
            f.write(f"Total Conversations: {len(self.current_session_data['conversation_history'])}\n")
            f.write(f"Code Executions: {len(self.current_session_data['code_executions'])}\n")
            f.write(f"Versions Created: {len(self.current_session_data['dataframe_versions'])}\n")
            f.write(f"Issues Found: {len(self.current_session_data['issues_found'])}\n")
            f.write(f"Ended: {end_time.strftime('%Y-%m-%d %H:%M:%S')}\n")

        # Update session data
        self.current_session_data["end_time"] = end_time.isoformat()
        self.current_session_data["duration_minutes"] = duration.total_seconds() / 60

        session_data = self.current_session_data.copy()

        # Reset for next session
        self.current_session_file = None
        self.current_session_data = {}

        print(f"✅ Session recording finalized: {session_data['session_id']}")
        return session_data

# Global session recorder
session_recorder = SessionRecorder()

In [None]:
class EnhancedTemplateManager:
    def __init__(self):
        self.templates_dir = "rent_roll_templates"
        os.makedirs(self.templates_dir, exist_ok=True)
        self.current_session = None

    def create_template_from_session(self, session_data, starting_df, final_df, template_name):
        """Generate comprehensive template from complete session data using GPT-4"""

        print("🤖 Analyzing session with GPT-4.1 to generate instructions...")

        # Prepare comprehensive session context for GPT-4
        session_context = self._prepare_session_context(session_data)

        # Use GPT-4.1 to analyze and generate instructions
        client = OpenAI(api_key=DEFAULT_OPENAI_API_KEY)

        analysis_prompt = f"""
        You are an expert at analyzing data analysis workflows and creating reusable instruction templates.

        I will provide you with a complete copiloting session where a user worked on a rent roll analysis.
        Your task is to:
        1. Analyze the entire workflow
        2. Identify the key transformation patterns
        3. Create step-by-step instructions that can be applied to similar rent roll files
        4. Generate reusable code templates with placeholders
        5. Document the business logic and decision points

        SESSION DATA:
        {session_context}

        STARTING DATAFRAME INFO:
        - Shape: {starting_df.shape}
        - Columns: {list(starting_df.columns)}
        - Sample data: {starting_df.head(2).to_string()}

        FINAL DATAFRAME INFO:
        - Shape: {final_df.shape}
        - Columns: {list(final_df.columns)}
        - Sample data: {final_df.head(2).to_string()}

        Please generate a comprehensive analysis in the following JSON format:
        {{
            "workflow_summary": "Brief description of what was accomplished",
            "key_transformations": [
                {{
                    "step_name": "Clean Tenant Names",
                    "description": "Standardize tenant name formatting",
                    "business_rule": "All tenant names should be Title Case with no extra whitespace",
                    "code_template": "df['{{column_name}}'] = df['{{original_column}}'].str.strip().str.title()",
                    "parameters": ["column_name", "original_column"],
                    "conditions": "Apply when tenant names have inconsistent formatting"
                }}
            ],
            "data_quality_improvements": [
                "List of data quality issues that were resolved"
            ],
            "reusable_patterns": [
                "Pattern 1: Column standardization",
                "Pattern 2: Missing value handling"
            ],
            "business_insights": [
                "Key insights discovered during analysis"
            ],
            "prerequisites": [
                "What conditions must be met for this template to work"
            ],
            "instructions_for_reuse": [
                "Step 1: Upload new rent roll file",
                "Step 2: Map columns (if different names)",
                "Step 3: Apply transformations in order"
            ]
        }}
        """

        try:
            response = client.chat.completions.create(
                model="gpt-4.1",  # Using latest GPT-4
                messages=[
                    {"role": "system", "content": "You are an expert data analyst who creates reusable workflow templates from analysis sessions. Provide detailed, actionable instructions."},
                    {"role": "user", "content": analysis_prompt}
                ],
                max_tokens=4000,
                temperature=0.3
            )

            # Extract the analysis
            gpt_analysis = response.choices[0].message.content

            # Try to extract JSON from the response
            json_match = re.search(r'{.*}', gpt_analysis, re.DOTALL)
            if json_match:
                try:
                    workflow_analysis = json.loads(json_match.group(0))
                except:
                    # Fallback if JSON parsing fails
                    workflow_analysis = {"analysis": gpt_analysis}
            else:
                workflow_analysis = {"analysis": gpt_analysis}

        except Exception as e:
            print(f"❌ Error with GPT-4 analysis: {e}")
            workflow_analysis = {"error": str(e), "fallback_analysis": "Manual analysis required"}

        # Create template with all data
        template_id = f"template_{datetime.now().strftime('%Y%m%d_%H%M%S')}"

        # Save dataframes as text files
        starting_df_file = f"{template_id}_starting_df.txt"
        final_df_file = f"{template_id}_final_df.txt"
        session_file = f"{template_id}_session.txt"

        starting_df_path = os.path.join(self.templates_dir, starting_df_file)
        final_df_path = os.path.join(self.templates_dir, final_df_file)
        session_path = os.path.join(self.templates_dir, session_file)

        # Save dataframes
        starting_df.to_csv(starting_df_path, index=False)
        final_df.to_csv(final_df_path, index=False)

        # Save raw session data
        with open(session_path, 'w', encoding='utf-8') as f:
            f.write(session_context)

        # Create comprehensive template
        template_data = {
            "template_id": template_id,
            "template_name": template_name,
            "created_date": datetime.now().isoformat(),
            "source_session_id": session_data.get("session_id", "unknown"),
            "source_file_name": session_data.get("rent_roll_file", "unknown"),

            "files": {
                "starting_dataframe": starting_df_file,
                "final_dataframe": final_df_file,
                "raw_session": session_file
            },

            "session_summary": {
                "duration_minutes": session_data.get("duration_minutes", 0),
                "total_conversations": len(session_data.get("conversation_history", [])),
                "code_executions": len(session_data.get("code_executions", [])),
                "versions_created": len(session_data.get("dataframe_versions", [])),
                "issues_found": len(session_data.get("issues_found", []))
            },

            "gpt4_analysis": workflow_analysis,

            "raw_workflow_steps": session_data.get("conversation_history", []),
            "code_executions": session_data.get("code_executions", []),
            "dataframe_changes": session_data.get("dataframe_versions", []),
            "issues_identified": session_data.get("issues_found", [])
        }

        # Save template metadata
        template_json_path = os.path.join(self.templates_dir, f"{template_id}.json")
        with open(template_json_path, 'w') as f:
            json.dump(template_data, f, indent=2, default=str)

        print(f"✅ Comprehensive template created: {template_id}")
        print(f"📁 Starting DF: {starting_df_path}")
        print(f"📁 Final DF: {final_df_path}")
        print(f"📁 Session Data: {session_path}")
        print(f"📋 Template: {template_json_path}")

        return template_data

    def _prepare_session_context(self, session_data):
        """Prepare session data for GPT-4 analysis"""
        context = f"""
COPILOTING SESSION ANALYSIS
============================

Session ID: {session_data.get('session_id', 'N/A')}
Duration: {session_data.get('duration_minutes', 0):.1f} minutes
Rent Roll File: {session_data.get('rent_roll_file', 'N/A')}

CONVERSATION HISTORY:
"""

        for i, conv in enumerate(session_data.get('conversation_history', []), 1):
            context += f"""
--- Conversation {i} [{conv.get('timestamp', 'N/A')}] ---
USER QUERY: {conv.get('user_message', 'N/A')}
ACTION TYPE: {conv.get('action_type', 'N/A')}
"""
            if conv.get('code_executed'):
                context += f"CODE EXECUTED:\n{conv['code_executed']}\n"

            context += f"AI RESPONSE: {conv.get('ai_response', 'N/A')[:300]}...\n"

            if conv.get('version_saved'):
                context += f"VERSION SAVED: {conv['version_saved']}\n"

            context += "\n"

        context += "\nCODE EXECUTIONS SUMMARY:\n"
        for code_exec in session_data.get('code_executions', []):
            context += f"- [{code_exec.get('timestamp')}] {code_exec.get('purpose', 'N/A')}\n"
            context += f"  Code: {code_exec.get('code', 'N/A')[:100]}...\n"

        context += "\nISSUES IDENTIFIED:\n"
        for issue in session_data.get('issues_found', []):
            context += f"- [{issue.get('timestamp')}] {issue.get('description', 'N/A')}\n"

        context += "\nDATAFRAME VERSIONS:\n"
        for version in session_data.get('dataframe_versions', []):
            context += f"- {version.get('version_name', 'N/A')}: {version.get('description', 'N/A')}\n"

        return context

    def load_template_dataframes(self, template_id):
        """Load both starting and final dataframes from a template"""
        try:
            # Load template metadata
            template_json_path = os.path.join(self.templates_dir, f"{template_id}.json")
            with open(template_json_path, 'r') as f:
                template_data = json.load(f)

            # Load starting dataframe
            starting_df_path = os.path.join(self.templates_dir, template_data["files"]["starting_dataframe"])
            starting_df = pd.read_csv(starting_df_path)

            # Load final dataframe
            final_df_path = os.path.join(self.templates_dir, template_data["files"]["final_dataframe"])
            final_df = pd.read_csv(final_df_path)

            return template_data, starting_df, final_df

        except Exception as e:
            print(f"❌ Error loading template: {e}")
            return None, None, None

    def list_templates(self):
        """List all available templates"""
        try:
            json_files = [f for f in os.listdir(self.templates_dir) if f.endswith('.json')]
            templates = []

            for json_file in json_files:
                template_path = os.path.join(self.templates_dir, json_file)
                with open(template_path, 'r') as f:
                    template_data = json.load(f)

                templates.append({
                    "template_id": template_data["template_id"],
                    "template_name": template_data["template_name"],
                    "created_date": template_data["created_date"],
                    "source_file": template_data["source_file_name"],
                    "steps_count": len(template_data.get("raw_workflow_steps", [])),
                    "gpt4_analysis_available": "gpt4_analysis" in template_data
                })

            return sorted(templates, key=lambda x: x["created_date"], reverse=True)

        except Exception as e:
            print(f"❌ Error listing templates: {e}")
            return []

    def get_template_summary(self, template_id):
        """Get a human-readable summary of a template"""
        try:
            template_json_path = os.path.join(self.templates_dir, f"{template_id}.json")
            with open(template_json_path, 'r') as f:
                template_data = json.load(f)

            summary = f"""
📋 Template: {template_data.get('template_name', 'Unknown')}
🆔 ID: {template_data.get('template_id', 'Unknown')}
📅 Created: {template_data.get('created_date', 'Unknown')}
📁 Source File: {template_data.get('source_file_name', 'Unknown')}

📊 Session Summary:
• Duration: {template_data.get('session_summary', {}).get('duration_minutes', 0):.1f} minutes
• Conversations: {template_data.get('session_summary', {}).get('total_conversations', 0)}
• Code Executions: {template_data.get('session_summary', {}).get('code_executions', 0)}
• Versions Created: {template_data.get('session_summary', {}).get('versions_created', 0)}
• Issues Found: {template_data.get('session_summary', {}).get('issues_found', 0)}

🤖 GPT-4 Analysis: {'✅ Available' if 'gpt4_analysis' in template_data else '❌ Not Available'}
"""

            # Add GPT-4 analysis summary if available
            if 'gpt4_analysis' in template_data and isinstance(template_data['gpt4_analysis'], dict):
                gpt_analysis = template_data['gpt4_analysis']

                if 'workflow_summary' in gpt_analysis:
                    summary += f"\n🔍 Workflow Summary:\n{gpt_analysis['workflow_summary']}\n"

                if 'key_transformations' in gpt_analysis:
                    summary += f"\n🔧 Key Transformations ({len(gpt_analysis['key_transformations'])}):\n"
                    for i, transform in enumerate(gpt_analysis['key_transformations'][:3], 1):  # Show first 3
                        summary += f"{i}. {transform.get('step_name', 'Unknown')}: {transform.get('description', 'No description')}\n"
                    if len(gpt_analysis['key_transformations']) > 3:
                        summary += f"... and {len(gpt_analysis['key_transformations']) - 3} more\n"

                if 'prerequisites' in gpt_analysis:
                    summary += f"\n📋 Prerequisites:\n"
                    for prereq in gpt_analysis['prerequisites'][:3]:  # Show first 3
                        summary += f"• {prereq}\n"

            return summary

        except Exception as e:
            return f"❌ Error getting template summary: {str(e)}"

    def delete_template(self, template_id):
        """Delete a template and all its associated files"""
        try:
            template_json_path = os.path.join(self.templates_dir, f"{template_id}.json")

            if not os.path.exists(template_json_path):
                return f"❌ Template {template_id} not found"

            # Load template to get file list
            with open(template_json_path, 'r') as f:
                template_data = json.load(f)

            files_to_delete = []
            files_to_delete.append(template_json_path)  # The main template file

            # Add dataframe and session files
            if 'files' in template_data:
                for file_key, filename in template_data['files'].items():
                    file_path = os.path.join(self.templates_dir, filename)
                    if os.path.exists(file_path):
                        files_to_delete.append(file_path)

            # Delete all files
            deleted_count = 0
            for file_path in files_to_delete:
                try:
                    os.remove(file_path)
                    deleted_count += 1
                except Exception as e:
                    print(f"Warning: Could not delete {file_path}: {e}")

            return f"✅ Template {template_id} deleted successfully. Removed {deleted_count} files."

        except Exception as e:
            return f"❌ Error deleting template: {str(e)}"

# Global enhanced template manager
enhanced_template_manager = EnhancedTemplateManager()

In [None]:
def create_template_from_current_session():
    """Create template from current copiloting session"""
    global app_state, session_recorder, enhanced_template_manager

    if not session_recorder.current_session_file:
        return "❌ No active session to create template from"

    if app_state is None or app_state["df"] is None:
        return "❌ No dataframe loaded"

    try:
        # Get starting dataframe (first version)
        if app_state.get("df_versions") and len(app_state["df_versions"]) > 0:
            first_version = app_state["df_versions"][0]
            starting_df_path = first_version.get("filename") or first_version.get("csv_filename")
            starting_df = pd.read_csv(starting_df_path)
        else:
            starting_df = app_state["df"]  # Fallback if no versions

        # Current dataframe is the final version
        final_df = app_state["df"]

        # Finalize current session
        session_data = session_recorder.finalize_session()

        if session_data is None:
            return "❌ Error finalizing session"

        # Generate template name suggestion
        template_name = f"Rent Roll Process {datetime.now().strftime('%Y-%m-%d %H:%M')}"

        # Create comprehensive template
        template_data = enhanced_template_manager.create_template_from_session(
            session_data=session_data,
            starting_df=starting_df,
            final_df=final_df,
            template_name=template_name
        )

        return f"✅ Template created successfully!\nTemplate ID: {template_data['template_id']}\nSteps captured: {len(session_data.get('conversation_history', []))}"

    except Exception as e:
        return f"❌ Error creating template: {str(e)}"

In [None]:
# Global state for the application (Not part of graph state)
app_state = {
    "df": None,
    "anthropic_client": None,
    "openai_client": None,  # Added for GPT-4
    "issues": [],
    "system_message": ""
}
# Enhanced Chat Function with Complete Session Recording and Template Generation

def chat(message, history):
    """
    Enhanced chat function with comprehensive session recording and template generation.
    Records every interaction, code execution, and dataframe change for template creation.
    """
    global app_state, session_recorder, enhanced_template_manager, conversation_manager

    logger.info(f"Received chat message: {message[:50]}...")

    # Check if system is ready
    if app_state is None or app_state["df"] is None:
        logger.warning("Chat attempted before setup is complete")
        return history + [(message, "Please upload a rent roll file and set up your API keys first.")]

    # Start session recording if not already started
    if not session_recorder.current_session_file:
        rent_roll_filename = getattr(app_state, 'original_filename', 'uploaded_rent_roll.xlsx')
        session_id = session_recorder.start_session_recording(rent_roll_filename)
        logger.info(f"Started new session recording: {session_id}")

        # Record initial dataframe state
        if app_state.get("df_versions") and len(app_state["df_versions"]) > 0:
            first_version = app_state["df_versions"][0]
            session_recorder.record_dataframe_version(
                version_name=first_version["name"],
                description=first_version["description"],
                shape=list(app_state["df"].shape),
                columns=list(app_state["df"].columns)
            )

    # Get previous messages from history
    prev_messages = []
    if history:
        for user_msg, assistant_msg in history:
            prev_messages.append({"role": "user", "content": user_msg})
            prev_messages.append({"role": "assistant", "content": assistant_msg})

    # Create message list without system message
    all_messages = []
    all_messages.extend(prev_messages)

    # Add the current user message
    all_messages.append({"role": "user", "content": message})

    # *** COMPRESSION STEP ***
    openai_client = app_state.get("openai_client") or OpenAI(api_key=DEFAULT_OPENAI_API_KEY)
    optimized_messages = conversation_manager.compress_history_if_needed(all_messages, openai_client)

    # Log compression if it happened
    original_size = conversation_manager.get_conversation_size(all_messages)
    optimized_size = conversation_manager.get_conversation_size(optimized_messages)
    if optimized_size < original_size:
        logger.info(f"Compressed conversation: {original_size} -> {optimized_size} tokens")
        # Log to session file
        if session_recorder.current_session_file:
            with open(session_recorder.current_session_file, 'a', encoding='utf-8') as f:
                f.write(f"\n[COMPRESSION] Reduced conversation from {original_size} to {optimized_size} tokens ({len(all_messages)} -> {len(optimized_messages)} messages)\n")

    # Create a state dictionary for the graph
    state = {
        "messages": optimized_messages,
        "system_message": app_state["system_message"],
        "df": app_state["df"],
        "issues": app_state["issues"],
        "needs_clarification": False,
        "generate_code": False,
        "execution_plan": None,
        "clarification_question": None,
        "code_execution_results": None,
        "final_response": None,
        "anthropic_client": app_state["anthropic_client"],
        "openai_client": openai_client
    }

    try:
        # Create the workflow if not already created
        if not hasattr(chat, "workflow"):
            chat.workflow = create_agentic_rent_roll_analyzer()
            logger.info("Created agentic workflow")

        # Run the workflow with the current state
        logger.info("Running agentic workflow")
        result = chat.workflow.invoke(state)

        # Get the final response from the result state
        final_response = result.get("final_response", "I'm sorry, I couldn't process your request.")
        logger.info(f"Received final response from workflow: {final_response[:50]}...")

        # === ENHANCED SESSION RECORDING ===

        # 1. Determine action type based on response content and workflow state
        action_type = "analysis"  # default

        if result.get("needs_clarification"):
            action_type = "clarification"
        elif result.get("generate_code"):
            action_type = "data_processing"
        elif "error" in final_response.lower() or "sorry" in final_response.lower():
            action_type = "error_handling"
        elif "```python" in final_response:
            action_type = "code_execution"
        elif any(keyword in message.lower() for keyword in ["clean", "fix", "correct", "standardize"]):
            action_type = "data_cleaning"
        elif any(keyword in message.lower() for keyword in ["calculate", "compute", "sum", "average"]):
            action_type = "calculation"
        elif any(keyword in message.lower() for keyword in ["find", "show", "display", "list"]):
            action_type = "data_exploration"
        elif any(keyword in message.lower() for keyword in ["chart", "graph", "plot", "visualize"]):
            action_type = "visualization"

        # 2. Extract executed code from response
        code_executed = None
        code_blocks = re.findall(r'```python\s*(.*?)\s*```', final_response, re.DOTALL)
        if code_blocks:
            # Combine all code blocks if multiple
            code_executed = "\n\n# --- Next Code Block ---\n\n".join(code_blocks)

        # 3. Check if a new dataframe version was saved
        version_saved = None
        if "✓ Saved dataframe version" in final_response:
            version_match = re.search(r'version (v_\w+)', final_response)
            if version_match:
                version_saved = version_match.group(1)
                logger.info(f"Detected new version saved: {version_saved}")

        # 4. Detect if issues were found or resolved
        if any(keyword in final_response.lower() for keyword in ["issue", "problem", "error", "missing", "duplicate"]):
            issue_description = message + " - " + final_response[:100] + "..."
            severity = "high" if any(word in final_response.lower() for word in ["critical", "error", "failed"]) else "medium"
            session_recorder.record_issue_found(issue_description, severity)

        # 5. Extract any business insights or patterns
        insights = []
        if "found" in final_response.lower() and any(word in final_response.lower() for word in ["units", "rent", "tenant"]):
            insights.append(f"Business insight from query: {message}")

        # 6. Record the complete conversation turn with enhanced metadata
        session_recorder.record_conversation_turn(
            user_message=message,
            ai_response=final_response,
            action_type=action_type,
            code_executed=code_executed,
            version_saved=version_saved
        )

        # 7. Record dataframe version details if saved
        if version_saved:
            # Find the latest version info
            latest_version = None
            if app_state.get("df_versions"):
                for version in app_state["df_versions"]:
                    if version["name"] == version_saved:
                        latest_version = version
                        break

            if latest_version:
                session_recorder.record_dataframe_version(
                    version_name=version_saved,
                    description=latest_version.get("description", "Auto-saved during copiloting"),
                    shape=list(app_state["df"].shape),
                    columns=list(app_state["df"].columns)
                )
            else:
                # Fallback if version not found in registry
                session_recorder.record_dataframe_version(
                    version_name=version_saved,
                    description="Auto-saved during copiloting session",
                    shape=list(app_state["df"].shape),
                    columns=list(app_state["df"].columns)
                )

        # 8. Track user goals and patterns
        user_goals = []
        if any(keyword in message.lower() for keyword in ["clean", "standardize", "fix"]):
            user_goals.append("Data cleaning and standardization")
        if any(keyword in message.lower() for keyword in ["analyze", "find", "calculate"]):
            user_goals.append("Data analysis and insights")
        if any(keyword in message.lower() for keyword in ["chart", "graph", "visualize"]):
            user_goals.append("Data visualization")

        if user_goals:
            session_recorder.current_session_data.setdefault("user_goals", []).extend(user_goals)

        # 9. Log session statistics
        if session_recorder.current_session_data:
            total_turns = len(session_recorder.current_session_data.get("conversation_history", []))
            total_code = len(session_recorder.current_session_data.get("code_executions", []))
            logger.info(f"Session stats - Turns: {total_turns}, Code executions: {total_code}")

        # Use the correct format for Gradio chatbot
        history_list = list(history) if history else []
        history_list.append((message, final_response))

        logger.info("Chat response processing complete with session recording")
        return history_list

    except Exception as e:
        logger.error(f"Error processing chat: {e}")
        logger.error(traceback.format_exc())

        # Record the error in session
        error_message = f"Error getting response: {str(e)}"

        if session_recorder.current_session_file:
            session_recorder.record_conversation_turn(
                user_message=message,
                ai_response=error_message,
                action_type="system_error",
                code_executed=None,
                version_saved=None
            )

            # Record as a system issue
            session_recorder.record_issue_found(
                f"System error during processing: {str(e)}",
                severity="high"
            )

        # Handle errors properly in the chat history format
        history_list = list(history) if history else []
        history_list.append((message, error_message))
        return history_list



def create_template_from_current_session(template_name_input=""):
    """
    Create a comprehensive template from the current copiloting session.
    This includes GPT-4.1 analysis of the entire workflow.
    """
    global app_state, session_recorder, enhanced_template_manager

    if not session_recorder.current_session_file:
        return "❌ No active copiloting session found. Please start chatting with the system first."

    if app_state is None or app_state["df"] is None:
        return "❌ No dataframe loaded. Cannot create template."

    try:
        logger.info("Starting template creation from current session...")

        # 1. Get starting dataframe (first version saved)
        starting_df = None
        if app_state.get("df_versions") and len(app_state["df_versions"]) > 0:
            # Load the original/first version
            first_version = app_state["df_versions"][0]
            starting_df_path = first_version.get("filename") or first_version.get("csv_filename")
            if starting_df_path and os.path.exists(starting_df_path):
                starting_df = pd.read_csv(starting_df_path)
                logger.info(f"Loaded starting dataframe from: {starting_df_path}")
            else:
                # Try to construct the path
                versions_dir = "rent_roll_versions"
                csv_filename = os.path.join(versions_dir, f"rent_roll_{first_version['name']}.csv")
                if os.path.exists(csv_filename):
                    starting_df = pd.read_csv(csv_filename)
                    logger.info(f"Loaded starting dataframe from: {csv_filename}")

        # Fallback: use current dataframe if no versions found
        if starting_df is None:
            starting_df = app_state["df"].copy()
            logger.warning("Using current dataframe as starting point (no version history found)")

        # 2. Current dataframe is the final version
        final_df = app_state["df"].copy()

        # 3. Finalize current session to get complete session data
        logger.info("Finalizing current session...")
        session_data = session_recorder.finalize_session()

        if session_data is None:
            return "❌ Error finalizing session data."

        # 4. Generate template name if not provided
        if not template_name_input.strip():
            rent_roll_file = session_data.get('rent_roll_file', 'Unknown')
            timestamp = datetime.now().strftime('%Y-%m-%d')
            template_name = f"Rent Roll Process - {rent_roll_file} - {timestamp}"
        else:
            template_name = template_name_input.strip()

        # 5. Create comprehensive template using GPT-4.1 analysis
        logger.info("Creating template with GPT-4.1 analysis...")
        template_data = enhanced_template_manager.create_template_from_session(
            session_data=session_data,
            starting_df=starting_df,
            final_df=final_df,
            template_name=template_name
        )

        # 6. Prepare success message with details
        session_stats = session_data.get('session_summary', {})
        success_message = f"""✅ Template Created Successfully!

📋 Template Details:
• Template ID: {template_data['template_id']}
• Template Name: {template_name}
• Source File: {session_data.get('rent_roll_file', 'Unknown')}

📊 Session Summary:
• Duration: {session_stats.get('duration_minutes', 0):.1f} minutes
• Conversations: {session_stats.get('total_conversations', 0)}
• Code Executions: {session_stats.get('code_executions', 0)}
• Versions Created: {session_stats.get('versions_created', 0)}
• Issues Found: {session_stats.get('issues_found', 0)}

📁 Files Created:
• Starting Dataframe: {template_data['files']['starting_dataframe']}
• Final Dataframe: {template_data['files']['final_dataframe']}
• Session Recording: {template_data['files']['raw_session']}
• Template Metadata: {template_data['template_id']}.json

🤖 GPT-4.1 Analysis: {'✅ Completed' if 'gpt4_analysis' in template_data else '❌ Failed'}

This template can now be applied to similar rent roll files using the Template Manager."""

        logger.info(f"Template creation completed: {template_data['template_id']}")
        return success_message

    except Exception as e:
        error_msg = f"❌ Error creating template: {str(e)}"
        logger.error(f"Template creation failed: {e}")
        logger.error(traceback.format_exc())
        return error_msg


def end_current_session():
    """
    Manually end the current copiloting session without creating a template.
    Useful for starting fresh or when session gets too long.
    """
    global session_recorder

    if not session_recorder.current_session_file:
        return "ℹ️ No active session to end."

    try:
        session_data = session_recorder.finalize_session()

        if session_data:
            session_stats = {
                'duration': session_data.get('duration_minutes', 0),
                'conversations': len(session_data.get('conversation_history', [])),
                'code_executions': len(session_data.get('code_executions', [])),
                'versions': len(session_data.get('dataframe_versions', []))
            }

            return f"""✅ Session Ended Successfully

            📊 Final Session Statistics:
            • Session ID: {session_data.get('session_id', 'Unknown')}
            • Duration: {session_stats['duration']:.1f} minutes
            • Total Conversations: {session_stats['conversations']}
            • Code Executions: {session_stats['code_executions']}
            • Dataframe Versions: {session_stats['versions']}

            💾 Session data saved to: {session_data.get('session_id', 'unknown')}.txt

            You can now start a new session or create a template from this completed session."""
        else:
            return "⚠️ Session ended but no data was saved."

    except Exception as e:
        return f"❌ Error ending session: {str(e)}"


# Additional helper function to get session status
def get_current_session_status():
    """Get the current session recording status and statistics."""
    global session_recorder

    if not session_recorder.current_session_file:
        return "📴 No active session recording"

    try:
        if session_recorder.current_session_data:
            data = session_recorder.current_session_data
            start_time = datetime.fromisoformat(data.get('start_time', datetime.now().isoformat()))
            duration = (datetime.now() - start_time).total_seconds() / 60

            status = f"""📹 Session Recording Active

            📊 Current Statistics:
            • Session ID: {data.get('session_id', 'Unknown')}
            • Duration: {duration:.1f} minutes
            • Conversations: {len(data.get('conversation_history', []))}
            • Code Executions: {len(data.get('code_executions', []))}
            • Versions Created: {len(data.get('dataframe_versions', []))}
            • Issues Found: {len(data.get('issues_found', []))}

            📁 Recording File: {session_recorder.current_session_file}

            All interactions are being automatically recorded for template creation."""

            return status
        else:
            return "📹 Session recording active but no data collected yet"

    except Exception as e:
        return f"❌ Error getting session status: {str(e)}"

In [None]:
def view_data():
    """Return a preview of the rent roll data."""
    global app_state  # Use app_state instead of agent_state

    logger.info("View data requested")

    if app_state is None or app_state["df"] is None:  # Note the dictionary access with ["df"]
        logger.warning("View data requested but no data is loaded")
        return "No rent roll data loaded yet."

    # Generate HTML representation of the dataframe
    logger.info(f"Generating HTML preview of data with {len(app_state['df'])} rows")
    html = f"""
    <h3>Rent Roll Data</h3>
    <p>{len(app_state['df'])} rows × {len(app_state['df'].columns)} columns</p>
    {app_state['df'].head(10).fillna('').to_html(index=False)}
    """

    return html

In [None]:

def clear_chat():
    """Reset the chat history."""
    logger.info("Clearing chat history")
    return []  # Return empty list for Gradio chat history

In [None]:
def view_dataframe_versions():
    """
    FIXED: Return HTML showing all versions of the rent roll dataframe
    """
    global app_state

    logger.info("Generating dataframe versions view")

    # First check app_state registry
    registry_versions = []
    if app_state and "df_versions" in app_state and app_state["df_versions"]:
        registry_versions = app_state["df_versions"]
        logger.info(f"Found {len(registry_versions)} versions in registry")

    # Also check physical files
    versions_dir = "rent_roll_versions"
    file_versions = []

    if os.path.exists(versions_dir):
        try:
            all_files = os.listdir(versions_dir)
            csv_files = [f for f in all_files if f.endswith('.csv') and 'rent_roll_v_' in f]

            for csv_file in csv_files:
                # Extract version name from filename
                if csv_file.startswith('rent_roll_v_'):
                    version_name = csv_file.replace('rent_roll_', '').replace('.csv', '')

                    file_path = os.path.join(versions_dir, csv_file)
                    file_stats = os.stat(file_path)

                    # Try to get dataframe info
                    try:
                        temp_df = pd.read_csv(file_path)
                        df_info = f"{len(temp_df)} rows × {len(temp_df.columns)} columns"
                    except:
                        df_info = "Unable to read file"

                    file_versions.append({
                        'version_name': version_name,
                        'file_path': file_path,
                        'file_size': file_stats.st_size,
                        'modified_time': datetime.fromtimestamp(file_stats.st_mtime),
                        'df_info': df_info,
                        'source': 'file'
                    })

            logger.info(f"Found {len(file_versions)} version files on disk")

        except Exception as e:
            logger.error(f"Error reading versions directory: {e}")

    # Combine and deduplicate versions
    all_versions = {}

    # Add registry versions
    for version in registry_versions:
        all_versions[version['name']] = {
            'version_name': version['name'],
            'description': version.get('description', ''),
            'timestamp': datetime.strptime(version['timestamp'], "%Y%m%d_%H%M%S") if 'timestamp' in version else None,
            'shape': version.get('shape', 'Unknown'),
            'is_original': version.get('is_original', False),
            'csv_filename': version.get('csv_filename', ''),
            'source': 'registry'
        }

    # Add file versions (update existing or add new)
    for version in file_versions:
        name = version['version_name']
        if name in all_versions:
            # Update with file info
            all_versions[name].update({
                'file_path': version['file_path'],
                'file_size': version['file_size'],
                'modified_time': version['modified_time'],
                'df_info': version['df_info'],
                'file_exists': True
            })
        else:
            # Add as file-only version
            all_versions[name] = {
                'version_name': name,
                'description': 'Found in directory',
                'timestamp': version['modified_time'],
                'df_info': version['df_info'],
                'file_path': version['file_path'],
                'file_size': version['file_size'],
                'modified_time': version['modified_time'],
                'is_original': False,
                'source': 'file_only',
                'file_exists': True
            }

    if not all_versions:
        return """
        <h3 style="color: white;">No Rent Roll Versions Found</h3>
        <p style="color: white;">No versions have been saved yet. Save a version first using the 'Save Version' functionality.</p>
        """

    # Sort versions by timestamp
    sorted_versions = sorted(all_versions.values(), key=lambda x: x.get('timestamp') or x.get('modified_time') or datetime.min)

    # Create enhanced HTML table
    html = f"""
    <h3 style="color: white;">Rent Roll Dataframe Version History</h3>
    <p style="color: white;">Found {len(sorted_versions)} version(s) - Registry: {len(registry_versions)}, Files: {len(file_versions)}</p>
    <table border="1" cellpadding="5" cellspacing="0" style="width: 100%; border-collapse: collapse; color: white;">
        <thead style="background-color: #009879;">
            <tr>
                <th style="text-align: left; padding: 10px;">Version Name</th>
                <th style="text-align: left; padding: 10px;">Status</th>
                <th style="text-align: left; padding: 10px;">Created</th>
                <th style="text-align: left; padding: 10px;">Size</th>
                <th style="text-align: left; padding: 10px;">Data</th>
                <th style="text-align: left; padding: 10px;">Description</th>
                <th style="text-align: left; padding: 10px;">Source</th>
            </tr>
        </thead>
        <tbody>
    """

    for i, version in enumerate(sorted_versions):
        # Determine status badges
        status_badges = []

        if version.get('is_original') or i == 0:
            status_badges.append('<span style="background-color: #3949ab; color: white; padding: 3px 6px; border-radius: 3px; display: inline-block;">ORIGINAL</span>')

        if i == len(sorted_versions) - 1:
            status_badges.append('<span style="background-color: #43a047; color: white; padding: 3px 6px; border-radius: 3px; display: inline-block;">LATEST</span>')

        if version.get('source') == 'registry':
            status_badges.append('<span style="background-color: #607d8b; color: white; padding: 3px 6px; border-radius: 3px; display: inline-block;">TRACKED</span>')

        if version.get('file_exists'):
            status_badges.append('<span style="background-color: #4caf50; color: white; padding: 3px 6px; border-radius: 3px; display: inline-block;">FILE OK</span>')

        status_html = ' '.join(status_badges) if status_badges else '<span style="color: #999;">-</span>'

        # Format timestamp
        timestamp = version.get('timestamp') or version.get('modified_time')
        time_str = timestamp.strftime('%Y-%m-%d %H:%M:%S') if timestamp else 'Unknown'

        # Format file size
        file_size = version.get('file_size', 0)
        size_str = f"{file_size:,} bytes ({file_size/1024:.1f} KB)" if file_size > 0 else 'Unknown'

        # Data info
        data_info = version.get('df_info') or version.get('shape', 'Unknown')
        if isinstance(data_info, list) and len(data_info) == 2:
            data_info = f"{data_info[0]} rows × {data_info[1]} columns"

        html += f"""
        <tr style="background-color: #25292e; color: white; border-bottom: 1px solid #333;">
            <td style="padding: 10px;"><code style="font-family: monospace; font-weight: bold;">{version['version_name']}</code></td>
            <td style="padding: 10px;">{status_html}</td>
            <td style="padding: 10px;">{time_str}</td>
            <td style="padding: 10px;">{size_str}</td>
            <td style="padding: 10px;">{data_info}</td>
            <td style="padding: 10px;">{version.get('description', 'No description')}</td>
            <td style="padding: 10px;">{version.get('source', 'unknown').title()}</td>
        </tr>
        """

    html += """
        </tbody>
    </table>
    <p style="color: #999; font-size: 12px; margin-top: 10px;">
        Registry: Versions tracked in memory | Files: Versions found on disk | Tracked: In both registry and disk
    </p>
    """

    logger.info(f"Generated version history HTML with {len(sorted_versions)} versions")
    return html

In [None]:
def ensure_version_system_initialized():
    """
    FIXED: Ensure the version system is properly initialized
    """
    global app_state

    if app_state is None:
        logger.warning("app_state is None, initializing...")
        app_state = {}

    if "df_versions" not in app_state:
        logger.info("Initializing df_versions list")
        app_state["df_versions"] = []

    # Create versions directory
    versions_dir = "rent_roll_versions"
    os.makedirs(versions_dir, exist_ok=True)

    logger.info(f"Version system initialized. Registry has {len(app_state['df_versions'])} versions")


In [None]:
def enhance_user_prompt_with_context(user_prompt, history):
    """
    Enhanced version: Enhance user prompt with rent roll context and CRE best practices using GPT-4
    Now properly integrates with version-specific dataframe loading
    """
    global app_state

    if not user_prompt.strip():
        return user_prompt

    if app_state is None or app_state["df"] is None:
        return user_prompt + " (Note: Please upload a rent roll file first for better analysis)"

    try:
        # Step 1: Detect version reference in user prompt
        version_pattern = r'v_\d{8}_\d{6}'
        version_match = re.search(version_pattern, user_prompt)

        version_context = ""
        df = app_state["df"]  # Default to current dataframe
        version_info_text = ""

        if version_match:
            requested_version = version_match.group(0)
            logger.info(f"User requested specific version in prompt enhancement: {requested_version}")

            # Try to load that specific version for context
            versions_dir = "rent_roll_versions"
            version_file = os.path.join(versions_dir, f"rent_roll_{requested_version}.csv")

            if os.path.exists(version_file):
                # Load the requested version for context
                version_df = pd.read_csv(version_file)
                logger.info(f"Successfully loaded version {requested_version} for enhancement with shape {version_df.shape}")

                # Find version metadata
                version_info = None
                for v in app_state.get("df_versions", []):
                    if v.get("name") == requested_version:
                        version_info = v
                        break

                # Use this version for context
                df = version_df

                # Create detailed version context
                version_context = f"""
        VERSION-SPECIFIC ENHANCEMENT CONTEXT:
        ===================================
        User requested version: {requested_version}
        Version Description: {version_info.get('description', 'No description available') if version_info else 'Version not found in registry'}
        Version Created: {version_info.get('timestamp', 'Unknown') if version_info else 'Unknown'}
        Version Shape: {version_df.shape}
        Current/Latest Version Shape: {app_state["df"].shape}

        Version Differences:
        - Row count difference: {version_df.shape[0] - app_state["df"].shape[0]} rows
        - Column count difference: {version_df.shape[1] - app_state["df"].shape[1]} columns
        - Version is {'older' if version_info and app_state.get('df_versions', [])[-1]['name'] != requested_version else 'the latest'} version

        CRITICAL: The enhanced prompt must specify that analysis should be performed on version {requested_version}
        """

                version_info_text = f"version {requested_version}"

                # Check if columns differ
                current_cols = set(app_state["df"].columns)
                version_cols = set(version_df.columns)

                if current_cols != version_cols:
                    added_cols = current_cols - version_cols
                    removed_cols = version_cols - current_cols

                    if added_cols:
                        version_context += f"\n        Columns added since this version: {list(added_cols)}"
                    if removed_cols:
                        version_context += f"\n        Columns removed since this version: {list(removed_cols)}"

            else:
                # Version file not found
                logger.warning(f"Requested version {requested_version} not found at {version_file} during enhancement")
                version_context = f"""
        VERSION REQUEST NOTE FOR ENHANCEMENT:
        ===================================
        ⚠️ User requested version '{requested_version}' but the file was not found.
        Enhancement will use current dataframe context but note the version request.
        Available versions: {[v['name'] for v in app_state.get('df_versions', [])][:5]}...

        CRITICAL: The enhanced prompt must include a note about the version request failure
        """
                version_info_text = f"requested version {requested_version} (file not found, using current)"
        else:
            # No version specified, note that we're using current
            latest_version_name = app_state.get('df_versions', [])[-1]['name'] if app_state.get('df_versions') else 'current'
            version_context = f"""
        USING CURRENT/LATEST VERSION FOR ENHANCEMENT: {latest_version_name}
        =============================================================
        No specific version requested - using the latest dataframe for context
        """
            version_info_text = "current/latest version"

        # Prepare comprehensive context with the selected dataframe
        df_context = f"""
        RENT ROLL CONTEXT FOR PROMPT ENHANCEMENT:
        ========================================

        {version_context}

        DataFrame Info ({version_info_text}):
        - Shape: {df.shape} (rows: {df.shape[0]}, columns: {df.shape[1]})
        - Columns: {list(df.columns)}
        - Data Types: {dict(df.dtypes.astype(str))}

        Sample Data from {version_info_text.upper()} (first 10 rows):
        {df.head(10).to_string()}

        Data Quality Overview:
        - Null values per column: {dict(df.isnull().sum())}
        - Memory usage: {df.memory_usage(deep=True).sum()} bytes

        Available Versions: {len(app_state.get('df_versions', []))} saved versions
        Version Names: {[v['name'] for v in app_state.get('df_versions', [])][:5]}{'...' if len(app_state.get('df_versions', [])) > 5 else ''}
        """

        # Get recent chat context (last 3 exchanges)
        recent_context = ""
        if history and len(history) > 0:
            recent_exchanges = history[-3:]  # Last 3 exchanges
            for i, (user_msg, ai_msg) in enumerate(recent_exchanges, 1):
                recent_context += f"Exchange {i}:\n"
                recent_context += f"User: {user_msg[:100]}...\n"
                recent_context += f"AI: {ai_msg[:100]}...\n\n"

        # Get issues context if available
        issues_context = ""
        if app_state.get("issues"):
            issues_context = f"\nKnown Issues in Data:\n" + "\n".join([f"- {issue[:100]}..." for issue in app_state["issues"][:5]])

        # MODIFIED: Create professional, instructional, single-paragraph enhancement prompt
        enhancement_prompt = f"""
        You are an expert commercial real estate data analyst. Transform the user's request into a single, comprehensive, professional paragraph that provides detailed instructions for rent roll analysis.

        CURRENT USER PROMPT: "{user_prompt}"

        {df_context}

        RECENT CONVERSATION CONTEXT:
        {recent_context}

        {issues_context}

        VERSION AWARENESS:
        - The user {'HAS SPECIFICALLY REQUESTED' if version_match else 'has NOT specified'} a particular version to work with
        {'- Working with version: ' + requested_version + ' (MUST be included in enhanced prompt)' if version_match else '- Working with the current/latest version'}

        ENHANCEMENT REQUIREMENTS:
        Transform the user's request into a single, professional paragraph that:
        • {'Clearly specifies analysis should be performed on version ' + requested_version if version_match else 'Notes that analysis should use the current/latest version'}
        • References specific column names from the dataset: {list(df.columns)[:8]}
        • Uses authoritative, instructional language with technical precision
        • Includes specific CRE terminology and business context
        • Specifies expected outputs, validation steps, and quality checks
        • Provides clear methodology and success criteria
        • Maintains professional tone throughout

        EXAMPLE TRANSFORMATIONS:

        "show me the data" →
        {"Perform a comprehensive analysis of rent roll dataset version " + requested_version if version_match else "Perform a comprehensive analysis of the current rent roll dataset"} by generating summary statistics for all key columns including tenant names, unit identifiers, rent amounts, and lease dates, conducting a thorough data quality assessment to identify missing values, inconsistencies, and outliers, calculating portfolio-level metrics such as total occupied units, average rent per unit, occupancy rates, and rent distribution patterns, and producing a detailed report that highlights any data anomalies requiring immediate attention while ensuring all findings are validated against industry standards for commercial real estate analysis.

        "fix the unit column" →
        {"Execute data standardization procedures on the Unit column within " + ("version " + requested_version if version_match else "the current rent roll dataset")} by systematically analyzing existing data patterns to identify formatting inconsistencies, implementing a standardized unit identifier format that aligns with commercial real estate best practices, validating data integrity through cross-referencing with other unit-related columns, correcting all identified discrepancies while maintaining audit trail documentation, and generating a comprehensive before-and-after comparison report that demonstrates the improvements made and confirms all unit identifiers are properly formatted for subsequent portfolio analysis and reporting requirements.

        Return ONLY a single professional paragraph that transforms the user's request into comprehensive, authoritative instructions. Use technical precision, industry terminology, and maintain an instructional tone throughout.
        """

        # Call Claude 3.5 Sonnet for enhancement
        client = Anthropic(api_key=DEFAULT_ANTHROPIC_API_KEY)

        response = client.messages.create(
            model="claude-3-7-sonnet-20250219",
            max_tokens=200,
            temperature=0.2,
            system="You are an expert at creating professional, authoritative, single-paragraph instructions for commercial real estate rent roll analysis. Transform user requests into comprehensive, technical instructions that specify methodology, expected outputs, and validation steps. Return only one professional paragraph.",
            messages=[
                {"role": "user", "content": enhancement_prompt}
            ]
        )

        enhanced_prompt = response.content[0].text.strip()

        # Remove any quotes if Claude wrapped the response
        if enhanced_prompt.startswith('"') and enhanced_prompt.endswith('"'):
            enhanced_prompt = enhanced_prompt[1:-1]

        logger.info(f"Enhanced prompt with Claude 3.5 Sonnet: {enhanced_prompt[:100]}...")
        return enhanced_prompt

    except Exception as e:
        logger.error(f"Error enhancing prompt with version awareness: {e}")
        # Return original prompt with a note if enhancement fails
        version_note = f" (Note: Working with {version_match.group(0) if version_match else 'current version'} - enhancement temporarily unavailable)"
        return f"{user_prompt}{version_note}"

In [None]:
import re
import os
import pandas as pd

# def enhance_user_prompt_with_context(user_prompt, history):
#     """
#     Enhanced version: Enhance user prompt with rent roll context and CRE best practices using GPT-4
#     Now properly integrates with version-specific dataframe loading
#     """
#     global app_state

#     if not user_prompt.strip():
#         return user_prompt

#     if app_state is None or app_state["df"] is None:
#         return user_prompt + " (Note: Please upload a rent roll file first for better analysis)"

#     try:
#         # Step 1: Detect version reference in user prompt
#         version_pattern = r'v_\d{8}_\d{6}'
#         version_match = re.search(version_pattern, user_prompt)

#         version_context = ""
#         df = app_state["df"]  # Default to current dataframe
#         version_info_text = ""

#         if version_match:
#             requested_version = version_match.group(0)
#             logger.info(f"User requested specific version in prompt enhancement: {requested_version}")

#             # Try to load that specific version for context
#             versions_dir = "rent_roll_versions"
#             version_file = os.path.join(versions_dir, f"rent_roll_{requested_version}.csv")

#             if os.path.exists(version_file):
#                 # Load the requested version for context
#                 version_df = pd.read_csv(version_file)
#                 logger.info(f"Successfully loaded version {requested_version} for enhancement with shape {version_df.shape}")

#                 # Find version metadata
#                 version_info = None
#                 for v in app_state.get("df_versions", []):
#                     if v.get("name") == requested_version:
#                         version_info = v
#                         break

#                 # Use this version for context
#                 df = version_df

#                 # Create detailed version context
#                 version_context = f"""
#         VERSION-SPECIFIC ENHANCEMENT CONTEXT:
#         ===================================
#         User requested version: {requested_version}
#         Version Description: {version_info.get('description', 'No description available') if version_info else 'Version not found in registry'}
#         Version Created: {version_info.get('timestamp', 'Unknown') if version_info else 'Unknown'}
#         Version Shape: {version_df.shape}
#         Current/Latest Version Shape: {app_state["df"].shape}

#         Version Differences:
#         - Row count difference: {version_df.shape[0] - app_state["df"].shape[0]} rows
#         - Column count difference: {version_df.shape[1] - app_state["df"].shape[1]} columns
#         - Version is {'older' if version_info and app_state.get('df_versions', [])[-1]['name'] != requested_version else 'the latest'} version

#         CRITICAL: The enhanced prompt must specify that analysis should be performed on version {requested_version}
#         """

#                 version_info_text = f"version {requested_version}"

#                 # Check if columns differ
#                 current_cols = set(app_state["df"].columns)
#                 version_cols = set(version_df.columns)

#                 if current_cols != version_cols:
#                     added_cols = current_cols - version_cols
#                     removed_cols = version_cols - current_cols

#                     if added_cols:
#                         version_context += f"\n        Columns added since this version: {list(added_cols)}"
#                     if removed_cols:
#                         version_context += f"\n        Columns removed since this version: {list(removed_cols)}"

#             else:
#                 # Version file not found
#                 logger.warning(f"Requested version {requested_version} not found at {version_file} during enhancement")
#                 version_context = f"""
#         VERSION REQUEST NOTE FOR ENHANCEMENT:
#         ===================================
#         ⚠️ User requested version '{requested_version}' but the file was not found.
#         Enhancement will use current dataframe context but note the version request.
#         Available versions: {[v['name'] for v in app_state.get('df_versions', [])][:5]}...

#         CRITICAL: The enhanced prompt must include a note about the version request failure
#         """
#                 version_info_text = f"requested version {requested_version} (file not found, using current)"
#         else:
#             # No version specified, note that we're using current
#             latest_version_name = app_state.get('df_versions', [])[-1]['name'] if app_state.get('df_versions') else 'current'
#             version_context = f"""
#         USING CURRENT/LATEST VERSION FOR ENHANCEMENT: {latest_version_name}
#         =============================================================
#         No specific version requested - using the latest dataframe for context
#         """
#             version_info_text = "current/latest version"

#         # Prepare comprehensive context with the selected dataframe
#         df_context = f"""
#         RENT ROLL CONTEXT FOR PROMPT ENHANCEMENT:
#         ========================================

#         {version_context}

#         DataFrame Info ({version_info_text}):
#         - Shape: {df.shape} (rows: {df.shape[0]}, columns: {df.shape[1]})
#         - Columns: {list(df.columns)}
#         - Data Types: {dict(df.dtypes.astype(str))}

#         Sample Data from {version_info_text.upper()} (first 10 rows):
#         {df.head(10).to_string()}

#         Data Quality Overview:
#         - Null values per column: {dict(df.isnull().sum())}
#         - Memory usage: {df.memory_usage(deep=True).sum()} bytes

#         Available Versions: {len(app_state.get('df_versions', []))} saved versions
#         Version Names: {[v['name'] for v in app_state.get('df_versions', [])][:5]}{'...' if len(app_state.get('df_versions', [])) > 5 else ''}
#         """

#         # Get recent chat context (last 3 exchanges)
#         recent_context = ""
#         if history and len(history) > 0:
#             recent_exchanges = history[-3:]  # Last 3 exchanges
#             for i, (user_msg, ai_msg) in enumerate(recent_exchanges, 1):
#                 recent_context += f"Exchange {i}:\n"
#                 recent_context += f"User: {user_msg[:100]}...\n"
#                 recent_context += f"AI: {ai_msg[:100]}...\n\n"

#         # Get issues context if available
#         issues_context = ""
#         if app_state.get("issues"):
#             issues_context = f"\nKnown Issues in Data:\n" + "\n".join([f"- {issue[:100]}..." for issue in app_state["issues"][:5]])

#         # Create enhancement prompt for GPT-4 with enhanced version awareness
#         enhancement_prompt = f"""
#         You are an expert commercial real estate analyst assistant. Your job is to enhance user prompts to be more specific, actionable, and contextually aware for rent roll analysis.

#         CURRENT USER PROMPT: "{user_prompt}"

#         {df_context}

#         RECENT CONVERSATION CONTEXT:
#         {recent_context}

#         {issues_context}

#         VERSION AWARENESS CRITICAL INSTRUCTIONS:
#         =======================================
#         - The user {'HAS SPECIFICALLY REQUESTED' if version_match else 'has NOT specified'} a particular version to work with
#         {'- Working with version: ' + requested_version + ' (MUST be included in enhanced prompt)' if version_match else '- Working with the current/latest version'}
#         - Ensure the enhanced prompt clearly specifies which version should be used for analysis
#         - If a specific version was requested, the enhanced prompt MUST include the version identifier
#         - Make it clear that version-specific analysis is required when applicable

#         Please enhance the user's prompt by:

#         1. **Version Specification**: {'Clearly state that analysis should be performed on version ' + requested_version if version_match else 'Note that analysis should use the current/latest version'}
#         2. **Add Specific Context**: Reference actual column names from the {version_info_text} dataframe
#         3. **CRE Terminology**: Use proper commercial real estate terminology
#         4. **Technical Precision**: Specify data types, calculations, or analysis methods needed
#         5. **Business Goals**: Frame the request in terms of CRE business objectives
#         6. **Actionable Instructions**: Make the request more specific and executable
#         7. **Data Validation**: Include any data quality checks that should be performed

#         ENHANCEMENT GUIDELINES WITH VERSION AWARENESS:
#         =============================================
#         - If a version is specified, ALWAYS include it prominently in the enhanced prompt
#         - If asking about rent, specify which rent columns (base rent, total rent, etc.) from the {version_info_text}
#         - If asking about tenants, reference actual tenant-related columns from the {version_info_text}
#         - If asking about dates, specify lease start/end date columns from the {version_info_text}
#         - If asking about calculations, be specific about formulas needed for the {version_info_text}
#         - If asking about analysis, suggest specific CRE metrics or insights for the {version_info_text}
#         - If asking about cleaning, reference specific data quality issues in the {version_info_text}
#         - Keep the user's original intent but make it more professional and version-aware

#         EXAMPLE TRANSFORMATIONS WITH VERSION AWARENESS:
#         =============================================

#         {"show me the data from " + requested_version if version_match else "show me the data"} →
#         {"Display a comprehensive overview of the rent roll data from version " + requested_version if version_match else "Display a comprehensive overview of the current rent roll data"}, including tenant information from [Tenant] column, rent amounts from [Base Rent] and [Total Rent] columns, lease dates from [Lease Start Date] and [Lease End Date] columns, and occupancy status. {"Compare key metrics with the current version and highlight any significant changes." if version_match else "Highlight any data quality issues or notable patterns."}

#         {"fix the rent in " + ("version " + requested_version if version_match else "the table")} →
#         {"Using " + ("version " + requested_version if version_match else "the current rent roll data")}, analyze and standardize the rent-related columns ([Base Rent], [Total Rent], [CAM Charges] if available) by: 1) Identifying and correcting formatting inconsistencies, 2) Validating that total rent calculations are accurate, 3) Flagging any unusual rent amounts that may need review, and 4) Ensuring all currency values are properly formatted for CRE reporting standards. Save the cleaned data as a new version with a descriptive name.

#         Return ONLY the enhanced prompt, nothing else. The enhanced prompt should be professional, version-aware, specific, and ready for immediate execution by a CRE analysis AI system.
#         """

#         # Call GPT-4 for enhancement
#         client = OpenAI(api_key=DEFAULT_OPENAI_API_KEY)

#         response = client.chat.completions.create(
#             model="gpt-4o",
#             messages=[
#                 {"role": "system", "content": "You are an expert at enhancing user prompts for commercial real estate rent roll analysis with precise version awareness. Return only the enhanced prompt, nothing else."},
#                 {"role": "user", "content": enhancement_prompt}
#             ],
#             max_tokens=800,
#             temperature=0.3
#         )

#         enhanced_prompt = response.choices[0].message.content.strip()

#         # Remove any quotes if GPT-4 wrapped the response
#         if enhanced_prompt.startswith('"') and enhanced_prompt.endswith('"'):
#             enhanced_prompt = enhanced_prompt[1:-1]

#         logger.info(f"Enhanced prompt with version awareness: {enhanced_prompt[:100]}...")
#         return enhanced_prompt

#     except Exception as e:
#         logger.error(f"Error enhancing prompt with version awareness: {e}")
#         # Return original prompt with a note if enhancement fails
#         version_note = f" (Note: Working with {version_match.group(0) if version_match else 'current version'} - enhancement temporarily unavailable)"
#         return f"{user_prompt}{version_note}"



def enhance_prompt_interface(user_prompt, history):
    """
    Interface function for the enhance button - returns enhanced prompt and status message
    """
    if not user_prompt.strip():
        return "Please enter a prompt first", user_prompt

    try:
        # Check if version is mentioned
        version_pattern = r'v_\d{8}_\d{6}'
        has_version = bool(re.search(version_pattern, user_prompt))

        enhanced = enhance_user_prompt_with_context(user_prompt, history)

        if enhanced != user_prompt:
            if has_version:
                status_msg = "✨ Prompt enhanced with specific version context and CRE best practices!"
            else:
                status_msg = "✨ Prompt enhanced with rent roll context and CRE best practices!"
        else:
            status_msg = "ℹ️ Prompt is already well-structured"

        return status_msg, enhanced

    except Exception as e:
        return f"❌ Enhancement failed: {str(e)}", user_prompt

In [None]:
def analyze_dataframe_changes_with_gpt4(original_df, modified_df, user_description=""):
    """
    Enhanced version with ALL CRITICAL FIXES APPLIED:
    1. Fixed logging bug with proper string formatting
    2. Focus on data type/formatting precision
    3. Implement attempt-to-attempt learning
    4. Add data type validation for exact target schema matching
    """
    import os
    import json
    import traceback
    from datetime import datetime

    # Create detailed logging directory
    logs_dir = "manual_edit_analysis_logs"
    os.makedirs(logs_dir, exist_ok=True)

    # Generate unique log session ID
    log_session_id = f"manual_edit_{datetime.now().strftime('%Y%m%d_%H%M%S_%f')[:-3]}"
    log_file_path = os.path.join(logs_dir, f"{log_session_id}_detailed_analysis.txt")

    # FIXED LOGGING FUNCTION - Proper string formatting
    def log_to_file(content, section_title=""):
        try:
            with open(log_file_path, 'a', encoding='utf-8') as f:
                if section_title:
                    f.write(f"\n{'='*80}\n")
                    f.write(f"{section_title}\n")
                    f.write(f"{'='*80}\n")
                f.write(f"{content}\n")
                f.flush()
        except Exception as e:
            print(f"ERROR writing to log file: {e}")

    # Initialize comprehensive log
    try:
        log_to_file(f"""COMPREHENSIVE MANUAL EDIT ANALYSIS LOG - ENHANCED VERSION
Session ID: {log_session_id}
Timestamp: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}
User Description: "{user_description}"
Log File Path: {log_file_path}

ENHANCEMENTS APPLIED:
1. ✅ Fixed logging string formatting bug
2. ✅ Enhanced data type/formatting precision matching
3. ✅ Implemented attempt-to-attempt learning system
4. ✅ Added comprehensive data type validation

ANALYSIS OVERVIEW:
This log contains the complete workflow of analyzing manual dataframe edits:
1. Original vs Modified dataframe comparison with data type analysis
2. GPT-4.1 prompt generation with schema validation
3. Claude API calls with learning feedback loops
4. Code execution with precision validation
5. Replication success/failure analysis with detailed diagnostics

FUNCTION EXECUTION STATUS: STARTING...
""", "SESSION INITIALIZATION")

        print(f"📝 Enhanced manual edit analysis log initialized: {log_file_path}")

    except Exception as init_error:
        print(f"CRITICAL: Could not initialize log file: {init_error}")

    try:
        # ENHANCED DATAFRAME ANALYSIS with data type focus
        log_to_file(f"""ORIGINAL DATAFRAME COMPLETE ANALYSIS:
Shape: {original_df.shape}
Columns: {list(original_df.columns)}
Data Types: {dict(original_df.dtypes.astype(str))}
Data Type Details: {json.dumps({col: str(dtype) for col, dtype in original_df.dtypes.items()}, indent=2)}
Memory Usage: {original_df.memory_usage(deep=True).sum()} bytes
Null Counts: {dict(original_df.isnull().sum())}
Index Type: {type(original_df.index).__name__}
Column Order: {list(original_df.columns)}

FIRST 10 ROWS PREVIEW:
{original_df.head(10).to_string()}

COMPLETE ORIGINAL DATAFRAME (ALL ROWS):
{original_df.to_string(max_rows=None, max_cols=None)}

ORIGINAL DATAFRAME AS CSV:
{original_df.to_csv(index=False)}

DATA TYPE ANALYSIS:
{_analyze_dataframe_schema(original_df, "ORIGINAL")}
""", "ORIGINAL DATAFRAME ANALYSIS")

        log_to_file(f"""MODIFIED DATAFRAME COMPLETE ANALYSIS:
Shape: {modified_df.shape}
Columns: {list(modified_df.columns)}
Data Types: {dict(modified_df.dtypes.astype(str))}
Data Type Details: {json.dumps({col: str(dtype) for col, dtype in modified_df.dtypes.items()}, indent=2)}
Memory Usage: {modified_df.memory_usage(deep=True).sum()} bytes
Null Counts: {dict(modified_df.isnull().sum())}
Index Type: {type(modified_df.index).__name__}
Column Order: {list(modified_df.columns)}

FIRST 10 ROWS PREVIEW:
{modified_df.head(10).to_string()}

COMPLETE MODIFIED DATAFRAME (ALL ROWS):
{modified_df.to_string(max_rows=None, max_cols=None)}

MODIFIED DATAFRAME AS CSV:
{modified_df.to_csv(index=False)}

DATA TYPE ANALYSIS:
{_analyze_dataframe_schema(modified_df, "MODIFIED")}
""", "MODIFIED DATAFRAME ANALYSIS")

        # Initialize OpenAI client
        try:
            client = OpenAI(api_key=DEFAULT_OPENAI_API_KEY)
            log_to_file("✅ OpenAI client initialized successfully", "API CLIENT SETUP")
        except Exception as client_error:
            log_to_file(f"❌ Failed to initialize OpenAI client: {client_error}", "API CLIENT SETUP ERROR")
            raise client_error

        # ENHANCED SCHEMA COMPARISON
        schema_comparison = _compare_dataframe_schemas(original_df, modified_df)
        log_to_file(f"""ENHANCED SCHEMA COMPARISON:
{json.dumps(schema_comparison, indent=2)}
""", "SCHEMA COMPARISON ANALYSIS")

        # Prepare enhanced comparison data for GPT-4
        original_info = {
            "shape": original_df.shape,
            "columns": list(original_df.columns),
            "dtypes": dict(original_df.dtypes.astype(str)),
            "dtype_details": {col: str(dtype) for col, dtype in original_df.dtypes.items()},
            "full_data": original_df.to_string(max_rows=None, max_cols=None),
            "full_csv": original_df.to_csv(index=False),
            "null_counts": dict(original_df.isnull().sum()),
            "memory_usage": original_df.memory_usage(deep=True).sum(),
            "schema_analysis": _analyze_dataframe_schema(original_df, "ORIGINAL"),
            "summary_stats": original_df.describe(include='all').to_string() if len(original_df) > 0 else "No data"
        }

        modified_info = {
            "shape": modified_df.shape,
            "columns": list(modified_df.columns),
            "dtypes": dict(modified_df.dtypes.astype(str)),
            "dtype_details": {col: str(dtype) for col, dtype in modified_df.dtypes.items()},
            "full_data": modified_df.to_string(max_rows=None, max_cols=None),
            "full_csv": modified_df.to_csv(index=False),
            "null_counts": dict(modified_df.isnull().sum()),
            "memory_usage": modified_df.memory_usage(deep=True).sum(),
            "schema_analysis": _analyze_dataframe_schema(modified_df, "MODIFIED"),
            "summary_stats": modified_df.describe(include='all').to_string() if len(modified_df) > 0 else "No data"
        }

        # Enhanced structural change detection
        shape_changed = original_df.shape != modified_df.shape
        columns_changed = set(original_df.columns) != set(modified_df.columns)
        schema_changes = schema_comparison

        log_to_file(f"""ENHANCED STRUCTURAL CHANGE ANALYSIS:
Shape Changed: {shape_changed}
- Original Shape: {original_df.shape}
- Modified Shape: {modified_df.shape}
- Rows Added: {max(0, modified_df.shape[0] - original_df.shape[0])}
- Rows Removed: {max(0, original_df.shape[0] - modified_df.shape[0])}
- Columns Added: {max(0, modified_df.shape[1] - original_df.shape[1])}
- Columns Removed: {max(0, original_df.shape[1] - modified_df.shape[1])}

Columns Changed: {columns_changed}
- Original Columns: {list(original_df.columns)}
- Modified Columns: {list(modified_df.columns)}
- Added Columns: {list(set(modified_df.columns) - set(original_df.columns))}
- Removed Columns: {list(set(original_df.columns) - set(modified_df.columns))}

SCHEMA CHANGES DETECTED:
{json.dumps(schema_changes, indent=2)}
""", "ENHANCED STRUCTURAL CHANGE ANALYSIS")

        # Enhanced cell-by-cell comparison with data type focus
        data_changes_detected = False
        changed_cells = []
        total_changes = 0
        changed_rows = set()
        changed_columns = set()
        data_type_mismatches = []

        if original_df.shape == modified_df.shape and list(original_df.columns) == list(modified_df.columns):
            print(f"🔍 Enhanced comparison: {len(original_df)} rows × {len(original_df.columns)} columns...")
            log_to_file(f"""STARTING ENHANCED CELL-BY-CELL COMPARISON:
Total cells to compare: {original_df.shape[0] * original_df.shape[1]}
Comparing {len(original_df)} rows × {len(original_df.columns)} columns
Enhanced features: Data type validation, formatting precision, null handling
This may take time for large dataframes...
""", "ENHANCED CELL-BY-CELL COMPARISON START")

            # Enhanced cell comparison with data type awareness
            for i in range(len(original_df)):
                row_has_changes = False
                row_changes = []

                for col in original_df.columns:
                    try:
                        orig_val = original_df.iloc[i][col]
                        mod_val = modified_df.iloc[i][col]

                        # Enhanced comparison with data type awareness
                        change_detected, change_detail = _enhanced_cell_comparison(
                            orig_val, mod_val, i, col,
                            original_df.dtypes[col], modified_df.dtypes[col]
                        )

                        if change_detected:
                            data_changes_detected = True
                            total_changes += 1
                            row_has_changes = True
                            changed_columns.add(col)
                            row_changes.append(change_detail)
                            changed_cells.append(change_detail)

                            # Track data type mismatches
                            if change_detail.get('data_type_changed'):
                                data_type_mismatches.append(change_detail)

                    except Exception as e:
                        log_to_file(f"ERROR comparing cell at row {i}, column '{col}': {str(e)}")
                        continue

                if row_has_changes:
                    changed_rows.add(i)

                # Log progress every 100 rows
                if (i + 1) % 100 == 0:
                    log_to_file(f"Progress: Processed {i + 1}/{len(original_df)} rows, found {total_changes} changes so far")

            print(f"✅ Enhanced comparison finished: {total_changes} changes, {len(data_type_mismatches)} data type mismatches")

            # Log comprehensive enhanced change analysis
            log_to_file(f"""ENHANCED CELL-BY-CELL COMPARISON RESULTS:
=====================================
Total Changes Detected: {total_changes}
Data Type Mismatches: {len(data_type_mismatches)}
Affected Rows: {len(changed_rows)} out of {original_df.shape[0]} ({len(changed_rows)/original_df.shape[0]*100:.1f}%)
Affected Columns: {len(changed_columns)} out of {len(original_df.columns)} ({len(changed_columns)/len(original_df.columns)*100:.1f}%)

AFFECTED COLUMNS LIST:
{list(changed_columns)}

AFFECTED ROWS LIST:
{sorted(list(changed_rows))[:50]}  # Show first 50 rows

DATA TYPE MISMATCHES:
{json.dumps(data_type_mismatches, indent=2) if data_type_mismatches else "None detected"}

ALL DETECTED CHANGES ({len(changed_cells)} total):
""", "ENHANCED COMPREHENSIVE CHANGE DETECTION RESULTS")

            # Log ALL changes with enhanced details
            for i, change in enumerate(changed_cells[:100]):  # Show first 100 changes
                change_type = change.get('change_type', 'unknown')
                data_type_info = f" [DType: {change.get('original_dtype', 'unknown')} → {change.get('modified_dtype', 'unknown')}]" if change.get('data_type_changed') else ""
                log_to_file(f"Change {i+1}: Row {change['row']}, Column '{change['column']}' ({change_type}){data_type_info}: '{change['original']}' → '{change['modified']}'")

        else:
            log_to_file(f"""CANNOT PERFORM CELL-BY-CELL COMPARISON:
Reason: Shape or column structure differs
Original shape: {original_df.shape}
Modified shape: {modified_df.shape}
Original columns: {list(original_df.columns)}
Modified columns: {list(modified_df.columns)}
""", "CELL-BY-CELL COMPARISON SKIPPED")

        # Enhanced change statistics
        total_cells = original_df.shape[0] * original_df.shape[1] if original_df.size > 0 else 1
        change_density = total_changes / total_cells

        log_to_file(f"""ENHANCED COMPREHENSIVE STATISTICAL ANALYSIS:
===============================
Total Cells in Original: {total_cells}
Total Cells Changed: {total_changes}
Change Density: {change_density*100:.4f}%
Data Type Mismatches: {len(data_type_mismatches)}
Percentage of Rows Affected: {len(changed_rows)/original_df.shape[0]*100:.2f}% ({len(changed_rows)}/{original_df.shape[0]})
Percentage of Columns Affected: {len(changed_columns)/len(original_df.columns)*100:.2f}% ({len(changed_columns)}/{len(original_df.columns)})

ENHANCED CHANGE PATTERN ANALYSIS:
- NaN Changes: {len([c for c in changed_cells if c.get('change_type') == 'nan_change'])}
- Value Changes: {len([c for c in changed_cells if c.get('change_type') == 'value_change'])}
- Data Type Changes: {len([c for c in changed_cells if c.get('data_type_changed')])}
- Formatting Changes: {len([c for c in changed_cells if c.get('change_type') == 'formatting_change'])}
- Most Affected Columns: {sorted(changed_columns)[:10]}
- Row Change Distribution: Every {original_df.shape[0]//max(1,len(changed_rows)):.0f} rows on average

SCHEMA COMPATIBILITY ANALYSIS:
{json.dumps(schema_comparison, indent=2)}
""", "ENHANCED COMPREHENSIVE STATISTICAL ANALYSIS")

        # STEP 1: Enhanced GPT-4.1 prompt generation with schema awareness
        print("🧠 Enhanced GPT-4.1: Analyzing changes with schema validation...")
        log_to_file("🧠 STARTING ENHANCED GPT-4.1 ANALYSIS WITH SCHEMA VALIDATION...", "ENHANCED GPT-4.1 PROMPT GENERATION START")

        claude_prompt = _generate_enhanced_claude_prompt_with_gpt4_logged(
            client, original_info, modified_info, user_description,
            total_changes, changed_rows, changed_columns, changed_cells,
            schema_comparison, data_type_mismatches, log_to_file
        )

        # STEP 2: Enhanced Claude prompt testing with learning system
        print("🤖 Enhanced Claude testing with learning system...")
        log_to_file("🤖 STARTING ENHANCED CLAUDE REPLICATION WITH LEARNING SYSTEM...", "ENHANCED CLAUDE REPLICATION TESTING START")

        replication_results = _test_claude_prompt_replication_with_learning_logged(
            original_df, modified_df, claude_prompt, schema_comparison, max_attempts=3, log_to_file=log_to_file
        )

        # STEP 3: Enhanced final analysis
        log_to_file("📊 GENERATING ENHANCED FINAL ANALYSIS...", "ENHANCED FINAL ANALYSIS GENERATION START")

        final_analysis = _get_enhanced_final_analysis_with_prompts_logged(
            client, original_info, modified_info, user_description,
            total_changes, changed_rows, changed_columns, changed_cells,
            schema_comparison, data_type_mismatches, claude_prompt, replication_results, log_to_file
        )

        # Enhanced metadata
        final_analysis["raw_gpt_response"] = final_analysis.get("raw_gpt_response", "")
        final_analysis["complete_comparison_performed"] = True
        final_analysis["enhanced_features_applied"] = True
        final_analysis["log_file_path"] = log_file_path
        final_analysis["full_change_statistics"] = {
            "total_cells": total_cells,
            "total_changes": total_changes,
            "change_density": change_density,
            "data_type_mismatches": len(data_type_mismatches),
            "affected_rows": list(changed_rows),
            "affected_columns": list(changed_columns),
            "schema_changes": schema_comparison,
            "replication_tested": True,
            "replication_success": replication_results["final_success"],
            "replication_attempts": len(replication_results["attempts"]),
            "learning_applied": True,
            "all_changes": changed_cells
        }

        # FIXED: Proper string formatting in final log
        best_attempt_score = replication_results.get("best_attempt", {}).get("match_score", 0)
        best_score_formatted = f"{best_attempt_score:.2%}" if best_attempt_score > 0 else "N/A"

        log_to_file(f"""ENHANCED ANALYSIS EXECUTION COMPLETED SUCCESSFULLY
==========================================
Total Execution Time: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}
Log File Saved: {log_file_path}
Log File Size: {os.path.getsize(log_file_path)} bytes

ENHANCED FINAL RESULTS SUMMARY:
- Replication Success: {replication_results["final_success"]}
- Best Match Score: {best_score_formatted}
- Total Replication Attempts: {len(replication_results["attempts"])}
- Changes Detected: {total_changes}
- Data Type Mismatches: {len(data_type_mismatches)}
- Change Density: {change_density*100:.4f}%
- Learning System Applied: ✅
- Schema Validation Applied: ✅

ENHANCEMENTS SUCCESSFULLY APPLIED:
1. ✅ Fixed logging string formatting bug
2. ✅ Enhanced data type/formatting precision matching
3. ✅ Implemented attempt-to-attempt learning system
4. ✅ Added comprehensive data type validation

COMPLETE ENHANCED FINAL ANALYSIS OBJECT:
{json.dumps(final_analysis, indent=2, default=str)}

✅ ENHANCED ANALYSIS COMPLETE - ALL LOGS SAVED TO: {log_file_path}
""", "ENHANCED FINAL EXECUTION RESULTS")

        print(f"📝 Enhanced analysis with all fixes applied - logs saved to: {log_file_path}")
        return final_analysis

    except Exception as e:
        # FIXED: Proper error logging without string formatting issues
        error_time = datetime.now().strftime('%Y-%m-%d %H:%M:%S')
        error_type = type(e).__name__
        error_message = str(e)
        stack_trace = traceback.format_exc()

        original_shape = original_df.shape if 'original_df' in locals() else 'Unknown'
        modified_shape = modified_df.shape if 'modified_df' in locals() else 'Unknown'

        error_details = f"""CRITICAL ERROR DURING ENHANCED ANALYSIS EXECUTION
========================================
Error Time: {error_time}
Error Type: {error_type}
Error Message: {error_message}

FULL STACK TRACE:
{stack_trace}

EXECUTION CONTEXT:
- User Description: "{user_description}"
- Original DF Shape: {original_shape}
- Modified DF Shape: {modified_shape}
- Log File: {log_file_path}

❌ ENHANCED ANALYSIS FAILED - ERROR LOGGED TO: {log_file_path}
"""

        log_to_file(error_details, "CRITICAL ERROR")

        error_msg = f"Error in enhanced GPT-4 dataframe analysis: {e}"
        logger.error(error_msg)
        print(f"❌ Enhanced analysis failed but logs saved to: {log_file_path}")

        return {
            "change_summary": f"Enhanced dataframe analysis failed: {user_description}",
            "change_type": "data_edit",
            "session_description": f"User made changes to entire dataframe. Description: {user_description}. Error in enhanced analysis: {error_message}",
            "error": error_message,
            "enhanced_features_applied": False,
            "complete_comparison_performed": False,
            "log_file_path": log_file_path,
            "error_logged": True
        }


# ENHANCEMENT 1: Data Type Schema Analysis Functions
def _analyze_dataframe_schema(df, label=""):
    """
    Analyze dataframe schema with detailed data type information
    """
    schema_analysis = {
        "label": label,
        "shape": df.shape,
        "column_count": len(df.columns),
        "row_count": len(df),
        "columns": {}
    }

    for col in df.columns:
        col_data = df[col]
        schema_analysis["columns"][col] = {
            "dtype": str(col_data.dtype),
            "python_type": str(type(col_data.dtype)),
            "null_count": int(col_data.isnull().sum()),
            "null_percentage": float(col_data.isnull().sum() / len(df) * 100) if len(df) > 0 else 0,
            "unique_count": int(col_data.nunique()),
            "memory_usage": int(col_data.memory_usage(deep=True)),
            "sample_values": [str(val) for val in col_data.dropna().head(3).tolist()],
            "is_numeric": pd.api.types.is_numeric_dtype(col_data),
            "is_datetime": pd.api.types.is_datetime64_any_dtype(col_data),
            "is_categorical": pd.api.types.is_categorical_dtype(col_data),
            "is_object": pd.api.types.is_object_dtype(col_data)
        }

    return schema_analysis


def _compare_dataframe_schemas(original_df, modified_df):
    """
    Compare schemas between original and modified dataframes
    """
    orig_schema = _analyze_dataframe_schema(original_df, "ORIGINAL")
    mod_schema = _analyze_dataframe_schema(modified_df, "MODIFIED")

    comparison = {
        "shape_changed": orig_schema["shape"] != mod_schema["shape"],
        "column_count_changed": orig_schema["column_count"] != mod_schema["column_count"],
        "row_count_changed": orig_schema["row_count"] != mod_schema["row_count"],
        "column_changes": {},
        "schema_compatibility_score": 0.0,
        "critical_issues": []
    }

    # Analyze column-by-column changes
    all_columns = set(original_df.columns) | set(modified_df.columns)
    compatible_columns = 0

    for col in all_columns:
        if col in original_df.columns and col in modified_df.columns:
            orig_col = orig_schema["columns"][col]
            mod_col = mod_schema["columns"][col]

            column_change = {
                "exists_in_both": True,
                "dtype_changed": orig_col["dtype"] != mod_col["dtype"],
                "original_dtype": orig_col["dtype"],
                "modified_dtype": mod_col["dtype"],
                "null_count_changed": orig_col["null_count"] != mod_col["null_count"],
                "type_compatibility": _check_type_compatibility(orig_col["dtype"], mod_col["dtype"])
            }

            if column_change["type_compatibility"]:
                compatible_columns += 1
            else:
                comparison["critical_issues"].append(f"Column '{col}': Incompatible type change {orig_col['dtype']} → {mod_col['dtype']}")

        elif col in original_df.columns:
            column_change = {
                "exists_in_both": False,
                "removed": True,
                "original_dtype": orig_schema["columns"][col]["dtype"]
            }
            comparison["critical_issues"].append(f"Column '{col}' was removed")

        else:
            column_change = {
                "exists_in_both": False,
                "added": True,
                "modified_dtype": mod_schema["columns"][col]["dtype"]
            }
            comparison["critical_issues"].append(f"Column '{col}' was added")

        comparison["column_changes"][col] = column_change

    # Calculate compatibility score
    total_columns = len(all_columns)
    comparison["schema_compatibility_score"] = compatible_columns / total_columns if total_columns > 0 else 1.0

    return comparison


def _check_type_compatibility(orig_dtype, mod_dtype):
    """
    Check if two data types are compatible for replication
    """
    # Exact match
    if str(orig_dtype) == str(mod_dtype):
        return True

    # Compatible numeric types
    numeric_types = ['int64', 'int32', 'float64', 'float32', 'number']
    if any(t in str(orig_dtype).lower() for t in numeric_types) and any(t in str(mod_dtype).lower() for t in numeric_types):
        return True

    # Compatible string/object types
    string_types = ['object', 'string', 'str']
    if any(t in str(orig_dtype).lower() for t in string_types) and any(t in str(mod_dtype).lower() for t in string_types):
        return True

    # Compatible datetime types
    datetime_types = ['datetime', 'timestamp']
    if any(t in str(orig_dtype).lower() for t in datetime_types) and any(t in str(mod_dtype).lower() for t in datetime_types):
        return True

    return False


# ENHANCEMENT 2: Enhanced Cell Comparison with Data Type Awareness
def _enhanced_cell_comparison(orig_val, mod_val, row_idx, col_name, orig_dtype, mod_dtype):
    """
    Enhanced cell comparison with data type awareness and precision handling
    """
    change_detail = {
        "row": row_idx,
        "column": col_name,
        "original": str(orig_val),
        "modified": str(mod_val),
        "original_dtype": str(orig_dtype),
        "modified_dtype": str(mod_dtype),
        "data_type_changed": str(orig_dtype) != str(mod_dtype),
        "change_type": "no_change"
    }

    # Handle NaN comparisons
    if pd.isna(orig_val) and pd.isna(mod_val):
        return False, change_detail
    elif pd.isna(orig_val) or pd.isna(mod_val):
        change_detail["change_type"] = "nan_change"
        return True, change_detail

    # Data type change detection
    if str(orig_dtype) != str(mod_dtype):
        change_detail["change_type"] = "data_type_change"
        # Still check if values are equivalent despite type change
        if _are_values_equivalent(orig_val, mod_val):
            change_detail["change_type"] = "data_type_change_equivalent_value"
            return True, change_detail
        else:
            return True, change_detail

    # Enhanced value comparison with precision handling
    if _are_values_equivalent(orig_val, mod_val):
        return False, change_detail

    # Check for formatting differences
    if _are_formatting_differences_only(orig_val, mod_val):
        change_detail["change_type"] = "formatting_change"
        return True, change_detail

    # Significant value change
    change_detail["change_type"] = "value_change"
    return True, change_detail


def _are_values_equivalent(val1, val2):
    """
    Check if two values are equivalent despite potential formatting differences
    """
    # Exact string match after stripping
    if str(val1).strip() == str(val2).strip():
        return True

    # Numeric equivalence check
    try:
        # Handle currency and comma formatting
        clean_val1 = str(val1).replace(',', '').replace('$', '').strip()
        clean_val2 = str(val2).replace(',', '').replace('$', '').strip()

        num1 = float(clean_val1)
        num2 = float(clean_val2)

        # Allow small floating point differences
        return abs(num1 - num2) < 1e-10

    except (ValueError, TypeError):
        pass

    # Date equivalence check
    try:
        import pandas as pd
        date1 = pd.to_datetime(val1)
        date2 = pd.to_datetime(val2)
        return date1 == date2
    except:
        pass

    return False


def _are_formatting_differences_only(val1, val2):
    """
    Check if differences are only formatting (whitespace, case, etc.)
    """
    try:
        # Case-insensitive comparison
        if str(val1).strip().lower() == str(val2).strip().lower():
            return True

        # Numeric formatting differences
        clean_val1 = str(val1).replace(',', '').replace('$', '').replace(' ', '').strip()
        clean_val2 = str(val2).replace(',', '').replace('$', '').replace(' ', '').strip()

        if clean_val1 == clean_val2:
            return True

    except:
        pass

    return False


# ENHANCEMENT 3: Enhanced GPT-4.1 Prompt Generation with Schema Awareness
def _generate_enhanced_claude_prompt_with_gpt4_logged(client, original_info, modified_info, user_description,
                                                     total_changes, changed_rows, changed_columns, changed_cells,
                                                     schema_comparison, data_type_mismatches, log_to_file):
    """
    Enhanced GPT-4.1 prompt generation with comprehensive schema awareness and data type focus
    """
    # Prepare detailed schema information for GPT-4.1
    schema_issues = schema_comparison.get("critical_issues", [])
    compatibility_score = schema_comparison.get("schema_compatibility_score", 0)

    sample_changes = changed_cells[:15] if changed_cells else []
    sample_type_mismatches = data_type_mismatches[:5] if data_type_mismatches else []

    enhanced_prompt_request = f"""
    You are an expert at analyzing dataframe changes and generating PRECISE prompts for Claude 3.7 to replicate manual edits with EXACT data type and formatting precision.

    CRITICAL REQUIREMENTS:
    1. Focus on DATA TYPE PRECISION - ensure exact dtype matching
    2. Handle FORMATTING with precision (decimals, currency, dates)
    3. Provide SPECIFIC transformation logic, not general filtering
    4. Include EXPLICIT data type conversion instructions
    5. Address schema compatibility issues

    ORIGINAL DATAFRAME:
    Shape: {original_info['shape']}
    Columns: {original_info['columns']}
    Data Types: {original_info['dtype_details']}

    Data Sample:
    {original_info['full_data'][:3000]}

    MODIFIED DATAFRAME:
    Shape: {modified_info['shape']}
    Columns: {modified_info['columns']}
    Data Types: {modified_info['dtype_details']}

    Data Sample:
    {modified_info['full_data'][:3000]}

    ENHANCED CHANGE ANALYSIS:
    - Total changes: {total_changes}
    - Schema compatibility score: {compatibility_score:.2f}
    - Data type mismatches: {len(data_type_mismatches)}
    - Critical schema issues: {schema_issues}
    - Changed rows: {list(changed_rows)[:20] if changed_rows else []}
    - Changed columns: {list(changed_columns)}

    SAMPLE DETAILED CHANGES:
    {json.dumps(sample_changes, indent=2)}

    DATA TYPE MISMATCHES DETECTED:
    {json.dumps(sample_type_mismatches, indent=2)}

    USER DESCRIPTION: "{user_description}"

    Generate a PRECISE prompt for Claude 3.7 that will replicate these exact changes with:

    1. EXACT DATA TYPE SPECIFICATIONS:
       - Include explicit dtype conversion commands
       - Specify decimal precision for floats
       - Handle string formatting requirements

    2. SPECIFIC TRANSFORMATION LOGIC:
       - Exact cell selection criteria
       - Precise value transformation formulas
       - Row-by-row or column-by-column instructions if needed

    3. SCHEMA VALIDATION:
       - Ensure output matches target schema exactly
       - Include data type verification steps

    4. BUSINESS CONTEXT for rent roll data:
       - Consider tenant information updates
       - Handle rent calculations precisely
       - Manage occupancy status changes

    The prompt should be executable Python pandas code that produces the EXACT modified dataframe.

    Return ONLY the Claude prompt text that focuses on precision and data type accuracy.
    """

    # Log the enhanced GPT-4.1 prompt
    log_to_file(f"""ENHANCED GPT-4.1 PROMPT TO GENERATE CLAUDE INSTRUCTIONS:
Model: gpt-4.1
Temperature: 0.05 (Lower for precision)
Max Tokens: 4000

ENHANCED PROMPT FEATURES:
- Schema compatibility analysis included
- Data type mismatch focus
- Formatting precision requirements
- Sample change details provided

COMPLETE ENHANCED PROMPT SENT TO GPT-4.1:
{'-'*60}
{enhanced_prompt_request}
{'-'*60}
""", "ENHANCED GPT-4.1 REQUEST")

    try:
        response = client.chat.completions.create(
            model="gpt-4.1",
            messages=[
                {"role": "system", "content": "You are an expert at generating precise data transformation prompts with exact data type and formatting specifications. Focus on precision and schema accuracy."},
                {"role": "user", "content": enhanced_prompt_request}
            ],
            max_tokens=4000,
            temperature=0.05  # Lower temperature for more precision
        )

        enhanced_gpt_response = response.choices[0].message.content.strip()

        # Log enhanced GPT-4.1 response
        log_to_file(f"""ENHANCED GPT-4.1 RESPONSE (CLAUDE PROMPT):
Response Length: {len(enhanced_gpt_response)} characters
Tokens Used: Approximately {len(enhanced_gpt_response.split())} words
Schema Focus: ✅ Applied
Data Type Precision: ✅ Applied

GENERATED ENHANCED CLAUDE PROMPT:
{'-'*60}
{enhanced_gpt_response}
{'-'*60}
""", "ENHANCED GPT-4.1 RESPONSE")

        return enhanced_gpt_response

    except Exception as e:
        error_msg = f"Enhanced GPT-4.1 API Error: {str(e)}"
        log_to_file(f"""ENHANCED GPT-4.1 API CALL FAILED:
Error: {error_msg}
Fallback: Using enhanced basic prompt with schema awareness
""")

        # Enhanced fallback prompt with schema awareness
        fallback_prompt = f"""Replicate the manual edits described as: {user_description}

CRITICAL REQUIREMENTS:
- Maintain exact data types: {modified_info['dtype_details']}
- Total changes to make: {total_changes}
- Focus on columns: {list(changed_columns)}
- Ensure precise formatting and data type conversion
- Output must match target schema exactly

Sample changes for reference:
{json.dumps(sample_changes[:5], indent=2)}
"""
        return fallback_prompt


# ENHANCEMENT 4: Claude Testing with Learning System
def _test_claude_prompt_replication_with_learning_logged(original_df, target_df, claude_prompt, schema_comparison, max_attempts=3, log_to_file=None):
    """
    Enhanced Claude testing with attempt-to-attempt learning system
    """
    attempts = []
    current_prompt = claude_prompt
    learning_history = []

    log_to_file(f"""ENHANCED CLAUDE REPLICATION TESTING WITH LEARNING STARTED:
Maximum Attempts: {max_attempts}
Target DataFrame Shape: {target_df.shape}
Original DataFrame Shape: {original_df.shape}
Schema Compatibility Score: {schema_comparison.get('schema_compatibility_score', 0):.2f}
Critical Schema Issues: {len(schema_comparison.get('critical_issues', []))}

LEARNING SYSTEM FEATURES:
✅ Attempt-to-attempt feedback loop
✅ Schema validation focus
✅ Data type precision tracking
✅ Progressive prompt refinement

INITIAL ENHANCED CLAUDE PROMPT TO TEST:
{'-'*60}
{claude_prompt}
{'-'*60}
""", "ENHANCED REPLICATION TESTING INITIALIZATION")

    for attempt in range(max_attempts):
        print(f"🔄 Enhanced Attempt {attempt + 1}/{max_attempts} with learning system")
        log_to_file(f"Starting enhanced attempt {attempt + 1}/{max_attempts} with accumulated learning...", f"ENHANCED ATTEMPT {attempt + 1}")

        try:
            # Apply learning from previous attempts
            if attempt > 0:
                current_prompt = _apply_learning_to_prompt(current_prompt, learning_history, target_df, log_to_file)

            # Call Claude with enhanced logging
            claude_response = _call_enhanced_claude_logged(current_prompt, original_df, target_df, schema_comparison, log_to_file, attempt + 1)

            # Enhanced code extraction
            code_blocks = _extract_enhanced_code_blocks(claude_response)

            log_to_file(f"""ENHANCED CODE EXTRACTION RESULTS:
Found {len(code_blocks)} code blocks
Enhanced Features: Schema validation, data type checking

Code Blocks Extracted:
""")

            for i, code in enumerate(code_blocks):
                log_to_file(f"Enhanced Code Block {i+1}:\n```python\n{code}\n```\n")

            if not code_blocks:
                attempt_result = {
                    "attempt": attempt + 1,
                    "success": False,
                    "error": "No code found in Claude response",
                    "prompt_used": current_prompt,
                    "learning_applied": len(learning_history) > 0
                }
                attempts.append(attempt_result)
                learning_history.append({
                    "attempt": attempt + 1,
                    "issue": "no_code_generated",
                    "resolution": "request_explicit_code_blocks"
                })
                log_to_file("No code blocks found - adding to learning history")
                continue

            # Enhanced code execution with schema validation
            test_df = original_df.copy()
            exec_globals = {
                "df": test_df,
                "pd": pd,
                "np": np,
                "os": os,
                "datetime": datetime
            }

            log_to_file("Starting enhanced code execution with schema validation...")
            execution_output = ""
            execution_success = True

            for i, code in enumerate(code_blocks):
                try:
                    log_to_file(f"Executing enhanced code block {i+1}...")
                    exec(code, exec_globals)
                    execution_output += f"Enhanced code block {i+1} executed successfully\n"
                except Exception as exec_error:
                    execution_output += f"Enhanced code block {i+1} failed: {str(exec_error)}\n"
                    log_to_file(f"Enhanced code block {i+1} execution error: {str(exec_error)}")
                    execution_success = False
                    learning_history.append({
                        "attempt": attempt + 1,
                        "issue": f"execution_error_{i+1}",
                        "error": str(exec_error),
                        "resolution": "fix_syntax_and_logic"
                    })

            result_df = exec_globals["df"]
            log_to_file(f"Enhanced execution completed. Result DataFrame shape: {result_df.shape}")

            # Enhanced validation with schema checking
            enhanced_validation = _enhanced_result_validation(target_df, result_df, schema_comparison)
            match_score = enhanced_validation["overall_score"]
            schema_match = enhanced_validation["schema_match"]
            data_type_match = enhanced_validation["data_type_match"]

            attempt_result = {
                "attempt": attempt + 1,
                "success": match_score >= 0.95 and schema_match,
                "match_score": match_score,
                "schema_match": schema_match,
                "data_type_match": data_type_match,
                "enhanced_validation": enhanced_validation,
                "generated_code": code_blocks,
                "prompt_used": current_prompt,
                "result_shape": result_df.shape,
                "target_shape": target_df.shape,
                "execution_output": execution_output,
                "execution_success": execution_success,
                "learning_applied": len(learning_history) > 0
            }

            attempts.append(attempt_result)

            log_to_file(f"""ENHANCED ATTEMPT {attempt + 1} RESULTS:
Overall Match Score: {match_score:.2%}
Schema Match: {'✅ PASSED' if schema_match else '❌ FAILED'}
Data Type Match: {'✅ PASSED' if data_type_match else '❌ FAILED'}
Success Threshold (95% + Schema): {'✅ PASSED' if (match_score >= 0.95 and schema_match) else '❌ FAILED'}
Result Shape: {result_df.shape}
Target Shape: {target_df.shape}
Execution Success: {'✅ YES' if execution_success else '❌ NO'}
Learning Applied: {'✅ YES' if len(learning_history) > 0 else '❌ NO'}

ENHANCED VALIDATION DETAILS:
{json.dumps(enhanced_validation, indent=2)}

Execution Output:
{execution_output}
""")

            print(f"📊 Enhanced Match Score: {match_score:.2%}, Schema: {'✅' if schema_match else '❌'}")

            # Add learning insights for next attempt
            if match_score < 0.95 or not schema_match:
                learning_insights = _analyze_failure_for_learning(target_df, result_df, enhanced_validation)
                learning_history.extend(learning_insights)
                log_to_file(f"Learning insights added: {json.dumps(learning_insights, indent=2)}")

            if match_score >= 0.95 and schema_match:
                print("✅ Enhanced replication successful!")
                log_to_file("🎉 ENHANCED REPLICATION SUCCESSFUL! Stopping attempts.")
                break

        except Exception as e:
            error_msg = str(e)
            attempt_result = {
                "attempt": attempt + 1,
                "success": False,
                "error": error_msg,
                "prompt_used": current_prompt,
                "learning_applied": len(learning_history) > 0
            }
            attempts.append(attempt_result)
            learning_history.append({
                "attempt": attempt + 1,
                "issue": "execution_exception",
                "error": error_msg,
                "resolution": "improve_error_handling"
            })
            log_to_file(f"ENHANCED ATTEMPT {attempt + 1} FAILED with error: {error_msg}")
            print(f"❌ Enhanced Error: {error_msg}")

    final_success = any(attempt["success"] for attempt in attempts)
    best_attempt = max(attempts, key=lambda x: x.get("match_score", 0)) if attempts else None

    log_to_file(f"""ENHANCED REPLICATION TESTING COMPLETE:
Final Success: {final_success}
Best Match Score: {best_attempt.get('match_score', 0):.2% if best_attempt else 'N/A'}
Best Schema Match: {best_attempt.get('schema_match', False) if best_attempt else False}
Total Attempts: {len(attempts)}
Total Learning Insights: {len(learning_history)}

LEARNING HISTORY SUMMARY:
{json.dumps(learning_history, indent=2)}

ALL ENHANCED ATTEMPTS SUMMARY:
""", "ENHANCED REPLICATION TESTING RESULTS")

    for i, attempt in enumerate(attempts, 1):
        success = attempt.get('success', False)
        score = attempt.get('match_score', 0)
        schema_match = attempt.get('schema_match', False)
        learning = attempt.get('learning_applied', False)
        error = attempt.get('error', 'None')

        log_to_file(f"Enhanced Attempt {i}: Success={success}, Score={score:.2%}, Schema={schema_match}, Learning={learning}, Error={error}")

    return {
        "attempts": attempts,
        "final_success": final_success,
        "best_attempt": best_attempt,
        "final_prompt": best_attempt["prompt_used"] if best_attempt else claude_prompt,
        "learning_history": learning_history,
        "learning_applied": len(learning_history) > 0
    }


# Learning System Helper Functions
def _apply_learning_to_prompt(base_prompt, learning_history, target_df, log_to_file):
    """
    Apply accumulated learning to improve the prompt
    """
    if not learning_history:
        return base_prompt

    # Analyze learning patterns
    common_issues = {}
    for learning in learning_history:
        issue = learning.get("issue", "unknown")
        if issue in common_issues:
            common_issues[issue] += 1
        else:
            common_issues[issue] = 1

    # Generate improvement suggestions based on learning
    improvements = []

    if "execution_error" in [l.get("issue", "") for l in learning_history]:
        improvements.append("Focus on syntactically correct pandas operations")

    if "schema_mismatch" in [l.get("issue", "") for l in learning_history]:
        improvements.append(f"Ensure exact data types: {dict(target_df.dtypes)}")

    if "data_type_mismatch" in [l.get("issue", "") for l in learning_history]:
        improvements.append("Include explicit dtype conversions using .astype()")

    if "no_code_generated" in [l.get("issue", "") for l in learning_history]:
        improvements.append("Generate code in ```python ``` blocks")

    # Apply improvements to prompt
    if improvements:
        learning_section = f"""

LEARNING FROM PREVIOUS ATTEMPTS:
Based on previous attempts, please pay special attention to:
{chr(10).join([f"• {improvement}" for improvement in improvements])}

COMMON ISSUES TO AVOID:
{chr(10).join([f"• {issue}: occurred {count} time(s)" for issue, count in common_issues.items()])}
"""
        improved_prompt = base_prompt + learning_section

        log_to_file(f"""LEARNING APPLIED TO PROMPT:
Improvements Added: {len(improvements)}
Common Issues Addressed: {len(common_issues)}

LEARNING SECTION ADDED:
{learning_section}
""")

        return improved_prompt

    return base_prompt


def _analyze_failure_for_learning(target_df, result_df, enhanced_validation):
    """
    Analyze failure to extract learning insights for next attempt
    """
    insights = []

    # Schema issues
    if not enhanced_validation.get("schema_match", True):
        if enhanced_validation.get("shape_mismatch", False):
            insights.append({
                "issue": "shape_mismatch",
                "details": f"Target: {target_df.shape}, Got: {result_df.shape}",
                "resolution": "check_row_filtering_and_column_selection"
            })

        if enhanced_validation.get("column_mismatch", False):
            insights.append({
                "issue": "column_mismatch",
                "details": f"Target cols: {list(target_df.columns)}, Got cols: {list(result_df.columns)}",
                "resolution": "ensure_exact_column_names_and_order"
            })

    # Data type issues
    if not enhanced_validation.get("data_type_match", True):
        type_mismatches = enhanced_validation.get("data_type_details", {})
        for col, details in type_mismatches.items():
            if details.get("mismatch", False):
                insights.append({
                    "issue": "data_type_mismatch",
                    "column": col,
                    "expected": details.get("target_dtype"),
                    "got": details.get("result_dtype"),
                    "resolution": f"add_explicit_conversion: df['{col}'] = df['{col}'].astype('{details.get('target_dtype')}')"
                })

    # Content issues
    content_score = enhanced_validation.get("content_score", 1.0)
    if content_score < 0.9:
        insights.append({
            "issue": "content_mismatch",
            "score": content_score,
            "resolution": "review_transformation_logic_for_cell_values"
        })

    return insights


# Enhanced Result Validation
def _enhanced_result_validation(target_df, result_df, schema_comparison):
    """
    Enhanced validation with comprehensive schema and data type checking
    """
    validation = {
        "overall_score": 0.0,
        "schema_match": False,
        "data_type_match": False,
        "content_score": 0.0,
        "shape_mismatch": False,
        "column_mismatch": False,
        "data_type_details": {}
    }

    # Shape validation
    if target_df.shape != result_df.shape:
        validation["shape_mismatch"] = True
        validation["overall_score"] = 0.0
        return validation

    # Column validation
    if list(target_df.columns) != list(result_df.columns):
        validation["column_mismatch"] = True
        validation["overall_score"] = 0.1
        return validation

    validation["schema_match"] = True

    # Data type validation
    type_match_count = 0
    total_columns = len(target_df.columns)

    for col in target_df.columns:
        target_dtype = str(target_df[col].dtype)
        result_dtype = str(result_df[col].dtype)

        type_compatible = _check_type_compatibility(target_dtype, result_dtype)

        validation["data_type_details"][col] = {
            "target_dtype": target_dtype,
            "result_dtype": result_dtype,
            "exact_match": target_dtype == result_dtype,
            "compatible": type_compatible,
            "mismatch": not type_compatible
        }

        if type_compatible:
            type_match_count += 1

    validation["data_type_match"] = type_match_count == total_columns

    # Content validation using the enhanced comparison
    if validation["schema_match"] and len(target_df) > 0:
        validation["content_score"] = _calculate_enhanced_match_score(target_df, result_df)
    else:
        validation["content_score"] = 0.0

    # Overall score calculation
    schema_weight = 0.3
    dtype_weight = 0.3
    content_weight = 0.4

    schema_score = 1.0 if validation["schema_match"] else 0.0
    dtype_score = type_match_count / total_columns if total_columns > 0 else 1.0

    validation["overall_score"] = (
        schema_weight * schema_score +
        dtype_weight * dtype_score +
        content_weight * validation["content_score"]
    )

    return validation


def _calculate_enhanced_match_score(target_df, result_df):
    """
    Enhanced match score calculation with improved precision
    """
    if target_df.empty and result_df.empty:
        return 1.0
    if target_df.empty or result_df.empty:
        return 0.0
    if target_df.shape != result_df.shape:
        return 0.0
    if list(target_df.columns) != list(result_df.columns):
        return 0.0

    total_cells = 0
    matching_cells = 0

    for i in range(len(target_df)):
        for col in target_df.columns:
            total_cells += 1

            try:
                target_val = target_df.iloc[i][col]
                result_val = result_df.iloc[i][col]

                if _are_values_equivalent(target_val, result_val):
                    matching_cells += 1

            except (IndexError, KeyError):
                continue

    return matching_cells / total_cells if total_cells > 0 else 0.0


# Enhanced Claude API Call
def _call_enhanced_claude_logged(prompt, original_df, target_df, schema_comparison, log_to_file, attempt_number):
    """
    Enhanced Claude API call with schema context and improved prompting
    """
    target_schema_info = {
        "shape": target_df.shape,
        "dtypes": dict(target_df.dtypes),
        "columns": list(target_df.columns),
        "sample_data": target_df.head(3).to_string()
    }

    log_to_file(f"""CALLING ENHANCED CLAUDE API - ATTEMPT {attempt_number}:
Model: claude-sonnet-4-20250514
Temperature: 0.1 (Optimized for precision)
Max Tokens: 3000

ENHANCED FEATURES:
✅ Target schema context provided
✅ Data type precision focus
✅ Schema compatibility warnings included

PROMPT BEING SENT TO CLAUDE:
{'-'*60}
{prompt}
{'-'*60}

ENHANCED CONTEXT PROVIDED:
Original DataFrame Shape: {original_df.shape}
Target DataFrame Shape: {target_df.shape}
Schema Compatibility Score: {schema_comparison.get('schema_compatibility_score', 0):.2f}
Critical Issues: {len(schema_comparison.get('critical_issues', []))}

TARGET SCHEMA REQUIREMENTS:
{json.dumps(target_schema_info, indent=2, default=str)}
""", f"ENHANCED CLAUDE API CALL {attempt_number}")

    try:
        # Get Anthropic client
        anthropic_client = Anthropic(api_key=DEFAULT_ANTHROPIC_API_KEY)

        # Enhanced dataframe context with target schema
        enhanced_df_summary = f"""
        CURRENT DATAFRAME CONTENT:
        {original_df.to_string(max_rows=50)}

        CURRENT DATAFRAME STATISTICS:
        - Shape: {original_df.shape}
        - Columns: {list(original_df.columns)}
        - Data types: {dict(original_df.dtypes)}
        - Null values per column: {dict(original_df.isnull().sum())}

        TARGET SCHEMA REQUIREMENTS (CRITICAL):
        - Target Shape: {target_df.shape}
        - Target Columns: {list(target_df.columns)}
        - Target Data Types: {dict(target_df.dtypes)}
        - Schema Compatibility Issues: {schema_comparison.get('critical_issues', [])}

        SAMPLE TARGET DATA:
        {target_df.head(3).to_string()}
        """

        # Enhanced system prompt
        enhanced_claude_system_prompt = """You are an expert Python data analyst specializing in applying existing code to new dataframes with minimal modifications.

        CORE PRINCIPLES:
        1. Preserve original business logic at all costs
        2. Make only minimal changes necessary for compatibility
        3. If column names differ, update references but keep all calculations identical
        4. Maintain the same data transformation goals
        5. Use the original code structure as much as possible

        Your job is to take working code and adapt it minimally to work with a new dataframe structure."""

        # Enhanced messages for Claude
        # enhanced_claude_messages = [
        #     {
        #         "role": "user",
        #         "content": f"Here is the enhanced context:\n\n{enhanced_df_summary}\n\nTASK WITH PRECISION REQUIREMENTS: {prompt}\n\nGenerate precise Python code to accomplish this exact transformation with schema validation."
        #     }
        # ]

        # Call Claude
        claude_response = anthropic_client.messages.create(
            model="claude-3-7-sonnet-20250219",
            system=enhanced_claude_system_prompt,
            messages=[{"role": "user", "content": full_claude_prompt}],
            max_tokens=3000,
            temperature=0.1  # Lower temperature for more precision
        )

        # Extract response text
        response_text = claude_response.content[0].text

        log_to_file(f"""ENHANCED CLAUDE API RESPONSE - ATTEMPT {attempt_number}:
Response Length: {len(response_text)} characters
Enhanced Features Applied: ✅
Response Received Successfully: ✅

FULL ENHANCED CLAUDE RESPONSE:
{'-'*60}
{response_text}
{'-'*60}
""")

        return response_text

    except Exception as e:
        error_msg = f"Enhanced Claude API Error: {str(e)}"
        log_to_file(f"""ENHANCED CLAUDE API CALL FAILED - ATTEMPT {attempt_number}:
Error: {error_msg}
Using enhanced fallback response
""")

        # Enhanced fallback response
        enhanced_fallback_response = f"""
        I'll help you with this enhanced transformation task. Here's the precise code:

        ```python
        # Enhanced error calling Claude API: {str(e)}
        # Enhanced fallback code with schema awareness
        print("Enhanced Claude API error, using schema-aware fallback")
        print(f"Current dataframe shape: {{df.shape}}")
        print(f"Target shape should be: {target_df.shape}")
        print(f"Target dtypes: {dict(target_df.dtypes)}")
        print("Please review transformation logic manually")

        # Basic schema validation
        if df.shape != {target_df.shape}:
            print("Warning: Shape mismatch detected")

        # Show current vs target schema
        print("\\nCurrent dtypes:", dict(df.dtypes))
        print("Target dtypes:", {dict(target_df.dtypes)})
        ```
        """

        log_to_file(f"Enhanced fallback response generated:\n{enhanced_fallback_response}")
        return enhanced_fallback_response


def _extract_enhanced_code_blocks(response_text):
    """
    Enhanced code block extraction with validation
    """
    import re

    # Enhanced pattern matching for code blocks
    patterns = [
        r'```python\s*(.*?)```',
        r'```\s*python\s*(.*?)```',
        r'```\s*(.*?)```',
        r'`([^`]+)`'  # Single backticks as fallback
    ]

    code_blocks = []

    for pattern in patterns:
        matches = re.findall(pattern, response_text, re.DOTALL | re.IGNORECASE)
        for match in matches:
            cleaned_code = match.strip()
            if cleaned_code and len(cleaned_code) > 10:  # Filter out very short matches
                code_blocks.append(cleaned_code)

    # Remove duplicates while preserving order
    seen = set()
    unique_blocks = []
    for block in code_blocks:
        if block not in seen:
            seen.add(block)
            unique_blocks.append(block)

    return unique_blocks


# Enhanced Final Analysis Generation
def _get_enhanced_final_analysis_with_prompts_logged(client, original_info, modified_info, user_description,
                                                   total_changes, changed_rows, changed_columns, changed_cells,
                                                   schema_comparison, data_type_mismatches, claude_prompt,
                                                   replication_results, log_to_file):
    """
    Enhanced final analysis with comprehensive learning and schema insights
    """
    best_attempt = replication_results.get("best_attempt", {})
    final_claude_prompt = replication_results.get("final_prompt", claude_prompt)
    learning_applied = replication_results.get("learning_applied", False)
    learning_history = replication_results.get("learning_history", [])

    success_status = "SUCCESS" if replication_results["final_success"] else "PARTIAL"

    # Enhanced statistics
    total_cells = original_info["shape"][0] * original_info["shape"][1] if original_info["shape"][0] > 0 else 1
    change_density = total_changes / total_cells

    enhanced_analysis_prompt = f"""
    Analyze this enhanced dataframe change with comprehensive schema and learning insights.

    ORIGINAL DATAFRAME:
    Shape: {original_info['shape']}
    Schema: {original_info['dtype_details']}
    Data Sample: {original_info['full_data'][:2000]}...

    MODIFIED DATAFRAME:
    Shape: {modified_info['shape']}
    Schema: {modified_info['dtype_details']}
    Data Sample: {modified_info['full_data'][:2000]}...

    ENHANCED CHANGE ANALYSIS:
    - Total changes: {total_changes} cells ({change_density*100:.1f}%)
    - Data type mismatches: {len(data_type_mismatches)}
    - Schema compatibility: {schema_comparison.get('schema_compatibility_score', 0):.1%}
    - Critical schema issues: {len(schema_comparison.get('critical_issues', []))}
    - User description: "{user_description}"

    REPLICATION TESTING RESULTS:
    - Status: {success_status}
    - Attempts: {len(replication_results['attempts'])}
    - Best match: {best_attempt.get('match_score', 0):.1%}
    - Schema match: {best_attempt.get('schema_match', False)}
    - Learning applied: {learning_applied}
    - Learning insights: {len(learning_history)}

    FINAL WORKING PROMPT: {final_claude_prompt}

    Provide comprehensive analysis in this exact JSON format:
    {{
        "change_summary": "ENHANCED ANALYSIS: [comprehensive summary including WORKING_CLAUDE_PROMPT: {final_claude_prompt}] and detailed technical findings with schema validation results",
        "change_type": "data_edit|structure_change|mixed",
        "structural_changes": {{
            "rows_added": {max(0, modified_info['shape'][0] - original_info['shape'][0])},
            "rows_removed": {max(0, original_info['shape'][0] - modified_info['shape'][0])},
            "columns_added": {list(set(modified_info['columns']) - set(original_info['columns']))},
            "columns_removed": {list(set(original_info['columns']) - set(modified_info['columns']))}
        }},
        "enhanced_data_modifications": {{
            "cells_changed": {total_changes},
            "total_cells": {total_cells},
            "change_percentage": {change_density*100:.2f},
            "data_type_mismatches": {len(data_type_mismatches)},
            "rows_affected": {len(changed_rows)},
            "columns_affected": {list(changed_columns)},
            "schema_compatibility_score": {schema_comparison.get('schema_compatibility_score', 0):.2f},
            "critical_schema_issues": {len(schema_comparison.get('critical_issues', []))},
            "precision_patterns": ["Enhanced patterns identified"],
            "data_quality_impact": "improved|degraded|neutral"
        }},
        "replication_analysis": {{
            "final_success": {replication_results['final_success']},
            "best_match_score": {best_attempt.get('match_score', 0):.2f},
            "schema_validation_passed": {best_attempt.get('schema_match', False)},
            "learning_system_applied": {learning_applied},
            "total_learning_insights": {len(learning_history)},
            "common_issues_resolved": ["Issues identified and resolved"]
        }},
        "business_impact": {{
            "rent_calculations_affected": "Enhanced analysis of rent impact with precision focus",
            "tenant_information_updated": "Enhanced analysis of tenant data changes",
            "occupancy_status_changed": "Enhanced analysis of occupancy changes",
            "data_integrity_maintained": "Assessment of data integrity after changes"
        }},
        "enhanced_recommendations": [
            "Enhanced recommendations based on learning insights",
            "Schema validation recommendations",
            "Data type precision improvements"
        ],
        "session_description": "ENHANCED_REPLICATION_STATUS: {success_status} | FINAL_CLAUDE_PROMPT: {final_claude_prompt} | LEARNING_APPLIED: {learning_applied} | REPLICATION_ATTEMPTS: {len(replication_results['attempts'])} | USER_EDIT: {user_description} | Changes: {total_changes} cells ({change_density*100:.1f}%) | Best match: {best_attempt.get('match_score', 0):.1%} | Schema: {best_attempt.get('schema_match', False)}"
    }}
    """

    log_to_file(f"""ENHANCED FINAL ANALYSIS GENERATION:
Sending enhanced final analysis request to GPT-4.1...
Features: Schema insights, learning history, precision focus

ENHANCED PROMPT FOR FINAL ANALYSIS:
{'-'*60}
{enhanced_analysis_prompt}
{'-'*60}
""", "ENHANCED FINAL ANALYSIS GENERATION")

    try:
        response = client.chat.completions.create(
            model="gpt-4.1",
            messages=[
                {"role": "system", "content": "You are an enhanced data analyst with schema validation expertise. Return only valid JSON with comprehensive insights."},
                {"role": "user", "content": enhanced_analysis_prompt}
            ],
            max_tokens=4000,
            temperature=0.1
        )

        enhanced_gpt_final_response = response.choices[0].message.content
        log_to_file(f"""ENHANCED FINAL ANALYSIS GPT-4.1 RESPONSE:
{'-'*60}
{enhanced_gpt_final_response}
{'-'*60}
""")

        try:
            import json
            import re
            json_match = re.search(r'{.*}', enhanced_gpt_final_response, re.DOTALL)
            if json_match:
                enhanced_parsed_analysis = json.loads(json_match.group(0))
                log_to_file("✅ Successfully parsed JSON from enhanced final analysis response")
                return enhanced_parsed_analysis
            else:
                log_to_file("❌ No JSON found in enhanced final analysis response")
        except Exception as json_error:
            log_to_file(f"❌ Enhanced JSON parsing failed: {str(json_error)}")

    except Exception as api_error:
        log_to_file(f"❌ Enhanced final analysis API call failed: {str(api_error)}")

    # Enhanced fallback analysis
    enhanced_fallback_analysis = {
        "change_summary": f"ENHANCED_WORKING_CLAUDE_PROMPT: {final_claude_prompt} | ENHANCED_GPT4_ANALYSIS: [embedded] | {user_description} - {total_changes} changes with {replication_results['final_success']} replication and {learning_applied} learning",
        "change_type": "data_edit",
        "enhanced_data_modifications": {
            "cells_changed": total_changes,
            "total_cells": total_cells,
            "change_percentage": change_density*100,
            "data_type_mismatches": len(data_type_mismatches),
            "schema_compatibility_score": schema_comparison.get('schema_compatibility_score', 0)
        },
        "replication_analysis": {
            "final_success": replication_results['final_success'],
            "best_match_score": best_attempt.get('match_score', 0),
            "learning_system_applied": learning_applied,
            "total_learning_insights": len(learning_history)
        },
        "session_description": f"ENHANCED_REPLICATION_STATUS: {success_status} | FINAL_CLAUDE_PROMPT: {final_claude_prompt} | LEARNING_APPLIED: {learning_applied} | USER_EDIT: {user_description} | Changes: {total_changes} cells"
    }

    log_to_file(f"Using enhanced fallback analysis:\n{json.dumps(enhanced_fallback_analysis, indent=2)}")
    return enhanced_fallback_analysis





# Enhanced Log Summary with All Fixes
def get_enhanced_manual_edit_logs_summary_with_all_fixes():
    """
    Generate enhanced summary of all manual edit analysis log files with comprehensive insights
    """
    logs_dir = "manual_edit_analysis_logs"

    if not os.path.exists(logs_dir):
        return "No enhanced manual edit analysis logs found yet."

    try:
        log_files = [f for f in os.listdir(logs_dir) if f.endswith('_detailed_analysis.txt')]

        if not log_files:
            return "No enhanced detailed analysis log files found."

        # Sort by creation time (newest first)
        log_files.sort(key=lambda x: os.path.getctime(os.path.join(logs_dir, x)), reverse=True)

        enhanced_summary = f"""📁 ULTIMATE ENHANCED Manual Edit Analysis Logs Summary
Found {len(log_files)} comprehensive analysis log files with ALL FIXES APPLIED:

🔧 CRITICAL FIXES INCLUDED IN LOGS:
✅ 1. Fixed logging string formatting bug
✅ 2. Enhanced data type/formatting precision matching
✅ 3. Implemented attempt-to-attempt learning system
✅ 4. Added comprehensive data type validation

"""

        total_size = 0
        enhanced_features_count = 0
        learning_applied_count = 0

        for i, log_file in enumerate(log_files[:15], 1):  # Show last 15
            file_path = os.path.join(logs_dir, log_file)
            file_size = os.path.getsize(file_path)
            total_size += file_size
            created_time = datetime.fromtimestamp(os.path.getctime(file_path))

            # Try to extract enhanced information from log file
            try:
                with open(file_path, 'r', encoding='utf-8') as f:
                    log_content = f.read()

                # Check for enhanced features
                has_enhanced_features = "ENHANCED FEATURES APPLIED" in log_content
                has_learning = "LEARNING SYSTEM" in log_content
                has_schema_validation = "SCHEMA VALIDATION" in log_content

                if has_enhanced_features:
                    enhanced_features_count += 1
                if has_learning:
                    learning_applied_count += 1

            except Exception:
                has_enhanced_features = False
                has_learning = False
                has_schema_validation = False

            # Extract session info from filename
            session_id = log_file.replace('_detailed_analysis.txt', '')

            enhanced_summary += f"""{i}. {session_id}
   📄 File: {log_file}
   📅 Created: {created_time.strftime('%Y-%m-%d %H:%M:%S')}
   💾 Size: {file_size:,} bytes
   🔧 Enhanced Features: {'✅ Yes' if has_enhanced_features else '❌ No'}
   🧠 Learning Applied: {'✅ Yes' if has_learning else '❌ No'}
   📋 Schema Validation: {'✅ Yes' if has_schema_validation else '❌ No'}
   📁 Path: {file_path}

"""

        if len(log_files) > 15:
            enhanced_summary += f"... and {len(log_files) - 15} more enhanced log files\n"

        enhanced_summary += f"""
📈 ENHANCED LOGS STATISTICS:
• Total Log Files: {len(log_files)}
• Total Storage Used: {total_size:,} bytes ({total_size/1024/1024:.1f} MB)
• Files with Enhanced Features: {enhanced_features_count}/{len(log_files)} ({enhanced_features_count/len(log_files)*100:.1f}%)
• Files with Learning System: {learning_applied_count}/{len(log_files)} ({learning_applied_count/len(log_files)*100:.1f}%)

📋 ULTIMATE ENHANCED LOG CONTENTS INCLUDE:
• Complete GPT-4.1 prompts and responses with schema awareness
• All Claude API calls with learning system feedback loops
• Step-by-step replication testing with precision validation
• Cell-by-cell dataframe comparison with data type analysis
• Schema compatibility analysis and validation results
• Business impact assessment with enhanced insights
• Learning system evolution and improvement tracking
• Data type mismatch detection and resolution strategies
• Comprehensive error handling and debugging information
• Attempt-to-attempt learning and prompt refinement details

🔧 CRITICAL FIXES VERIFICATION:
1. ✅ Logging String Formatting: All logs use proper string formatting
2. ✅ Data Type Precision: Enhanced schema validation throughout
3. ✅ Learning System: Progressive improvement across attempts
4. ✅ Type Validation: Comprehensive data type compatibility checking

📂 All enhanced logs saved in: {logs_dir}/
"""

        return enhanced_summary

    except Exception as e:
        return f"Error reading enhanced log files: {str(e)}"


# Enhanced Log Reader
def read_enhanced_manual_edit_log(session_id):
    """
    Read and return enhanced log with analysis insights
    """
    logs_dir = "manual_edit_analysis_logs"
    log_file_path = os.path.join(logs_dir, f"{session_id}_detailed_analysis.txt")

    if not os.path.exists(log_file_path):
        return f"Enhanced log file not found: {log_file_path}"

    try:
        with open(log_file_path, 'r', encoding='utf-8') as f:
            content = f.read()

        # Analyze log content for enhanced insights
        enhanced_features_detected = "ENHANCED FEATURES APPLIED" in content
        learning_system_detected = "LEARNING SYSTEM" in content
        schema_validation_detected = "SCHEMA VALIDATION" in content
        replication_success = "REPLICATION SUCCESSFUL" in content

        file_size = len(content)
        line_count = content.count('\n')

        return f"""📄 ULTIMATE ENHANCED Manual Edit Analysis Log: {session_id}
{'='*80}

🔧 CRITICAL FIXES STATUS:
✅ Enhanced Features Applied: {enhanced_features_detected}
✅ Learning System Active: {learning_system_detected}
✅ Schema Validation Included: {schema_validation_detected}
✅ Replication Successful: {replication_success}

📊 LOG STATISTICS:
• File Size: {file_size:,} characters ({file_size/1024:.1f} KB)
• Total Lines: {line_count:,}
• Enhanced Analysis: {'✅ Complete' if enhanced_features_detected else '❌ Basic'}

{'='*80}

{content}

{'='*80}
End of ultimate enhanced log file: {log_file_path}

🔍 ANALYSIS SUMMARY:
This log contains {'comprehensive enhanced analysis' if enhanced_features_detected else 'basic analysis'} with:
• {'✅' if 'GPT-4.1' in content else '❌'} GPT-4.1 API interactions
• {'✅' if 'CLAUDE API' in content else '❌'} Claude API calls and responses
• {'✅' if learning_system_detected else '❌'} Learning system feedback loops
• {'✅' if schema_validation_detected else '❌'} Schema compatibility validation
• {'✅' if 'DATA TYPE' in content else '❌'} Data type precision analysis
• {'✅' if replication_success else '❌'} Successful replication testing
"""

    except Exception as e:
        return f"Error reading enhanced log file: {str(e)}"

In [None]:

# Update the Gradio event handlers to use enhanced functions
def setup_enhanced_edit_data_handlers():
    """
    Setup function to update Gradio event handlers for enhanced edit data functionality.
    Add this to your Gradio interface setup.
    """

    # Enhanced event handlers for Edit Data tab
    refresh_versions_btn.click(
        refresh_version_dropdown,
        outputs=[version_dropdown]
    )

    load_version_btn.click(
        load_specific_version,  # Use enhanced version
        inputs=[version_dropdown],
        outputs=[editable_df, edit_status]
    )

    save_changes_btn.click(
        save_edited_dataframe,  # Use enhanced version
        inputs=[editable_df, save_description],
        outputs=[save_status, editable_df]
    ).then(
        refresh_version_dropdown,  # Refresh the dropdown after saving
        outputs=[version_dropdown]
    )

    # You can also add a "Load Latest" button that uses the enhanced function
    # load_latest_btn.click(
    #     load_latest_version_for_editing_enhanced,
    #     outputs=[editable_df, edit_status]
    # )

In [None]:
class EnhancedTemplateApplicationEngine:
    def __init__(self):
        self.templates_dir = "rent_roll_templates"
        self.application_sessions_dir = "template_applications"
        os.makedirs(self.application_sessions_dir, exist_ok=True)
        self.current_application = None

    def start_template_application(self, template_id, new_rent_roll_file, new_rent_roll_df):
        """Initialize a new template application session"""

        # Load the template data
        template_data, starting_template_df, final_template_df = enhanced_template_manager.load_template_dataframes(template_id)

        if template_data is None:
            return None, "❌ Failed to load template data"

        # Create application session
        app_session_id = f"app_{datetime.now().strftime('%Y%m%d_%H%M%S')}"

        self.current_application = {
            "session_id": app_session_id,
            "template_id": template_id,
            "template_data": template_data,
            "starting_template_df": starting_template_df,
            "final_template_df": final_template_df,
            "new_rent_roll_file": new_rent_roll_file,
            "new_rent_roll_df": new_rent_roll_df.copy(),
            "current_df": new_rent_roll_df.copy(),
            "step_results": [],
            "current_step": 0,
            "total_steps": len(template_data.get("raw_workflow_steps", [])),
            "completed_steps": [],
            "failed_steps": [],
            "log_file": None,
            "latest_version_file": None,
            "step_version_files": {},
            "step_states": {},  # 🔄 NEW: Store complete state for each step
            "user_inputs_history": {}  # 📝 NEW: Store user inputs for each step
        }

        # Create dataframes directory early
        dataframes_dir = os.path.join(self.application_sessions_dir, f"{app_session_id}_dataframes")
        os.makedirs(dataframes_dir, exist_ok=True)

        # Create log file
        log_filename = f"{app_session_id}_application_log.txt"
        self.current_application["log_file"] = os.path.join(self.application_sessions_dir, log_filename)

        # Save initial version file (Step 0)
        initial_version_file = self._save_dataframe_version(
            new_rent_roll_df,
            "Initial uploaded rent roll - starting point",
            0
        )
        self.current_application["latest_version_file"] = initial_version_file
        self.current_application["step_version_files"][0] = initial_version_file

        # Save initial state
        self._save_step_state(0, {
            "dataframe": new_rent_roll_df.copy(),
            "version_file": initial_version_file,
            "status": "initial",
            "description": "Starting point"
        })

        # Write initial log
        with open(self.current_application["log_file"], 'w', encoding='utf-8') as f:
            f.write(f"=== ENHANCED TEMPLATE APPLICATION SESSION ===\n")
            f.write(f"Session ID: {app_session_id}\n")
            f.write(f"Template ID: {template_id}\n")
            f.write(f"Template Name: {template_data.get('template_name', 'Unknown')}\n")
            f.write(f"New Rent Roll File: {new_rent_roll_file}\n")
            f.write(f"Started: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}\n")
            f.write(f"Total Steps to Execute: {self.current_application['total_steps']}\n")
            f.write(f"💾 Initial Version File: {initial_version_file}\n")
            f.write(f"🆕 Enhanced Features: User input per step, Back navigation\n")
            f.write(f"=" * 60 + "\n\n")

        return app_session_id, "✅ Enhanced template application session started successfully"

    def _save_step_state(self, step_number, state_data):
        """🔄 NEW: Save complete state for a step to enable back navigation (FIXED VERSION)"""
        try:
            # ✅ FIX: Ensure we have a dataframe
            if "dataframe" not in state_data:
                print(f"⚠️ Warning: No dataframe in state_data for step {step_number}")
                # Try to use current_df as fallback
                if "current_df" in self.current_application:
                    state_data["dataframe"] = self.current_application["current_df"].copy()
                else:
                    print(f"❌ No current_df available for step {step_number}")
                    return

            # Save the state
            self.current_application["step_states"][step_number] = {
                "dataframe": state_data["dataframe"].copy(),
                "version_file": state_data.get("version_file", ""),
                "status": state_data.get("status", "unknown"),
                "description": state_data.get("description", ""),
                "timestamp": datetime.now().strftime('%Y-%m-%d %H:%M:%S'),
                "shape": state_data["dataframe"].shape
            }

            print(f"💾 Saved state for step {step_number}: {state_data['dataframe'].shape}")

        except Exception as e:
            print(f"❌ Error saving step state {step_number}: {e}")
            import traceback
            traceback.print_exc()

    def _restore_step_state(self, step_number):
        """🔄 NEW: Restore state from a specific step (FIXED VERSION)"""
        if step_number not in self.current_application["step_states"]:
            return False, f"No saved state for step {step_number}"

        try:
            state = self.current_application["step_states"][step_number]

            # Debug logging
            print(f"🔍 Restoring step {step_number}")
            print(f"Available keys in state: {list(state.keys())}")

            # ✅ FIX: Check if dataframe exists in state
            if "dataframe" not in state:
                # Try to load from version file instead
                version_file = state.get("version_file")
                if version_file and os.path.exists(version_file):
                    print(f"📁 Loading dataframe from version file: {version_file}")
                    restored_df = pd.read_csv(version_file)
                else:
                    print(f"❌ No dataframe in state and no valid version file")
                    return False, f"Cannot restore step {step_number}: no dataframe available"
            else:
                # Use dataframe from state
                restored_df = state["dataframe"].copy()
                print(f"✅ Loaded dataframe from state: {restored_df.shape}")

            # Restore the dataframe and version file
            self.current_application["current_df"] = restored_df
            self.current_application["latest_version_file"] = state.get("version_file")

            # Update current step
            self.current_application["current_step"] = step_number

            # Clear any completed/failed steps after this point
            self.current_application["completed_steps"] = [s for s in self.current_application["completed_steps"] if s <= step_number]
            self.current_application["failed_steps"] = [s for s in self.current_application["failed_steps"] if s <= step_number]

            # Clear future states
            future_steps = [s for s in self.current_application["step_states"].keys() if s > step_number]
            for future_step in future_steps:
                del self.current_application["step_states"][future_step]

            # Clear future user inputs
            future_inputs = [s for s in self.current_application["user_inputs_history"].keys() if s > step_number]
            for future_step in future_inputs:
                del self.current_application["user_inputs_history"][future_step]

            # Log the restoration
            with open(self.current_application["log_file"], 'a', encoding='utf-8') as f:
                f.write(f"🔄 RESTORED TO STEP {step_number}\n")
                f.write(f"Restored DataFrame Shape: {restored_df.shape}\n")
                f.write(f"Restored Version File: {os.path.basename(state.get('version_file', 'None'))}\n")
                f.write(f"Restored At: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}\n\n")

            return True, f"✅ Successfully restored to step {step_number} (Shape: {restored_df.shape})"

        except Exception as e:
            print(f"❌ Exception in _restore_step_state: {e}")
            import traceback
            traceback.print_exc()
            return False, f"❌ Error restoring step {step_number}: {str(e)}"

    def get_available_back_steps(self):
        """🔄 NEW: Get list of steps available for back navigation"""
        if not self.current_application:
            return []

        current_step = self.current_application["current_step"]
        available_steps = []

        for step_num in sorted(self.current_application["step_states"].keys()):
            if step_num < current_step:
                state = self.current_application["step_states"][step_num]
                available_steps.append({
                    "step_number": step_num,
                    "description": state["description"],
                    "timestamp": state["timestamp"],
                    "shape": state["shape"]
                })

        return available_steps

    def go_back_to_step(self, target_step):
        """🔄 NEW: Navigate back to a specific step"""
        if not self.current_application:
            return "❌ No active application session"

        current_step = self.current_application["current_step"]

        if target_step >= current_step:
            return f"❌ Cannot go back to step {target_step} (current step is {current_step})"

        if target_step not in self.current_application["step_states"]:
            return f"❌ No saved state for step {target_step}"

        success, message = self._restore_step_state(target_step)

        if success:
            return f"🔄 Successfully went back to step {target_step}. You can now continue from here."
        else:
            return message

    def get_next_step_info(self):
        """Get detailed information about the next step to execute"""
        if not self.current_application:
            return None, "No active application session"

        current_step = self.current_application["current_step"]
        workflow_steps = self.current_application["template_data"].get("raw_workflow_steps", [])

        if current_step >= len(workflow_steps):
            return None, "All steps completed"

        next_step = workflow_steps[current_step]
        step_number = current_step + 1

        # Get current dataframe info
        current_df = self._get_latest_dataframe_from_versions()

        step_info = {
            "step_number": step_number,
            "total_steps": len(workflow_steps),
            "original_message": next_step.get('user_message', 'No message'),
            "original_code": next_step.get('code_executed', ''),
            "ai_response": next_step.get('ai_response', ''),
            "current_shape": current_df.shape,
            "current_columns": list(current_df.columns),
            "can_go_back": len(self.get_available_back_steps()) > 0
        }

        return step_info, f"Step {step_number} of {len(workflow_steps)}"

    def execute_step_with_user_input(self, user_additional_input="", override_code=""):
        """📝 NEW: Execute next step with optional user input and code override"""
        if not self.current_application:
            return {"success": False, "error": "No active application session"}

        # Get next step info
        step_info, step_description = self.get_next_step_info()
        if step_info is None:
            return {"success": False, "error": "All steps completed"}

        current_step_num = self.current_application["current_step"] + 1

        # Store user input for this step
        self.current_application["user_inputs_history"][current_step_num] = {
            "additional_input": user_additional_input,
            "override_code": override_code,
            "timestamp": datetime.now().strftime('%Y-%m-%d %H:%M:%S')
        }

        try:
            print(f"\n🚀 EXECUTING STEP {current_step_num} WITH USER INPUT")
            print(f"📝 Original step: {step_info['original_message'][:100]}...")
            if user_additional_input:
                print(f"👤 User input: {user_additional_input[:100]}...")
            if override_code:
                print(f"🔧 Code override provided")

            # Get current dataframe from latest version file
            current_df = self._get_latest_dataframe_from_versions()
            self.current_application["current_df"] = current_df.copy()

            # Execute step with user input
            execution_result = self._execute_step_with_claude_and_user_input(
                step_info, current_step_num, user_additional_input, override_code
            )

            if execution_result.get("success", False):
                # ✅ SUCCESS
                print(f"✅ Step {current_step_num} succeeded!")

                # Update latest version file pointer
                new_version_file = execution_result.get("new_version_file")
                if new_version_file:
                    self._update_latest_version_file(new_version_file, current_step_num)

                # Save state for this completed step
                final_df = execution_result.get("updated_df", current_df)
                self._save_step_state(current_step_num, {
                    "dataframe": final_df,
                    "version_file": new_version_file,
                    "status": "completed",
                    "description": f"Step {current_step_num} completed: {step_info['original_message'][:50]}..."
                })

                # Advance to next step
                self.current_application["current_step"] += 1
                self.current_application["completed_steps"].append(current_step_num)

                # Save application state
                self._save_application_state()

                return {
                    "success": True,
                    "step_number": current_step_num,
                    "message": f"✅ Step {current_step_num} completed successfully",
                    "execution_result": execution_result,
                    "can_go_back": True,
                    "next_step_available": self.current_application["current_step"] < self.current_application["total_steps"]
                }

            else:
                # ❌ FAILURE
                error_msg = execution_result.get("error", "Unknown error")
                print(f"❌ Step {current_step_num} failed: {error_msg}")

                # Save failed state (don't advance step)
                self._save_step_state(current_step_num, {
                    "dataframe": current_df,
                    "version_file": self.current_application["latest_version_file"],
                    "status": "failed",
                    "description": f"Step {current_step_num} failed: {error_msg[:50]}..."
                })

                return {
                    "success": False,
                    "step_number": current_step_num,
                    "error": error_msg,
                    "execution_result": execution_result,
                    "can_retry": True,
                    "can_go_back": len(self.get_available_back_steps()) > 0
                }

        except Exception as e:
            error_msg = f"Execution crashed: {str(e)}"
            print(f"💥 Step {current_step_num} crashed: {error_msg}")

            return {
                "success": False,
                "step_number": current_step_num,
                "error": error_msg,
                "can_retry": True,
                "can_go_back": len(self.get_available_back_steps()) > 0
            }

    def _execute_step_with_claude_and_user_input(self, step_info, step_number, user_input, code_override):
        """📝 NEW: Execute step with Claude, incorporating user input"""
        from anthropic import Anthropic
        claude_client = Anthropic(api_key=DEFAULT_ANTHROPIC_API_KEY)

        # Load latest dataframe
        latest_df = self._get_latest_dataframe_from_versions()

        print(f"🔗 STEP {step_number} LOADING FROM VERSION FILE: {os.path.basename(self.current_application.get('latest_version_file', 'None'))}")
        print(f"📊 Loaded DataFrame Shape: {latest_df.shape}")

        # Prepare enhanced Claude prompt with user input
        df_context = f"""
CURRENT DATAFRAME STATUS:
========================
Shape: {latest_df.shape}
Columns: {list(latest_df.columns)}
Data Types: {dict(latest_df.dtypes.astype(str))}

Sample Data (first 3 rows):
{latest_df.head(3).to_string()}

The dataframe is already loaded as 'df' variable.
"""

        # Build the main prompt
        main_prompt = f"""
STEP {step_number} EXECUTION WITH USER INPUT
==========================================

ORIGINAL TEMPLATE STEP:
User Intent: "{step_info['original_message']}"

ORIGINAL AI RESPONSE CONTEXT:
{step_info['ai_response'][:500] + "..." if len(step_info['ai_response']) > 500 else step_info['ai_response']}

ORIGINAL CODE FROM TEMPLATE:
```python
{step_info['original_code']}
```
"""

        # Add user input section if provided
        user_input_section = ""
        if user_input.strip():
            user_input_section = f"""
🧑 USER ADDITIONAL INPUT FOR THIS STEP:
======================================
{user_input}

IMPORTANT: Incorporate the user's input into your solution. The user may be:
- Requesting modifications to the original approach
- Providing specific requirements or constraints
- Asking for different analysis or calculations
- Suggesting improvements or alternatives
"""

        # Add code override section if provided
        code_override_section = ""
        if code_override.strip():
            code_override_section = f"""
🔧 USER CODE OVERRIDE:
=====================
The user has provided custom code to use instead of or in addition to the original:

```python
{code_override}
```

INSTRUCTIONS: Use this code as the primary approach, but ensure it still achieves the original step's business objective.
"""

        # Final instructions
        final_instructions = f"""
{df_context}

EXECUTION INSTRUCTIONS:
1. The dataframe 'df' is already loaded and available
2. Consider the original template step's business objective
3. {"Incorporate the user's additional input/requirements" if user_input else ""}
4. {"Use the user's code override as the primary approach" if code_override else "Either use original code or adapt it as needed"}
5. 🔗 CRITICAL: You MUST call save_dataframe_version(df, "description") at the end
6. Provide clear output about what was accomplished

Please provide your code in ```python``` blocks.
"""

        full_prompt = main_prompt + user_input_section + code_override_section + final_instructions

        # Log the enhanced prompt
        with open(self.current_application["log_file"], 'a', encoding='utf-8') as f:
            f.write(f"📤 SENDING TO CLAUDE 3.7 - STEP {step_number} WITH USER INPUT\n")
            f.write(f"Input DataFrame Shape: {latest_df.shape}\n")
            f.write(f"User Input Length: {len(user_input)} characters\n")
            f.write(f"Code Override Length: {len(code_override)} characters\n")
            f.write(f"Full Prompt Length: {len(full_prompt)} characters\n\n")

        try:
            # Execute with Claude
            claude_response = claude_client.messages.create(
                model="claude-3-7-sonnet-20250219",
                messages=[{"role": "user", "content": full_prompt}],
                max_tokens=4000,
                temperature=0.3
            )

            response_text = claude_response.content[0].text

            # Log Claude's response
            with open(self.current_application["log_file"], 'a', encoding='utf-8') as f:
                f.write(f"📥 RECEIVED FROM CLAUDE 3.7 - STEP {step_number}\n")
                f.write(f"Response Length: {len(response_text)} characters\n\n")

            # Extract and execute code blocks
            import re
            code_blocks = re.findall(r'```python\s*(.*?)\s*```', response_text, re.DOTALL)

            if not code_blocks:
                return {
                    "success": False,
                    "summary": f"Step {step_number}: No executable code generated by Claude",
                    "claude_response": response_text,
                    "error": "No code blocks found in Claude's response"
                }

            # Execute the code with enhanced version saving
            version_file_created = None

            def enhanced_save_dataframe_version(df, operation_description=""):
                nonlocal version_file_created
                version_file_created = self._save_dataframe_version(df, operation_description, step_number)
                return version_file_created

            exec_globals = {
                "df": latest_df.copy(),
                "pd": pd,
                "np": np,
                "datetime": datetime,
                "os": os,
                "print_formatted_table": lambda df, title="DataFrame", max_rows=10: self.print_formatted_table(df, title, max_rows),
                "save_dataframe_version": enhanced_save_dataframe_version,
                "print": print,
                "len": len,
                "str": str,
                "int": int,
                "float": float
            }

            execution_success = False
            execution_output = ""

            import io
            from contextlib import redirect_stdout
            output_buffer = io.StringIO()

            try:
                with redirect_stdout(output_buffer):
                    for i, code_block in enumerate(code_blocks, 1):
                        print(f"--- Executing Code Block {i} ---")
                        exec(code_block, exec_globals)
                        print(f"--- Code Block {i} Completed ---\n")

                execution_output = output_buffer.getvalue()
                updated_df = exec_globals["df"]
                execution_success = True

            except Exception as e:
                import traceback
                execution_output = f"Execution error: {str(e)}\n{traceback.format_exc()}"
                execution_success = False
                updated_df = latest_df

            # Create version file if none was created during execution
            if execution_success and not version_file_created:
                print("⚠️ No version file created during execution, creating one now...")
                version_file_created = self._save_dataframe_version(
                    updated_df,
                    f"Step {step_number} completed with user input",
                    step_number
                )

            # Update memory reference
            self.current_application["current_df"] = updated_df.copy()

            # Log results
            with open(self.current_application["log_file"], 'a', encoding='utf-8') as f:
                result_status = "✅ SUCCESS" if execution_success else "❌ FAILED"
                f.write(f"{result_status} - STEP {step_number} WITH USER INPUT\n")
                f.write(f"Input Shape: {latest_df.shape}\n")
                f.write(f"Output Shape: {updated_df.shape}\n")
                f.write(f"User Input Applied: {bool(user_input)}\n")
                f.write(f"Code Override Used: {bool(code_override)}\n")
                if version_file_created:
                    f.write(f"🔗 Version File Created: {os.path.basename(version_file_created)}\n")
                f.write(f"Execution Output:\n{execution_output}\n\n")

            return {
                "success": execution_success,
                "summary": f"Step {step_number}: {'✅ Successfully executed with user input' if execution_success else '❌ Execution failed'}",
                "claude_response": response_text,
                "executed_code": "\n\n# --- Next Code Block ---\n\n".join(code_blocks),
                "execution_output": execution_output,
                "updated_df": updated_df,
                "new_version_file": version_file_created,
                "user_input_applied": bool(user_input),
                "code_override_used": bool(code_override)
            }

        except Exception as e:
            with open(self.current_application["log_file"], 'a', encoding='utf-8') as f:
                f.write(f"❌ CLAUDE API FAILED - STEP {step_number}\n")
                f.write(f"Error: {str(e)}\n\n")

            return {
                "success": False,
                "summary": f"Step {step_number}: Claude execution failed - {str(e)}",
                "error": str(e),
                "claude_response": "Failed to get response from Claude"
            }

    # Include all the existing helper methods from the original class
    def _get_latest_dataframe_from_versions(self):
        """Load dataframe from the latest version file in the chain"""
        try:
            if not self.current_application:
                raise Exception("No active application session")

            latest_version_file = self.current_application.get("latest_version_file")

            if not latest_version_file or not os.path.exists(latest_version_file):
                print("⚠️ No valid latest version file, using current_df fallback")
                return self.current_application["current_df"].copy()

            df = pd.read_csv(latest_version_file)

            with open(self.current_application["log_file"], 'a', encoding='utf-8') as f:
                f.write(f"🔗 LOADED FROM VERSION FILE: {os.path.basename(latest_version_file)}\n")
                f.write(f"Shape: {df.shape}\n")
                f.write(f"Columns: {list(df.columns)}\n\n")

            print(f"🔗 Loaded latest dataframe from: {os.path.basename(latest_version_file)}")

            return df

        except Exception as e:
            print(f"❌ Error loading from version file: {e}")
            return self.current_application["current_df"].copy()

    def _update_latest_version_file(self, new_version_file, step_number):
        """Update the latest version file pointer after saving"""
        if new_version_file and os.path.exists(new_version_file):
            self.current_application["latest_version_file"] = new_version_file
            self.current_application["step_version_files"][step_number] = new_version_file

            with open(self.current_application["log_file"], 'a', encoding='utf-8') as f:
                f.write(f"🔗 UPDATED LATEST VERSION: Step {step_number} -> {os.path.basename(new_version_file)}\n")

    def _save_dataframe_version(self, df, description, step_number):
        """Save dataframe version and return file path for chaining"""
        try:
            session_id = self.current_application["session_id"]
            dataframes_dir = os.path.join(self.application_sessions_dir, f"{session_id}_dataframes")
            os.makedirs(dataframes_dir, exist_ok=True)

            timestamp = datetime.now().strftime('%Y%m%d_%H%M%S')
            version_filename = f"step_{step_number}_version_{timestamp}.csv"
            version_filepath = os.path.join(dataframes_dir, version_filename)

            df.to_csv(version_filepath, index=False)

            with open(self.current_application["log_file"], 'a', encoding='utf-8') as f:
                f.write(f"💾 Saved dataframe version: {version_filename}\n")
                f.write(f"📝 Description: {description}\n")
                f.write(f"📊 Shape: {df.shape}\n")
                f.write(f"🔗 File Path: {version_filepath}\n\n")

            print(f"💾 Saved version: {version_filename} (Shape: {df.shape})")
            return version_filepath

        except Exception as e:
            print(f"❌ Version save failed: {str(e)}")
            return None

    def _save_application_state(self):
        """Save current application state to file"""
        try:
            if not self.current_application:
                return False

            session_id = self.current_application["session_id"]
            session_file = os.path.join(self.application_sessions_dir, f"{session_id}_state.json")

            save_data = self.current_application.copy()

            # Remove large dataframes from save data
            dataframes_to_remove = ["current_df", "starting_template_df", "final_template_df", "new_rent_roll_df"]
            for df_key in dataframes_to_remove:
                if df_key in save_data:
                    if hasattr(save_data[df_key], 'shape'):
                        save_data[f"{df_key}_shape"] = save_data[df_key].shape
                    del save_data[df_key]

            # Also remove dataframes from step states
            if "step_states" in save_data:
                for step_num in save_data["step_states"]:
                    if "dataframe" in save_data["step_states"][step_num]:
                        save_data["step_states"][step_num]["dataframe_shape"] = save_data["step_states"][step_num]["dataframe"].shape
                        del save_data["step_states"][step_num]["dataframe"]

            with open(session_file, 'w') as f:
                json.dump(save_data, f, indent=2, default=str)

            print(f"💾 Application state saved: {session_file}")
            return True

        except Exception as e:
            print(f"❌ Failed to save application state: {e}")
            return False

    def print_formatted_table(self, df, title="DataFrame", max_rows=10):
        """Format and print dataframe nicely"""
        print(f"\n{'='*60}")
        print(f"{title}")
        print(f"{'='*60}")
        print(f"Shape: {df.shape}")
        print(f"Columns: {list(df.columns)}")
        print("\nData:")
        if len(df) > max_rows:
            print(df.head(max_rows).to_string())
            print(f"\n... ({len(df) - max_rows} more rows)")
        else:
            print(df.to_string())
        print(f"{'='*60}\n")

    def get_application_status(self):
        """Get current application status with enhanced info"""
        if not self.current_application:
            return "📴 No active template application session"

        total = self.current_application["total_steps"]
        current = self.current_application["current_step"]
        completed = len(self.current_application["completed_steps"])
        failed = len(self.current_application["failed_steps"])

        # Show latest version file info
        latest_version = self.current_application.get("latest_version_file")
        version_info = f"🔗 Latest Version: {os.path.basename(latest_version)}" if latest_version else "🔗 No version file yet"

        # Show back navigation info
        back_steps = self.get_available_back_steps()
        back_info = f"🔄 Can go back to: {len(back_steps)} previous steps" if back_steps else "🔄 No back navigation available yet"

        return f"""📋 Enhanced Template Application Status

🎯 Template: {self.current_application['template_data'].get('template_name', 'Unknown')}
📁 Processing: {self.current_application['new_rent_roll_file']}

📊 Progress:
• Current Step: {current}/{total}
• Completed: {completed}
• Failed: {failed}
• Remaining: {total - current}

{version_info}
{back_info}

🔄 Status: {'🎉 Completed' if current >= total else '⏳ Ready for next step with user input'}
🆕 Enhanced Features: ✅ User input per step, ✅ Back navigation"""

    def _finalize_application(self):
        """Finalize using the latest version file"""
        if not self.current_application:
            return "No active session to finalize"

        session_id = self.current_application["session_id"]
        total_steps = self.current_application["total_steps"]
        completed = len(self.current_application["completed_steps"])
        failed = len(self.current_application["failed_steps"])

        # Load the final dataframe from the latest version file
        final_df = self._get_latest_dataframe_from_versions()

        # Save final result
        timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
        final_csv = os.path.join(self.application_sessions_dir, f"{session_id}_FINAL_RESULT_{timestamp}.csv")
        final_excel = os.path.join(self.application_sessions_dir, f"{session_id}_FINAL_RESULT_{timestamp}.xlsx")

        final_df.to_csv(final_csv, index=False)
        final_df.to_excel(final_excel, index=False, engine='openpyxl')

        # Log final summary
        with open(self.current_application["log_file"], 'a', encoding='utf-8') as f:
            f.write(f"\n{'='*60}\n")
            f.write("🎉 ENHANCED TEMPLATE APPLICATION COMPLETED\n")
            f.write(f"{'='*60}\n")
            f.write(f"Total Steps: {total_steps}\n")
            f.write(f"Completed Successfully: {completed}\n")
            f.write(f"Failed: {failed}\n")
            f.write(f"Success Rate: {(completed/total_steps)*100:.1f}%\n")
            f.write(f"Final DataFrame Shape: {final_df.shape}\n")
            f.write(f"🔗 Latest Version File: {self.current_application.get('latest_version_file', 'None')}\n")
            f.write(f"🆕 Enhanced Features Used: User input per step, Back navigation\n")
            f.write(f"Final Result CSV: {final_csv}\n")
            f.write(f"Final Result Excel: {final_excel}\n")
            f.write(f"Completed: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}\n")
            f.write(f"{'='*60}\n")

        self.current_application = None

        return f"""🎉 Enhanced Template Application Completed!
📊 Results:
• Total Steps: {total_steps}
• Successfully Completed: {completed}
• Failed: {failed}
• Success Rate: {(completed/total_steps)*100:.1f}%

📁 Final Output:
• CSV: {final_csv}
• Excel: {final_excel}
• Shape: {final_df.shape}

🆕 Enhanced features used: User input per step, Back navigation
✅ Version file workflow ensures proper step chaining!"""


# Global enhanced template application engine
enhanced_template_app_engine = EnhancedTemplateApplicationEngine()

# Enhanced functions for the Template Application tab
def load_template_for_enhanced_application(template_id):
    """Load template details for enhanced application"""
    if not template_id:
        return "Please enter a template ID", "", ""

    try:
        template_summary = enhanced_template_manager.get_template_summary(template_id)

        # Get template steps for preview
        template_json_path = os.path.join("rent_roll_templates", f"{template_id}.json")
        if os.path.exists(template_json_path):
            with open(template_json_path, 'r') as f:
                template_data = json.load(f)

            steps_preview = ""
            workflow_steps = template_data.get("raw_workflow_steps", [])
            for i, step in enumerate(workflow_steps[:5], 1):
                steps_preview += f"{i}. {step.get('user_message', 'N/A')[:80]}...\n"

            if len(workflow_steps) > 5:
                steps_preview += f"... and {len(workflow_steps) - 5} more steps\n"

            return template_summary, steps_preview, f"✅ Template loaded: {len(workflow_steps)} steps found"
        else:
            return template_summary, "", "❌ Template file not found"

    except Exception as e:
        return f"❌ Error loading template: {str(e)}", "", "Failed to load"

def start_enhanced_template_application_session(template_id, new_rent_roll_file):
    """Start applying template to new rent roll with enhanced features"""
    if not template_id:
        return "❌ Please select a template first"

    if not new_rent_roll_file:
        return "❌ Please upload a new rent roll file"

    try:
        import tempfile

        # Use the specialized loader
        temp_file = tempfile.NamedTemporaryFile(delete=False, suffix='.xlsx')
        temp_file.close()
        file_path = temp_file.name

        # Copy the uploaded file to temporary location
        with open(new_rent_roll_file.name, 'rb') as src_file, open(file_path, 'wb') as dst_file:
            dst_file.write(src_file.read())

        # Use the specialized rent roll loader
        try:
            print("Loading rent roll with specialized loader for enhanced template application...")
            new_df = read_rent_roll_simple(file_path)
        except Exception as e:
            print(f"Error with specialized loader: {e}. Falling back to standard loading.")
            new_df = pd.read_excel(file_path)

        # Clean up temp file
        os.unlink(file_path)

        # Start enhanced application session
        session_id, status = enhanced_template_app_engine.start_template_application(
            template_id=template_id,
            new_rent_roll_file=new_rent_roll_file.name,
            new_rent_roll_df=new_df
        )

        if session_id:
            return f"""✅ Enhanced Session Started: {session_id}

{status}

📊 New Rent Roll Info:
• Shape: {new_df.shape}
• Columns: {list(new_df.columns)}

🎯 Ready to execute {enhanced_template_app_engine.current_application['total_steps']} template steps!

🆕 Enhanced Features:
• 👤 Provide input for each step
• 🔧 Override code if needed
• 🔄 Go back to previous steps
• 📝 Step-by-step execution control"""
        else:
            return status

    except Exception as e:
        return f"❌ Error starting enhanced application: {str(e)}"

def get_next_step_details():
    """Get detailed information about the next step"""
    try:
        step_info, step_description = enhanced_template_app_engine.get_next_step_info()

        if step_info is None:
            return "🎉 All steps completed!", "", "", "", ""

        # Format the step information for display
        step_display = f"""📋 Step {step_info['step_number']} of {step_info['total_steps']}

🎯 Original Intent:
{step_info['original_message']}

📊 Current Data:
• Shape: {step_info['current_shape']}
• Columns: {len(step_info['current_columns'])} columns

🔄 Navigation:
• Can go back: {'✅ Yes' if step_info['can_go_back'] else '❌ No previous steps'}"""

        return (
            step_display,
            step_info['original_code'],
            step_info['ai_response'][:500] + "..." if len(step_info['ai_response']) > 500 else step_info['ai_response'],
            "", # Clear user input field
            ""  # Clear code override field
        )

    except Exception as e:
        return f"❌ Error getting step details: {str(e)}", "", "", "", ""

def execute_step_with_user_input(user_input, code_override):
    """Execute the current step with user input"""
    try:
        result = enhanced_template_app_engine.execute_step_with_user_input(
            user_additional_input=user_input or "",
            override_code=code_override or ""
        )

        if result["success"]:
            status_msg = f"""✅ {result['message']}

📊 Execution Details:
• User input applied: {'Yes' if result['execution_result'].get('user_input_applied') else 'No'}
• Code override used: {'Yes' if result['execution_result'].get('code_override_used') else 'No'}

🔄 Navigation Options:
• Next step available: {'✅ Yes' if result.get('next_step_available') else '❌ All complete'}
• Can go back: {'✅ Yes' if result.get('can_go_back') else '❌ No'}

📈 Output:
{result['execution_result']['execution_output'][:500]}..."""

        else:
            status_msg = f"""❌ Step {result['step_number']} Failed

🚨 Error: {result['error']}

🔄 Options:
• Can retry: {'✅ Yes' if result.get('can_retry') else '❌ No'}
• Can go back: {'✅ Yes' if result.get('can_go_back') else '❌ No'}

💡 Suggestions:
• Modify your input and try again
• Use code override to fix the issue
• Go back to a previous step"""

        return status_msg

    except Exception as e:
        return f"❌ Error executing step: {str(e)}"

def get_back_navigation_options():
    """Get available steps for back navigation"""
    try:
        back_steps = enhanced_template_app_engine.get_available_back_steps()

        if not back_steps:
            return "🔄 No previous steps available for back navigation"

        options_text = "🔄 Available Steps for Back Navigation:\n\n"
        for step in back_steps:
            options_text += f"Step {step['step_number']}: {step['description']}\n"
            options_text += f"   📅 {step['timestamp']} | 📊 Shape: {step['shape']}\n\n"

        return options_text

    except Exception as e:
        return f"❌ Error getting back navigation options: {str(e)}"

def go_back_to_step(step_number):
    """Navigate back to a specific step"""
    try:
        if not step_number:
            return "❌ Please enter a step number"

        result = enhanced_template_app_engine.go_back_to_step(int(step_number))
        return result

    except Exception as e:
        return f"❌ Error going back to step: {str(e)}"

def get_enhanced_application_status():
    """Get enhanced application status"""
    return enhanced_template_app_engine.get_application_status()


# Enhanced Gradio Interface
def create_enhanced_template_application_tab():
    """Create the enhanced Template Application tab with user input and back navigation"""

    with gr.Tab("🆕 Enhanced Apply Template"):
        gr.Markdown("""
        ### 🎯 Enhanced Template Application with User Input & Back Navigation

        **New Features:**
        - 👤 **User Input per Step**: Provide additional instructions for each step
        - 🔧 **Code Override**: Replace template code with your own
        - 🔄 **Back Navigation**: Go back to any previous step and continue from there
        - 📝 **Step-by-Step Control**: Execute one step at a time with full control
        """)

        with gr.Row():
            with gr.Column(scale=1):
                gr.Markdown("#### 1. Setup Template Application")

                template_id_enhanced = gr.Textbox(
                    label="Template ID",
                    placeholder="e.g., template_20250526_143022",
                    lines=1
                )

                load_template_enhanced_btn = gr.Button("📂 Load Template", variant="secondary")

                new_rent_roll_enhanced = gr.File(
                    label="New Rent Roll File (.xlsx, .xls)",
                    file_types=[".xlsx", ".xls"]
                )

                start_enhanced_btn = gr.Button("🚀 Start Enhanced Session", variant="primary", size="lg")

            with gr.Column(scale=2):
                gr.Markdown("#### Template Details")
                template_details_enhanced = gr.HTML(label="Template Information")

                template_steps_enhanced = gr.Textbox(
                    label="Steps Preview",
                    lines=6,
                    interactive=False
                )

        gr.Markdown("---")
        gr.Markdown("### 📝 Step-by-Step Execution with User Input")

        with gr.Row():
            with gr.Column(scale=2):
                gr.Markdown("#### Current Step Information")

                get_step_btn = gr.Button("📋 Get Next Step Details", variant="secondary")

                current_step_info = gr.Textbox(
                    label="Step Information",
                    lines=8,
                    interactive=False
                )

                with gr.Row():
                    original_code_display = gr.Textbox(
                        label="Original Template Code",
                        lines=8,
                        interactive=False
                    )

                    ai_response_display = gr.Textbox(
                        label="Original AI Response Context",
                        lines=8,
                        interactive=False
                    )

            with gr.Column(scale=1):
                gr.Markdown("#### 👤 Your Input for This Step")

                user_step_input = gr.Textbox(
                    label="Additional Instructions",
                    placeholder="Optional: Add specific requirements, modifications, or instructions for this step...",
                    lines=4
                )

                user_code_override = gr.Textbox(
                    label="Code Override (Optional)",
                    placeholder="Optional: Provide your own Python code to use instead of the template code...",
                    lines=6
                )

                execute_step_btn = gr.Button("▶️ Execute Step", variant="primary", size="lg")

        with gr.Row():
            with gr.Column(scale=1):
                gr.Markdown("#### 🔄 Back Navigation")

                get_back_options_btn = gr.Button("🔄 Show Back Options", variant="secondary")

                back_options_display = gr.Textbox(
                    label="Available Previous Steps",
                    lines=6,
                    interactive=False
                )

                back_step_number = gr.Number(
                    label="Go Back to Step #",
                    precision=0,      # Integer only
                    minimum=0,        # ✅ Allow step 0
                    maximum=100,      # Reasonable maximum
                    value=None,       # Start with no value
                    info="Enter 0 to go back to the beginning"
                )

                go_back_btn = gr.Button("⏪ Go Back to Step", variant="secondary")

            with gr.Column(scale=2):
                gr.Markdown("#### 📊 Execution Results & Status")

                execution_results = gr.Textbox(
                    label="Step Execution Results",
                    lines=12,
                    interactive=False
                )

                status_enhanced_btn = gr.Button("📊 Check Status", variant="secondary")

        with gr.Row():
            enhanced_status_display = gr.Textbox(
                label="Enhanced Application Status",
                lines=8,
                interactive=False
            )

        # Event handlers for Enhanced Template Application
        load_template_enhanced_btn.click(
            load_template_for_enhanced_application,
            inputs=[template_id_enhanced],
            outputs=[template_details_enhanced, template_steps_enhanced, execution_results]
        )

        start_enhanced_btn.click(
            start_enhanced_template_application_session,
            inputs=[template_id_enhanced, new_rent_roll_enhanced],
            outputs=[execution_results]
        ).then(
            get_enhanced_application_status,
            outputs=[enhanced_status_display]
        )

        get_step_btn.click(
            get_next_step_details,
            outputs=[current_step_info, original_code_display, ai_response_display, user_step_input, user_code_override]
        )

        execute_step_btn.click(
            execute_step_with_user_input,
            inputs=[user_step_input, user_code_override],
            outputs=[execution_results]
        ).then(
            get_enhanced_application_status,
            outputs=[enhanced_status_display]
        )

        get_back_options_btn.click(
            get_back_navigation_options,
            outputs=[back_options_display]
        )

        go_back_btn.click(
            go_back_to_step,
            inputs=[back_step_number],
            outputs=[execution_results]
        ).then(
            get_enhanced_application_status,
            outputs=[enhanced_status_display]
        )

        status_enhanced_btn.click(
            get_enhanced_application_status,
            outputs=[enhanced_status_display]
        )

        gr.Markdown("""
        ---
        ### 💡 Enhanced Usage Guide:

        1. **Setup**: Load template and upload new rent roll file
        2. **Start Session**: Begin enhanced template application
        3. **Step Details**: Click "Get Next Step Details" to see what the step will do
        4. **Add Input**: Optionally provide additional instructions or code overrides
        5. **Execute**: Run the step with your customizations
        6. **Navigate**: Use back navigation to return to previous steps if needed
        7. **Repeat**: Continue step-by-step until completion

        **🆕 Key Features:**
        - **User Input**: Customize each step with additional requirements
        - **Code Override**: Replace template code with your own
        - **Back Navigation**: Return to any previous step and continue from there
        - **Full Control**: Execute steps one at a time with complete oversight
        """)

# Replace the old tab with this enhanced version
# Use: create_enhanced_template_application_tab() instead of add_template_application_tab()

In [None]:
def chat_with_compression_status(message, history, selected_message_content=None, selected_message_index=None):
    """Enhanced wrapper that returns compression status along with chat result and handles version-specific requests"""
    result = chat_with_selected_message(message, history, selected_message_content, selected_message_index)

    # Calculate compression info
    if history:
        total_messages = len(history) * 2 + 1
        total_chars = sum(len(msg[0]) + len(msg[1]) for msg in history) + len(message)

        # Check if version was mentioned
        version_pattern = r'v_\d{8}_\d{6}'
        version_mentioned = bool(re.search(version_pattern, message))

        if total_messages > 40:
            status = f"🔄 Compressed - Managing {total_messages} messages (~{total_chars:,} chars)"
        else:
            status = f"✅ Normal - {total_messages} messages (~{total_chars:,} chars)"

        if version_mentioned:
            status += " | 📋 Version-specific analysis"
    else:
        status = "✅ New conversation"

    return result, status

In [None]:
# Initialize the global agent state
agent_state = None
custom_css = """
.chatbot-container .message-wrap .message.bot pre {
    white-space: pre !important;
    overflow-x: auto !important;
    max-width: 100% !important;
}
.chatbot-container .message-wrap .message.bot code {
    white-space: pre !important;
}
"""

def chat_with_selected_message(message, history, selected_message_content=None, selected_message_index=None):
    """
    Enhanced chat function that includes a selected previous message as context
    and loads specific dataframe versions when mentioned in the prompt
    """
    global app_state, session_recorder, conversation_manager

    logger.info(f"Received chat message with selected context: {message[:50]}...")

    if selected_message_content:
        logger.info(f"Using selected message as context: {selected_message_content[:100]}...")

    # Check if system is ready
    if app_state is None or app_state["df"] is None:
        logger.warning("Chat attempted before setup is complete")
        return history + [(message, "Please upload a rent roll file and set up your API keys first.")]

    # NEW: Detect and load specific version if mentioned
    version_specific_df = None
    version_context = ""
    version_loaded = False

    version_pattern = r'v_\d{8}_\d{6}'
    version_match = re.search(version_pattern, message)

    if version_match:
        requested_version = version_match.group(0)
        logger.info(f"User requested specific version: {requested_version}")

        # Load the specific version
        versions_dir = "rent_roll_versions"
        version_file = os.path.join(versions_dir, f"rent_roll_{requested_version}.csv")

        if os.path.exists(version_file):
            try:
                version_specific_df = pd.read_csv(version_file)
                version_loaded = True
                logger.info(f"Successfully loaded version {requested_version} with shape {version_specific_df.shape}")

                # Find version metadata
                version_info = None
                for v in app_state.get("df_versions", []):
                    if v.get("name") == requested_version:
                        version_info = v
                        break

                # Create detailed version context
                version_context = f"""
USING SPECIFIC VERSION: {requested_version}
==================================
Version Description: {version_info.get('description', 'No description available') if version_info else 'Version not found in registry'}
Version Created: {version_info.get('timestamp', 'Unknown') if version_info else 'Unknown'}
Version Shape: {version_specific_df.shape}
Current/Latest Version Shape: {app_state["df"].shape}

IMPORTANT: All analysis will be performed on version {requested_version}, NOT the current/latest version.

Version Differences:
- Row count difference: {version_specific_df.shape[0] - app_state["df"].shape[0]} rows
- Column count difference: {version_specific_df.shape[1] - app_state["df"].shape[1]} columns
- Version is {'older' if version_info and app_state.get('df_versions', [])[-1]['name'] != requested_version else 'the latest'} version

Data from Version {requested_version}:
{version_specific_df.head(3).to_string()}
"""

                # Check if columns differ
                current_cols = set(app_state["df"].columns)
                version_cols = set(version_specific_df.columns)

                if current_cols != version_cols:
                    added_cols = current_cols - version_cols
                    removed_cols = version_cols - current_cols

                    if added_cols:
                        version_context += f"\nColumns added since this version: {list(added_cols)}"
                    if removed_cols:
                        version_context += f"\nColumns removed since this version: {list(removed_cols)}"

            except Exception as e:
                logger.error(f"Failed to load version {requested_version}: {e}")
                version_specific_df = None
                version_context = f"""
VERSION LOADING ERROR:
=====================
Requested version '{requested_version}' could not be loaded.
Error: {str(e)}
Using current dataframe instead.
"""
        else:
            logger.warning(f"Version file not found: {version_file}")
            available_versions = [v['name'] for v in app_state.get('df_versions', [])][:5]
            version_context = f"""
VERSION NOT FOUND:
==================
Requested version '{requested_version}' file not found at: {version_file}
Using current dataframe instead.
Available versions: {available_versions}...
"""

    # Start session recording if not already started
    if not session_recorder.current_session_file:
        rent_roll_filename = getattr(app_state, 'original_filename', 'uploaded_rent_roll.xlsx')
        session_id = session_recorder.start_session_recording(rent_roll_filename)
        logger.info(f"Started new session recording: {session_id}")

        # Record initial dataframe state
        if app_state.get("df_versions") and len(app_state["df_versions"]) > 0:
            first_version = app_state["df_versions"][0]
            session_recorder.record_dataframe_version(
                version_name=first_version["name"],
                description=first_version["description"],
                shape=list(app_state["df"].shape),
                columns=list(app_state["df"].columns)
            )

    # Get previous messages from history
    prev_messages = []
    if history:
        for user_msg, assistant_msg in history:
            prev_messages.append({"role": "user", "content": user_msg})
            prev_messages.append({"role": "assistant", "content": assistant_msg})

    # Create message list
    all_messages = []
    all_messages.extend(prev_messages)

    # Enhanced message with selected context if provided
    if selected_message_content and selected_message_content.strip():
        enhanced_message = f"""CONTEXT FROM PREVIOUS ASSISTANT MESSAGE:
{selected_message_content}

USER'S FOLLOW-UP QUERY:
{message}

Please continue the analysis building on the context provided above."""

        # Record the context usage in session
        if session_recorder.current_session_file:
            with open(session_recorder.current_session_file, 'a', encoding='utf-8') as f:
                f.write(f"\n[{datetime.now().strftime('%H:%M:%S')}] USER SELECTED PREVIOUS MESSAGE FOR CONTEXT\n")
                f.write(f"Selected Message (first 200 chars): {selected_message_content[:200]}...\n")
                f.write(f"User's follow-up query: {message}\n")
                f.write("-" * 60 + "\n")
    else:
        enhanced_message = message

    # Add version context to the message if a specific version was requested
    if version_context:
        enhanced_message = f"""{version_context}

{enhanced_message}

{'NOTE: Analysis will be performed on the specific version loaded above.' if version_loaded else 'NOTE: Requested version could not be loaded, using current data.'}"""

        # Record version usage in session
        if session_recorder.current_session_file:
            with open(session_recorder.current_session_file, 'a', encoding='utf-8') as f:
                f.write(f"\n[{datetime.now().strftime('%H:%M:%S')}] USER REQUESTED SPECIFIC VERSION\n")
                f.write(f"Requested Version: {version_match.group(0) if version_match else 'Unknown'}\n")
                f.write(f"Version Loaded Successfully: {version_loaded}\n")
                f.write(f"Version Shape: {version_specific_df.shape if version_specific_df is not None else 'N/A'}\n")
                f.write("-" * 60 + "\n")

    # Add the current user message (enhanced with context if provided)
    all_messages.append({"role": "user", "content": enhanced_message})

    # *** COMPRESSION STEP ***
    openai_client = app_state.get("openai_client") or OpenAI(api_key=DEFAULT_OPENAI_API_KEY)
    optimized_messages = conversation_manager.compress_history_if_needed(all_messages, openai_client)

    # Log compression if it happened
    original_size = conversation_manager.get_conversation_size(all_messages)
    optimized_size = conversation_manager.get_conversation_size(optimized_messages)
    if optimized_size < original_size:
        logger.info(f"Compressed conversation: {original_size} -> {optimized_size} tokens")
        if session_recorder.current_session_file:
            with open(session_recorder.current_session_file, 'a', encoding='utf-8') as f:
                f.write(f"\n[COMPRESSION] Reduced conversation from {original_size} to {optimized_size} tokens ({len(all_messages)} -> {len(optimized_messages)} messages)\n")

    # Create a state dictionary for the graph
    state = {
        "messages": optimized_messages,
        "system_message": app_state["system_message"],
        "df": version_specific_df if version_specific_df is not None else app_state["df"],  # USE VERSION-SPECIFIC DF
        "issues": app_state["issues"],
        "needs_clarification": False,
        "generate_code": False,
        "execution_plan": None,
        "clarification_question": None,
        "code_execution_results": None,
        "final_response": None,
        "anthropic_client": app_state["anthropic_client"],
        "openai_client": openai_client,
        "selected_context": selected_message_content,
        "version_context": version_context,  # Pass version context
        "using_specific_version": version_loaded,  # Flag for workflow
        "requested_version": version_match.group(0) if version_match else None  # Store requested version name
    }

    try:
        # Create the workflow if not already created
        if not hasattr(chat_with_selected_message, "workflow"):
            chat_with_selected_message.workflow = create_agentic_rent_roll_analyzer()
            logger.info("Created agentic workflow")

        # Run the workflow with the current state
        logger.info(f"Running agentic workflow with {'version-specific dataframe' if version_loaded else 'current dataframe'}")
        result = chat_with_selected_message.workflow.invoke(state)

        # Get the final response from the result state
        final_response = result.get("final_response", "I'm sorry, I couldn't process your request.")
        logger.info(f"Received final response from workflow: {final_response[:50]}...")

        # Enhanced session recording
        action_type = "version_specific_analysis" if version_loaded else ("contextual_analysis" if selected_message_content else "analysis")

        if result.get("needs_clarification"):
            action_type = "clarification"
        elif result.get("generate_code"):
            action_type = f"version_specific_data_processing" if version_loaded else ("contextual_data_processing" if selected_message_content else "data_processing")

        # Extract executed code from response
        code_executed = None
        code_blocks = re.findall(r'```python\s*(.*?)\s*```', final_response, re.DOTALL)
        if code_blocks:
            code_executed = "\n\n# --- Next Code Block ---\n\n".join(code_blocks)

        # Check if a new dataframe version was saved
        version_saved = None
        if "✓ Saved dataframe version" in final_response:
            version_match_response = re.search(r'version (v_\w+)', final_response)
            if version_match_response:
                version_saved = version_match_response.group(1)

        # Record the conversation turn with enhanced context information
        session_description = enhanced_message if len(enhanced_message) < 500 else enhanced_message[:500] + "..."
        session_recorder.record_conversation_turn(
            user_message=session_description,
            ai_response=final_response,
            action_type=action_type,
            code_executed=code_executed,
            version_saved=version_saved
        )

        # Add version-specific note to response if a specific version was used
        if version_loaded:
            version_note = f"\n\n📋 **Version Note**: This analysis was performed on version `{state['requested_version']}` as requested, not the current/latest dataframe."
            final_response += version_note

        # Use the correct format for Gradio chatbot
        history_list = list(history) if history else []
        # Add the original message (not the enhanced one) to maintain clean chat display
        history_list.append((message, final_response))

        logger.info("Chat response processing complete with version-specific handling")
        return history_list

    except Exception as e:
        logger.error(f"Error processing chat with version handling: {e}")
        logger.error(traceback.format_exc())

        error_message = f"Error getting response: {str(e)}"

        if session_recorder.current_session_file:
            session_recorder.record_conversation_turn(
                user_message=enhanced_message,
                ai_response=error_message,
                action_type="system_error_with_version" if version_loaded else ("system_error_with_context" if selected_message_content else "system_error"),
                code_executed=None,
                version_saved=None
            )

        history_list = list(history) if history else []
        history_list.append((message, error_message))
        return history_list

# Helper functions for message selection
def extract_messages_for_selection(history):
    """Extract assistant messages for selection dropdown"""
    if not history:
        return gr.update(choices=[], value=None)

    choices = []
    for i, (user_msg, assistant_msg) in enumerate(history):
        # Create a preview of the assistant message
        preview = assistant_msg[:100] + "..." if len(assistant_msg) > 100 else assistant_msg
        # Remove newlines for cleaner display
        preview = preview.replace('\n', ' ').replace('\r', '')
        choices.append((f"#{i+1}: {preview}", i))

    return gr.update(choices=choices, value=None)

def get_selected_message_preview(history, selected_index):
    """Get full preview of selected message"""
    if selected_index is None or not history or selected_index >= len(history):
        return ""

    assistant_msg = history[selected_index][1]
    return f"Selected Message #{selected_index + 1}:\n\n{assistant_msg}"

def clear_message_selection():
    """Clear the message selection"""
    return None, "", gr.update(value=None)

with gr.Blocks(theme=gr.themes.Soft(primary_hue="blue"), css=custom_css) as demo:
    gr.Markdown("# Agentic Commercial Real Estate Rent Roll Analyzer")
    gr.Markdown("## Hybrid AI System: GPT-4 for Decision Making & Claude for Code Generation")

    with gr.Tab("Setup"):
        with gr.Row():
            with gr.Column():
                file_input = gr.File(label="Upload Rent Roll Excel File (.xlsx, .xls)")

                # Add separate API key inputs for OpenAI and Anthropic
                anthropic_api_key = gr.Textbox(
                    label="Anthropic API Key (Optional - for code generation)",
                    placeholder="Leave blank to use the default API key",
                    type="password"
                )

                openai_api_key = gr.Textbox(
                    label="OpenAI API Key (Optional - for decision making and text responses)",
                    placeholder="Leave blank to use the default API key",
                    type="password"
                )

                # Updated auto-analyze checkbox
                auto_analyze = gr.Checkbox(
                    label="Automatically analyze for issues using GPT-4",
                    value=True,
                    info="When checked, GPT-4 will automatically identify issues in your rent roll"
                )

                upload_button = gr.Button("Load Rent Roll & Start Chat", variant="primary")

            with gr.Column():
                result = gr.Textbox(label="Status")
                preview = gr.HTML(label="Data Preview")

    with gr.Tab("Chat"):
        # Session management buttons
        with gr.Row():
            view_versions_btn = gr.Button("View Version History")
            create_template_btn = gr.Button("🎯 Create Template from Session", variant="primary")
            end_session_btn = gr.Button("🔚 End Current Session")
            session_status_btn = gr.Button("📊 Session Status")

        data_view = gr.HTML()
        chatbot = gr.Chatbot(label="Agentic Rent Roll Analysis Chat", height=500)

        # NEW: Message Selection System
        with gr.Accordion("💬 Reply to Specific Message", open=False):
            gr.Markdown("""
            **Select a previous assistant message to use as context for your next question.**
            This helps maintain conversation flow and build on previous analysis.
            """)

            with gr.Row():
                message_selector = gr.Dropdown(
                    label="Select Assistant Message",
                    choices=[],
                    value=None,
                    interactive=True,
                    scale=3
                )
                refresh_messages_btn = gr.Button("🔄 Refresh", size="sm", scale=1)

            selected_message_preview = gr.Textbox(
                label="Selected Message Preview",
                lines=4,
                interactive=False,
                visible=False
            )

            with gr.Row():
                use_selection_btn = gr.Button("✅ Use Selected Message as Context", variant="primary", visible=False)
                clear_selection_btn = gr.Button("❌ Clear Selection", size="sm", visible=False)

            # Hidden state to store selected message content
            selected_message_content = gr.State(value=None)
            selected_message_index = gr.State(value=None)

        # Context indicator
        context_indicator = gr.HTML(value="", visible=False)

        # Enhanced message input area with prompt enhancement
        with gr.Row():
            with gr.Column(scale=4):
                msg = gr.Textbox(
                    label="Your question",
                    placeholder="Ask about the rent roll... (Use ✨ Enhance for AI-improved prompts)",
                    lines=2
                )
            with gr.Column(scale=1):
                with gr.Row():
                    enhance_btn = gr.Button("✨ Enhance", variant="secondary", size="sm")
                    send_btn = gr.Button("Send", variant="primary", size="sm")

        # Status line for enhancement feedback
        enhancement_status = gr.Textbox(
            label="Enhancement Status",
            interactive=False,
            visible=True,
            lines=1,
            value="Ready to enhance prompts with AI context"
        )
        compression_status = gr.Textbox(
            label="Conversation Compression Status",
            interactive=False,
            visible=True,
            lines=1,
            value="Conversation size: Normal"
        )
        clear_btn = gr.Button("Clear Chat History")

        # Template creation input
        with gr.Accordion("Template Creation", open=False):
            template_name_input = gr.Textbox(
                label="Template Name (Optional)",
                placeholder="e.g., 'Monthly Rent Roll Cleanup Process'",
                lines=1
            )
            template_status = gr.Textbox(label="Template Creation Status", interactive=False, lines=5)

        # Message selection handlers
        refresh_messages_btn.click(
            extract_messages_for_selection,
            inputs=[chatbot],
            outputs=[message_selector]
        )

        message_selector.change(
            get_selected_message_preview,
            inputs=[chatbot, message_selector],
            outputs=[selected_message_preview]
        ).then(
            lambda selected: (
                gr.update(visible=selected is not None),
                gr.update(visible=selected is not None),
                gr.update(visible=selected is not None)
            ),
            inputs=[message_selector],
            outputs=[selected_message_preview, use_selection_btn, clear_selection_btn]
        )

        use_selection_btn.click(
            lambda history, idx: (
                history[idx][1] if idx is not None and idx < len(history) else None,
                idx,
                f"<div style='background-color: #e3f2fd; padding: 10px; border-radius: 5px; margin: 10px 0;'><strong>🔗 Context Active:</strong> Using message #{idx + 1} as context</div>" if idx is not None else ""
            ),
            inputs=[chatbot, message_selector],
            outputs=[selected_message_content, selected_message_index, context_indicator]
        ).then(
            lambda: gr.update(visible=True),
            outputs=[context_indicator]
        )

        clear_selection_btn.click(
            clear_message_selection,
            outputs=[selected_message_content, context_indicator, message_selector]
        ).then(
            lambda: gr.update(visible=False),
            outputs=[context_indicator]
        )

        # Enhancement button handler
        enhance_btn.click(
            enhance_prompt_interface,
            inputs=[msg, chatbot],
            outputs=[enhancement_status, msg]
        )

        # Enhanced chat handlers with message selection
        msg.submit(
            chat_with_compression_status,
            inputs=[msg, chatbot, selected_message_content, selected_message_index],
            outputs=[chatbot, compression_status]
        ).then(
            lambda: ("", None, None, ""),  # Clear message, context, and indicator
            outputs=[msg, selected_message_content, selected_message_index, context_indicator]
        ).then(
            lambda: gr.update(visible=False),
            outputs=[context_indicator]
        ).then(
            extract_messages_for_selection,  # Refresh message selector
            inputs=[chatbot],
            outputs=[message_selector]
        )

        send_btn.click(
            chat_with_compression_status,
            inputs=[msg, chatbot, selected_message_content, selected_message_index],
            outputs=[chatbot, compression_status]
        ).then(
            lambda: ("", None, None, ""),
            outputs=[msg, selected_message_content, selected_message_index, context_indicator]
        ).then(
            lambda: gr.update(visible=False),
            outputs=[context_indicator]
        ).then(
            extract_messages_for_selection,
            inputs=[chatbot],
            outputs=[message_selector]
        )

        clear_btn.click(
            clear_chat,
            outputs=[chatbot]
        ).then(
            lambda: gr.update(choices=[], value=None),
            outputs=[message_selector]
        ).then(
            lambda: "✅ New conversation",  # ← ADDED - Reset compression status
            outputs=[compression_status]
        )

        # Enhanced event handlers for session management
        view_versions_btn.click(view_dataframe_versions, None, data_view)

        create_template_btn.click(
            create_template_from_current_session,
            inputs=[template_name_input],
            outputs=[template_status]
        )

        end_session_btn.click(
            end_current_session,
            outputs=[template_status]
        )

        session_status_btn.click(
            get_current_session_status,
            outputs=[template_status]
        )

    with gr.Tab("Edit Data"):
        gr.Markdown("""
        ### 📝 Edit Rent Roll Data

        You can directly edit cells in the table below, just like in Excel.
        - Click on any cell to edit it
        - Use Tab or arrow keys to navigate
        - Changes are analyzed by GPT-4.1 and recorded in your session
        - All changes are automatically saved to session history
        """)

        with gr.Row():
            with gr.Column(scale=3):
                # Version selector
                version_dropdown = gr.Dropdown(
                    label="Select Version to Edit",
                    choices=get_version_choices(),
                    value=None,
                    interactive=True
                )

            with gr.Column(scale=1):
                refresh_versions_btn = gr.Button("🔄 Refresh Versions", size="sm")
                with gr.Row():
                    load_latest_btn = gr.Button("📂 Load Latest", variant="secondary", size="sm")
                    load_version_btn = gr.Button("📂 Load Selected", variant="primary", size="sm")

        # Status display
        edit_status = gr.Textbox(label="Status", interactive=False)

        # The editable dataframe
        editable_df = gr.Dataframe(
            label="Editable Data (Click any cell to edit) - Changes tracked by AI",
            interactive=True,
            wrap=True,
            max_height=500,
            column_widths=["100px"] * 20,
        )

        # Save controls
        with gr.Row():
            with gr.Column(scale=3):
                save_description = gr.Textbox(
                    label="Description of Changes (GPT-4.1 will analyze if left blank)",
                    placeholder="e.g., 'Updated rent for units 101-105' or leave blank for AI analysis",
                    lines=2
                )

            with gr.Column(scale=1):
                save_changes_btn = gr.Button("💾 Save & Analyze Changes", variant="primary", size="lg")

        save_status = gr.Textbox(label="Save Status & AI Analysis", interactive=False, lines=8)

        # Quick actions section
        with gr.Accordion("Quick Actions", open=False):
            gr.Markdown("""
            ### Bulk Operations
            Use these buttons for common bulk edits:
            """)

            with gr.Row():
                # Add quick action buttons here in future
                gr.Button("🧹 Clean Empty Rows", size="sm", interactive=False)
                gr.Button("💵 Round All Currency", size="sm", interactive=False)
                gr.Button("📅 Fix Date Formats", size="sm", interactive=False)
                gr.Button("🔢 Recalculate Totals", size="sm", interactive=False)

        # Enhanced session tracking notice
        gr.Markdown("""
        ### 🤖 AI-Powered Change Tracking
        - **GPT-4.1 Analysis**: Every edit is analyzed for business impact
        - **Session Recording**: All changes saved to copiloting session
        - **Template Ready**: Manual edits become part of reusable workflows
        - **Quality Assurance**: AI detects data quality improvements/issues
        """)

        # Event handlers for Edit Data tab with enhanced functions
        refresh_versions_btn.click(
            refresh_version_dropdown,
            outputs=[version_dropdown]
        )

        load_latest_btn.click(
            load_latest_version_for_editing,  # ← Enhanced function
            outputs=[editable_df, edit_status]
        )

        load_version_btn.click(
            load_specific_version,  # ← Enhanced function
            inputs=[version_dropdown],
            outputs=[editable_df, edit_status]
        )

        save_changes_btn.click(
            save_edited_dataframe,  # ← Enhanced function
            inputs=[editable_df, save_description],
            outputs=[save_status, editable_df]
        ).then(
            refresh_version_dropdown,  # Refresh the dropdown after saving
            outputs=[version_dropdown]
        )

        # Instructions
        gr.Markdown("""
        ---
        ### 💡 How to Use Enhanced Edit Data:
        1. **Load Data**: Click "Load Latest" or select a specific version
        2. **Edit Cells**: Click on any cell and type to edit (just like Excel!)
        3. **Navigate**: Use Tab, Enter, or arrow keys to move between cells
        4. **Save Changes**: Enter a description (optional) and click "Save & Analyze Changes"
        5. **AI Analysis**: GPT-4.1 will analyze your changes and provide insights

        ### ⚠️ Enhanced Features:
        - **Automatic Analysis**: AI understands what you changed and why
        - **Business Impact**: Get insights on how changes affect rent calculations
        - **Session Integration**: All edits become part of your copiloting history
        - **Template Building**: Manual edits are included in reusable templates
        - **Quality Checks**: AI warns if changes might impact data quality
        """)

    create_enhanced_template_application_tab()

    with gr.Tab("Template Manager"):
        gr.Markdown("""
        ### 📋 Template Management System

        Create, view, and apply reusable rent roll processing templates.
        Templates capture your complete workflow including conversations, code, and manual edits.
        """)

        with gr.Row():
            with gr.Column(scale=2):
                gr.Markdown("#### Available Templates")
                template_list = gr.HTML(label="Template List")
                refresh_templates_btn = gr.Button("🔄 Refresh Template List")

            with gr.Column(scale=2):
                gr.Markdown("#### Template Details")
                template_details = gr.HTML(label="Template Summary")

        with gr.Row():
            template_id_input = gr.Textbox(
                label="Template ID",
                placeholder="e.g., template_20250526_143022"
            )
            with gr.Column():
                view_template_btn = gr.Button("👁️ View Template", variant="secondary")
                delete_template_btn = gr.Button("🗑️ Delete Template", variant="stop")

        template_action_status = gr.Textbox(label="Status", interactive=False, lines=3)

        # Template management event handlers
        refresh_templates_btn.click(
            lambda: list_available_templates(),
            outputs=[template_list]
        )

        view_template_btn.click(
            lambda template_id: enhanced_template_manager.get_template_summary(template_id) if template_id else "Please enter a template ID",
            inputs=[template_id_input],
            outputs=[template_details]
        )

        delete_template_btn.click(
            lambda template_id: enhanced_template_manager.delete_template(template_id) if template_id else "Please enter a template ID",
            inputs=[template_id_input],
            outputs=[template_action_status]
        ).then(
            lambda: list_available_templates(),  # Refresh list after deletion
            outputs=[template_list]
        )

    # Initially hide the chat interface
    chatbot.visible = False

    # Updated upload button event with both API keys and version dropdown
    upload_button.click(
        upload_rent_roll,
        inputs=[file_input, anthropic_api_key, openai_api_key, auto_analyze],
        outputs=[result, preview, chatbot, version_dropdown]
    )

    # Updated style and help info
    gr.Markdown("""
    ## How to use this Enhanced Agentic Rent Roll Analyzer:

    ### ✨ **NEW: AI Prompt Enhancement**

    **In the Chat tab, use the "✨ Enhance" button to:**
    - Transform simple requests into professional CRE analysis prompts
    - Add context from your actual rent roll data (column names, data types)
    - Include relevant business terminology and best practices
    - Make prompts more specific and actionable

    **Example Enhancement:**
    - **Before:** "show me the data"
    - **After:** "Display a comprehensive overview of the rent roll data including tenant information from [TenantName] column, rent amounts from [BaseRent] and [TotalRent] columns, lease dates from [LeaseStart] and [LeaseEnd] columns, and occupancy status. Also highlight any data quality issues such as missing values or inconsistent formatting."

    ### 🚀 **Template Application System**

    #### **Workflow Overview:**
    1. **Create Templates** (Chat Tab): Work through your rent roll analysis normally
    2. **Save Templates**: Click "Create Template from Session" to save your workflow
    3. **Apply Templates** (Apply Template Tab): Use saved templates on new rent roll files
    4. **Automated Processing**: GPT-4.1 + Claude 3.7 adapt and execute each step

    ### 📋 **Step-by-Step Guide:**

    #### **Phase 1: Create Your First Template**
    1. **Setup Tab**: Upload your rent roll Excel file
    2. **Chat Tab**: Interact normally - ask questions, get analysis, make changes
    3. **Use ✨ Enhance**: Make your prompts more professional and context-aware
    4. **Edit Data Tab**: Make any manual edits (tracked by AI)
    5. **Create Template**: Click "🎯 Create Template from Session"

    #### **Phase 2: Apply Template to New Files**
    1. **Apply Template Tab**: Select your saved template ID
    2. **Upload New File**: Choose a similar rent roll file
    3. **Start Application**: Click "🚀 Start Application"
    4. **Execute Steps**: Run "▶️ Execute Next Step" or "⏭️ Execute All Steps"

    ### 🤖 **AI Workflow in Template Application:**

    **For Each Template Step:**
    1. **GPT-4.1 Analysis**:
       - Analyzes original template step
       - Maps columns from template to new file
       - Adapts parameters and business rules
       - Creates optimized prompt for Claude

    2. **Claude 3.7 Execution**:
       - Receives adapted instructions
       - Generates appropriate Python code
       - Executes data processing
       - Returns results and updates dataframe

    3. **Validation & Progress**:
       - Validates step completion
       - Records success/failure
       - Logs detailed results
       - Moves to next step

    ### 🔧 **Key Features:**

    - **✨ AI Prompt Enhancement**: Smart context-aware prompt improvement
    - **Intelligent Adaptation**: Automatically maps different column names
    - **Business Logic Preservation**: Maintains the intent of original analysis
    - **Error Recovery**: Handles failures gracefully and continues
    - **Progress Tracking**: Real-time status of template application
    - **Complete Logging**: Detailed logs of every step and decision

    ### 💡 **Use Cases:**

    - **Monthly Processing**: Apply same cleanup to each month's rent roll
    - **Property Portfolios**: Use one template across multiple properties
    - **Team Workflows**: Share proven analysis methods
    - **Quality Assurance**: Ensure consistent processing standards
    - **Time Savings**: Automate repetitive analysis tasks

    ### ⚡ **Quick Start:**

    1. Upload rent roll → Chat about analysis (use ✨ Enhance!) → Create template
    2. Get template ID from Template Manager
    3. Go to Apply Template → Enter template ID → Upload new file → Execute!

    The system transforms your one-time analysis into reusable, intelligent automation!
    """)

# Additional helper functions for Template Manager tab
def list_available_templates():
    """Generate HTML list of available templates"""
    try:
        templates = enhanced_template_manager.list_templates()

        if not templates:
            return "<p>No templates available yet. Create your first template by using the 'Create Template from Session' button in the Chat tab.</p>"

        html = "<div style='max-height: 400px; overflow-y: auto;'>"

        for template in templates:
            gpt_status = "🤖 GPT-4 Analysis" if template.get('gpt4_analysis_available') else "📝 Basic Info"

            html += f"""
            <div style='border: 1px solid #ddd; margin: 10px 0; padding: 15px; border-radius: 8px; background-color: #f9f9f9;'>
                <h4 style='margin: 0 0 10px 0; color: #333;'>{template['template_name']}</h4>
                <p style='margin: 5px 0; color: #666;'><strong>ID:</strong> <code>{template['template_id']}</code></p>
                <p style='margin: 5px 0; color: #666;'><strong>Created:</strong> {template['created_date'][:10]}</p>
                <p style='margin: 5px 0; color: #666;'><strong>Source:</strong> {template['source_file']}</p>
                <p style='margin: 5px 0; color: #666;'><strong>Steps:</strong> {template['steps_count']} workflow steps</p>
                <p style='margin: 5px 0;'><span style='background-color: #e3f2fd; padding: 2px 6px; border-radius: 4px; font-size: 12px;'>{gpt_status}</span></p>
            </div>template_app_engine = TemplateApplicationEngine()
            """

        html += "</div>"
        return html

    except Exception as e:

        return f"<p>Error loading templates: {str(e)}</p>"


# Run the application
if __name__ == "__main__":
    logger.info("Starting Agentic Rent Roll Analyzer application with prompt enhancement")
    demo.launch(debug=True)
    logger.info("Application shutdown")

  chatbot = gr.Chatbot(label="Agentic Rent Roll Analysis Chat", height=500)


It looks like you are running Gradio on a hosted a Jupyter notebook. For the Gradio app to work, sharing must be enabled. Automatically setting `share=True` (you can turn this off by setting `share=False` in `launch()` explicitly).

Colab notebook detected. This cell will run indefinitely so that you can see errors and logs. To turn off, set debug=False in launch().
* Running on public URL: https://3f858d945a3ae533d6.gradio.live

This share link expires in 1 week. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)




✓ Saved dataframe version v_20250617_014250: Initial upload - original dataset from Rent Roll 9-1-24 Excel 1 1.xlsx
  - CSV: rent_roll_versions/rent_roll_v_20250617_014250.csv
  - Excel: rent_roll_versions/rent_roll_v_20250617_014250.xlsx
  - Shape: (30, 9)
  - Registry updated: 1 total versions


  {rent_roll_df.head(5).fillna('').to_html(index=False)}
  state[block._id] = block.__class__(**kwargs)


📝 Started session recording: session_20250617_014343
🔧 Conversation size: 350 tokens
Starting code generation with version support...

==== STARTING CODE GENERATION WITH VERSION SUPPORT ====
User query: 
USING SPECIFIC VERSION: v_20250617_014250
Version Description: Initial upload - original dataset from Rent Roll 9-1-24 Excel 1 1.xlsx
Version Created: 20250617_014250
Version Shape: (30, 9)
Current/Latest Version Shape: (30, 9)

IMPORTANT: All analysis will be performed on version v_20250617_014250, NOT the current/latest version.

Version Differences:
- Row count difference: 0 rows
- Column count difference: 0 columns
- Version is the latest version

Data from Version v_20250617_014250:
                Tenant Floor Lease Start Date Lease Expiration     RSF    RSF%  Annual Rent  Monthly Rent Rent PSF
0                 AT&T    LL       2024-09-01       2029-08-31   295.0  0.0007      10110.0        842.52    34.27
1  Care Tech Solutions    LL       2023-06-01       2028-05-31  8472.0  0



Loading rent roll with specialized loader for enhanced template application...
💾 Saved version: step_0_version_20250617_015856.csv (Shape: (32, 9))
💾 Saved state for step 0: (32, 9)
🔗 Loaded latest dataframe from: step_0_version_20250617_015856.csv
🔗 Loaded latest dataframe from: step_0_version_20250617_015856.csv

🚀 EXECUTING STEP 1 WITH USER INPUT
📝 Original step: 
USING SPECIFIC VERSION: v_20250617_014250
Version Description: I...
🔗 Loaded latest dataframe from: step_0_version_20250617_015856.csv
🔗 Loaded latest dataframe from: step_0_version_20250617_015856.csv
🔗 STEP 1 LOADING FROM VERSION FILE: step_0_version_20250617_015856.csv
📊 Loaded DataFrame Shape: (32, 9)
✅ Step 1 succeeded!
💾 Saved state for step 1: (32, 9)
💾 Application state saved: template_applications/app_20250617_015856_state.json
🔗 Loaded latest dataframe from: step_1_version_20250617_020016.csv
🔗 Loaded latest dataframe from: step_1_version_20250617_020016.csv

🚀 EXECUTING STEP 2 WITH USER INPUT
📝 Original step: LO

Summarize the key user-guided instructions and solutions from this rent roll copiloting session.

For each step, briefly describe:

What the user wanted to accomplish (without quoting them directly).

How the request was addressed or solved by the copilot.

Avoid excessive detail, don’t repeat the user’s exact instructions, and keep each summary concise and clear.

In [None]:
# Run this in your Colab cell to diagnose the version system

def run_version_diagnostic():
    """Complete diagnostic of the version system"""

    print("🔍 COMPREHENSIVE VERSION SYSTEM DIAGNOSTIC")
    print("=" * 60)

    # 1. Check global app_state
    print("1. APP_STATE CHECK:")
    if 'app_state' in globals():
        print("   ✅ app_state exists globally")
        if app_state is None:
            print("   ❌ app_state is None")
        else:
            print("   ✅ app_state is not None")
            print(f"   📊 Keys in app_state: {list(app_state.keys())}")

            if "df_versions" in app_state:
                versions = app_state["df_versions"]
                print(f"   📋 Versions in registry: {len(versions)}")
                for i, v in enumerate(versions):
                    name = v.get('name', 'UNKNOWN')
                    desc = v.get('description', 'No description')[:50]
                    print(f"      {i+1}. {name} - {desc}")
            else:
                print("   ❌ No df_versions key in app_state")
    else:
        print("   ❌ app_state not found in globals")

    # 2. Check file system
    print("\n2. FILE SYSTEM CHECK:")
    versions_dir = "rent_roll_versions"
    if os.path.exists(versions_dir):
        print(f"   ✅ Directory exists: {versions_dir}")
        files = os.listdir(versions_dir)
        csv_files = [f for f in files if f.endswith('.csv')]
        excel_files = [f for f in files if f.endswith('.xlsx')]

        print(f"   📄 Total files: {len(files)}")
        print(f"   📄 CSV files: {len(csv_files)}")
        print(f"   📄 Excel files: {len(excel_files)}")

        print("   📋 CSV Files found:")
        for csv_file in csv_files[:10]:  # Show first 10
            file_path = os.path.join(versions_dir, csv_file)
            file_size = os.path.getsize(file_path)
            print(f"      • {csv_file} ({file_size:,} bytes)")

        if len(csv_files) > 10:
            print(f"      ... and {len(csv_files) - 10} more")
    else:
        print(f"   ❌ Directory not found: {versions_dir}")

    # 3. Test version choices function
    print("\n3. VERSION CHOICES TEST:")
    try:
        choices = get_version_choices()
        print(f"   📋 Generated choices: {len(choices)}")
        for choice in choices[:5]:  # Show first 5
            print(f"      • {choice}")
        if len(choices) > 5:
            print(f"      ... and {len(choices) - 5} more")
    except Exception as e:
        print(f"   ❌ Error getting version choices: {e}")

    # 4. Test specific version loading
    print("\n4. VERSION LOADING TEST:")
    if 'choices' in locals() and choices:
        test_version = choices[0].split(" (")[0].strip()  # Get clean name
        print(f"   🧪 Testing load of version: {test_version}")
        try:
            df, status = load_specific_version(test_version)
            if df is not None:
                print(f"   ✅ Successfully loaded: {df.shape}")
                print(f"   📄 Status: {status[:100]}...")
            else:
                print(f"   ❌ Failed to load: {status}")
        except Exception as e:
            print(f"   ❌ Error loading version: {e}")
    else:
        print("   ⚠️  No versions available to test")

    print("\n5. RECOMMENDATIONS:")

    # Check for common issues
    if 'app_state' not in globals() or app_state is None:
        print("   🔧 Run: Restart and upload a rent roll file first")
    elif "df_versions" not in app_state or not app_state["df_versions"]:
        if os.path.exists("rent_roll_versions") and len([f for f in os.listdir("rent_roll_versions") if f.endswith('.csv')]) > 0:
            print("   🔧 Run: force_sync_versions() to sync files with registry")
        else:
            print("   🔧 Create some versions first by using the chat or editing data")
    else:
        print("   ✅ System looks healthy!")

    print("=" * 60)

# Run the diagnostic
run_version_diagnostic()

# If you have sync issues, also run this:
print("\n" + "="*40)
print("RUNNING FORCE SYNC...")
print("="*40)
try:
    result = force_sync_versions()
    print(result)

    # Test again after sync
    print("\nTesting version choices after sync:")
    choices = get_version_choices()
    print(f"Available choices: {len(choices)}")
    for choice in choices[:3]:
        print(f"  • {choice}")

except Exception as e:
    print(f"Sync failed: {e}")