In [None]:
# # LangChain Looker Agent - Conversational Test Notebook
# 
# This notebook adapts an existing BigQuery conversational SQL agent to use the
# custom LangChain Looker SQL Agent. It allows for natural language queries
# against your Looker instance via its SQL Interface.
# 
# **Prerequisites (within Vertex AI Notebook VM - or your chosen environment):**
# 1.  **Java (JRE/JDK):** Installed (e.g., OpenJDK 11).
# 2.  **Looker JDBC Driver:** Uploaded to the VM (e.g., to `/home/jupyter/drivers/looker-jdbc.jar`).
# 3.  **Python Libraries:** `langchain`, `langchain-core`, `langchain-openai` (or other LLM), 
#     `langchain-community`, `jaydebeapi`, `python-dotenv`, `ipywidgets`, `jpype1`, `tabulate`.
#     *(SQLAlchemy and google-cloud-bigquery are NOT needed for the Looker agent part)*
# 4.  **`looker_sql_agent_prototype.py`:** The Python module file saved in the same directory.
# 5.  **`.env` file:** Created in the same directory with Looker and LLM credentials, 
#     JDBC driver path, and LookML model name.

# ## Phase 1: Setup and Configuration

# ### Cell 1.1: Install/Upgrade Dependencies (if not already done)
# *Run this cell once if you haven't already installed/updated these.*
# *Restart the kernel after running this cell if it's the first time or after major upgrades.*

# %%
# # Ensure Java is installed (example for Debian/Ubuntu based systems)
# !sudo apt-get update && sudo apt-get install -y openjdk-11-jdk --no-install-recommends
# !java -version

# # Install/Upgrade Python packages
# !pip install -U langchain langchain-core langchain-openai langchain-community jaydebeapi python-dotenv ipywidgets JPype1 tabulate

# print("Dependencies check/installation step complete. If you installed Java or made major pip changes, please RESTART KERNEL now.")

# ### Cell 1.2: Imports and Environment Loading

# %%
import os
import logging 
from dotenv import load_dotenv 
import sys 

# --- 1. Configure basic logging FIRST ---
logging.basicConfig(level=logging.ERROR, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s')
logger = logging.getLogger(__name__) # Logger for this notebook
# If you want to see INFO messages from THIS NOTEBOOK'S logger, uncomment the next line:
# logger.setLevel(logging.INFO) 

# If you want to see INFO messages from your custom agent module, you can do this:
# logging.getLogger("langchain_looker_sql_agent").setLevel(logging.INFO)
# Or set it to DEBUG for even more detail from your custom module:
# logging.getLogger("langchain_looker_sql_agent").setLevel(logging.DEBUG)

project_root = "" # Initialize
current_notebook_actual_dir = os.getcwd() # e.g., /home/jupyter/project_root/notebooks

try:
    project_root = os.path.abspath(os.path.join(current_notebook_actual_dir, os.pardir)) 
    
    if project_root not in sys.path:
        sys.path.insert(0, project_root)
        logger.info(f"Added project root to sys.path: {project_root}")
    else:
        logger.info(f"Project root {project_root} already in sys.path.")
except Exception as e:
    logger.error(f"Error determining project root or modifying sys.path: {e}")
    project_root = current_notebook_actual_dir # Fallback if path logic fails
    logger.warning(f"Falling back to using notebook's current directory as project_root: {project_root}")
    if project_root not in sys.path: # Ensure fallback is also in path
        sys.path.insert(0, project_root)

dotenv_filepath = os.path.join(project_root, '.env') 

if os.path.exists(dotenv_filepath): 
    load_dotenv(dotenv_path=dotenv_filepath) 
    logger.info(f".env file loaded successfully from: {dotenv_filepath}")
else: 
    logger.error(f".env file not found at {dotenv_filepath}. Please ensure it is in the project root directory: {project_root}")
    raise FileNotFoundError(f".env file not found at {dotenv_filepath}")

# --- 4. Custom Looker agent components ---
try:
    from langchain_looker_sql_agent import LookerSQLDatabase, LookerSQLToolkit, create_looker_sql_agent 
    logger.info("Custom Looker agent classes imported successfully from langchain_looker_sql_agent.py.")
except ImportError as e:
    logger.error(f"Error importing from 'langchain_looker_sql_agent.py': {e}. Ensure the file is in {project_root} and named correctly (with underscores).", exc_info=True)
    raise 

# --- 5. LangChain specific imports ---
from langchain_openai import ChatOpenAI
from langchain.memory import ConversationBufferMemory
from langchain.agents import AgentExecutor 
logger.info("Core LangChain components (ChatOpenAI, ConversationBufferMemory, AgentExecutor) imported.")

# --- 6. Fetch and Validate ALL Environment Variables needed ---
# Looker Configuration
LOOKER_INSTANCE_URL = os.getenv("LOOKER_INSTANCE_URL")
LOOKML_MODEL_NAME = os.getenv("LOOKML_MODEL_NAME")   
LOOKER_CLIENT_ID = os.getenv("LOOKER_CLIENT_ID")
LOOKER_CLIENT_SECRET = os.getenv("LOOKER_CLIENT_SECRET")
LOOKER_JDBC_DRIVER_PATH = os.getenv("LOOKER_JDBC_DRIVER_PATH")
# For display/logging purposes only
LOOKER_HOST_DISPLAY = os.getenv("LOOKER_HOST", LOOKER_INSTANCE_URL) 
OPENAI_API_KEY = os.getenv("OPENAI_API_KEY") 

# Java Home Verification
if "JAVA_HOME" not in os.environ or not os.path.isdir(os.environ.get("JAVA_HOME","")) or "bin/java" in os.environ.get("JAVA_HOME",""):
    java_home_correct_path = "/usr/lib/jvm/java-11-openjdk-amd64" # Common default for Linux VMs
    if os.path.exists(java_home_correct_path) and os.path.isdir(java_home_correct_path):
        os.environ["JAVA_HOME"] = java_home_correct_path; logger.info(f"JAVA_HOME set to: {os.environ['JAVA_HOME']}")
    else: logger.warning(f"JAVA_HOME incorrect/not set, default path '{java_home_correct_path}' not found.")
else: logger.info(f"JAVA_HOME is correctly set to: {os.environ['JAVA_HOME']}")

# Validate essential configurations
missing_vars = []
if not LOOKER_INSTANCE_URL: missing_vars.append("LOOKER_INSTANCE_URL")
if not LOOKML_MODEL_NAME: missing_vars.append("LOOKML_MODEL_NAME")
if not LOOKER_CLIENT_ID: missing_vars.append("LOOKER_CLIENT_ID")
if not LOOKER_CLIENT_SECRET: missing_vars.append("LOOKER_CLIENT_SECRET")
if not LOOKER_JDBC_DRIVER_PATH: missing_vars.append("LOOKER_JDBC_DRIVER_PATH")
if not OPENAI_API_KEY: missing_vars.append("OPENAI_API_KEY")

if missing_vars:
    err_msg = f"Required environment variables from .env are missing: {', '.join(missing_vars)}"
    logger.critical(err_msg); raise ValueError(err_msg)
if not os.path.exists(LOOKER_JDBC_DRIVER_PATH):
    err_msg = f"Looker JDBC Driver not found at path: {LOOKER_JDBC_DRIVER_PATH}. Check .env and that the path is relative to project root if not absolute."
    logger.critical(err_msg); raise FileNotFoundError(err_msg)
    
logger.info("All configurations loaded and validated.")
logger.info(f"  Using Looker Instance URL: {LOOKER_INSTANCE_URL}")
logger.info(f"  Using LookML Model Name: {LOOKML_MODEL_NAME}")
logger.info(f"  JDBC Driver Path (from project root): {LOOKER_JDBC_DRIVER_PATH}") # Confirm this path logic
logger.info(f"  JAVA_HOME: {os.getenv('JAVA_HOME', 'Not Set - Check Warnings!')}")


# ## Phase 2: Initialize Components (Database, LLM, Memory, Agent)

# Initialize LookerSQLDatabase
try:
    db = LookerSQLDatabase(
        looker_instance_url=LOOKER_INSTANCE_URL,
        lookml_model_name=LOOKML_MODEL_NAME,
        client_id=LOOKER_CLIENT_ID,
        client_secret=LOOKER_CLIENT_SECRET,
        jdbc_driver_path=LOOKER_JDBC_DRIVER_PATH,
        sample_rows_in_table_info=2 # Adjust as needed, 0 to disable if problematic
    )
    logger.info("LookerSQLDatabase initialized successfully.")
except Exception as e:
    logger.error(f"Failed to initialize LookerSQLDatabase: {e}", exc_info=True)
    db = None # Ensure db is None if initialization fails

# Initialize LLM
if db: # Proceed only if db connection was successful
    try:
        llm = ChatOpenAI(
            model="gpt-4o", # Or your preferred model e.g., "gpt-3.5-turbo"
            temperature=0,
            max_tokens=None, # Let the model decide or set a specific limit
            timeout=None,
            max_retries=2,
            # api_key=OPENAI_API_KEY, # Already set via env var by LangChain usually
        )
        logger.info(f"ChatOpenAI LLM initialized with model: {llm.model_name}")
    except Exception as e:
        logger.error(f"Failed to initialize LLM: {e}", exc_info=True)
        llm = None
else:
    llm = None
    logger.warning("Skipping LLM initialization as database connection failed.")


# Initialize Conversational Memory
# Note: For the ReAct agent created by `create_looker_sql_agent`,
# memory needs to be handled by the AgentExecutor or by passing `chat_history` explicitly.
# The `create_react_agent` prompt template includes `{chat_history}`.
# If we want the AgentExecutor to manage memory, we pass it to the AgentExecutor.
memory = ConversationBufferMemory(memory_key="chat_history", return_messages=True) # return_messages=True for chat models
logger.info("ConversationBufferMemory initialized.")


# Create Looker SQL Agent Toolkit
if db and llm:
    try:
        looker_toolkit = LookerSQLToolkit(db=db) # LLM is not directly passed to this toolkit
        logger.info("LookerSQLToolkit initialized.")
    except Exception as e:
        logger.error(f"Failed to initialize LookerSQLToolkit: {e}", exc_info=True)
        looker_toolkit = None
else:
    looker_toolkit = None
    logger.warning("Skipping Toolkit initialization.")


# Create the Looker SQL Agent Executor
agent_executor = None
if llm and looker_toolkit:
    try:
        # For the ReAct agent used in create_looker_sql_agent,
        # pass memory to the AgentExecutor if you want it to manage history.
        agent_executor = create_looker_sql_agent(
            llm=llm,
            toolkit=looker_toolkit,
            verbose=True, # Set to False for less console output during agent runs
            top_k=100,    # Max rows for agent to consider for LIMIT, can be overridden by LLM
            agent_executor_kwargs={ # Pass AgentExecutor specific arguments here
                "memory": memory,
                "handle_parsing_errors":True # Good for SQL agents to retry
            }
        )
        logger.info("Looker SQL Agent Executor created successfully.")
    except Exception as e:
        logger.error(f"Failed to create Looker SQL Agent Executor: {e}", exc_info=True)
else:
    logger.warning("Skipping Agent Executor creation.")


# ## Phase 3: Define Query Function and Start Interaction

# %%
def ask_question_looker(question_input: str, agent_exec: any):
    """Sends a question to the Looker agent and returns the response."""
    if not agent_exec:
        return "Agent Executor is not initialized. Please check previous setup steps."
    try:
        # The ReAct agent created via `create_looker_sql_agent` expects `input` and `chat_history`.
        # The `AgentExecutor` with memory will handle `chat_history` automatically.
        # If memory was NOT passed to AgentExecutor, you'd need:
        # response = agent_exec.invoke({"input": question_input, "chat_history": memory.chat_memory.messages or []})
        # memory.save_context({"input": question_input}, {"output": response.get("output")})
        
        # If memory IS passed to AgentExecutor, it manages history implicitly.
        response = agent_exec.invoke({"input": question_input})
        return response.get("output", "No output found in agent response.")
    except Exception as e:
        logger.error(f"Error during agent invocation: {e}", exc_info=True)
        return f"An error occurred: {e}"

# %%
# Custom instruction prefix for the Looker agent
# This can guide the LLM on the context and desired output format/style.
# The main syntax rules are already in the agent's system prompt.
# This instruction here is more about the *persona* and *analytical depth*.

looker_instruction_prefix = """You are a helpful data analyst for Rittman Analytics, querying data through Looker.
When asked for a specific metric or value, provide it clearly.

Question: """

# %%
# Check if agent_executor is ready before starting the loop
if agent_executor:
    print("Hi! I'm your Looker Data Agent. Ask me a question about data accessible via our Looker instance.")
    print("Type QUIT to exit.")
    
    while True:
        try:
            question = input("\nYour question: ")
            if question.strip().upper() == 'QUIT':
                print("Exiting chat. Goodbye!")
                break
            else:
                # Prepend the persona/task instruction
                full_question = looker_instruction_prefix + question
                
                print("Thinking...")
                response = ask_question_looker(full_question, agent_executor)
                print(f"\nAgent: {response}")
                print(f"\nIs there anything else I can answer for you? Or type QUIT to exit.")
        except KeyboardInterrupt:
            print("\nExiting chat due to interrupt. Goodbye!")
            break
        except Exception as e:
            logger.error(f"An error occurred in the chat loop: {e}", exc_info=True)
            print("Sorry, an unexpected error occurred. Please try again or type QUIT.")
else:
    logger.error("Agent Executor not initialized. Cannot start chat loop. Please check setup cells for errors.")
    print("ERROR: Agent Executor could not be initialized. Please review the notebook setup and logs.")

# %% [markdown]
# ## Phase 4: Cleanup (Optional)
# 
# *Close the database connection when done.*

# %%
if db:
    try:
        logger.info("Attempting to close Looker database connection...")
        db.close()
    except Exception as e:
        logger.error(f"Error closing Looker connection: {e}", exc_info=True)
else:
    logger.info("No database connection (db object) to close or it was not initialized.")

logger.info("--- End of Looker Agent Conversational Test Notebook ---")