In [1]:
import time
import pandas as pd
import logging
import sys
from pathlib import Path

# Configure logging
# logging.basicConfig(stream=sys.stdout, level=logging.DEBUG) # Uncomment for detailed LlamaIndex logs
# logging.getLogger().addHandler(logging.StreamHandler(stream=sys.stdout))

# --- LlamaIndex Imports ---
try:
    from llama_index.core import Settings, SimpleDirectoryReader, VectorStoreIndex
    from llama_index.core.agent import ReActAgent
    from llama_index.llms.llama_cpp import LlamaCPP
    from llama_index.core.tools import QueryEngineTool, ToolMetadata
    from llama_index.readers.file import PandasCSVReader # Specific reader for pandas
    from llama_index.core.query_engine import PandasQueryEngine # Engine for querying pandas dataframes

except ImportError as e:
    print(f"Import Error: {e}")
    print("Please ensure all required llama-index packages are installed:")
    print("pip install llama-index llama-index-llms-llama-cpp llama-index-readers-file llama-index-agent-react pandas")
    sys.exit(1)

# --- Configuration ---
MODEL_PATH = "/Users/rushilsrivastava/models/DeepSeek-R1-Distill-Llama-8B-Q4_K_M.gguf" # Path to your downloaded GGUF model
CSV_DIR = "/Users/rushilsrivastava/Desktop/uni/2nd_sem/AI in Business/FactoryTwin/ft_datasets"  # Directory where your CSV files are stored

# LLM Configuration (Adjust based on your system's RAM and CPU)
CONTEXT_WINDOW = 4096 # Max context window size for the model
MAX_NEW_TOKENS = 512 # Max tokens the LLM should generate in one response
N_GPU_LAYERS = 0     # Ensure CPU-only execution
N_THREADS = None     # Use None for llama.cpp to auto-detect CPU threads, or set a specific number

# --- 1. Load LLM using LlamaCPP Integration ---
print("Loading LLM...")
try:
    llm = LlamaCPP(
        model_path=MODEL_PATH,
        temperature=0.1, # Lower temperature for more factual answers
        max_new_tokens=MAX_NEW_TOKENS,
        context_window=CONTEXT_WINDOW,
        # Model specific kwargs passed to llama_cpp.Llama
        model_kwargs={"n_gpu_layers": N_GPU_LAYERS, "verbose": False},
        # LlamaCPP specific kwargs
        generate_kwargs={},
        # Set True for more verbose LlamaCPP logging
        verbose=False,
    )
    # Update global settings
    Settings.llm = llm
    Settings.context_window = CONTEXT_WINDOW # Also set globally for context awareness in LlamaIndex
    print("LLM loaded successfully.")
except Exception as e:
    print(f"Error loading LLM: {e}")
    print(f"Ensure the model path is correct: {MODEL_PATH}")
    sys.exit(1)


# --- 2. Load Data using Pandas CSV Reader ---
print(f"Loading CSV data from: {CSV_DIR}")
csv_files = list(Path(CSV_DIR).glob("*.csv"))
if not csv_files:
    print(f"Error: No CSV files found in directory '{CSV_DIR}'. Please add your CSV files.")
    sys.exit(1)

Loading LLM...


llama_init_from_model: n_ctx_per_seq (4096) < n_ctx_train (131072) -- the full capacity of the model will not be utilized
ggml_metal_init: skipping kernel_get_rows_bf16                     (not supported)
ggml_metal_init: skipping kernel_mul_mv_bf16_f32                   (not supported)
ggml_metal_init: skipping kernel_mul_mv_bf16_f32_1row              (not supported)
ggml_metal_init: skipping kernel_mul_mv_bf16_f32_l4                (not supported)
ggml_metal_init: skipping kernel_mul_mv_bf16_bf16                  (not supported)
ggml_metal_init: skipping kernel_mul_mv_id_bf16_f32                (not supported)
ggml_metal_init: skipping kernel_mul_mm_bf16_f32                   (not supported)
ggml_metal_init: skipping kernel_mul_mm_id_bf16_f32                (not supported)
ggml_metal_init: skipping kernel_flash_attn_ext_bf16_h64           (not supported)
ggml_metal_init: skipping kernel_flash_attn_ext_bf16_h80           (not supported)
ggml_metal_init: skipping kernel_flash_attn_ext_

LLM loaded successfully.
Loading CSV data from: /Users/rushilsrivastava/Desktop/uni/2nd_sem/AI in Business/FactoryTwin/ft_datasets


In [2]:
# Load each CSV into a pandas DataFrame
dataframes = {}
try:
    for csv_file in csv_files:
        df_name = csv_file.stem # Use filename without extension as key
        print(f"  - Loading {csv_file.name}...")
        # PandasCSVReader returns a list of LlamaIndex Documents, we want the raw df
        # A simpler approach is direct pandas loading
        dataframes[df_name] = pd.read_csv(csv_file)
        print(f"    Loaded DataFrame '{df_name}' with shape {dataframes[df_name].shape}")
except Exception as e:
    print(f"Error loading CSV files: {e}")
    sys.exit(1)

  - Loading job_mst.csv...
    Loaded DataFrame 'job_mst' with shape (300, 6)
  - Loading purchase_line.csv...
    Loaded DataFrame 'purchase_line' with shape (980, 9)
  - Loading sales_mst.csv...
    Loaded DataFrame 'sales_mst' with shape (500, 3)
  - Loading sales_line.csv...
    Loaded DataFrame 'sales_line' with shape (1770, 8)
  - Loading customer.csv...
    Loaded DataFrame 'customer' with shape (1000, 7)
  - Loading vend_part.csv...
    Loaded DataFrame 'vend_part' with shape (400, 4)
  - Loading purchase_mst.csv...
    Loaded DataFrame 'purchase_mst' with shape (400, 3)
  - Loading vendor.csv...
    Loaded DataFrame 'vendor' with shape (500, 7)
  - Loading bill_of_m.csv...
    Loaded DataFrame 'bill_of_m' with shape (5000, 3)
  - Loading nonconform_mst.csv...
    Loaded DataFrame 'nonconform_mst' with shape (50, 4)
  - Loading part_mst.csv...
    Loaded DataFrame 'part_mst' with shape (10000, 8)


In [10]:
# --- 3. Create Pandas Query Engines for each DataFrame ---
from llama_index.experimental.query_engine import PandasQueryEngine
print("Creating Pandas Query Engines...")
query_engines = {}
for name, df in dataframes.items():
    try:
        # The PandasQueryEngine allows querying the DataFrame using the LLM
        query_engine = PandasQueryEngine(df=df, llm=llm, verbose=False) # Set verbose=True to see generated pandas queries
        query_engines[name] = query_engine
        print(f"  - Created query engine for '{name}'")
    except Exception as e:
        print(f"Error creating query engine for '{name}': {e}")
        # Continue trying to create other engines

if not query_engines:    
    print("Error: Failed to create any query engines. Cannot proceed.")
    sys.exit(1)

Creating Pandas Query Engines...
  - Created query engine for 'job_mst'
  - Created query engine for 'purchase_line'
  - Created query engine for 'sales_mst'
  - Created query engine for 'sales_line'
  - Created query engine for 'customer'
  - Created query engine for 'vend_part'
  - Created query engine for 'purchase_mst'
  - Created query engine for 'vendor'
  - Created query engine for 'bill_of_m'
  - Created query engine for 'nonconform_mst'
  - Created query engine for 'part_mst'


In [11]:
query_engines

{'job_mst': <llama_index.experimental.query_engine.pandas.pandas_query_engine.PandasQueryEngine at 0x4775f7910>,
 'purchase_line': <llama_index.experimental.query_engine.pandas.pandas_query_engine.PandasQueryEngine at 0x48e087090>,
 'sales_mst': <llama_index.experimental.query_engine.pandas.pandas_query_engine.PandasQueryEngine at 0x17c5232d0>,
 'sales_line': <llama_index.experimental.query_engine.pandas.pandas_query_engine.PandasQueryEngine at 0x48df9b190>,
 'customer': <llama_index.experimental.query_engine.pandas.pandas_query_engine.PandasQueryEngine at 0x48e327f50>,
 'vend_part': <llama_index.experimental.query_engine.pandas.pandas_query_engine.PandasQueryEngine at 0x48df9bb50>,
 'purchase_mst': <llama_index.experimental.query_engine.pandas.pandas_query_engine.PandasQueryEngine at 0x48d363d10>,
 'vendor': <llama_index.experimental.query_engine.pandas.pandas_query_engine.PandasQueryEngine at 0x48e3353d0>,
 'bill_of_m': <llama_index.experimental.query_engine.pandas.pandas_query_engin

In [None]:
# --- 4. Define Tools for the Agent ---
print("Defining tools for the agent...")
all_tools = []

# Example: Create tools based on assumed CSV filenames.
# **IMPORTANT**: Customize the name and description based on your ACTUAL CSV files and their content.
# The description is CRITICAL for the agent to know when to use the tool.

if 'orders' in query_engines:
    orders_tool = QueryEngineTool(
        query_engine=query_engines['orders'],
        metadata=ToolMetadata(
            name="orders_data_query",
            description=(
                "Provides information about customer orders, including projected revenue, "
                "order dates, due dates, customer names, and ordered part numbers/families. "
                "Use this tool for questions about revenue forecasts, order status, and customer-specific details."
            ),
        ),
    )
    all_tools.append(orders_tool)
    print("  - Added tool for 'orders' data")

if 'deliveries' in query_engines:
    deliveries_tool = QueryEngineTool(
        query_engine=query_engines['deliveries'],
        metadata=ToolMetadata(
            name="delivery_data_query",
            description=(
                "Provides information about order deliveries, including actual delivery dates, "
                "on-time status (compared to due dates), reasons/root causes for delays, and line item details. "
                "Use this tool for questions about on-time delivery performance, late deliveries, and root cause analysis."
            ),
        ),
    )
    all_tools.append(deliveries_tool)
    print("  - Added tool for 'deliveries' data")

if 'parts' in query_engines:
    parts_tool = QueryEngineTool(
        query_engine=query_engines['parts'],
        metadata=ToolMetadata(
            name="parts_data_query",
            description=(
                "Provides information about manufactured parts and purchased materials, "
                "including part families, contribution margins, system lead times, supplier information, "
                "and demonstrated lead times. Use this tool for questions about part profitability, "
                "lead times, and supplier performance."
            ),
        ),
    )
    all_tools.append(parts_tool)
    print("  - Added tool for 'parts' data")

# Add more tools for other relevant CSVs as needed...

if not all_tools:
    print("Error: No tools were created. Check CSV filenames and tool definitions.")
    sys.exit(1)

In [3]:
import time
import os
import sys
import warnings
import json
import re
import traceback # For detailed error printing
from typing import List, Dict, Any, TypedDict, Annotated, Sequence
import operator

# --- Environment Setup ---
os.environ["TOKENIZERS_PARALLELISM"] = "false"

# --- Package Imports and Checks ---
try:
    import vanna
    from vanna.base import VannaBase
    from vanna.chromadb.chromadb_vector import ChromaDB_VectorStore
    import openai
    import mysql.connector
    import pandas as pd
    # --- LangChain / LangGraph Imports ---
    from langchain_openai import ChatOpenAI
    from langchain_core.messages import BaseMessage, HumanMessage, ToolMessage, AIMessage
    from langchain_core.tools import tool
    from langgraph.graph import StateGraph, END
    from langgraph.prebuilt import ToolNode # Use the prebuilt ToolNode

    print("Required packages imported successfully.")
except ImportError as e:
    print(f"Import Error: {e}")
    print("Ensure 'vanna[chromadb]', 'openai', 'mysql-connector-python', 'cryptography', 'pandas', 'llama-cpp-python', 'langchain', 'langchain_openai', 'langgraph', 'langchain_community' are installed.")
    sys.exit(1)

# --- Configuration ---
db_type = "mysql"
mysql_host = "127.0.0.1"
mysql_port = 3306
mysql_user = "root" # !!! REPLACE !!!
mysql_password = "Raekwon_wtc$36" # !!! REPLACE !!!
mysql_dbname = "ft_database" # !!! REPLACE !!!

llm_model_name = 'DeepSeek-R1-Distill-Llama-8B-Q4_K_M.gguf'
local_server_url = 'http://127.0.0.1:8000/v1'
placeholder_api_key = "sk-no-key-required"

# --- Initialize Vanna Instance ---
vn = None
try:
    # Define Vanna's LLM Interface (basic placeholder)
    class VannaLlmInterface(VannaBase):
        def __init__(self, config=None):
            VannaBase.__init__(self, config=config)
            self.model = llm_model_name
        def system_message(self, message: str) -> dict: return {"role": "system", "content": message}
        def user_message(self, message: str) -> dict: return {"role": "user", "content": message}
        def assistant_message(self, message: str) -> dict: return {"role": "assistant", "content": message}
        def submit_prompt(self, prompt, **kwargs) -> str:
            print("[VannaLlmInterface.submit_prompt] WARNING: This basic method called.")
            # This is less critical now as the agent uses the LangChain LLM
            if isinstance(prompt, list):
                 return str(prompt[-1]['content']) # Simplistic return
            return str(prompt)

    # Define Combined Vanna Class
    class MyVanna(ChromaDB_VectorStore, VannaLlmInterface):
        def __init__(self, config=None):
            print("[MyVanna Init] Initializing combined Vanna class for Tool...")
            ChromaDB_VectorStore.__init__(self, config=config)
            VannaLlmInterface.__init__(self, config=config)

    print("Instantiating Vanna for Tool...")
    vn = MyVanna(config=None) # Instantiate globally


SyntaxError: incomplete input (3574047409.py, line 73)

In [None]:

    print(f"Connecting Vanna to {db_type} database '{mysql_dbname}'...")
    if db_type == "mysql":
        vn.connect_to_mysql(host=mysql_host, port=mysql_port, user=mysql_user, password=mysql_password, dbname=mysql_dbname)
    else:
        raise ValueError(f"Database type '{db_type}' not configured.")     
    print("Vanna database connection successful.")

    # --- Vanna Training ---
    print("\n--- Vanna Training ---")
    # Set train_vanna = True ONLY when you need to update Vanna's knowledge
    train_vanna = True # Default to False for normal operation
    if train_vanna:
        print("Running Vanna training...")
        try:
            # Fetch schema info
            print(f"Fetching schema for database: '{mysql_dbname}'")
            # --- FIX: Use explicit DB name with quotes ---
            sql_query = f"SELECT TABLE_SCHEMA, TABLE_NAME, COLUMN_NAME, DATA_TYPE FROM INFORMATION_SCHEMA.COLUMNS WHERE TABLE_SCHEMA = '{mysql_dbname}'"
            # --- End Fix ---
            print(f"Executing SQL: {sql_query}")
            df_information_schema = vn.run_sql(sql_query)

            # --- START DEBUG PRINTS ---
            print("\n--- Debug: Information Schema DataFrame ---")
            if isinstance(df_information_schema, pd.DataFrame):
                 print(df_information_schema.head())
                 print(f"Shape: {df_information_schema.shape}")
                 if df_information_schema.empty:
                     print("ERROR: DataFrame is EMPTY. Check DB name, table existence, and permissions.")
            # --- FIX: Corrected indentation for else ---
            else:
                 print(f"ERROR: vn.run_sql did not return a DataFrame. Type: {type(df_information_schema)}")
                 print(f"Value: {df_information_schema}")
            # --- END FIX ---
            # --- END DEBUG PRINTS ---

            # Proceed only if DataFrame is not empty
            if isinstance(df_information_schema, pd.DataFrame) and not df_information_schema.empty:
                print("\nGenerating training plan...")
                plan = vn.get_training_plan_generic(df_information_schema)

                # --- START DEBUG PRINTS ---
                print("\n--- Debug: Generated Training Plan ---")
                print(f"Type: {type(plan)}")
                if isinstance(plan, list):
                     print(f"Length: {len(plan)}")
                     # print(f"First 5 elements: {plan[:5]}") # Optional verbose
                     if not plan: print("ERROR: Training plan is an EMPTY list.")
                else:
                     print(f"Value: {plan}")
                     if not plan: print("ERROR: Training plan is None or empty.")
                # --- END DEBUG PRINTS ---

                # Proceed only if plan seems valid
                if plan:
                    print("\nExecuting vn.train(plan=plan)...")
                    vn.train(plan=plan)
                    print("Schema training portion completed.")
                    # === ADD DOCUMENTATION AND SQL EXAMPLES HERE ===
                    # print("Add documentation and SQL examples for better results!")
                    # ===============================================
                else: print("Skipping vn.train because the plan is empty or invalid.")
            else: print("Skipping plan generation and training because schema DataFrame is empty or invalid.")

            print("\nVanna training process finished (or skipped relevant parts).")

        except Exception as train_e:
            print(f"\nError during Vanna training: {type(train_e).__name__} - {train_e}")
            traceback.print_exc()

    else: print("Skipping Vanna training (train_vanna set to False).")

except Exception as setup_e:
    print(f"\n--- Error during Vanna Setup ---")
    print(f"{type(setup_e).__name__}: {setup_e}")
    print("Cannot proceed without Vanna instance. Exiting.")
    traceback.print_exc()
    sys.exit(1)

In [4]:
vn.get_training_plan_generic(df_information_schema)

IndexError: list index out of range

In [7]:
df_information_schema = vn.run_sql("SELECT * FROM INFORMATION_SCHEMA.COLUMNS where TABLE_SCHEMA = 'ft_database'")
vn.get_training_plan_generic(df_information_schema)

Train on Information Schema: def.ft_database bill_of_m
Train on Information Schema: def.ft_database customer
Train on Information Schema: def.ft_database job_mst
Train on Information Schema: def.ft_database nonconform_mst
Train on Information Schema: def.ft_database part_mst
Train on Information Schema: def.ft_database purchase_line
Train on Information Schema: def.ft_database purchase_mst
Train on Information Schema: def.ft_database sales_line
Train on Information Schema: def.ft_database sales_mst
Train on Information Schema: def.ft_database vend_part
Train on Information Schema: def.ft_database vendor

In [None]:
# --- Initialize LangChain LLM ---
try:
    llm = ChatOpenAI(
        model=llm_model_name,
        openai_api_base=local_server_url,
        openai_api_key=placeholder_api_key,
        temperature=0.1,
        max_tokens=2048)

    # --- End Fix ---
    print("\n[LLM Test] Testing LangChain LLM connection...")
    llm.invoke("Hello!") # Simple test
    print("[LLM Test] LangChain LLM connection successful.")
except Exception as llm_e:
    print(f"\n--- Error initializing or testing LangChain LLM ---")
    print(f"{type(llm_e).__name__}: {llm_e}")
    print("Check if the llama-cpp-python server is running and accessible.")
    sys.exit(1)

# --- Define Vanna Tool for LangGraph ---
@tool
def vanna_query_tool(question: str) -> Dict[str, Any]:
    """
    Uses Vanna.ai to generate and execute a SQL query based on a natural language question.
    Returns both the generated SQL (if any) and the execution results (as a string DataFrame or error message).
    Use this tool when you need data from the database to answer a question or perform analysis.
    Input must be the natural language question for Vanna.
    """
    global vn
    print(f"\n--- Calling Vanna Tool ---")
    print(f"Input Question: {question}")
    if vn is None: return {"sql_query": None, "results": "Error: Vanna instance not available."}

    sql_query = None
    results_str = "Error: Processing failed."
    df_result = None

    try:
        sql_query_raw = vn.generate_sql(question=question)
        if not sql_query_raw or (isinstance(sql_query_raw, str) and sql_query_raw.startswith("Error:")):
            print(f"Vanna failed to generate SQL: {sql_query_raw}")
            results_str = sql_query_raw or "Error: Vanna did not generate SQL."
            sql_query = None
        else:
            sql_query = sql_query_raw.strip().removeprefix("``````").strip()
            print(f"Generated SQL: {sql_query}")
            try:
                print("Executing SQL...")
                df_result = vn.run_sql(sql=sql_query)
                # --- FIX: Removed dangling '...' ---
                print(f"SQL Execution Successful. Result type: {type(df_result)}, Shape: {df_result.shape if isinstance(df_result, pd.DataFrame) else 'N/A'}")
                # --- End Fix ---
                if isinstance(df_result, pd.DataFrame):
                     if df_result.empty: results_str = "Query executed successfully, but returned no results."
                     elif len(df_result) > 30: results_str = f"Query executed successfully. Showing first 30 rows:\n{df_result.head(30).to_markdown(index=False)}\n... (truncated, {len(df_result)} rows total)"
                     else: results_str = f"Query executed successfully. Results:\n{df_result.to_markdown(index=False)}"
                elif df_result is not None: results_str = f"Query executed successfully. Result: {str(df_result)}"
                else: results_str = "Query executed successfully, but result was None."
            except Exception as exec_e:
                print(f"Error executing SQL: {type(exec_e).__name__} - {exec_e}")
                results_str = f"Error executing SQL: {type(exec_e).__name__} - {exec_e}\nFailed SQL: {sql_query}"
    except Exception as gen_e:
        print(f"Error during Vanna SQL generation: {type(gen_e).__name__} - {gen_e}")
        results_str = f"Error during Vanna SQL generation: {type(gen_e).__name__} - {gen_e}"

    return {"sql_query": sql_query, "results": results_str}

# --- Prepare Tools for LangGraph ---
tools = [vanna_query_tool]
llm_with_tools = llm.bind_tools(tools)
tool_node = ToolNode(tools)

# --- LangGraph State Definition ---
class AgentState(TypedDict):
    messages: Annotated[Sequence[BaseMessage], operator.add]

# --- LangGraph Nodes ---
def agent_node(state: AgentState):
    """ Invokes the LLM to decide the next action or respond. """
    print("\n--- Agent Node ---")
    messages = state['messages']
    if isinstance(messages[-1], ToolMessage):
         print("Last message is ToolMessage, synthesizing final response.")
         response = llm.invoke(messages)
         return {"messages": [response]}
    print("Invoking LLM with tools to decide action...")
    response = llm_with_tools.invoke(messages)
    if not response.tool_calls:
         print("LLM decided no tool needed, responding directly.")
         return {"messages": [response]}
    else:
         print(f"LLM requested tool call(s): {response.tool_calls}")
         return {"messages": [response]}

# --- LangGraph Definition ---
print("\n--- Defining LangGraph Workflow ---")
workflow = StateGraph(AgentState)
workflow.add_node("agent", agent_node)
workflow.add_node("tools", tool_node)
workflow.set_entry_point("agent")
workflow.add_conditional_edges(
    "agent",
    lambda state: "tools" if isinstance(state["messages"][-1], AIMessage) and state["messages"][-1].tool_calls else END,
    {"tools": "tools", END: END}
)
workflow.add_edge("tools", "agent")
app = workflow.compile()
print("LangGraph app compiled.")

# --- Main Interaction Loop (Using LangGraph with Streaming) ---
print("\n--- FactoryTwin AI Assistant (Agentic RAG via LangGraph) ---")
print(f"Connected to DB: {mysql_dbname}. Using LLM via: {local_server_url}")
print("Ask questions about your database. Type 'exit' to quit.")

while True:
    try:
        question = input("\nUser: ")
        if question.lower() == 'exit': break
        if not question.strip(): continue

        print("\nAgent working...")
        start_time = time.time()
        initial_state = {"messages": [HumanMessage(content=question)]}

        final_state = None
        print("--- Agent Steps ---")
        for event in app.stream(initial_state):
            for node_name, output in event.items():
                print(f"Node: {node_name}")
                if isinstance(output, dict) and "messages" in output:
                    last_msg = output["messages"][-1]
                    if hasattr(last_msg, "content"): print(f"  Message Content (Partial): {str(last_msg.content)[:300]}...")
                    if hasattr(last_msg, "tool_calls"): print(f"  Tool Calls: {last_msg.tool_calls}")
                final_state = output
        end_time = time.time()
        print("--- End Agent Steps ---")

        final_answer = "Agent Error: Could not determine final answer."
        if final_state and "messages" in final_state:
             final_response_message = final_state['messages'][-1]
             if isinstance(final_response_message, AIMessage): final_answer = final_response_message.content
             elif isinstance(final_response_message, HumanMessage): final_answer = final_response_message.content
             elif isinstance(final_response_message, ToolMessage): final_answer = f"Agent ended after tool use. Tool result: {final_response_message.content}"
        else: print("Warning: Could not determine final state or messages after stream.")

        print(f"\n--- Agent Response ({end_time - start_time:.2f}s) ---")
        print(final_answer)
        print("--- End of Agent Response ---")

    except KeyboardInterrupt:
        print("\nExiting...")
        break
    except Exception as loop_e:
        print(f"\nAn error occurred in the main loop: {type(loop_e).__name__} - {loop_e}")
        print("Check server terminals for related errors.")
        traceback.print_exc()

print("\n--- End of Session ---")


[LLM Test] Testing LangChain LLM connection...
[LLM Test] LangChain LLM connection successful.

--- Defining LangGraph Workflow ---
LangGraph app compiled.

--- FactoryTwin AI Assistant (Agentic RAG via LangGraph) ---
Connected to DB: ft_database. Using LLM via: http://127.0.0.1:8000/v1
Ask questions about your database. Type 'exit' to quit.


In [2]:
import time
import os
import sys
import warnings
import json
import re
from typing import Dict, List
from collections import defaultdict

# --- Environment Setup ---
os.environ["TOKENIZERS_PARALLELISM"] = "false"

# --- Package Imports ---
try:
    import vanna
    from vanna.base import VannaBase
    from vanna.chromadb.chromadb_vector import ChromaDB_VectorStore
    import openai
    import mysql.connector
    import pandas as pd
except ImportError as e:
    print(f"Import Error: {e}")
    sys.exit(1)

# --- Configuration ---
db_type = "mysql"
mysql_host = "127.0.0.1"
mysql_port = 3306
mysql_user = "root"
mysql_password = "Raekwon_wtc$36" 
mysql_dbname = "ft_database"
llm_model_name = 'DeepSeek-R1-Distill-Llama-8B-Q4_K_M.gguf'
local_server_url = 'http://127.0.0.1:8000/v1'
placeholder_api_key = "sk-no-key-required"

# --- Question Classification Constants ---
QUESTION_TYPES = {
    "descriptive": "Direct factual retrieval from database",
    "judgement": "Requires analysis of multiple data points", 
    "suggestion": "Needs recommendations with external context"
}

CONTEXT_PROMPTS = {
    "descriptive": "Return ONLY SQL without explanation",
    "judgement": "Include trend analysis and correlations",
    "suggestion": "Reference vendor lead time APIs"
}

CLASSIFICATION_METRICS = defaultdict(int)

# --- Custom LLM Class with MCP Features ---
class MyLocalLlm(VannaBase):
    def __init__(self, config=None):
        super().__init__(config=config)
        self.model = llm_model_name

    # Implement required abstract methods
    def assistant_message(self, content: str) -> Dict:
        return {"role": "assistant", "content": content}

    def system_message(self, content: str) -> Dict:
        return {"role": "system", "content": content}

    def user_message(self, content: str) -> Dict:
        return {"role": "user", "content": content}
            
    def classify_question(self, question: str) -> str:
        classification_prompt = f"""Classify this manufacturing query:
        {json.dumps(QUESTION_TYPES, indent=2)}
        Return ONLY the category name. Query: {question}"""
        
        response = self.submit_prompt(classification_prompt)
        category = response.strip().lower()
        return category if category in QUESTION_TYPES else "descriptive"

    def submit_prompt(self, prompt, **kwargs) -> str:
        try:
            local_client = openai.OpenAI(
                base_url=local_server_url,
                api_key=placeholder_api_key,
                timeout=kwargs.get('request_timeout', 180.0)
            )
            
            messages = prompt if isinstance(prompt, list) else [{"role": "user", "content": str(prompt)}]
            
            response = local_client.chat.completions.create(
                model=self.model,
                messages=messages,
                temperature=kwargs.get('temperature', 0.1),
                max_tokens=kwargs.get('max_tokens', 1536),
                stop=kwargs.get('stop', ["``````"])
            )
            
            if response.choices and response.choices[0].message.content:
                return response.choices[0].message.content.strip()
            return "Error: No response generated"

        except Exception as e:
            return f"Error: {str(e)}"

# --- Enhanced Vanna Class ---
class MyVanna(ChromaDB_VectorStore, MyLocalLlm):
    def __init__(self, config=None):
        ChromaDB_VectorStore.__init__(self, config=config)
        MyLocalLlm.__init__(self, config=config)
        
    def generate_contextual_response(self, question: str) -> str:
        category = self.classify_question(question)
        CLASSIFICATION_METRICS[category] += 1
        
        system_msg = f"""You are a manufacturing analyst. Follow these rules:
        - DESCRIPTIVE: {CONTEXT_PROMPTS['descriptive']}
        - JUDGEMENT: {CONTEXT_PROMPTS['judgement']}
        - SUGGESTION: {CONTEXT_PROMPTS['suggestion']}"""
        
        return self.generate_sql(
            question=question,
            chat_history=[self.system_message(system_msg)]
        )

# --- Core Execution Flow --- 
vn = MyVanna(config=None)

# Database connection setup remains unchanged
# --- Connect to Database ---
print(f"Connecting to {db_type} database '{mysql_dbname}' on {mysql_host}...")
try:
    if db_type == "mysql":
        vn.connect_to_mysql(
            host=mysql_host,
            port=mysql_port,
            user=mysql_user,
            password=mysql_password,
            dbname=mysql_dbname
        )
    else:
        print(f"Database type '{db_type}' not configured.")
        sys.exit(1)
    print("Database connection successful.")
except Exception as e:
    print(f"Error connecting to database: {e}")
    print("Please check MySQL server status, credentials, permissions, and required packages.")
    sys.exit(1)

# --- Training Vanna (Run deliberately when needed) ---
print("\n--- Vanna Training ---")
print("Loads metadata (DDL, docs, SQL examples) into Vanna's local ChromaDB vector store.")
train_vanna = True # Set to True to run training, False to skip for normal use
if train_vanna:
    df_information_schema = vn.run_sql("SELECT * FROM INFORMATION_SCHEMA.COLUMNS")
    Plan = vn.get_training_plan_generic(df_information_schema)
    vn.train(plan=Plan)
    print("Running Vanna training...")

    # === Add your specific training calls here ===
    print("  Example: Fetching DDL (Customize for your setup)...")
    # df_ddl = vn.run_sql(f"SHOW CREATE TABLE your_table_name;")
    # if not df_ddl.empty: vn.train(ddl=df_ddl.iloc['Create Table'])

    print("  Example: Adding Documentation (Add yours)...")
    # --- Customer Table Documentation ---
    vn.train(documentation="Table CUSTOMER contains information about business customers.")
    vn.train(documentation="CUSTOMER.id is the unique identifier for each customer.")
    vn.train(documentation="CUSTOMER.name is the company name of the customer.")
    vn.train(documentation="CUSTOMER.addr_str contains the street address for the customer.")
    vn.train(documentation="CUSTOMER.addr_city contains the city for the customer.")
    vn.train(documentation="CUSTOMER.addr_ste contains the state for the customer.")
    vn.train(documentation="CUSTOMER.addr_ctry contains the country for the customer.")
    vn.train(documentation="CUSTOMER.addr_zip contains the zip-code for the customer.")

    # --- Vendor Table Documentation ---
    vn.train(documentation="Table VENDOR lists suppliers of parts.")
    vn.train(documentation="VENDOR.id is the unique identifier for each vendor.")
    vn.train(documentation="VENDOR.name is the name of the vendor/supplier.")
    vn.train(documentation="CUSTOMER.addr_str contains the street address for the vendor.")
    vn.train(documentation="CUSTOMER.addr_city contains the city for the vendor.")
    vn.train(documentation="CUSTOMER.addr_ste contains the state for the vendor.")
    vn.train(documentation="CUSTOMER.addr_ctry contains the country for the vendor.")
    vn.train(documentation="CUSTOMER.addr_zip contains the zip-code for the vendor.")

    # --- Part Master Table Documentation ---
    vn.train(documentation="Table PART_MST is the master list of all parts.")
    vn.train(documentation="PART_MST.id is the unique identifier which indicates the unique id for the part.")
    vn.train(documentation="PART_MST.p_code is the unique product code for the part.")
    vn.train(documentation="PART_MST.procurement indicates if a part is manufactured internally ('M') or purchased ('B'). It must be either 'M' or 'B'.")
    vn.train(documentation="PART_MST.unit_price represents the standard price per unit of the part.")
    vn.train(documentation="PART_MST.pref_order_qty specifies The preferred quantity to order or manufacture at once, depending on if the part is M or B.")
    vn.train(documentation="PART_MST.unit_meas the unit of measure for this part; usually 'EA' for 'eaches'.")
    vn.train(documentation="PART_MST.mfg_lt is the lead time to manufacture this part, if it is manufactured. This must an 'M' part and positive.")
    vn.train(documentation="PART_MST.curr_stock the current amount of this part in stock in inventory")

    # --- Add Documentation for BOM_MST, RTG_MST, JOB_OPER, JOB_MATL, RECV_MST, RECV_LINE ---
    # Example:
    vn.train(documentation="Table bill_of_m defines the Bill of Materials, showing parent-child part relationships.")
    vn.train(documentation="bill_of_m.m_part links to the parent part code in PART_MST. The manufactured part which consumes c_part.")
    vn.train(documentation="BOM_MST.c_part links to the child component part code in PART_MST. The component part consumed by m_part.")
    vn.train(documentation="BOM_MST.qty_req is The quantity of c_part required to make one unit of m_part.")

    # --- Job Master Table Documentation ---
    vn.train(documentation="Table job_mst represents the master data for all the jobs.")
    vn.train(documentation="job_mst.id is the unique identifier for a job.")
    vn.train(documentation="job_mst.job_stat signifies the current status of the job as follows: 'Q' (in queue), 'C' (completed), 'O' (opened), 'X' (cancelled), or 'H' (on hold).")
    vn.train(documentation="job_mst.part is the part that this job is manufacturing. This can be found in the PART_MST table.")
    vn.train(documentation="job_mst.qty is the quantity of the part that this job is manufacturing.")
    vn.train(documentation="job_mst.job_rls is the time at which this job began work.")
    vn.train(documentation="job_mst.job_cls is the time at which this job was completed.")

    # --- Nonconform master table documentation ---
    vn.train(documentation="Table NONCONFORM_MST defines the Nonconformance report master data.")
    vn.train(documentation="NONCONFORM_MST.id is a unique id for this nonconformance report.")
    vn.train(documentation="NONCONFORM_MST.job signifies if this nonconformance report was issued for a job, the job it was issued for.")
    vn.train(documentation="NONCONFORM_MST.po_no signifies if this nonconformance report was issued for a purchase order, the purchase order it was issued for.")
    vn.train(documentation="NONCONFORM_MST.qty signifies the quantity of parts on this job/purchase order which were flagged for nonconformance issues.")




    # --- Sales Order Header Table Documentation ---
    vn.train(documentation="Table SALES_MST represents the header information for a customer sales order.")
    vn.train(documentation="SALES_MST.order_no is the unique identifier for a sales order.")
    vn.train(documentation="SALES_MST.order_date is the date the sales order was placed.")
    vn.train(documentation="SALES_MST.cust links to the CUSTOMER table for the customer who placed the order.")

    # --- Sales Order Line Table Documentation ---
    vn.train(documentation="Table SALES_LINE contains individual line items for each sales order in SALES_MST.")
    vn.train(documentation="SALES_LINE.order_no links back to the SALES_MST table.")
    vn.train(documentation="SALES_LINE.line_no is the line number of the line item under the given order_no.")
    vn.train(documentation="SALES_LINE.order_stat is the current status of the line item: 'C' (completed), 'O' (opened), or 'X' (cancelled).")
    vn.train(documentation="SALES_LINE.part links to the PART_MST table for the specific part ordered.")
    vn.train(documentation="SALES_LINE.due is the requested delivery date for this line item.")
    vn.train(documentation="SALES_LINE.unit_price is the agreed price per unit for the part on this order line.")
    vn.train(documentation="SALES_LINE.qty is the number of units ordered for this part on this line item.")
    vn.train(documentation="SALES_LINE.line_cls is the date on which this line item was closed.")

    # --- Purchase Order Header Table Documentation ---
    vn.train(documentation="Table PURCHASE_MST represents the header information for a purchase order sent to a vendor.")
    vn.train(documentation="PURCHASE_MST.order_no is the unique identifier for a purchase order.")
    vn.train(documentation="PURCHASE_MST.order_date is the date the purchase order was created.")
    vn.train(documentation="PURCHASE_MST.vendor links to the VENDOR table for the supplier receiving the order.")

    # --- Purchase Order Line Table Documentation ---
    vn.train(documentation="Table PURCHASE_LINE contains individual line items for each purchase order in PURCHASE_MST.")
    vn.train(documentation="PURCHASE_LINE.order_no links back to the PURCHASE_MST table.")
    vn.train(documentation="PURCHASE_LINE.part links to the PART_MST table for the specific part being purchased.")
    vn.train(documentation="PURCHASE_LINE.due is the expected delivery date for this purchased part.")
    vn.train(documentation="PURCHASE_LINE.unit_cost is the cost per unit for the part on this purchase order line.")
    vn.train(documentation="PURCHASE_LINE.qty is the number of units purchased. It must be a positive number.")
    vn.train(documentation="PURCHASE_LINE.order_stat is the current status of the purchase order: 'C' (completed), 'O' (opened), or 'X' (cancelled).")
    vn.train(documentation="PURCHASE_LINE.order_type is the type of purchase order: either an 'S' for service order or 'M' for material order.")
    vn.train(documentation="PURCHASE_LINE.line_no is the line number of this purchase order line under the given order_no.")
    vn.train(documentation="PURCHASE_LINE.line_cls is the date on which this purchase order was closed.")


    # --- Vendor Part Table Documentation ---
    vn.train(documentation="Table VEND_PART represents vendor and part relationship.")
    vn.train(documentation="VEND_PART.vendor is the unique identifier for each vendor.")
    vn.train(documentation="VEND_PART.part is the part that can be procured from the vendor.")
    vn.train(documentation="VEND_PART.unit_cost is typically the creation or start date of the job.")
    vn.train(documentation="VEND_PART.part_lt is The standard lead time for which to place orders for part to vendor")
    

    # --- General Business Rules/Concepts ---
    # vn.train(documentation="'Lead Time' generally refers to the time required to procure or produce a part (PART_MST.sys_lt is a default).")
    # vn.train(documentation="'On-time Delivery' can be assessed by comparing SALES_LINE.due_date with actual delivery dates (potentially in another table like deliveries if it exists).")



# --- Enhanced Training Section ---

    # Add classification-aware training examples
    # classification_examples = [
    #     ("What's current revenue?", "descriptive", "SELECT SUM(unit_price*qty) FROM sales_line"),
    #     ("Why was order PO-123 late?", "judgement", "Analyze purchase_line JOIN vendor ON..."),
    #     ("Improve delivery times?", "suggestion", "SELECT sys_lt FROM part_mst WHERE...")
    # ]

    # for q, t, s in classification_examples:
    #     vn.train(
    #         documentation=f"{t.upper()}: {q}",
    #         question=q,
    #         sql=s
    #     )
        
    # # Add type-specific documentation
    # vn.train(documentation="DESCRIPTIVE: Revenue projection query")
    # vn.train(documentation="JUDGEMENT: Lead time accuracy analysis")
    # vn.train(documentation="SUGGESTION: Vendor performance improvement")

def extract_sql_from_response(response: str) -> str:
    """Extract SQL code from LLM response using regex patterns"""
    # Match multi-line SQL between `````` with flexible whitespace
    response.split(":\n\n")[1].replace("\n"," ")

# --- Main Interaction Loop ---
# --- Modified Main Interaction Loop ---
while True:
    try:
        question = input("calculate the total sales value till now")
        if question.lower() == 'exit':
            break
            
        print("\nAgent thinking...")
        start_time = time.time()
        
        # Get classified response
        llm_response = vn.generate_contextual_response(question)
        category = vn.classify_question(question)
        
        # Extract SQL using implemented function
        extracted_sql = extract_sql_from_response(llm_response)
        
        if not extracted_sql:
            print("\nCould not extract executable SQL from response")
            continue
            
        print(f"\n--- Extracted SQL ({category.upper()}) ---")
        print(extracted_sql)
        print("--- End of Extracted SQL ---")
        
        # Unified execution using Vanna's method
        execute = input(f"\nExecute {category.upper()} SQL query? (y/n): ").lower().strip()
        
        if execute == 'y':
            try:
                print("Executing query...")
                exec_start_time = time.time()
                
                df_result = vn.run_sql(sql=extracted_sql)
                
                # Category-specific post-processing
                if category == "judgement":
                    print("\n** Analytical Insights **")
                    print(df_result.describe())
                elif category == "suggestion":
                    print("\n** Recommended Actions **")
                    print("Consider these vendor lead times:")
                    print(df_result.head())
                else:  # descriptive
                    print("\n** Query Results **")
                
                # Display results
                with pd.option_context('display.max_rows', 20, 
                                     'display.max_columns', None,
                                     'display.width', 1000):
                    if len(df_result) > 20:
                        print(df_result.head(20))
                        print(f"... (truncated, {len(df_result)} rows total)")
                    else:
                        print(df_result)
                        
            except Exception as exec_e:
                print(f"\nError executing SQL: {type(exec_e).__name__} - {exec_e}")
                print(f"Failed SQL:\n{extracted_sql}")

    except KeyboardInterrupt:
        print("\nExiting...")
        break

Connecting to mysql database 'ft_database' on 127.0.0.1...
Database connection successful.

--- Vanna Training ---
Loads metadata (DDL, docs, SQL examples) into Vanna's local ChromaDB vector store.


Add of existing embedding ID: e3603016-4c1f-50c4-820f-8e0d95c214fc-doc
Add of existing embedding ID: a74cf111-4b61-54b7-8e1f-90fb0f8a918a-doc
Add of existing embedding ID: 77ed2433-88db-5fba-adf6-9471a7f01c52-doc
Add of existing embedding ID: 738837f8-c922-5d09-bc30-aaf3527857ed-doc
Add of existing embedding ID: e95b9132-9489-5658-8119-3031ab2a95c7-doc
Add of existing embedding ID: 2794d095-c404-5c33-b17d-5c3fb1d47b65-doc
Add of existing embedding ID: de651ab9-5fb6-507d-bee2-b292a33e1bf9-doc
Add of existing embedding ID: 44bcdca4-c489-5e11-9d57-c1846f11ecae-doc
Add of existing embedding ID: a774c4f8-c438-5ed1-9c33-e165731d67ad-doc
Add of existing embedding ID: 57ac5cd8-eaa4-5d85-ad5c-99afb1c68f9f-doc
Add of existing embedding ID: 516ccf63-2414-5bcb-b38d-40eb6e379d89-doc
Add of existing embedding ID: 3b03ebbf-278b-5016-9fa0-ac44013530a1-doc
Add of existing embedding ID: 3fee41a8-11a5-5348-b781-eb11890b596c-doc
Add of existing embedding ID: 05ac8afe-e45f-5b51-a4c6-2da5bfdf712f-doc
Add of

Running Vanna training...
  Example: Fetching DDL (Customize for your setup)...
  Example: Adding Documentation (Add yours)...
Adding documentation....
Adding documentation....
Adding documentation....


Add of existing embedding ID: c7737722-5406-50ec-a2e3-457631201e43-doc
Insert of existing embedding ID: c7737722-5406-50ec-a2e3-457631201e43-doc
Add of existing embedding ID: 5710f1b2-2513-59b1-a22a-06ca846f3335-doc
Insert of existing embedding ID: 5710f1b2-2513-59b1-a22a-06ca846f3335-doc
Add of existing embedding ID: f62258fc-5052-5064-8778-17d64b48fb75-doc
Insert of existing embedding ID: f62258fc-5052-5064-8778-17d64b48fb75-doc


Adding documentation....
Adding documentation....
Adding documentation....


Add of existing embedding ID: 3a1ce7ab-9179-5232-8d27-603f1c7fc185-doc
Insert of existing embedding ID: 3a1ce7ab-9179-5232-8d27-603f1c7fc185-doc
Add of existing embedding ID: 118f8eeb-9139-5bb3-aa51-ffde0d892907-doc
Insert of existing embedding ID: 118f8eeb-9139-5bb3-aa51-ffde0d892907-doc
Add of existing embedding ID: 8c8564bf-5f18-5fca-b590-917e64a7c696-doc
Insert of existing embedding ID: 8c8564bf-5f18-5fca-b590-917e64a7c696-doc


Adding documentation....
Adding documentation....
Adding documentation....


Add of existing embedding ID: 1e2fad2c-8b40-5101-b01a-eb7cb27e9093-doc
Insert of existing embedding ID: 1e2fad2c-8b40-5101-b01a-eb7cb27e9093-doc
Add of existing embedding ID: adeabba7-8bbd-5156-97a3-d1e27d0e8f47-doc
Insert of existing embedding ID: adeabba7-8bbd-5156-97a3-d1e27d0e8f47-doc
Add of existing embedding ID: 380c2a34-55c2-5fe6-aaf9-5b98fc734a1d-doc
Insert of existing embedding ID: 380c2a34-55c2-5fe6-aaf9-5b98fc734a1d-doc
Add of existing embedding ID: 2e795c42-c6f8-5c44-aa1c-eb628fdee4fc-doc
Insert of existing embedding ID: 2e795c42-c6f8-5c44-aa1c-eb628fdee4fc-doc


Adding documentation....
Adding documentation....
Adding documentation....
Adding documentation....


Add of existing embedding ID: 3a25bf9f-42c8-5156-823b-ce6a42754b95-doc
Insert of existing embedding ID: 3a25bf9f-42c8-5156-823b-ce6a42754b95-doc
Add of existing embedding ID: fa75db61-9760-51e8-b436-32ed46777d85-doc
Insert of existing embedding ID: fa75db61-9760-51e8-b436-32ed46777d85-doc
Add of existing embedding ID: 808d1279-ae6d-5a3f-b2a9-6a42245a4f28-doc
Insert of existing embedding ID: 808d1279-ae6d-5a3f-b2a9-6a42245a4f28-doc
Add of existing embedding ID: deab258b-8c49-5858-8f76-a52a6269500e-doc
Insert of existing embedding ID: deab258b-8c49-5858-8f76-a52a6269500e-doc


Adding documentation....
Adding documentation....
Adding documentation....
Adding documentation....


Add of existing embedding ID: 2db3b251-253c-55b3-99aa-12540571472c-doc
Insert of existing embedding ID: 2db3b251-253c-55b3-99aa-12540571472c-doc
Add of existing embedding ID: 417933a5-e35b-5dc9-a0d1-bf5bf8a4d2ce-doc
Insert of existing embedding ID: 417933a5-e35b-5dc9-a0d1-bf5bf8a4d2ce-doc
Add of existing embedding ID: 628de61e-0ed2-5b90-898f-38e558ac180f-doc
Insert of existing embedding ID: 628de61e-0ed2-5b90-898f-38e558ac180f-doc
Add of existing embedding ID: 80065a35-16cd-5fd7-a372-512bd06427e5-doc
Insert of existing embedding ID: 80065a35-16cd-5fd7-a372-512bd06427e5-doc


Adding documentation....
Adding documentation....
Adding documentation....
Adding documentation....


Add of existing embedding ID: aae9e2ec-a925-599b-a250-2f0b2410a291-doc
Insert of existing embedding ID: aae9e2ec-a925-599b-a250-2f0b2410a291-doc
Add of existing embedding ID: e9263c31-5c79-5360-816b-5f7f98684880-doc
Insert of existing embedding ID: e9263c31-5c79-5360-816b-5f7f98684880-doc
Add of existing embedding ID: 5f751b55-e510-53da-a888-5ecefa26fee9-doc
Insert of existing embedding ID: 5f751b55-e510-53da-a888-5ecefa26fee9-doc
Add of existing embedding ID: 7ed5330d-ac46-5eb4-89b5-9971d2414c73-doc
Insert of existing embedding ID: 7ed5330d-ac46-5eb4-89b5-9971d2414c73-doc


Adding documentation....
Adding documentation....
Adding documentation....
Adding documentation....


Add of existing embedding ID: 23909f8f-6287-589a-9018-c962c6900982-doc
Insert of existing embedding ID: 23909f8f-6287-589a-9018-c962c6900982-doc
Add of existing embedding ID: a3dff5a7-f7ae-5291-9b03-a0b09282c724-doc
Insert of existing embedding ID: a3dff5a7-f7ae-5291-9b03-a0b09282c724-doc
Add of existing embedding ID: 595aefd0-fcf8-5f8f-8068-5e83d009e593-doc
Insert of existing embedding ID: 595aefd0-fcf8-5f8f-8068-5e83d009e593-doc


Adding documentation....
Adding documentation....
Adding documentation....


Add of existing embedding ID: bbeecc4f-d710-575a-8f54-5c94c81562f7-doc
Insert of existing embedding ID: bbeecc4f-d710-575a-8f54-5c94c81562f7-doc
Add of existing embedding ID: 8386d710-25b6-51b9-91d8-30785a276e62-doc
Insert of existing embedding ID: 8386d710-25b6-51b9-91d8-30785a276e62-doc
Add of existing embedding ID: 6910f4a8-91ea-598d-9b25-57242d313d65-doc
Insert of existing embedding ID: 6910f4a8-91ea-598d-9b25-57242d313d65-doc


Adding documentation....
Adding documentation....
Adding documentation....


Add of existing embedding ID: ab40f9fb-5a08-5f54-8d12-0e9d43477249-doc
Insert of existing embedding ID: ab40f9fb-5a08-5f54-8d12-0e9d43477249-doc
Add of existing embedding ID: dea8e4e4-e47c-5c8e-9a6a-7166bc5e54dd-doc
Insert of existing embedding ID: dea8e4e4-e47c-5c8e-9a6a-7166bc5e54dd-doc
Add of existing embedding ID: 49ed9ddb-37cb-5687-ab29-5364cc05e755-doc
Insert of existing embedding ID: 49ed9ddb-37cb-5687-ab29-5364cc05e755-doc
Add of existing embedding ID: 76378bf1-3f39-52d7-8589-31832805c77b-doc
Insert of existing embedding ID: 76378bf1-3f39-52d7-8589-31832805c77b-doc


Adding documentation....
Adding documentation....
Adding documentation....


Add of existing embedding ID: 77937f1f-1c06-5c8e-9b2a-dbaecbb40383-doc
Insert of existing embedding ID: 77937f1f-1c06-5c8e-9b2a-dbaecbb40383-doc
Add of existing embedding ID: 2bae7fcb-36a7-5286-b8c6-71208a314226-doc
Insert of existing embedding ID: 2bae7fcb-36a7-5286-b8c6-71208a314226-doc
Add of existing embedding ID: 49333a1c-ca97-5b18-a978-eb9f9f22d43c-doc
Insert of existing embedding ID: 49333a1c-ca97-5b18-a978-eb9f9f22d43c-doc


Adding documentation....
Adding documentation....
Adding documentation....
Adding documentation....


Add of existing embedding ID: 24e2eb51-a625-5ba2-bd1e-ceb1b1df0182-doc
Insert of existing embedding ID: 24e2eb51-a625-5ba2-bd1e-ceb1b1df0182-doc
Add of existing embedding ID: c1d11163-ce12-54e8-a638-426f870aa7f4-doc
Insert of existing embedding ID: c1d11163-ce12-54e8-a638-426f870aa7f4-doc
Add of existing embedding ID: 0551b27b-7308-52ed-9abd-4415dd77ec81-doc
Insert of existing embedding ID: 0551b27b-7308-52ed-9abd-4415dd77ec81-doc
Add of existing embedding ID: fb6f1cbe-b884-51e1-a624-7ef41fea05e1-doc
Insert of existing embedding ID: fb6f1cbe-b884-51e1-a624-7ef41fea05e1-doc


Adding documentation....
Adding documentation....
Adding documentation....
Adding documentation....


Add of existing embedding ID: 5257415b-4130-5a26-946a-d31f4069503e-doc
Insert of existing embedding ID: 5257415b-4130-5a26-946a-d31f4069503e-doc
Add of existing embedding ID: 96bde749-8d97-5d6e-88d2-dac226a05810-doc
Insert of existing embedding ID: 96bde749-8d97-5d6e-88d2-dac226a05810-doc
Add of existing embedding ID: c02733e3-c35a-53a8-b5ce-996106d1b55d-doc
Insert of existing embedding ID: c02733e3-c35a-53a8-b5ce-996106d1b55d-doc
Add of existing embedding ID: af9febcb-31be-5b14-8ce8-0fac9252681a-doc
Insert of existing embedding ID: af9febcb-31be-5b14-8ce8-0fac9252681a-doc


Adding documentation....
Adding documentation....
Adding documentation....
Adding documentation....


Add of existing embedding ID: b03e0de0-ae76-5be7-b66c-ba00c99ac122-doc
Insert of existing embedding ID: b03e0de0-ae76-5be7-b66c-ba00c99ac122-doc
Add of existing embedding ID: 26430856-5ae1-5ecf-867b-b933387d0461-doc
Insert of existing embedding ID: 26430856-5ae1-5ecf-867b-b933387d0461-doc
Add of existing embedding ID: 242db09b-2597-5c29-b559-99eb35e6645d-doc
Insert of existing embedding ID: 242db09b-2597-5c29-b559-99eb35e6645d-doc
Add of existing embedding ID: 53f4e256-e5f4-5793-b2fc-d15fe96aed80-doc
Insert of existing embedding ID: 53f4e256-e5f4-5793-b2fc-d15fe96aed80-doc


Adding documentation....
Adding documentation....
Adding documentation....
Adding documentation....


Add of existing embedding ID: e3a1bdc3-429f-5a7f-9e80-a667d434142c-doc
Insert of existing embedding ID: e3a1bdc3-429f-5a7f-9e80-a667d434142c-doc
Add of existing embedding ID: 128533d2-05c4-5167-8867-de34e7743520-doc
Insert of existing embedding ID: 128533d2-05c4-5167-8867-de34e7743520-doc
Add of existing embedding ID: 37a60c7d-71e5-5999-a7a9-d5062627d6db-doc
Insert of existing embedding ID: 37a60c7d-71e5-5999-a7a9-d5062627d6db-doc
Add of existing embedding ID: 0300c6b3-f8f8-52bb-9d2e-08d83a1e973c-doc
Insert of existing embedding ID: 0300c6b3-f8f8-52bb-9d2e-08d83a1e973c-doc


Adding documentation....
Adding documentation....
Adding documentation....
Adding documentation....


Add of existing embedding ID: 5b4e09c7-3aa4-5d97-918a-570f5b07fc5b-doc
Insert of existing embedding ID: 5b4e09c7-3aa4-5d97-918a-570f5b07fc5b-doc
Add of existing embedding ID: 5ee3dc07-0388-522f-8277-f9abf7399e4b-doc
Insert of existing embedding ID: 5ee3dc07-0388-522f-8277-f9abf7399e4b-doc
Add of existing embedding ID: 7a1d58d5-2171-565f-9edf-3cb8a125ef66-doc
Insert of existing embedding ID: 7a1d58d5-2171-565f-9edf-3cb8a125ef66-doc
Add of existing embedding ID: 7d9e084d-20aa-5ab5-b619-4ce79a69131f-doc
Insert of existing embedding ID: 7d9e084d-20aa-5ab5-b619-4ce79a69131f-doc


Adding documentation....
Adding documentation....
Adding documentation....
Adding documentation....


Add of existing embedding ID: 0dc60cc5-f32b-578b-9708-dfdadc907edf-doc
Insert of existing embedding ID: 0dc60cc5-f32b-578b-9708-dfdadc907edf-doc
Add of existing embedding ID: 1f322c62-5226-5ee3-86f8-695e4ab976b0-doc
Insert of existing embedding ID: 1f322c62-5226-5ee3-86f8-695e4ab976b0-doc
Add of existing embedding ID: bc909e47-ed1f-5b15-be2c-0c1789202c68-doc
Insert of existing embedding ID: bc909e47-ed1f-5b15-be2c-0c1789202c68-doc
Add of existing embedding ID: 04b42da7-2f59-5af1-9379-f9c8854b943c-doc
Insert of existing embedding ID: 04b42da7-2f59-5af1-9379-f9c8854b943c-doc


Adding documentation....
Adding documentation....
Adding documentation....
Adding documentation....


Add of existing embedding ID: 87135ea3-a863-589b-8d9e-207c0bf9e368-doc
Insert of existing embedding ID: 87135ea3-a863-589b-8d9e-207c0bf9e368-doc
Add of existing embedding ID: 6dafd24f-2d1b-5b34-a48a-7c52345f8fd3-doc
Insert of existing embedding ID: 6dafd24f-2d1b-5b34-a48a-7c52345f8fd3-doc
Add of existing embedding ID: b7c247a6-3fc3-599f-9eb4-6d2ab1920097-doc
Insert of existing embedding ID: b7c247a6-3fc3-599f-9eb4-6d2ab1920097-doc
Add of existing embedding ID: e200dc7d-15c2-55aa-9452-92526bca6476-doc
Insert of existing embedding ID: e200dc7d-15c2-55aa-9452-92526bca6476-doc


Adding documentation....
Adding documentation....
Adding documentation....
Adding documentation....


Add of existing embedding ID: 3a2b3b3c-d62e-57c9-9017-e00b1dfacf1e-doc
Insert of existing embedding ID: 3a2b3b3c-d62e-57c9-9017-e00b1dfacf1e-doc
Add of existing embedding ID: 0a712900-a404-5806-ac70-6554fa3d6e27-doc
Insert of existing embedding ID: 0a712900-a404-5806-ac70-6554fa3d6e27-doc
Add of existing embedding ID: eaf17db6-7e22-5ab6-b635-0eca04ab8db1-doc
Insert of existing embedding ID: eaf17db6-7e22-5ab6-b635-0eca04ab8db1-doc
Add of existing embedding ID: d83ce6c1-5e5e-5b58-bde5-219d2811b351-doc
Insert of existing embedding ID: d83ce6c1-5e5e-5b58-bde5-219d2811b351-doc


Adding documentation....
Adding documentation....
Adding documentation....
Adding documentation....


Add of existing embedding ID: 9d2cfeb6-376b-52b4-911e-ff90b1d01409-doc
Insert of existing embedding ID: 9d2cfeb6-376b-52b4-911e-ff90b1d01409-doc
Add of existing embedding ID: d47f3274-ac0c-5782-b821-3f086922139f-doc
Insert of existing embedding ID: d47f3274-ac0c-5782-b821-3f086922139f-doc
Add of existing embedding ID: 49f130b8-13f2-5c3c-8e60-20acda411259-doc
Insert of existing embedding ID: 49f130b8-13f2-5c3c-8e60-20acda411259-doc
Add of existing embedding ID: 82f36a27-c95f-598f-ba45-7c4b36d5fce6-doc
Insert of existing embedding ID: 82f36a27-c95f-598f-ba45-7c4b36d5fce6-doc


Adding documentation....
Adding documentation....
Adding documentation....

Agent thinking...


Add of existing embedding ID: 9739146c-f94b-59bb-9b26-d76f778e77eb-sql
Add of existing embedding ID: 86b5b45f-c2a2-57b5-8a18-ffbfe18b9739-sql
Add of existing embedding ID: 1fe23c85-50b3-54ed-964c-154ff04cdea7-sql
Add of existing embedding ID: 62081552-c963-51ca-9bb6-7f9013ea7097-sql
Add of existing embedding ID: 9739146c-f94b-59bb-9b26-d76f778e77eb-sql
Add of existing embedding ID: 86b5b45f-c2a2-57b5-8a18-ffbfe18b9739-sql
Add of existing embedding ID: 1fe23c85-50b3-54ed-964c-154ff04cdea7-sql
Add of existing embedding ID: 62081552-c963-51ca-9bb6-7f9013ea7097-sql
Add of existing embedding ID: 9739146c-f94b-59bb-9b26-d76f778e77eb-sql
Add of existing embedding ID: 86b5b45f-c2a2-57b5-8a18-ffbfe18b9739-sql
Add of existing embedding ID: 1fe23c85-50b3-54ed-964c-154ff04cdea7-sql
Add of existing embedding ID: 62081552-c963-51ca-9bb6-7f9013ea7097-sql
Add of existing embedding ID: 9739146c-f94b-59bb-9b26-d76f778e77eb-sql
Add of existing embedding ID: 86b5b45f-c2a2-57b5-8a18-ffbfe18b9739-sql
Add of

SQL Prompt: [{'role': 'system', 'content': "You are a SQL expert. Please help to generate a SQL query to answer the question. Your response should ONLY be based on the given context and follow the response guidelines and format instructions. \n===Additional Context \n\nCUSTOMER.name is the company name of the customer.\n\nDESCRIPTIVE: What's current revenue?\n\nCUSTOMER.id is the unique identifier for each customer.\n\nPURCHASE_LINE.due is the expected delivery date for this purchased part.\n\nSUGGESTION: Vendor performance improvement\n\nJUDGEMENT: Lead time accuracy analysis\n\nSALES_LINE.due is the requested delivery date for this line item.\n\nSUGGESTION: Improve delivery times?\n\nVENDOR.name is the name of the vendor/supplier.\n\nPURCHASE_LINE.qty is the number of units purchased. It must be a positive number.\n\n===Response Guidelines \n1. If the provided context is sufficient, please generate a valid SQL query without any explanations for the question. \n2. If the provided cont

In [None]:
llm_response

NameError: name 'llm_response' is not defined

In [7]:
def extract_sql_from_response(response: str) -> str:
    """Extract SQL code from LLM response using regex patterns"""
    # Match multi-line SQL between `````` with flexible whitespace
    sql_match = re.search(r'``````', response, re.DOTALL)
    if sql_match:
        return sql_match.group(1).strip()
    
    # Match SQL without code blocks (case-insensitive, multi-line)
    sql_match = re.search(
        r'\b(SELECT|WITH|UPDATE|INSERT|DELETE)\b.*?;',
        response,
        re.IGNORECASE | re.DOTALL
    )
    return sql_match.group(0).strip() if sql_match else None


In [None]:
extract_sql_from_response(llm_response)

'with an alias, from sales_line, where order_date is less than or equal to current date.\n</think>\n\nTo calculate the total sales values till now, you can use the following SQL query:\n\n```sql\nSELECT \n    SUM(unit_price * qty) AS total_sales_till_now \nFROM \n    SALES_LINE \nWHERE \n    order_date >= DATE_SUB(CURDATE(), INTERVAL 1 DAY);'

In [10]:
def extract_sql_from_response(response: str) -> str:
    sql_match = re.search(
        r'\b(SELECT|WITH|UPDATE|INSERT|DELETE)\b.*?;',
        response,
        re.IGNORECASE | re.DOTALL
    )
    return sql_match.group(0).strip() if sql_match else None

In [11]:
extract_sql_from_response(llm_response)

'with an alias, from sales_line, where order_date is less than or equal to current date.\n</think>\n\nTo calculate the total sales values till now, you can use the following SQL query:\n\n```sql\nSELECT \n    SUM(unit_price * qty) AS total_sales_till_now \nFROM \n    SALES_LINE \nWHERE \n    order_date >= DATE_SUB(CURDATE(), INTERVAL 1 DAY);'

In [12]:
llm_response

'with an alias, from sales_line, where order_date is less than or equal to current date.\n</think>\n\nTo calculate the total sales values till now, you can use the following SQL query:\n\n```sql\nSELECT \n    SUM(unit_price * qty) AS total_sales_till_now \nFROM \n    SALES_LINE \nWHERE \n    order_date >= DATE_SUB(CURDATE(), INTERVAL 1 DAY);'

In [13]:
def extract_sql_from_response(response: str) -> str:
    """Extract SQL code from LLM response using regex patterns"""
    # Match SQL after colon with code block
    sql_match = re.search(
        r':.*?``````',  # Matches colon followed by SQL code block
        response, 
        re.IGNORECASE | re.DOTALL
    )
    if sql_match:
        return sql_match.group(1).strip()
    
    # Match SQL without colon but with code block
    sql_match = re.search(
        r'``````', 
        response, 
        re.DOTALL
    )
    if sql_match:
        return sql_match.group(1).strip()
    
    # Match SQL without code blocks
    sql_match = re.search(
        r'\b(SELECT|WITH|UPDATE|INSERT|DELETE)\b.*?;', 
        response, 
        re.IGNORECASE | re.DOTALL
    )
    return sql_match.group(0).strip() if sql_match else None


In [15]:
extract_sql_from_response(llm_response)

'with an alias, from sales_line, where order_date is less than or equal to current date.\n</think>\n\nTo calculate the total sales values till now, you can use the following SQL query:\n\n```sql\nSELECT \n    SUM(unit_price * qty) AS total_sales_till_now \nFROM \n    SALES_LINE \nWHERE \n    order_date >= DATE_SUB(CURDATE(), INTERVAL 1 DAY);'

In [None]:
# sql_statement = 
vn.run_sql(llm_response.split(":\n\n")[1].replace("\n"," "))


Unnamed: 0,total_sales
0,32798837.3


In [27]:
llm_response.split(":\n\n")[1].replace("\n"," ")

'SELECT SUM(unit_price * qty) AS total_sales FROM sales_line;'

In [None]:
import time
import os
import sys
import warnings
import json
import re
from typing import Dict, List
from collections import defaultdict

# --- Environment Setup ---
os.environ["TOKENIZERS_PARALLELISM"] = "false"

# --- Package Imports ---
try:
    import vanna
    from vanna.base import VannaBase
    from vanna.chromadb.chromadb_vector import ChromaDB_VectorStore
    import openai
    import mysql.connector
    import pandas as pd
except ImportError as e:
    print(f"Import Error: {e}")
    sys.exit(1)

# --- Configuration ---
db_type = "mysql"
mysql_host = "127.0.0.1"
mysql_port = 3306
mysql_user = "root"
mysql_password = "Raekwon_wtc$36" 
mysql_dbname = "ft_database"
llm_model_name = 'DeepSeek-R1-Distill-Llama-8B-Q4_K_M.gguf'
local_server_url = 'http://127.0.0.1:8000/v1'
placeholder_api_key = "sk-no-key-required"

# --- Question Classification Constants ---
QUESTION_TYPES = {
    "descriptive": "Direct factual retrieval from database",
    "judgement": "Requires analysis of multiple data points", 
    "suggestion": "Needs recommendations with external context"
}

CONTEXT_PROMPTS = {
    "descriptive": "Return ONLY SQL without explanation",
    "judgement": "Include trend analysis and correlations",
    "suggestion": "Reference vendor lead time APIs"
}

CLASSIFICATION_METRICS = defaultdict(int)

# --- Custom LLM Class with MCP Features ---
class MyLocalLlm(VannaBase):
    def __init__(self, config=None):
        super().__init__(config=config)
        self.model = llm_model_name

    # Implement required abstract methods
    def assistant_message(self, content: str) -> Dict:
        return {"role": "assistant", "content": content}

    def system_message(self, content: str) -> Dict:
        return {"role": "system", "content": content}

    def user_message(self, content: str) -> Dict:
        return {"role": "user", "content": content}
            
    def classify_question(self, question: str) -> str:
        classification_prompt = f"""Classify this manufacturing query:
        {json.dumps(QUESTION_TYPES, indent=2)}
        Return ONLY the category name. Query: {question}"""
        
        response = self.submit_prompt(classification_prompt)
        category = response.strip().lower()
        return category if category in QUESTION_TYPES else "descriptive"

    def submit_prompt(self, prompt, **kwargs) -> str:
        try:
            local_client = openai.OpenAI(
                base_url=local_server_url,
                api_key=placeholder_api_key,
                timeout=kwargs.get('request_timeout', 180.0)
            )
            
            messages = prompt if isinstance(prompt, list) else [{"role": "user", "content": str(prompt)}]
            
            response = local_client.chat.completions.create(
                model=self.model,
                messages=messages,
                temperature=kwargs.get('temperature', 0.1),
                max_tokens=kwargs.get('max_tokens', 1536),
                stop=kwargs.get('stop', ["``````"])
            )
            
            if response.choices and response.choices[0].message.content:
                return response.choices[0].message.content.strip()
            return "Error: No response generated"

        except Exception as e:
            return f"Error: {str(e)}"

# --- Enhanced Vanna Class ---
class MyVanna(ChromaDB_VectorStore, MyLocalLlm):
    def __init__(self, config=None):
        ChromaDB_VectorStore.__init__(self, config=config)
        MyLocalLlm.__init__(self, config=config)
        
    def generate_contextual_response(self, question: str) -> str:
        category = self.classify_question(question)
        CLASSIFICATION_METRICS[category] += 1
        
        system_msg = f"""You are a manufacturing analyst. Follow these rules:
        - DESCRIPTIVE: {CONTEXT_PROMPTS['descriptive']}
        - JUDGEMENT: {CONTEXT_PROMPTS['judgement']}
        - SUGGESTION: {CONTEXT_PROMPTS['suggestion']}"""
        
        return self.generate_sql(
            question=question,
            chat_history=[self.system_message(system_msg)]
        )

# --- Core Execution Flow --- 
vn = MyVanna(config=None)

# Database connection setup remains unchanged
# --- Connect to Database ---
print(f"Connecting to {db_type} database '{mysql_dbname}' on {mysql_host}...")
try:
    if db_type == "mysql":
        vn.connect_to_mysql(
            host=mysql_host,
            port=mysql_port,
            user=mysql_user,
            password=mysql_password,
            dbname=mysql_dbname
        )
    else:
        print(f"Database type '{db_type}' not configured.")
        sys.exit(1)
    print("Database connection successful.")
except Exception as e:
    print(f"Error connecting to database: {e}")
    print("Please check MySQL server status, credentials, permissions, and required packages.")
    sys.exit(1)

# --- Training Vanna (Run deliberately when needed) ---
print("\n--- Vanna Training ---")
print("Loads metadata (DDL, docs, SQL examples) into Vanna's local ChromaDB vector store.")
train_vanna = True # Set to True to run training, False to skip for normal use
if train_vanna:
    df_information_schema = vn.run_sql("SELECT * FROM INFORMATION_SCHEMA.COLUMNS")
    Plan = vn.get_training_plan_generic(df_information_schema)
    vn.train(plan=Plan)
    print("Running Vanna training...")

    # === Add your specific training calls here ===
    print("  Example: Fetching DDL (Customize for your setup)...")
    # df_ddl = vn.run_sql(f"SHOW CREATE TABLE your_table_name;")
    # if not df_ddl.empty: vn.train(ddl=df_ddl.iloc['Create Table'])

    print("  Example: Adding Documentation (Add yours)...")
    # --- Customer Table Documentation ---
    vn.train(documentation="Table CUSTOMER contains information about business customers.")
    vn.train(documentation="CUSTOMER.id is the unique identifier for each customer.")
    vn.train(documentation="CUSTOMER.name is the company name of the customer.")
    vn.train(documentation="CUSTOMER.addr_str contains the street address for the customer.")
    vn.train(documentation="CUSTOMER.addr_city contains the city for the customer.")
    vn.train(documentation="CUSTOMER.addr_ste contains the state for the customer.")
    vn.train(documentation="CUSTOMER.addr_ctry contains the country for the customer.")
    vn.train(documentation="CUSTOMER.addr_zip contains the zip-code for the customer.")

    # --- Vendor Table Documentation ---
    vn.train(documentation="Table VENDOR lists suppliers of parts.")
    vn.train(documentation="VENDOR.id is the unique identifier for each vendor.")
    vn.train(documentation="VENDOR.name is the name of the vendor/supplier.")
    vn.train(documentation="CUSTOMER.addr_str contains the street address for the vendor.")
    vn.train(documentation="CUSTOMER.addr_city contains the city for the vendor.")
    vn.train(documentation="CUSTOMER.addr_ste contains the state for the vendor.")
    vn.train(documentation="CUSTOMER.addr_ctry contains the country for the vendor.")
    vn.train(documentation="CUSTOMER.addr_zip contains the zip-code for the vendor.")

    # --- Part Master Table Documentation ---
    vn.train(documentation="Table PART_MST is the master list of all parts.")
    vn.train(documentation="PART_MST.id is the unique identifier which indicates the unique id for the part.")
    vn.train(documentation="PART_MST.p_code is the unique product code for the part.")
    vn.train(documentation="PART_MST.procurement indicates if a part is manufactured internally ('M') or purchased ('B'). It must be either 'M' or 'B'.")
    vn.train(documentation="PART_MST.unit_price represents the standard price per unit of the part.")
    vn.train(documentation="PART_MST.pref_order_qty specifies The preferred quantity to order or manufacture at once, depending on if the part is M or B.")
    vn.train(documentation="PART_MST.unit_meas the unit of measure for this part; usually 'EA' for 'eaches'.")
    vn.train(documentation="PART_MST.mfg_lt is the lead time to manufacture this part, if it is manufactured. This must an 'M' part and positive.")
    vn.train(documentation="PART_MST.curr_stock the current amount of this part in stock in inventory")

    # --- Add Documentation for BOM_MST, RTG_MST, JOB_OPER, JOB_MATL, RECV_MST, RECV_LINE ---
    # Example:
    vn.train(documentation="Table bill_of_m defines the Bill of Materials, showing parent-child part relationships.")
    vn.train(documentation="bill_of_m.m_part links to the parent part code in PART_MST. The manufactured part which consumes c_part.")
    vn.train(documentation="BOM_MST.c_part links to the child component part code in PART_MST. The component part consumed by m_part.")
    vn.train(documentation="BOM_MST.qty_req is The quantity of c_part required to make one unit of m_part.")

    # --- Job Master Table Documentation ---
    vn.train(documentation="Table job_mst represents the master data for all the jobs.")
    vn.train(documentation="job_mst.id is the unique identifier for a job.")
    vn.train(documentation="job_mst.job_stat signifies the current status of the job as follows: 'Q' (in queue), 'C' (completed), 'O' (opened), 'X' (cancelled), or 'H' (on hold).")
    vn.train(documentation="job_mst.part is the part that this job is manufacturing. This can be found in the PART_MST table.")
    vn.train(documentation="job_mst.qty is the quantity of the part that this job is manufacturing.")
    vn.train(documentation="job_mst.job_rls is the time at which this job began work.")
    vn.train(documentation="job_mst.job_cls is the time at which this job was completed.")

    # --- Nonconform master table documentation ---
    vn.train(documentation="Table NONCONFORM_MST defines the Nonconformance report master data.")
    vn.train(documentation="NONCONFORM_MST.id is a unique id for this nonconformance report.")
    vn.train(documentation="NONCONFORM_MST.job signifies if this nonconformance report was issued for a job, the job it was issued for.")
    vn.train(documentation="NONCONFORM_MST.po_no signifies if this nonconformance report was issued for a purchase order, the purchase order it was issued for.")
    vn.train(documentation="NONCONFORM_MST.qty signifies the quantity of parts on this job/purchase order which were flagged for nonconformance issues.")




    # --- Sales Order Header Table Documentation ---
    vn.train(documentation="Table SALES_MST represents the header information for a customer sales order.")
    vn.train(documentation="SALES_MST.order_no is the unique identifier for a sales order.")
    vn.train(documentation="SALES_MST.order_date is the date the sales order was placed.")
    vn.train(documentation="SALES_MST.cust links to the CUSTOMER table for the customer who placed the order.")

    # --- Sales Order Line Table Documentation ---
    vn.train(documentation="Table SALES_LINE contains individual line items for each sales order in SALES_MST.")
    vn.train(documentation="SALES_LINE.order_no links back to the SALES_MST table.")
    vn.train(documentation="SALES_LINE.line_no is the line number of the line item under the given order_no.")
    vn.train(documentation="SALES_LINE.order_stat is the current status of the line item: 'C' (completed), 'O' (opened), or 'X' (cancelled).")
    vn.train(documentation="SALES_LINE.part links to the PART_MST table for the specific part ordered.")
    vn.train(documentation="SALES_LINE.due is the requested delivery date for this line item.")
    vn.train(documentation="SALES_LINE.unit_price is the agreed price per unit for the part on this order line.")
    vn.train(documentation="SALES_LINE.qty is the number of units ordered for this part on this line item.")
    vn.train(documentation="SALES_LINE.line_cls is the date on which this line item was closed.")

    # --- Purchase Order Header Table Documentation ---
    vn.train(documentation="Table PURCHASE_MST represents the header information for a purchase order sent to a vendor.")
    vn.train(documentation="PURCHASE_MST.order_no is the unique identifier for a purchase order.")
    vn.train(documentation="PURCHASE_MST.order_date is the date the purchase order was created.")
    vn.train(documentation="PURCHASE_MST.vendor links to the VENDOR table for the supplier receiving the order.")

    # --- Purchase Order Line Table Documentation ---
    vn.train(documentation="Table PURCHASE_LINE contains individual line items for each purchase order in PURCHASE_MST.")
    vn.train(documentation="PURCHASE_LINE.order_no links back to the PURCHASE_MST table.")
    vn.train(documentation="PURCHASE_LINE.part links to the PART_MST table for the specific part being purchased.")
    vn.train(documentation="PURCHASE_LINE.due is the expected delivery date for this purchased part.")
    vn.train(documentation="PURCHASE_LINE.unit_cost is the cost per unit for the part on this purchase order line.")
    vn.train(documentation="PURCHASE_LINE.qty is the number of units purchased. It must be a positive number.")
    vn.train(documentation="PURCHASE_LINE.order_stat is the current status of the purchase order: 'C' (completed), 'O' (opened), or 'X' (cancelled).")
    vn.train(documentation="PURCHASE_LINE.order_type is the type of purchase order: either an 'S' for service order or 'M' for material order.")
    vn.train(documentation="PURCHASE_LINE.line_no is the line number of this purchase order line under the given order_no.")
    vn.train(documentation="PURCHASE_LINE.line_cls is the date on which this purchase order was closed.")


    # --- Vendor Part Table Documentation ---
    vn.train(documentation="Table VEND_PART represents vendor and part relationship.")
    vn.train(documentation="VEND_PART.vendor is the unique identifier for each vendor.")
    vn.train(documentation="VEND_PART.part is the part that can be procured from the vendor.")
    vn.train(documentation="VEND_PART.unit_cost is typically the creation or start date of the job.")
    vn.train(documentation="VEND_PART.part_lt is The standard lead time for which to place orders for part to vendor")
    

    # --- General Business Rules/Concepts ---
    # vn.train(documentation="'Lead Time' generally refers to the time required to procure or produce a part (PART_MST.sys_lt is a default).")
    # vn.train(documentation="'On-time Delivery' can be assessed by comparing SALES_LINE.due_date with actual delivery dates (potentially in another table like deliveries if it exists).")



# --- Enhanced Training Section ---

    # Add classification-aware training examples
    # classification_examples = [
    #     ("What's current revenue?", "descriptive", "SELECT SUM(unit_price*qty) FROM sales_line"),
    #     ("Why was order PO-123 late?", "judgement", "Analyze purchase_line JOIN vendor ON..."),
    #     ("Improve delivery times?", "suggestion", "SELECT sys_lt FROM part_mst WHERE...")
    # ]

    # for q, t, s in classification_examples:
    #     vn.train(
    #         documentation=f"{t.upper()}: {q}",
    #         question=q,
    #         sql=s
    #     )
        
    # # Add type-specific documentation
    # vn.train(documentation="DESCRIPTIVE: Revenue projection query")
    # vn.train(documentation="JUDGEMENT: Lead time accuracy analysis")
    # vn.train(documentation="SUGGESTION: Vendor performance improvement")

def extract_sql_from_response(response: str) -> str:
    """Extract SQL code from LLM response using regex patterns"""
    # Match multi-line SQL between `````` with flexible whitespace
    response.split(":\n\n")[1].replace("\n"," ")

# --- Main Interaction Loop ---
# --- Modified Main Interaction Loop ---
while True:
    try:
        question = input("Whats the total sales value till now?")
        if question.lower() == 'exit':
            break
            
        print("\nAgent thinking...")
        start_time = time.time()
        
        # Get classified response
        llm_response = vn.generate_contextual_response(question)
        category = vn.classify_question(question)
        
        # Extract SQL using implemented function
        extracted_sql = extract_sql_from_response(llm_response)
        
        if not extracted_sql:
            print("\nCould not extract executable SQL from response")
            continue
            
        print(f"\n--- Extracted SQL ({category.upper()}) ---")
        print(extracted_sql)
        print("--- End of Extracted SQL ---")
        
        # Unified execution using Vanna's method
        execute = input(f"\nExecute {category.upper()} SQL query? (y/n): ").lower().strip()
        
        if execute == 'y':
            try:
                print("Executing query...")
                exec_start_time = time.time()
                
                df_result = vn.run_sql(sql=extracted_sql)
                
                # Category-specific post-processing
                if category == "judgement":
                    print("\n** Analytical Insights **")
                    print(df_result.describe())
                elif category == "suggestion":
                    print("\n** Recommended Actions **")
                    print("Consider these vendor lead times:")
                    print(df_result.head())
                else:  # descriptive
                    print("\n** Query Results **")
                
                # Display results
                with pd.option_context('display.max_rows', 20, 
                                     'display.max_columns', None,
                                     'display.width', 1000):
                    if len(df_result) > 20:
                        print(df_result.head(20))
                        print(f"... (truncated, {len(df_result)} rows total)")
                    else:
                        print(df_result)
                        
            except Exception as exec_e:
                print(f"\nError executing SQL: {type(exec_e).__name__} - {exec_e}")
                print(f"Failed SQL:\n{extracted_sql}")

    except KeyboardInterrupt:
        print("\nExiting...")
        break

Connecting to mysql database 'ft_database' on 127.0.0.1...
Database connection successful.

--- Vanna Training ---
Loads metadata (DDL, docs, SQL examples) into Vanna's local ChromaDB vector store.


Add of existing embedding ID: 31c39d68-1b22-5545-97e0-353dc991f251-doc
Insert of existing embedding ID: 31c39d68-1b22-5545-97e0-353dc991f251-doc
Add of existing embedding ID: fff92c80-4899-53ea-a8e4-a18c0415b820-doc
Insert of existing embedding ID: fff92c80-4899-53ea-a8e4-a18c0415b820-doc
Add of existing embedding ID: 020be5d7-5209-56ca-8b6a-5962efd95682-doc
Insert of existing embedding ID: 020be5d7-5209-56ca-8b6a-5962efd95682-doc
Add of existing embedding ID: 2f420d2e-9850-51f8-ab1a-2bf1e77e2d3b-doc
Insert of existing embedding ID: 2f420d2e-9850-51f8-ab1a-2bf1e77e2d3b-doc
Add of existing embedding ID: 9aebcf7e-04a6-5368-b307-25d7ca8a1741-doc
Insert of existing embedding ID: 9aebcf7e-04a6-5368-b307-25d7ca8a1741-doc
Add of existing embedding ID: d27aa358-78b0-58f6-8dc7-4f50d0c2b8e2-doc
Insert of existing embedding ID: d27aa358-78b0-58f6-8dc7-4f50d0c2b8e2-doc
Add of existing embedding ID: 97dd033f-f6c1-574a-b60f-242668753672-doc
Insert of existing embedding ID: 97dd033f-f6c1-574a-b60f-24

Running Vanna training...
  Example: Fetching DDL (Customize for your setup)...
  Example: Adding Documentation (Add yours)...
Adding documentation....
Adding documentation....
Adding documentation....
Adding documentation....


Add of existing embedding ID: 5710f1b2-2513-59b1-a22a-06ca846f3335-doc
Insert of existing embedding ID: 5710f1b2-2513-59b1-a22a-06ca846f3335-doc
Add of existing embedding ID: f62258fc-5052-5064-8778-17d64b48fb75-doc
Insert of existing embedding ID: f62258fc-5052-5064-8778-17d64b48fb75-doc
Add of existing embedding ID: 3a1ce7ab-9179-5232-8d27-603f1c7fc185-doc
Insert of existing embedding ID: 3a1ce7ab-9179-5232-8d27-603f1c7fc185-doc
Add of existing embedding ID: 118f8eeb-9139-5bb3-aa51-ffde0d892907-doc
Insert of existing embedding ID: 118f8eeb-9139-5bb3-aa51-ffde0d892907-doc


Adding documentation....
Adding documentation....
Adding documentation....
Adding documentation....


Add of existing embedding ID: 8c8564bf-5f18-5fca-b590-917e64a7c696-doc
Insert of existing embedding ID: 8c8564bf-5f18-5fca-b590-917e64a7c696-doc
Add of existing embedding ID: 1e2fad2c-8b40-5101-b01a-eb7cb27e9093-doc
Insert of existing embedding ID: 1e2fad2c-8b40-5101-b01a-eb7cb27e9093-doc
Add of existing embedding ID: adeabba7-8bbd-5156-97a3-d1e27d0e8f47-doc
Insert of existing embedding ID: adeabba7-8bbd-5156-97a3-d1e27d0e8f47-doc
Add of existing embedding ID: 380c2a34-55c2-5fe6-aaf9-5b98fc734a1d-doc
Insert of existing embedding ID: 380c2a34-55c2-5fe6-aaf9-5b98fc734a1d-doc


Adding documentation....
Adding documentation....
Adding documentation....
Adding documentation....


Add of existing embedding ID: 2e795c42-c6f8-5c44-aa1c-eb628fdee4fc-doc
Insert of existing embedding ID: 2e795c42-c6f8-5c44-aa1c-eb628fdee4fc-doc
Add of existing embedding ID: 3a25bf9f-42c8-5156-823b-ce6a42754b95-doc
Insert of existing embedding ID: 3a25bf9f-42c8-5156-823b-ce6a42754b95-doc
Add of existing embedding ID: fa75db61-9760-51e8-b436-32ed46777d85-doc
Insert of existing embedding ID: fa75db61-9760-51e8-b436-32ed46777d85-doc
Add of existing embedding ID: 808d1279-ae6d-5a3f-b2a9-6a42245a4f28-doc
Insert of existing embedding ID: 808d1279-ae6d-5a3f-b2a9-6a42245a4f28-doc


Adding documentation....
Adding documentation....
Adding documentation....
Adding documentation....


Add of existing embedding ID: deab258b-8c49-5858-8f76-a52a6269500e-doc
Insert of existing embedding ID: deab258b-8c49-5858-8f76-a52a6269500e-doc
Add of existing embedding ID: 2db3b251-253c-55b3-99aa-12540571472c-doc
Insert of existing embedding ID: 2db3b251-253c-55b3-99aa-12540571472c-doc
Add of existing embedding ID: 417933a5-e35b-5dc9-a0d1-bf5bf8a4d2ce-doc
Insert of existing embedding ID: 417933a5-e35b-5dc9-a0d1-bf5bf8a4d2ce-doc
Add of existing embedding ID: 628de61e-0ed2-5b90-898f-38e558ac180f-doc
Insert of existing embedding ID: 628de61e-0ed2-5b90-898f-38e558ac180f-doc
Add of existing embedding ID: 80065a35-16cd-5fd7-a372-512bd06427e5-doc
Insert of existing embedding ID: 80065a35-16cd-5fd7-a372-512bd06427e5-doc


Adding documentation....
Adding documentation....
Adding documentation....
Adding documentation....


Add of existing embedding ID: aae9e2ec-a925-599b-a250-2f0b2410a291-doc
Insert of existing embedding ID: aae9e2ec-a925-599b-a250-2f0b2410a291-doc
Add of existing embedding ID: e9263c31-5c79-5360-816b-5f7f98684880-doc
Insert of existing embedding ID: e9263c31-5c79-5360-816b-5f7f98684880-doc
Add of existing embedding ID: 5f751b55-e510-53da-a888-5ecefa26fee9-doc
Insert of existing embedding ID: 5f751b55-e510-53da-a888-5ecefa26fee9-doc


Adding documentation....
Adding documentation....
Adding documentation....
Adding documentation....


Add of existing embedding ID: 7ed5330d-ac46-5eb4-89b5-9971d2414c73-doc
Insert of existing embedding ID: 7ed5330d-ac46-5eb4-89b5-9971d2414c73-doc
Add of existing embedding ID: 23909f8f-6287-589a-9018-c962c6900982-doc
Insert of existing embedding ID: 23909f8f-6287-589a-9018-c962c6900982-doc
Add of existing embedding ID: a3dff5a7-f7ae-5291-9b03-a0b09282c724-doc
Insert of existing embedding ID: a3dff5a7-f7ae-5291-9b03-a0b09282c724-doc
Add of existing embedding ID: 595aefd0-fcf8-5f8f-8068-5e83d009e593-doc
Insert of existing embedding ID: 595aefd0-fcf8-5f8f-8068-5e83d009e593-doc


Adding documentation....
Adding documentation....
Adding documentation....
Adding documentation....


Add of existing embedding ID: bbeecc4f-d710-575a-8f54-5c94c81562f7-doc
Insert of existing embedding ID: bbeecc4f-d710-575a-8f54-5c94c81562f7-doc
Add of existing embedding ID: 8386d710-25b6-51b9-91d8-30785a276e62-doc
Insert of existing embedding ID: 8386d710-25b6-51b9-91d8-30785a276e62-doc
Add of existing embedding ID: 6910f4a8-91ea-598d-9b25-57242d313d65-doc
Insert of existing embedding ID: 6910f4a8-91ea-598d-9b25-57242d313d65-doc
Add of existing embedding ID: ab40f9fb-5a08-5f54-8d12-0e9d43477249-doc
Insert of existing embedding ID: ab40f9fb-5a08-5f54-8d12-0e9d43477249-doc


Adding documentation....
Adding documentation....
Adding documentation....
Adding documentation....


Add of existing embedding ID: dea8e4e4-e47c-5c8e-9a6a-7166bc5e54dd-doc
Insert of existing embedding ID: dea8e4e4-e47c-5c8e-9a6a-7166bc5e54dd-doc
Add of existing embedding ID: 49ed9ddb-37cb-5687-ab29-5364cc05e755-doc
Insert of existing embedding ID: 49ed9ddb-37cb-5687-ab29-5364cc05e755-doc
Add of existing embedding ID: 76378bf1-3f39-52d7-8589-31832805c77b-doc
Insert of existing embedding ID: 76378bf1-3f39-52d7-8589-31832805c77b-doc
Add of existing embedding ID: 77937f1f-1c06-5c8e-9b2a-dbaecbb40383-doc
Insert of existing embedding ID: 77937f1f-1c06-5c8e-9b2a-dbaecbb40383-doc


Adding documentation....
Adding documentation....
Adding documentation....
Adding documentation....


Add of existing embedding ID: 2bae7fcb-36a7-5286-b8c6-71208a314226-doc
Insert of existing embedding ID: 2bae7fcb-36a7-5286-b8c6-71208a314226-doc
Add of existing embedding ID: 49333a1c-ca97-5b18-a978-eb9f9f22d43c-doc
Insert of existing embedding ID: 49333a1c-ca97-5b18-a978-eb9f9f22d43c-doc
Add of existing embedding ID: 24e2eb51-a625-5ba2-bd1e-ceb1b1df0182-doc
Insert of existing embedding ID: 24e2eb51-a625-5ba2-bd1e-ceb1b1df0182-doc
Add of existing embedding ID: c1d11163-ce12-54e8-a638-426f870aa7f4-doc
Insert of existing embedding ID: c1d11163-ce12-54e8-a638-426f870aa7f4-doc


Adding documentation....
Adding documentation....
Adding documentation....
Adding documentation....


Add of existing embedding ID: 0551b27b-7308-52ed-9abd-4415dd77ec81-doc
Insert of existing embedding ID: 0551b27b-7308-52ed-9abd-4415dd77ec81-doc
Add of existing embedding ID: fb6f1cbe-b884-51e1-a624-7ef41fea05e1-doc
Insert of existing embedding ID: fb6f1cbe-b884-51e1-a624-7ef41fea05e1-doc
Add of existing embedding ID: 5257415b-4130-5a26-946a-d31f4069503e-doc
Insert of existing embedding ID: 5257415b-4130-5a26-946a-d31f4069503e-doc
Add of existing embedding ID: 96bde749-8d97-5d6e-88d2-dac226a05810-doc
Insert of existing embedding ID: 96bde749-8d97-5d6e-88d2-dac226a05810-doc


Adding documentation....
Adding documentation....
Adding documentation....
Adding documentation....


Add of existing embedding ID: c02733e3-c35a-53a8-b5ce-996106d1b55d-doc
Insert of existing embedding ID: c02733e3-c35a-53a8-b5ce-996106d1b55d-doc
Add of existing embedding ID: af9febcb-31be-5b14-8ce8-0fac9252681a-doc
Insert of existing embedding ID: af9febcb-31be-5b14-8ce8-0fac9252681a-doc
Add of existing embedding ID: b03e0de0-ae76-5be7-b66c-ba00c99ac122-doc
Insert of existing embedding ID: b03e0de0-ae76-5be7-b66c-ba00c99ac122-doc
Add of existing embedding ID: 26430856-5ae1-5ecf-867b-b933387d0461-doc
Insert of existing embedding ID: 26430856-5ae1-5ecf-867b-b933387d0461-doc


Adding documentation....
Adding documentation....
Adding documentation....
Adding documentation....


Add of existing embedding ID: 242db09b-2597-5c29-b559-99eb35e6645d-doc
Insert of existing embedding ID: 242db09b-2597-5c29-b559-99eb35e6645d-doc
Add of existing embedding ID: 53f4e256-e5f4-5793-b2fc-d15fe96aed80-doc
Insert of existing embedding ID: 53f4e256-e5f4-5793-b2fc-d15fe96aed80-doc
Add of existing embedding ID: e3a1bdc3-429f-5a7f-9e80-a667d434142c-doc
Insert of existing embedding ID: e3a1bdc3-429f-5a7f-9e80-a667d434142c-doc
Add of existing embedding ID: 128533d2-05c4-5167-8867-de34e7743520-doc
Insert of existing embedding ID: 128533d2-05c4-5167-8867-de34e7743520-doc


Adding documentation....
Adding documentation....
Adding documentation....
Adding documentation....


Add of existing embedding ID: 37a60c7d-71e5-5999-a7a9-d5062627d6db-doc
Insert of existing embedding ID: 37a60c7d-71e5-5999-a7a9-d5062627d6db-doc
Add of existing embedding ID: 0300c6b3-f8f8-52bb-9d2e-08d83a1e973c-doc
Insert of existing embedding ID: 0300c6b3-f8f8-52bb-9d2e-08d83a1e973c-doc
Add of existing embedding ID: 5b4e09c7-3aa4-5d97-918a-570f5b07fc5b-doc
Insert of existing embedding ID: 5b4e09c7-3aa4-5d97-918a-570f5b07fc5b-doc
Add of existing embedding ID: 5ee3dc07-0388-522f-8277-f9abf7399e4b-doc
Insert of existing embedding ID: 5ee3dc07-0388-522f-8277-f9abf7399e4b-doc


Adding documentation....
Adding documentation....
Adding documentation....
Adding documentation....


Add of existing embedding ID: 7a1d58d5-2171-565f-9edf-3cb8a125ef66-doc
Insert of existing embedding ID: 7a1d58d5-2171-565f-9edf-3cb8a125ef66-doc
Add of existing embedding ID: 7d9e084d-20aa-5ab5-b619-4ce79a69131f-doc
Insert of existing embedding ID: 7d9e084d-20aa-5ab5-b619-4ce79a69131f-doc
Add of existing embedding ID: 0dc60cc5-f32b-578b-9708-dfdadc907edf-doc
Insert of existing embedding ID: 0dc60cc5-f32b-578b-9708-dfdadc907edf-doc
Add of existing embedding ID: 1f322c62-5226-5ee3-86f8-695e4ab976b0-doc
Insert of existing embedding ID: 1f322c62-5226-5ee3-86f8-695e4ab976b0-doc


Adding documentation....
Adding documentation....
Adding documentation....
Adding documentation....


Add of existing embedding ID: bc909e47-ed1f-5b15-be2c-0c1789202c68-doc
Insert of existing embedding ID: bc909e47-ed1f-5b15-be2c-0c1789202c68-doc
Add of existing embedding ID: 04b42da7-2f59-5af1-9379-f9c8854b943c-doc
Insert of existing embedding ID: 04b42da7-2f59-5af1-9379-f9c8854b943c-doc
Add of existing embedding ID: 87135ea3-a863-589b-8d9e-207c0bf9e368-doc
Insert of existing embedding ID: 87135ea3-a863-589b-8d9e-207c0bf9e368-doc
Add of existing embedding ID: 6dafd24f-2d1b-5b34-a48a-7c52345f8fd3-doc
Insert of existing embedding ID: 6dafd24f-2d1b-5b34-a48a-7c52345f8fd3-doc


Adding documentation....
Adding documentation....
Adding documentation....
Adding documentation....


Add of existing embedding ID: b7c247a6-3fc3-599f-9eb4-6d2ab1920097-doc
Insert of existing embedding ID: b7c247a6-3fc3-599f-9eb4-6d2ab1920097-doc
Add of existing embedding ID: e200dc7d-15c2-55aa-9452-92526bca6476-doc
Insert of existing embedding ID: e200dc7d-15c2-55aa-9452-92526bca6476-doc
Add of existing embedding ID: 3a2b3b3c-d62e-57c9-9017-e00b1dfacf1e-doc
Insert of existing embedding ID: 3a2b3b3c-d62e-57c9-9017-e00b1dfacf1e-doc
Add of existing embedding ID: 0a712900-a404-5806-ac70-6554fa3d6e27-doc
Insert of existing embedding ID: 0a712900-a404-5806-ac70-6554fa3d6e27-doc


Adding documentation....
Adding documentation....
Adding documentation....
Adding documentation....


Add of existing embedding ID: eaf17db6-7e22-5ab6-b635-0eca04ab8db1-doc
Insert of existing embedding ID: eaf17db6-7e22-5ab6-b635-0eca04ab8db1-doc
Add of existing embedding ID: d83ce6c1-5e5e-5b58-bde5-219d2811b351-doc
Insert of existing embedding ID: d83ce6c1-5e5e-5b58-bde5-219d2811b351-doc
Add of existing embedding ID: 9d2cfeb6-376b-52b4-911e-ff90b1d01409-doc
Insert of existing embedding ID: 9d2cfeb6-376b-52b4-911e-ff90b1d01409-doc
Add of existing embedding ID: d47f3274-ac0c-5782-b821-3f086922139f-doc
Insert of existing embedding ID: d47f3274-ac0c-5782-b821-3f086922139f-doc


Adding documentation....
Adding documentation....
Adding documentation....
Adding documentation....


Add of existing embedding ID: 49f130b8-13f2-5c3c-8e60-20acda411259-doc
Insert of existing embedding ID: 49f130b8-13f2-5c3c-8e60-20acda411259-doc
Add of existing embedding ID: 82f36a27-c95f-598f-ba45-7c4b36d5fce6-doc
Insert of existing embedding ID: 82f36a27-c95f-598f-ba45-7c4b36d5fce6-doc


Adding documentation....

Agent thinking...
SQL Prompt: [{'role': 'system', 'content': "You are a SQL expert. Please help to generate a SQL query to answer the question. Your response should ONLY be based on the given context and follow the response guidelines and format instructions. \n===Additional Context \n\nCUSTOMER.name is the company name of the customer.\n\nDESCRIPTIVE: What's current revenue?\n\nCUSTOMER.id is the unique identifier for each customer.\n\nPURCHASE_LINE.due is the expected delivery date for this purchased part.\n\nSUGGESTION: Vendor performance improvement\n\nJUDGEMENT: Lead time accuracy analysis\n\nSALES_LINE.due is the requested delivery date for this line item.\n\nSUGGESTION: Improve delivery times?\n\nVENDOR.name is the name of the vendor/supplier.\n\nPURCHASE_LINE.qty is the number of units purchased. It must be a positive number.\n\n===Response Guidelines \n1. If the provided context is sufficient, please generate a valid SQL query without any explanations

In [3]:
import time
import os
import sys
import warnings
import json
import re
from typing import Dict, List
from collections import defaultdict

# --- Environment Setup ---
os.environ["TOKENIZERS_PARALLELISM"] = "false"

# --- Package Imports ---
try:
    import vanna
    from vanna.base import VannaBase
    from vanna.chromadb.chromadb_vector import ChromaDB_VectorStore
    import openai
    import mysql.connector
    import pandas as pd
except ImportError as e:
    print(f"Import Error: {e}")
    sys.exit(1)

# --- Configuration ---
db_type = "mysql"
mysql_host = "127.0.0.1"
mysql_port = 3306
mysql_user = "root"
mysql_password = "Raekwon_wtc$36" 
mysql_dbname = "ft_database"
llm_model_name = 'DeepSeek-R1-Distill-Llama-8B-Q4_K_M.gguf'
local_server_url = 'http://127.0.0.1:8000/v1'
placeholder_api_key = "sk-no-key-required"

# --- Question Classification Constants ---
QUESTION_TYPES = {
    "descriptive": "Direct factual retrieval from database",
    "judgement": "Requires analysis of multiple data points", 
    "suggestion": "Needs recommendations with external context"
}

CONTEXT_PROMPTS = {
    "descriptive": "Return ONLY SQL without explanation",
    "judgement": "Include trend analysis and correlations",
    "suggestion": "Reference vendor lead time APIs"
}

CLASSIFICATION_METRICS = defaultdict(int)

# --- Custom LLM Class with MCP Features ---
class MyLocalLlm(VannaBase):
    def __init__(self, config=None):
        super().__init__(config=config)
        self.model = llm_model_name

    # Implement required abstract methods
    def assistant_message(self, content: str) -> Dict:
        return {"role": "assistant", "content": content}

    def system_message(self, content: str) -> Dict:
        return {"role": "system", "content": content}

    def user_message(self, content: str) -> Dict:
        return {"role": "user", "content": content}
            
    def classify_question(self, question: str) -> str:
        classification_prompt = f"""Classify this manufacturing query:
        {json.dumps(QUESTION_TYPES, indent=2)}
        Return ONLY the category name. Query: {question}"""
        
        response = self.submit_prompt(classification_prompt)
        category = response.strip().lower()
        return category if category in QUESTION_TYPES else "descriptive"

    def submit_prompt(self, prompt, **kwargs) -> str:
        try:
            local_client = openai.OpenAI(
                base_url=local_server_url,
                api_key=placeholder_api_key,
                timeout=kwargs.get('request_timeout', 180.0)
            )
            
            messages = prompt if isinstance(prompt, list) else [{"role": "user", "content": str(prompt)}]
            
            response = local_client.chat.completions.create(
                model=self.model,
                messages=messages,
                temperature=kwargs.get('temperature', 0.1),
                max_tokens=kwargs.get('max_tokens', 1536),
                stop=kwargs.get('stop', ["``````"])
            )
            
            if response.choices and response.choices[0].message.content:
                return response.choices[0].message.content.strip()
            return "Error: No response generated"

        except Exception as e:
            return f"Error: {str(e)}"

# --- Enhanced Vanna Class ---
class MyVanna(ChromaDB_VectorStore, MyLocalLlm):
    def __init__(self, config=None):
        ChromaDB_VectorStore.__init__(self, config=config)
        MyLocalLlm.__init__(self, config=config)
        
    def generate_contextual_response(self, question: str) -> str:
        category = self.classify_question(question)
        CLASSIFICATION_METRICS[category] += 1
        
        system_msg = f"""You are a manufacturing analyst. Follow these rules:
        - DESCRIPTIVE: {CONTEXT_PROMPTS['descriptive']}
        - JUDGEMENT: {CONTEXT_PROMPTS['judgement']}
        - SUGGESTION: {CONTEXT_PROMPTS['suggestion']}"""
        
        return self.generate_sql(
            question=question,
            chat_history=[self.system_message(system_msg)]
        )

# --- Core Execution Flow --- 
vn = MyVanna(config=None)

# Database connection setup remains unchanged
# --- Connect to Database ---
print(f"Connecting to {db_type} database '{mysql_dbname}' on {mysql_host}...")
try:
    if db_type == "mysql":
        vn.connect_to_mysql(
            host=mysql_host,
            port=mysql_port,
            user=mysql_user,
            password=mysql_password,
            dbname=mysql_dbname
        )
    else:
        print(f"Database type '{db_type}' not configured.")
        sys.exit(1)
    print("Database connection successful.")
except Exception as e:
    print(f"Error connecting to database: {e}")
    print("Please check MySQL server status, credentials, permissions, and required packages.")
    sys.exit(1)

# --- Training Vanna (Run deliberately when needed) ---
print("\n--- Vanna Training ---")
print("Loads metadata (DDL, docs, SQL examples) into Vanna's local ChromaDB vector store.")
train_vanna = True # Set to True to run training, False to skip for normal use
if train_vanna:
    df_information_schema = vn.run_sql("SELECT * FROM INFORMATION_SCHEMA.COLUMNS")
    Plan = vn.get_training_plan_generic(df_information_schema)
    vn.train(plan=Plan)
    print("Running Vanna training...")

    # === Add your specific training calls here ===
    print("  Example: Fetching DDL (Customize for your setup)...")
    # df_ddl = vn.run_sql(f"SHOW CREATE TABLE your_table_name;")
    # if not df_ddl.empty: vn.train(ddl=df_ddl.iloc['Create Table'])

    print("  Example: Adding Documentation (Add yours)...")
    # --- Customer Table Documentation ---
    vn.train(documentation="Table CUSTOMER contains information about business customers.")
    vn.train(documentation="CUSTOMER.id is the unique identifier for each customer.")
    vn.train(documentation="CUSTOMER.name is the company name of the customer.")
    vn.train(documentation="CUSTOMER.addr_str contains the street address for the customer.")
    vn.train(documentation="CUSTOMER.addr_city contains the city for the customer.")
    vn.train(documentation="CUSTOMER.addr_ste contains the state for the customer.")
    vn.train(documentation="CUSTOMER.addr_ctry contains the country for the customer.")
    vn.train(documentation="CUSTOMER.addr_zip contains the zip-code for the customer.")

    # --- Vendor Table Documentation ---
    vn.train(documentation="Table VENDOR lists suppliers of parts.")
    vn.train(documentation="VENDOR.id is the unique identifier for each vendor.")
    vn.train(documentation="VENDOR.name is the name of the vendor/supplier.")
    vn.train(documentation="CUSTOMER.addr_str contains the street address for the vendor.")
    vn.train(documentation="CUSTOMER.addr_city contains the city for the vendor.")
    vn.train(documentation="CUSTOMER.addr_ste contains the state for the vendor.")
    vn.train(documentation="CUSTOMER.addr_ctry contains the country for the vendor.")
    vn.train(documentation="CUSTOMER.addr_zip contains the zip-code for the vendor.")

    # --- Part Master Table Documentation ---
    vn.train(documentation="Table PART_MST is the master list of all parts.")
    vn.train(documentation="PART_MST.id is the unique identifier which indicates the unique id for the part.")
    vn.train(documentation="PART_MST.p_code is the unique product code for the part.")
    vn.train(documentation="PART_MST.procurement indicates if a part is manufactured internally ('M') or purchased ('B'). It must be either 'M' or 'B'.")
    vn.train(documentation="PART_MST.unit_price represents the standard price per unit of the part.")
    vn.train(documentation="PART_MST.pref_order_qty specifies The preferred quantity to order or manufacture at once, depending on if the part is M or B.")
    vn.train(documentation="PART_MST.unit_meas the unit of measure for this part; usually 'EA' for 'eaches'.")
    vn.train(documentation="PART_MST.mfg_lt is the lead time to manufacture this part, if it is manufactured. This must an 'M' part and positive.")
    vn.train(documentation="PART_MST.curr_stock the current amount of this part in stock in inventory")

    # --- Add Documentation for BOM_MST, RTG_MST, JOB_OPER, JOB_MATL, RECV_MST, RECV_LINE ---
    # Example:
    vn.train(documentation="Table bill_of_m defines the Bill of Materials, showing parent-child part relationships.")
    vn.train(documentation="bill_of_m.m_part links to the parent part code in PART_MST. The manufactured part which consumes c_part.")
    vn.train(documentation="BOM_MST.c_part links to the child component part code in PART_MST. The component part consumed by m_part.")
    vn.train(documentation="BOM_MST.qty_req is The quantity of c_part required to make one unit of m_part.")

    # --- Job Master Table Documentation ---
    vn.train(documentation="Table job_mst represents the master data for all the jobs.")
    vn.train(documentation="job_mst.id is the unique identifier for a job.")
    vn.train(documentation="job_mst.job_stat signifies the current status of the job as follows: 'Q' (in queue), 'C' (completed), 'O' (opened), 'X' (cancelled), or 'H' (on hold).")
    vn.train(documentation="job_mst.part is the part that this job is manufacturing. This can be found in the PART_MST table.")
    vn.train(documentation="job_mst.qty is the quantity of the part that this job is manufacturing.")
    vn.train(documentation="job_mst.job_rls is the time at which this job began work.")
    vn.train(documentation="job_mst.job_cls is the time at which this job was completed.")

    # --- Nonconform master table documentation ---
    vn.train(documentation="Table NONCONFORM_MST defines the Nonconformance report master data.")
    vn.train(documentation="NONCONFORM_MST.id is a unique id for this nonconformance report.")
    vn.train(documentation="NONCONFORM_MST.job signifies if this nonconformance report was issued for a job, the job it was issued for.")
    vn.train(documentation="NONCONFORM_MST.po_no signifies if this nonconformance report was issued for a purchase order, the purchase order it was issued for.")
    vn.train(documentation="NONCONFORM_MST.qty signifies the quantity of parts on this job/purchase order which were flagged for nonconformance issues.")




    # --- Sales Order Header Table Documentation ---
    vn.train(documentation="Table SALES_MST represents the header information for a customer sales order.")
    vn.train(documentation="SALES_MST.order_no is the unique identifier for a sales order.")
    vn.train(documentation="SALES_MST.order_date is the date the sales order was placed.")
    vn.train(documentation="SALES_MST.cust links to the CUSTOMER table for the customer who placed the order.")

    # --- Sales Order Line Table Documentation ---
    vn.train(documentation="Table SALES_LINE contains individual line items for each sales order in SALES_MST.")
    vn.train(documentation="SALES_LINE.order_no links back to the SALES_MST table.")
    vn.train(documentation="SALES_LINE.line_no is the line number of the line item under the given order_no.")
    vn.train(documentation="SALES_LINE.order_stat is the current status of the line item: 'C' (completed), 'O' (opened), or 'X' (cancelled).")
    vn.train(documentation="SALES_LINE.part links to the PART_MST table for the specific part ordered.")
    vn.train(documentation="SALES_LINE.due is the requested delivery date for this line item.")
    vn.train(documentation="SALES_LINE.unit_price is the agreed price per unit for the part on this order line.")
    vn.train(documentation="SALES_LINE.qty is the number of units ordered for this part on this line item.")
    vn.train(documentation="SALES_LINE.line_cls is the date on which this line item was closed.")

    # --- Purchase Order Header Table Documentation ---
    vn.train(documentation="Table PURCHASE_MST represents the header information for a purchase order sent to a vendor.")
    vn.train(documentation="PURCHASE_MST.order_no is the unique identifier for a purchase order.")
    vn.train(documentation="PURCHASE_MST.order_date is the date the purchase order was created.")
    vn.train(documentation="PURCHASE_MST.vendor links to the VENDOR table for the supplier receiving the order.")

    # --- Purchase Order Line Table Documentation ---
    vn.train(documentation="Table PURCHASE_LINE contains individual line items for each purchase order in PURCHASE_MST.")
    vn.train(documentation="PURCHASE_LINE.order_no links back to the PURCHASE_MST table.")
    vn.train(documentation="PURCHASE_LINE.part links to the PART_MST table for the specific part being purchased.")
    vn.train(documentation="PURCHASE_LINE.due is the expected delivery date for this purchased part.")
    vn.train(documentation="PURCHASE_LINE.unit_cost is the cost per unit for the part on this purchase order line.")
    vn.train(documentation="PURCHASE_LINE.qty is the number of units purchased. It must be a positive number.")
    vn.train(documentation="PURCHASE_LINE.order_stat is the current status of the purchase order: 'C' (completed), 'O' (opened), or 'X' (cancelled).")
    vn.train(documentation="PURCHASE_LINE.order_type is the type of purchase order: either an 'S' for service order or 'M' for material order.")
    vn.train(documentation="PURCHASE_LINE.line_no is the line number of this purchase order line under the given order_no.")
    vn.train(documentation="PURCHASE_LINE.line_cls is the date on which this purchase order was closed.")


    # --- Vendor Part Table Documentation ---
    vn.train(documentation="Table VEND_PART represents vendor and part relationship.")
    vn.train(documentation="VEND_PART.vendor is the unique identifier for each vendor.")
    vn.train(documentation="VEND_PART.part is the part that can be procured from the vendor.")
    vn.train(documentation="VEND_PART.unit_cost is typically the creation or start date of the job.")
    vn.train(documentation="VEND_PART.part_lt is The standard lead time for which to place orders for part to vendor")
    


training_data = vn.get_training_data()
print(training_data)

Connecting to mysql database 'ft_database' on 127.0.0.1...
Database connection successful.

--- Vanna Training ---
Loads metadata (DDL, docs, SQL examples) into Vanna's local ChromaDB vector store.


Add of existing embedding ID: c99b0f25-24aa-5e8d-9f79-31b6c7bdab17-doc
Add of existing embedding ID: 7c7b8ba1-c4c8-5de5-ae11-3fdd75f98be0-doc
Add of existing embedding ID: f0faa21e-d190-5cca-ad8c-7edad5eab84e-doc
Add of existing embedding ID: 59a3b734-f3d5-5862-9b6c-841ca075ab06-doc
Add of existing embedding ID: d508bfc4-0768-5358-bb2e-0fcebb79d057-doc
Add of existing embedding ID: 5a93c965-ca6b-5cb2-ac0a-b5028d37064e-doc
Add of existing embedding ID: 513eff27-a528-502a-8bd7-f8e286de22bb-doc
Add of existing embedding ID: c6e71686-47d3-58e2-8fa8-96fcfd051f56-doc
Add of existing embedding ID: 57b485a2-180e-5fa0-b00f-c5d9a94f730f-doc
Add of existing embedding ID: 4a8c8128-84b2-5708-9fe6-441ab5e6afc0-doc
Add of existing embedding ID: ae7b0b2d-0b41-56c8-88d0-081fe0de268e-doc
Add of existing embedding ID: cccd1fec-961a-5dcf-9a33-45e509249880-doc
Add of existing embedding ID: 9c68a495-285e-5a1f-b026-f634caae3e3c-doc
Add of existing embedding ID: 193501f8-8442-568c-b640-f30f626ae0b1-doc
Add of

Running Vanna training...
  Example: Fetching DDL (Customize for your setup)...
  Example: Adding Documentation (Add yours)...
Adding documentation....
Adding documentation....
Adding documentation....
Adding documentation....


Insert of existing embedding ID: 5710f1b2-2513-59b1-a22a-06ca846f3335-doc
Add of existing embedding ID: 5710f1b2-2513-59b1-a22a-06ca846f3335-doc
Insert of existing embedding ID: f62258fc-5052-5064-8778-17d64b48fb75-doc
Add of existing embedding ID: f62258fc-5052-5064-8778-17d64b48fb75-doc
Insert of existing embedding ID: 3a1ce7ab-9179-5232-8d27-603f1c7fc185-doc
Add of existing embedding ID: 3a1ce7ab-9179-5232-8d27-603f1c7fc185-doc
Insert of existing embedding ID: 118f8eeb-9139-5bb3-aa51-ffde0d892907-doc
Add of existing embedding ID: 118f8eeb-9139-5bb3-aa51-ffde0d892907-doc


Adding documentation....
Adding documentation....
Adding documentation....
Adding documentation....


Insert of existing embedding ID: 8c8564bf-5f18-5fca-b590-917e64a7c696-doc
Add of existing embedding ID: 8c8564bf-5f18-5fca-b590-917e64a7c696-doc
Insert of existing embedding ID: 1e2fad2c-8b40-5101-b01a-eb7cb27e9093-doc
Add of existing embedding ID: 1e2fad2c-8b40-5101-b01a-eb7cb27e9093-doc
Insert of existing embedding ID: adeabba7-8bbd-5156-97a3-d1e27d0e8f47-doc
Add of existing embedding ID: adeabba7-8bbd-5156-97a3-d1e27d0e8f47-doc
Insert of existing embedding ID: 380c2a34-55c2-5fe6-aaf9-5b98fc734a1d-doc
Add of existing embedding ID: 380c2a34-55c2-5fe6-aaf9-5b98fc734a1d-doc


Adding documentation....
Adding documentation....
Adding documentation....
Adding documentation....


Insert of existing embedding ID: 2e795c42-c6f8-5c44-aa1c-eb628fdee4fc-doc
Add of existing embedding ID: 2e795c42-c6f8-5c44-aa1c-eb628fdee4fc-doc
Insert of existing embedding ID: 3a25bf9f-42c8-5156-823b-ce6a42754b95-doc
Add of existing embedding ID: 3a25bf9f-42c8-5156-823b-ce6a42754b95-doc
Insert of existing embedding ID: fa75db61-9760-51e8-b436-32ed46777d85-doc
Add of existing embedding ID: fa75db61-9760-51e8-b436-32ed46777d85-doc
Insert of existing embedding ID: 808d1279-ae6d-5a3f-b2a9-6a42245a4f28-doc
Add of existing embedding ID: 808d1279-ae6d-5a3f-b2a9-6a42245a4f28-doc


Adding documentation....
Adding documentation....
Adding documentation....
Adding documentation....


Insert of existing embedding ID: deab258b-8c49-5858-8f76-a52a6269500e-doc
Add of existing embedding ID: deab258b-8c49-5858-8f76-a52a6269500e-doc
Insert of existing embedding ID: 2db3b251-253c-55b3-99aa-12540571472c-doc
Add of existing embedding ID: 2db3b251-253c-55b3-99aa-12540571472c-doc
Insert of existing embedding ID: 417933a5-e35b-5dc9-a0d1-bf5bf8a4d2ce-doc
Add of existing embedding ID: 417933a5-e35b-5dc9-a0d1-bf5bf8a4d2ce-doc
Insert of existing embedding ID: 628de61e-0ed2-5b90-898f-38e558ac180f-doc
Add of existing embedding ID: 628de61e-0ed2-5b90-898f-38e558ac180f-doc


Adding documentation....
Adding documentation....
Adding documentation....
Adding documentation....


Insert of existing embedding ID: 80065a35-16cd-5fd7-a372-512bd06427e5-doc
Add of existing embedding ID: 80065a35-16cd-5fd7-a372-512bd06427e5-doc
Insert of existing embedding ID: aae9e2ec-a925-599b-a250-2f0b2410a291-doc
Add of existing embedding ID: aae9e2ec-a925-599b-a250-2f0b2410a291-doc
Insert of existing embedding ID: e9263c31-5c79-5360-816b-5f7f98684880-doc
Add of existing embedding ID: e9263c31-5c79-5360-816b-5f7f98684880-doc
Insert of existing embedding ID: 5f751b55-e510-53da-a888-5ecefa26fee9-doc
Add of existing embedding ID: 5f751b55-e510-53da-a888-5ecefa26fee9-doc


Adding documentation....
Adding documentation....
Adding documentation....
Adding documentation....


Insert of existing embedding ID: 7ed5330d-ac46-5eb4-89b5-9971d2414c73-doc
Add of existing embedding ID: 7ed5330d-ac46-5eb4-89b5-9971d2414c73-doc
Insert of existing embedding ID: 23909f8f-6287-589a-9018-c962c6900982-doc
Add of existing embedding ID: 23909f8f-6287-589a-9018-c962c6900982-doc
Insert of existing embedding ID: a3dff5a7-f7ae-5291-9b03-a0b09282c724-doc
Add of existing embedding ID: a3dff5a7-f7ae-5291-9b03-a0b09282c724-doc
Insert of existing embedding ID: 595aefd0-fcf8-5f8f-8068-5e83d009e593-doc
Add of existing embedding ID: 595aefd0-fcf8-5f8f-8068-5e83d009e593-doc


Adding documentation....
Adding documentation....
Adding documentation....
Adding documentation....


Insert of existing embedding ID: bbeecc4f-d710-575a-8f54-5c94c81562f7-doc
Add of existing embedding ID: bbeecc4f-d710-575a-8f54-5c94c81562f7-doc
Insert of existing embedding ID: 8386d710-25b6-51b9-91d8-30785a276e62-doc
Add of existing embedding ID: 8386d710-25b6-51b9-91d8-30785a276e62-doc
Insert of existing embedding ID: 6910f4a8-91ea-598d-9b25-57242d313d65-doc
Add of existing embedding ID: 6910f4a8-91ea-598d-9b25-57242d313d65-doc
Insert of existing embedding ID: ab40f9fb-5a08-5f54-8d12-0e9d43477249-doc
Add of existing embedding ID: ab40f9fb-5a08-5f54-8d12-0e9d43477249-doc


Adding documentation....
Adding documentation....
Adding documentation....
Adding documentation....


Insert of existing embedding ID: dea8e4e4-e47c-5c8e-9a6a-7166bc5e54dd-doc
Add of existing embedding ID: dea8e4e4-e47c-5c8e-9a6a-7166bc5e54dd-doc
Insert of existing embedding ID: 49ed9ddb-37cb-5687-ab29-5364cc05e755-doc
Add of existing embedding ID: 49ed9ddb-37cb-5687-ab29-5364cc05e755-doc
Insert of existing embedding ID: 76378bf1-3f39-52d7-8589-31832805c77b-doc
Add of existing embedding ID: 76378bf1-3f39-52d7-8589-31832805c77b-doc
Insert of existing embedding ID: 77937f1f-1c06-5c8e-9b2a-dbaecbb40383-doc
Add of existing embedding ID: 77937f1f-1c06-5c8e-9b2a-dbaecbb40383-doc


Adding documentation....
Adding documentation....
Adding documentation....
Adding documentation....


Insert of existing embedding ID: 2bae7fcb-36a7-5286-b8c6-71208a314226-doc
Add of existing embedding ID: 2bae7fcb-36a7-5286-b8c6-71208a314226-doc
Insert of existing embedding ID: 49333a1c-ca97-5b18-a978-eb9f9f22d43c-doc
Add of existing embedding ID: 49333a1c-ca97-5b18-a978-eb9f9f22d43c-doc
Insert of existing embedding ID: 24e2eb51-a625-5ba2-bd1e-ceb1b1df0182-doc
Add of existing embedding ID: 24e2eb51-a625-5ba2-bd1e-ceb1b1df0182-doc
Insert of existing embedding ID: c1d11163-ce12-54e8-a638-426f870aa7f4-doc
Add of existing embedding ID: c1d11163-ce12-54e8-a638-426f870aa7f4-doc


Adding documentation....
Adding documentation....
Adding documentation....
Adding documentation....


Insert of existing embedding ID: 0551b27b-7308-52ed-9abd-4415dd77ec81-doc
Add of existing embedding ID: 0551b27b-7308-52ed-9abd-4415dd77ec81-doc
Insert of existing embedding ID: fb6f1cbe-b884-51e1-a624-7ef41fea05e1-doc
Add of existing embedding ID: fb6f1cbe-b884-51e1-a624-7ef41fea05e1-doc
Insert of existing embedding ID: 5257415b-4130-5a26-946a-d31f4069503e-doc
Add of existing embedding ID: 5257415b-4130-5a26-946a-d31f4069503e-doc
Insert of existing embedding ID: 96bde749-8d97-5d6e-88d2-dac226a05810-doc
Add of existing embedding ID: 96bde749-8d97-5d6e-88d2-dac226a05810-doc


Adding documentation....
Adding documentation....
Adding documentation....
Adding documentation....


Insert of existing embedding ID: c02733e3-c35a-53a8-b5ce-996106d1b55d-doc
Add of existing embedding ID: c02733e3-c35a-53a8-b5ce-996106d1b55d-doc
Insert of existing embedding ID: af9febcb-31be-5b14-8ce8-0fac9252681a-doc
Add of existing embedding ID: af9febcb-31be-5b14-8ce8-0fac9252681a-doc
Insert of existing embedding ID: b03e0de0-ae76-5be7-b66c-ba00c99ac122-doc
Add of existing embedding ID: b03e0de0-ae76-5be7-b66c-ba00c99ac122-doc
Insert of existing embedding ID: 26430856-5ae1-5ecf-867b-b933387d0461-doc
Add of existing embedding ID: 26430856-5ae1-5ecf-867b-b933387d0461-doc


Adding documentation....
Adding documentation....
Adding documentation....
Adding documentation....


Insert of existing embedding ID: 242db09b-2597-5c29-b559-99eb35e6645d-doc
Add of existing embedding ID: 242db09b-2597-5c29-b559-99eb35e6645d-doc
Insert of existing embedding ID: 53f4e256-e5f4-5793-b2fc-d15fe96aed80-doc
Add of existing embedding ID: 53f4e256-e5f4-5793-b2fc-d15fe96aed80-doc
Insert of existing embedding ID: e3a1bdc3-429f-5a7f-9e80-a667d434142c-doc
Add of existing embedding ID: e3a1bdc3-429f-5a7f-9e80-a667d434142c-doc
Insert of existing embedding ID: 128533d2-05c4-5167-8867-de34e7743520-doc
Add of existing embedding ID: 128533d2-05c4-5167-8867-de34e7743520-doc


Adding documentation....
Adding documentation....
Adding documentation....
Adding documentation....


Insert of existing embedding ID: 37a60c7d-71e5-5999-a7a9-d5062627d6db-doc
Add of existing embedding ID: 37a60c7d-71e5-5999-a7a9-d5062627d6db-doc
Insert of existing embedding ID: 0300c6b3-f8f8-52bb-9d2e-08d83a1e973c-doc
Add of existing embedding ID: 0300c6b3-f8f8-52bb-9d2e-08d83a1e973c-doc
Insert of existing embedding ID: 5b4e09c7-3aa4-5d97-918a-570f5b07fc5b-doc
Add of existing embedding ID: 5b4e09c7-3aa4-5d97-918a-570f5b07fc5b-doc
Insert of existing embedding ID: 5ee3dc07-0388-522f-8277-f9abf7399e4b-doc
Add of existing embedding ID: 5ee3dc07-0388-522f-8277-f9abf7399e4b-doc


Adding documentation....
Adding documentation....
Adding documentation....
Adding documentation....


Insert of existing embedding ID: 7a1d58d5-2171-565f-9edf-3cb8a125ef66-doc
Add of existing embedding ID: 7a1d58d5-2171-565f-9edf-3cb8a125ef66-doc
Insert of existing embedding ID: 7d9e084d-20aa-5ab5-b619-4ce79a69131f-doc
Add of existing embedding ID: 7d9e084d-20aa-5ab5-b619-4ce79a69131f-doc
Insert of existing embedding ID: 0dc60cc5-f32b-578b-9708-dfdadc907edf-doc
Add of existing embedding ID: 0dc60cc5-f32b-578b-9708-dfdadc907edf-doc
Insert of existing embedding ID: 1f322c62-5226-5ee3-86f8-695e4ab976b0-doc
Add of existing embedding ID: 1f322c62-5226-5ee3-86f8-695e4ab976b0-doc


Adding documentation....
Adding documentation....
Adding documentation....
Adding documentation....


Insert of existing embedding ID: bc909e47-ed1f-5b15-be2c-0c1789202c68-doc
Add of existing embedding ID: bc909e47-ed1f-5b15-be2c-0c1789202c68-doc
Insert of existing embedding ID: 04b42da7-2f59-5af1-9379-f9c8854b943c-doc
Add of existing embedding ID: 04b42da7-2f59-5af1-9379-f9c8854b943c-doc
Insert of existing embedding ID: 87135ea3-a863-589b-8d9e-207c0bf9e368-doc
Add of existing embedding ID: 87135ea3-a863-589b-8d9e-207c0bf9e368-doc
Insert of existing embedding ID: 6dafd24f-2d1b-5b34-a48a-7c52345f8fd3-doc
Add of existing embedding ID: 6dafd24f-2d1b-5b34-a48a-7c52345f8fd3-doc


Adding documentation....
Adding documentation....
Adding documentation....
Adding documentation....


Insert of existing embedding ID: b7c247a6-3fc3-599f-9eb4-6d2ab1920097-doc
Add of existing embedding ID: b7c247a6-3fc3-599f-9eb4-6d2ab1920097-doc
Insert of existing embedding ID: e200dc7d-15c2-55aa-9452-92526bca6476-doc
Add of existing embedding ID: e200dc7d-15c2-55aa-9452-92526bca6476-doc
Insert of existing embedding ID: 3a2b3b3c-d62e-57c9-9017-e00b1dfacf1e-doc
Add of existing embedding ID: 3a2b3b3c-d62e-57c9-9017-e00b1dfacf1e-doc
Insert of existing embedding ID: 0a712900-a404-5806-ac70-6554fa3d6e27-doc
Add of existing embedding ID: 0a712900-a404-5806-ac70-6554fa3d6e27-doc


Adding documentation....
Adding documentation....
Adding documentation....
Adding documentation....


Insert of existing embedding ID: eaf17db6-7e22-5ab6-b635-0eca04ab8db1-doc
Add of existing embedding ID: eaf17db6-7e22-5ab6-b635-0eca04ab8db1-doc
Insert of existing embedding ID: d83ce6c1-5e5e-5b58-bde5-219d2811b351-doc
Add of existing embedding ID: d83ce6c1-5e5e-5b58-bde5-219d2811b351-doc
Insert of existing embedding ID: 9d2cfeb6-376b-52b4-911e-ff90b1d01409-doc
Add of existing embedding ID: 9d2cfeb6-376b-52b4-911e-ff90b1d01409-doc
Insert of existing embedding ID: d47f3274-ac0c-5782-b821-3f086922139f-doc
Add of existing embedding ID: d47f3274-ac0c-5782-b821-3f086922139f-doc


Adding documentation....
Adding documentation....
Adding documentation....
Adding documentation....


Insert of existing embedding ID: 49f130b8-13f2-5c3c-8e60-20acda411259-doc
Add of existing embedding ID: 49f130b8-13f2-5c3c-8e60-20acda411259-doc
Insert of existing embedding ID: 82f36a27-c95f-598f-ba45-7c4b36d5fce6-doc
Add of existing embedding ID: 82f36a27-c95f-598f-ba45-7c4b36d5fce6-doc


Adding documentation....
                                           id  \
0    9739146c-f94b-59bb-9b26-d76f778e77eb-sql   
1    86b5b45f-c2a2-57b5-8a18-ffbfe18b9739-sql   
2    1fe23c85-50b3-54ed-964c-154ff04cdea7-sql   
3    62081552-c963-51ca-9bb6-7f9013ea7097-sql   
4    a54ce698-1275-5bb2-82e0-c04e1cdc0cd3-sql   
..                                        ...   
465  87135ea3-a863-589b-8d9e-207c0bf9e368-doc   
466  e200dc7d-15c2-55aa-9452-92526bca6476-doc   
467  3a2b3b3c-d62e-57c9-9017-e00b1dfacf1e-doc   
468  0a712900-a404-5806-ac70-6554fa3d6e27-doc   
469  eaf17db6-7e22-5ab6-b635-0eca04ab8db1-doc   

                                          question  \
0                What is the total sales till now?   
1           How many orders are currently pending?   
2                Can order #123 meet its due date?   
3    Which suppliers should we focus on improving?   
4                                Get all customers   
..                                             ...   
465     

In [6]:
training_data.to_csv("training_data.csv")

In [7]:
df = pd.read_csv("training_data.csv")
df

Unnamed: 0,id
0,9739146c-f94b-59bb-9b26-d76f778e77eb-sql
1,86b5b45f-c2a2-57b5-8a18-ffbfe18b9739-sql
2,1fe23c85-50b3-54ed-964c-154ff04cdea7-sql
3,62081552-c963-51ca-9bb6-7f9013ea7097-sql
4,a54ce698-1275-5bb2-82e0-c04e1cdc0cd3-sql
...,...
496,87135ea3-a863-589b-8d9e-207c0bf9e368-doc
497,e200dc7d-15c2-55aa-9452-92526bca6476-doc
498,3a2b3b3c-d62e-57c9-9017-e00b1dfacf1e-doc
499,0a712900-a404-5806-ac70-6554fa3d6e27-doc


In [9]:
id_list = list(df.id.unique())


In [11]:
for i in id_list:
    print(i)    
    vn.remove_training_data(id=i)
# Replace 'your-training-data-id' with the actual ID from step 2. Repeat this for each entry you want to remove.
# 4. (Optional) Verify Removal
# After deletion, you can check again:
# python

Add of existing embedding ID: 9739146c-f94b-59bb-9b26-d76f778e77eb-sql
Add of existing embedding ID: 86b5b45f-c2a2-57b5-8a18-ffbfe18b9739-sql
Add of existing embedding ID: 1fe23c85-50b3-54ed-964c-154ff04cdea7-sql
Add of existing embedding ID: 62081552-c963-51ca-9bb6-7f9013ea7097-sql
Add of existing embedding ID: 9739146c-f94b-59bb-9b26-d76f778e77eb-sql
Add of existing embedding ID: 86b5b45f-c2a2-57b5-8a18-ffbfe18b9739-sql
Add of existing embedding ID: 1fe23c85-50b3-54ed-964c-154ff04cdea7-sql
Add of existing embedding ID: 62081552-c963-51ca-9bb6-7f9013ea7097-sql
Add of existing embedding ID: 9739146c-f94b-59bb-9b26-d76f778e77eb-sql
Add of existing embedding ID: 86b5b45f-c2a2-57b5-8a18-ffbfe18b9739-sql
Add of existing embedding ID: 1fe23c85-50b3-54ed-964c-154ff04cdea7-sql
Add of existing embedding ID: 62081552-c963-51ca-9bb6-7f9013ea7097-sql
Add of existing embedding ID: 9739146c-f94b-59bb-9b26-d76f778e77eb-sql
Add of existing embedding ID: 86b5b45f-c2a2-57b5-8a18-ffbfe18b9739-sql
Add of

9739146c-f94b-59bb-9b26-d76f778e77eb-sql
86b5b45f-c2a2-57b5-8a18-ffbfe18b9739-sql
1fe23c85-50b3-54ed-964c-154ff04cdea7-sql
62081552-c963-51ca-9bb6-7f9013ea7097-sql
a54ce698-1275-5bb2-82e0-c04e1cdc0cd3-sql
4293528b-c9df-5b02-9c5e-6ef4c6217979-sql
d4c9bf2f-3442-51bc-9334-9f2131659f51-sql
41e60ad4-c6fe-5d80-b53d-e176f814a6cf-sql
1b18671a-c5fe-5288-ab9c-2f843422a01d-sql
f261db6f-0489-5795-abc3-f6ea78655ebd-sql
0a85a0c1-8e67-5516-8991-6d619cb2904c-sql
1af897fe-fe24-511a-8002-4a56ca7980c1-sql
ce73e887-d8e1-5ca9-aa6c-797c7e410212-sql
8b230bce-2570-547d-991c-474d5c438162-sql
b9a02b4a-3f38-50ad-b293-5f36a79d5370-sql
81759c39-654f-5840-a43b-bc38f5e9bad2-sql
99aa0d08-288b-50fc-8256-4c0033538229-sql
75131bde-8356-51d8-9b85-30f298a51275-sql
513224c4-63e3-5f5b-8d73-a0e9228b0357-sql
c6364fb4-6fa6-5f8a-b091-ef277b06555d-sql
3029e92b-fc01-5245-bd92-85c5b0ed35ff-sql
a9c4c948-7e96-5c07-aad7-021b8bd39494-sql
3108b1d4-dc85-5d2d-94f4-09f0aaef57ac-sql
6ff1198a-2811-544a-9255-2fc225e9d2b6-sql
3b9429ec-afa5-59

In [17]:
new_training_data = vn.get_training_data()
new_training_data

Unnamed: 0,id,question,content,training_data_type
0,a54ce698-1275-5bb2-82e0-c04e1cdc0cd3-sql,Get all customers,SELECT * FROM customer;,sql
1,4293528b-c9df-5b02-9c5e-6ef4c6217979-sql,Get all vendors in a specific state,SELECT * FROM vendor WHERE 'addr_ste' = 'TX';,sql
2,d4c9bf2f-3442-51bc-9334-9f2131659f51-sql,Join sales orders with customer info,"SELECT s.order_no, s.order_date, c.name, c.add...",sql
3,41e60ad4-c6fe-5d80-b53d-e176f814a6cf-sql,Parts with their vendor and unit cost,"SELECT P.p_code, V.name AS vendor_name, VP.uni...",sql
4,1b18671a-c5fe-5288-ab9c-2f843422a01d-sql,Total number of sales per customer,"SELECT C.name, COUNT(S.order_no) AS total_orde...",sql
...,...,...,...,...
428,d83ce6c1-5e5e-5b58-bde5-219d2811b351-doc,,Table VEND_PART represents vendor and part rel...,documentation
429,9d2cfeb6-376b-52b4-911e-ff90b1d01409-doc,,VEND_PART.vendor is the unique identifier for ...,documentation
430,d47f3274-ac0c-5782-b821-3f086922139f-doc,,VEND_PART.part is the part that can be procure...,documentation
431,49f130b8-13f2-5c3c-8e60-20acda411259-doc,,VEND_PART.unit_cost is typically the creation ...,documentation


In [18]:
new_training_data.training_data_type.unique()

array(['sql', 'documentation'], dtype=object)

In [30]:
new_training_data[new_training_data.content.str.contains('sales')]

Unnamed: 0,id,question,content,training_data_type
2,d4c9bf2f-3442-51bc-9334-9f2131659f51-sql,Join sales orders with customer info,"SELECT s.order_no, s.order_date, c.name, c.add...",sql
4,1b18671a-c5fe-5288-ab9c-2f843422a01d-sql,Total number of sales per customer,"SELECT C.name, COUNT(S.order_no) AS total_orde...",sql
9,8b230bce-2570-547d-991c-474d5c438162-sql,All sales lines with ‘COMPLETE’ status,SELECT * FROM sales_line WHERE order_stat = 'C';,sql
21,43ec2853-0cbb-5d06-821c-a6e9da0d9fd2-sql,Monthly Sales Revenue Trend,"SELECT DATE_FORMAT(s.order_date, '%Y-%m') AS s...",sql
27,3ef57a2e-aa71-5e16-a27d-4ac45cb717b9-sql,whats the total sales value till now?,SELECT SUM(sl.qty * sl.unit_price) AS total_s...,sql
7,cf81a43b-c3f7-5282-b3cd-2adcb2b0a3fb-doc,,The following columns are in the sales_line ta...,documentation
8,b675c221-5c95-550d-9eb7-ed55de111f03-doc,,The following columns are in the sales_mst tab...,documentation
401,5257415b-4130-5a26-946a-d31f4069503e-doc,,Table SALES_MST represents the header informat...,documentation
402,96bde749-8d97-5d6e-88d2-dac226a05810-doc,,SALES_MST.order_no is the unique identifier fo...,documentation
403,c02733e3-c35a-53a8-b5ce-996106d1b55d-doc,,SALES_MST.order_date is the date the sales ord...,documentation


In [29]:
 vn.remove_training_data(id="3ef57a2e-aa71-5e16-a27d-4ac45cb717b9-sql")

Delete of nonexisting embedding ID: 3ef57a2e-aa71-5e16-a27d-4ac45cb717b9-sql
Delete of nonexisting embedding ID: 3ef57a2e-aa71-5e16-a27d-4ac45cb717b9-sql


True

In [33]:
training_data = vn.get_training_data()
training_data.id.to_list()

['a54ce698-1275-5bb2-82e0-c04e1cdc0cd3-sql',
 '4293528b-c9df-5b02-9c5e-6ef4c6217979-sql',
 'd4c9bf2f-3442-51bc-9334-9f2131659f51-sql',
 '41e60ad4-c6fe-5d80-b53d-e176f814a6cf-sql',
 '1b18671a-c5fe-5288-ab9c-2f843422a01d-sql',
 'f261db6f-0489-5795-abc3-f6ea78655ebd-sql',
 '0a85a0c1-8e67-5516-8991-6d619cb2904c-sql',
 '1af897fe-fe24-511a-8002-4a56ca7980c1-sql',
 'ce73e887-d8e1-5ca9-aa6c-797c7e410212-sql',
 '8b230bce-2570-547d-991c-474d5c438162-sql',
 'b9a02b4a-3f38-50ad-b293-5f36a79d5370-sql',
 '81759c39-654f-5840-a43b-bc38f5e9bad2-sql',
 '99aa0d08-288b-50fc-8256-4c0033538229-sql',
 '75131bde-8356-51d8-9b85-30f298a51275-sql',
 '513224c4-63e3-5f5b-8d73-a0e9228b0357-sql',
 'c6364fb4-6fa6-5f8a-b091-ef277b06555d-sql',
 '3029e92b-fc01-5245-bd92-85c5b0ed35ff-sql',
 'a9c4c948-7e96-5c07-aad7-021b8bd39494-sql',
 '3108b1d4-dc85-5d2d-94f4-09f0aaef57ac-sql',
 '6ff1198a-2811-544a-9255-2fc225e9d2b6-sql',
 '3b9429ec-afa5-5912-b50b-f42354a2fb00-sql',
 '43ec2853-0cbb-5d06-821c-a6e9da0d9fd2-sql',
 '483b8731

In [34]:
for i in training_data.id.to_list():
    print(i)    
    vn.remove_training_data(id=i)


a54ce698-1275-5bb2-82e0-c04e1cdc0cd3-sql
4293528b-c9df-5b02-9c5e-6ef4c6217979-sql
d4c9bf2f-3442-51bc-9334-9f2131659f51-sql
41e60ad4-c6fe-5d80-b53d-e176f814a6cf-sql
1b18671a-c5fe-5288-ab9c-2f843422a01d-sql
f261db6f-0489-5795-abc3-f6ea78655ebd-sql
0a85a0c1-8e67-5516-8991-6d619cb2904c-sql
1af897fe-fe24-511a-8002-4a56ca7980c1-sql
ce73e887-d8e1-5ca9-aa6c-797c7e410212-sql
8b230bce-2570-547d-991c-474d5c438162-sql
b9a02b4a-3f38-50ad-b293-5f36a79d5370-sql
81759c39-654f-5840-a43b-bc38f5e9bad2-sql
99aa0d08-288b-50fc-8256-4c0033538229-sql
75131bde-8356-51d8-9b85-30f298a51275-sql
513224c4-63e3-5f5b-8d73-a0e9228b0357-sql
c6364fb4-6fa6-5f8a-b091-ef277b06555d-sql
3029e92b-fc01-5245-bd92-85c5b0ed35ff-sql
a9c4c948-7e96-5c07-aad7-021b8bd39494-sql
3108b1d4-dc85-5d2d-94f4-09f0aaef57ac-sql
6ff1198a-2811-544a-9255-2fc225e9d2b6-sql


Delete of nonexisting embedding ID: 1a9a3ff1-80b9-59d4-aec5-9ec9baff0fe8-sql
Delete of nonexisting embedding ID: abf090f5-351d-5d3d-aa04-e003678090d7-sql
Delete of nonexisting embedding ID: c459050c-e31a-5cc5-99c3-e8c6173f8f96-sql
Delete of nonexisting embedding ID: eb16448e-7427-557c-bb5e-9f676f63dcc3-sql
Delete of nonexisting embedding ID: e29c8891-6cf4-5429-a4ed-448136766228-sql
Delete of nonexisting embedding ID: 3306e237-8bfd-50f2-bdb4-8b80483242a6-sql
Delete of nonexisting embedding ID: 31c39d68-1b22-5545-97e0-353dc991f251-doc
Delete of nonexisting embedding ID: fff92c80-4899-53ea-a8e4-a18c0415b820-doc
Delete of nonexisting embedding ID: 020be5d7-5209-56ca-8b6a-5962efd95682-doc
Delete of nonexisting embedding ID: 2f420d2e-9850-51f8-ab1a-2bf1e77e2d3b-doc
Delete of nonexisting embedding ID: 9aebcf7e-04a6-5368-b307-25d7ca8a1741-doc
Delete of nonexisting embedding ID: d27aa358-78b0-58f6-8dc7-4f50d0c2b8e2-doc
Delete of nonexisting embedding ID: 97dd033f-f6c1-574a-b60f-242668753672-doc

3b9429ec-afa5-5912-b50b-f42354a2fb00-sql
43ec2853-0cbb-5d06-821c-a6e9da0d9fd2-sql
483b8731-5bdb-5ec8-a0c2-462c407b6c3a-sql
1a9a3ff1-80b9-59d4-aec5-9ec9baff0fe8-sql
abf090f5-351d-5d3d-aa04-e003678090d7-sql
c459050c-e31a-5cc5-99c3-e8c6173f8f96-sql
eb16448e-7427-557c-bb5e-9f676f63dcc3-sql
e29c8891-6cf4-5429-a4ed-448136766228-sql
3306e237-8bfd-50f2-bdb4-8b80483242a6-sql
31c39d68-1b22-5545-97e0-353dc991f251-doc
fff92c80-4899-53ea-a8e4-a18c0415b820-doc
020be5d7-5209-56ca-8b6a-5962efd95682-doc
2f420d2e-9850-51f8-ab1a-2bf1e77e2d3b-doc
9aebcf7e-04a6-5368-b307-25d7ca8a1741-doc
d27aa358-78b0-58f6-8dc7-4f50d0c2b8e2-doc
97dd033f-f6c1-574a-b60f-242668753672-doc
cf81a43b-c3f7-5282-b3cd-2adcb2b0a3fb-doc
b675c221-5c95-550d-9eb7-ed55de111f03-doc
ff1bb159-0fcf-57c1-a002-26d9f3ba2d9b-doc
17ee22e9-46e2-576b-a47f-d83bb127202a-doc
ccccd389-106e-5873-a5e7-bb551d47143b-doc
790aa9fd-077e-5561-9448-4a536b8d21ed-doc
11920293-959e-507e-908e-9bf4351debd6-doc
9fbb831a-dc25-56dd-bdad-ec21ef346209-doc
4f344eeb-0e41-5a

Delete of nonexisting embedding ID: 0e24c4e7-9eab-5b3e-8b8a-68746535867a-doc
Delete of nonexisting embedding ID: 7b755478-e277-5b0c-9a64-76f51186e584-doc
Delete of nonexisting embedding ID: 46be15dd-0c58-59bd-aaa7-828bc361eb30-doc
Delete of nonexisting embedding ID: e24edd7b-0a51-5094-86a3-8dd11e7e4ac7-doc
Delete of nonexisting embedding ID: f5b38104-9cea-5651-bc2a-bec1e6b0ce58-doc
Delete of nonexisting embedding ID: 8449863f-e235-5987-9f80-b06a6f8824d9-doc
Delete of nonexisting embedding ID: 0048ff4b-9b75-5e0d-bb39-79e0e1fa01fe-doc
Delete of nonexisting embedding ID: 25291357-0afa-5e4d-b92c-ca9a80879c08-doc
Delete of nonexisting embedding ID: 29ae90fa-99cf-5361-8eb5-c1055944a2de-doc
Delete of nonexisting embedding ID: 8a3aa77a-a718-5e6f-866d-b72ca8bab847-doc
Delete of nonexisting embedding ID: 8948ef21-852e-5c4a-b906-00eeb2065db7-doc
Delete of nonexisting embedding ID: ca16f4ee-3243-557a-a117-927f81b555c2-doc
Delete of nonexisting embedding ID: c0ca1bf9-ffc3-5aa3-908b-652a79aebebb-doc

0e24c4e7-9eab-5b3e-8b8a-68746535867a-doc
7b755478-e277-5b0c-9a64-76f51186e584-doc
46be15dd-0c58-59bd-aaa7-828bc361eb30-doc
e24edd7b-0a51-5094-86a3-8dd11e7e4ac7-doc
f5b38104-9cea-5651-bc2a-bec1e6b0ce58-doc
8449863f-e235-5987-9f80-b06a6f8824d9-doc
0048ff4b-9b75-5e0d-bb39-79e0e1fa01fe-doc
25291357-0afa-5e4d-b92c-ca9a80879c08-doc
29ae90fa-99cf-5361-8eb5-c1055944a2de-doc
8a3aa77a-a718-5e6f-866d-b72ca8bab847-doc
8948ef21-852e-5c4a-b906-00eeb2065db7-doc
ca16f4ee-3243-557a-a117-927f81b555c2-doc
c0ca1bf9-ffc3-5aa3-908b-652a79aebebb-doc
ba41a41b-baf3-5e56-a7bc-65a4bb3ee143-doc
b6ef8765-f766-5111-a6bf-7a03b3b44c01-doc
b01d08aa-eb2c-56a9-9f54-831b6c522c21-doc
00efd320-c26c-58f8-943d-235583460b93-doc
4fff930d-59d0-5618-bb73-d6c606e40e72-doc
7559cdad-21f4-5b38-ba2d-52ce33486ec6-doc
5f2c67ea-aef8-54a2-bfb0-91d837987bff-doc
b4d7be60-bc0e-5403-a8d3-b895179709be-doc
061c8781-1fe2-5591-8c40-75eb09e9bd56-doc
e00e5d4d-7ddc-51fb-b630-9fd79dc5373d-doc
0ef01bba-c5c7-5faa-820e-dffb472ce3c3-doc
e84b0939-bef0-51

Delete of nonexisting embedding ID: fa09e36d-e360-5ba9-bde8-bf6104dd0c9f-doc
Delete of nonexisting embedding ID: 116d0b47-6601-5a82-9a8a-5b2efc130a0b-doc
Delete of nonexisting embedding ID: 48fd49ee-a5a5-5be3-967e-661ea8c75d93-doc
Delete of nonexisting embedding ID: b28bb7f8-88e5-5f65-9e6e-a60d5d70817f-doc
Delete of nonexisting embedding ID: efa4a31c-4d75-5e1f-95b0-c3f56bfcd848-doc
Delete of nonexisting embedding ID: 475bda8b-f953-5225-ba49-7503859cf4f1-doc
Delete of nonexisting embedding ID: cdd635b1-b4b3-5d46-98b6-c5c2261f48b8-doc
Delete of nonexisting embedding ID: 5e627887-8606-5857-99c3-c56fb25cafa1-doc
Delete of nonexisting embedding ID: 3ea1d8d9-ca8d-535f-9c12-0a6d909b2442-doc
Delete of nonexisting embedding ID: d44b80b1-57e0-545b-ab9e-6256e8c4a35d-doc
Delete of nonexisting embedding ID: 5fb1370b-c653-5a6e-b4b2-546ace9de3ae-doc
Delete of nonexisting embedding ID: ea5377a5-d3a8-5ad8-bd18-53a257b60bfa-doc
Delete of nonexisting embedding ID: 3348ac83-bddf-5b21-92c4-b74ca1ed1b7c-doc

116d0b47-6601-5a82-9a8a-5b2efc130a0b-doc
48fd49ee-a5a5-5be3-967e-661ea8c75d93-doc
b28bb7f8-88e5-5f65-9e6e-a60d5d70817f-doc
efa4a31c-4d75-5e1f-95b0-c3f56bfcd848-doc
475bda8b-f953-5225-ba49-7503859cf4f1-doc
cdd635b1-b4b3-5d46-98b6-c5c2261f48b8-doc
5e627887-8606-5857-99c3-c56fb25cafa1-doc
3ea1d8d9-ca8d-535f-9c12-0a6d909b2442-doc
d44b80b1-57e0-545b-ab9e-6256e8c4a35d-doc
5fb1370b-c653-5a6e-b4b2-546ace9de3ae-doc
ea5377a5-d3a8-5ad8-bd18-53a257b60bfa-doc
3348ac83-bddf-5b21-92c4-b74ca1ed1b7c-doc
bc61a01d-655e-5ac6-8cee-3bf0d765e1ae-doc
08a4df50-9e0f-5019-9b64-896db71e69a9-doc
b26ce05b-1380-5839-9069-e1bbb47a234f-doc
2fe10e81-1a42-5341-ac05-5122eed6d95a-doc
feab956c-3922-53b1-9962-efdd9be268cc-doc
85cf37af-a7b2-5d45-8ae9-aa7a4f6986ba-doc
a575f189-db6f-5601-b2ee-d45fb6fd2278-doc
ddae42d8-9716-5500-a67d-01d7ee066c6f-doc
07445c7c-d4b3-5dcc-8c96-c9bf5ce3c3f4-doc
d2758d80-68c0-520b-bd35-cf6d63435383-doc
0d2a5c75-91b5-55b5-abad-0d72b00398f6-doc
61656c00-81ed-59bd-bd32-2be91463ecd5-doc
656e1937-2992-5f

Delete of nonexisting embedding ID: 33656bdd-8640-574d-ad31-002ab0c471df-doc
Delete of nonexisting embedding ID: 94a5b3af-d24a-5376-a221-3c51523646ef-doc
Delete of nonexisting embedding ID: 71c60d91-8eb3-54e4-9673-93bbb426e6bb-doc
Delete of nonexisting embedding ID: 3a413db9-01e1-5a29-ba6c-3ff3afe6086b-doc
Delete of nonexisting embedding ID: 41e39095-95d3-5296-a49d-1fb833eaa8bd-doc
Delete of nonexisting embedding ID: c5f276a6-40f6-5991-8a8a-83f3ecba2fd4-doc
Delete of nonexisting embedding ID: b9a08e7b-533d-51df-9b9d-df6907e4dfe8-doc
Delete of nonexisting embedding ID: 624e6432-bc60-5965-82ce-3ce4dbcec2a7-doc
Delete of nonexisting embedding ID: 232e7f4e-2668-578b-a906-3715959e2270-doc
Delete of nonexisting embedding ID: d979dc51-664a-51e3-a0a7-a8bc0da4bb2e-doc
Delete of nonexisting embedding ID: 2c476863-01e0-5580-8c7f-2b4add0287b7-doc
Delete of nonexisting embedding ID: 1ec82aa6-8493-5064-aa78-06c2ae0f6e06-doc
Delete of nonexisting embedding ID: 454e9f7a-5af1-5ef7-9702-4b4edea19a37-doc

94a5b3af-d24a-5376-a221-3c51523646ef-doc
71c60d91-8eb3-54e4-9673-93bbb426e6bb-doc
3a413db9-01e1-5a29-ba6c-3ff3afe6086b-doc
41e39095-95d3-5296-a49d-1fb833eaa8bd-doc
c5f276a6-40f6-5991-8a8a-83f3ecba2fd4-doc
b9a08e7b-533d-51df-9b9d-df6907e4dfe8-doc
624e6432-bc60-5965-82ce-3ce4dbcec2a7-doc
232e7f4e-2668-578b-a906-3715959e2270-doc
d979dc51-664a-51e3-a0a7-a8bc0da4bb2e-doc
2c476863-01e0-5580-8c7f-2b4add0287b7-doc
1ec82aa6-8493-5064-aa78-06c2ae0f6e06-doc
454e9f7a-5af1-5ef7-9702-4b4edea19a37-doc
f45d78fa-471e-550c-a117-c940ab8b32ab-doc
6e9905a8-d350-5e58-873f-1305ab08733f-doc
9ff7bf21-408f-5df1-a949-612dc7166f66-doc
0a756c9c-1a7c-51d6-bdfb-b8897c643e2b-doc
0be0e85f-5df6-566c-9e7a-f8ca1bd56557-doc
70d67e51-ec55-5cb3-aa76-e7cda9212220-doc
43a855c5-b585-520f-bbe2-d48ef9f31b7a-doc
feea2399-ab6c-5d2e-91cb-35290b6452ed-doc
36e947ed-7e0e-58d4-a6ad-1d4e7a2dda6c-doc
c0ea6d40-d3ee-59b7-a586-3b2d83cf72c6-doc
7330d108-16da-5f86-85ec-6982ecf23d96-doc
2aa30ffa-8163-54af-abac-e6baf548615c-doc
00492a7e-ea95-50

Delete of nonexisting embedding ID: 516ccf63-2414-5bcb-b38d-40eb6e379d89-doc
Delete of nonexisting embedding ID: 3b03ebbf-278b-5016-9fa0-ac44013530a1-doc
Delete of nonexisting embedding ID: 3fee41a8-11a5-5348-b781-eb11890b596c-doc
Delete of nonexisting embedding ID: 05ac8afe-e45f-5b51-a4c6-2da5bfdf712f-doc
Delete of nonexisting embedding ID: 2374cb2b-4000-547b-af82-f5805e1ee43a-doc
Delete of nonexisting embedding ID: 812e104b-ec23-5542-b714-5b052526d0ec-doc
Delete of nonexisting embedding ID: 81d5eaa5-575c-5758-b958-a3e49f0cb9e2-doc
Delete of nonexisting embedding ID: a6dbb38d-2a6d-5613-97f0-9735598458ee-doc
Delete of nonexisting embedding ID: e773d757-7a93-5321-bada-30cd02c044e7-doc
Delete of nonexisting embedding ID: 91fec02c-cdde-5d1f-8cbb-b131dfce029a-doc
Delete of nonexisting embedding ID: ba501acf-9434-5763-bcf6-b85e362f67d0-doc
Delete of nonexisting embedding ID: ba1ded7c-96ca-5a22-9ad6-f51b29158a3e-doc
Delete of nonexisting embedding ID: 85011bc2-1153-5652-9833-b7ef59eb876e-doc

516ccf63-2414-5bcb-b38d-40eb6e379d89-doc
3b03ebbf-278b-5016-9fa0-ac44013530a1-doc
3fee41a8-11a5-5348-b781-eb11890b596c-doc
05ac8afe-e45f-5b51-a4c6-2da5bfdf712f-doc
2374cb2b-4000-547b-af82-f5805e1ee43a-doc
812e104b-ec23-5542-b714-5b052526d0ec-doc
81d5eaa5-575c-5758-b958-a3e49f0cb9e2-doc
a6dbb38d-2a6d-5613-97f0-9735598458ee-doc
e773d757-7a93-5321-bada-30cd02c044e7-doc
91fec02c-cdde-5d1f-8cbb-b131dfce029a-doc
ba501acf-9434-5763-bcf6-b85e362f67d0-doc
ba1ded7c-96ca-5a22-9ad6-f51b29158a3e-doc
85011bc2-1153-5652-9833-b7ef59eb876e-doc
67d79418-b267-58a1-a153-944d2e8a7ac8-doc
7ca377e3-a291-535c-80a5-ca0c4b1766e5-doc
9ee0dff4-5e4d-556b-bd1b-2e2a44deedde-doc
a2fd523e-4f83-5f71-81e9-3f61d93b2a8d-doc
d4f6306d-0f2f-5689-b7c1-b1d968c1eca0-doc
75ddeb69-8776-5ab4-9d47-f018d90ac3f0-doc
3f557ec6-5d09-5094-af2d-0e9d21533b71-doc
bac177d1-d929-5238-9adb-86975edbf2c5-doc
26081d07-e0f7-5b44-9fa6-cbb2a78a4c2c-doc
409d6ab6-65f7-51d1-a99b-17fbef14b3db-doc
25e3cae6-4c4e-52c0-b06e-fd9900c5717f-doc
a36dc66e-8b2f-51

Delete of nonexisting embedding ID: f5740b3d-bd76-5c9b-9312-32fb3cd9a59d-doc
Delete of nonexisting embedding ID: bd053944-9bd1-5828-a495-7950583f44c3-doc
Delete of nonexisting embedding ID: 07b35c21-5903-554f-af5e-9c8351674543-doc
Delete of nonexisting embedding ID: 92e775e2-30be-5fa3-80d2-d71b8b6e15aa-doc
Delete of nonexisting embedding ID: 1c911181-1718-5ad7-bfe7-fbce324a3da9-doc
Delete of nonexisting embedding ID: 2c82302d-6454-5183-8bdf-23ce51a6fbce-doc
Delete of nonexisting embedding ID: 1eda123b-27a6-5742-8e82-d1993ea599b6-doc
Delete of nonexisting embedding ID: 9944c8e6-1afc-5f5c-82f1-a1d204605daa-doc
Delete of nonexisting embedding ID: 603ecc5c-0132-51f1-b6dd-30a4c4a8922a-doc
Delete of nonexisting embedding ID: 280cb13c-b603-5c0e-82e2-ff46575a4bda-doc
Delete of nonexisting embedding ID: f7db6b5f-14e5-5d18-86c0-bc7c944c838f-doc
Delete of nonexisting embedding ID: 01a99045-f948-51b8-ae58-f4821d6bfc00-doc
Delete of nonexisting embedding ID: ab70b98c-0c8d-5dee-8767-be79cd077788-doc

f5740b3d-bd76-5c9b-9312-32fb3cd9a59d-doc
bd053944-9bd1-5828-a495-7950583f44c3-doc
07b35c21-5903-554f-af5e-9c8351674543-doc
92e775e2-30be-5fa3-80d2-d71b8b6e15aa-doc
1c911181-1718-5ad7-bfe7-fbce324a3da9-doc
2c82302d-6454-5183-8bdf-23ce51a6fbce-doc
1eda123b-27a6-5742-8e82-d1993ea599b6-doc
9944c8e6-1afc-5f5c-82f1-a1d204605daa-doc
603ecc5c-0132-51f1-b6dd-30a4c4a8922a-doc
280cb13c-b603-5c0e-82e2-ff46575a4bda-doc
f7db6b5f-14e5-5d18-86c0-bc7c944c838f-doc
01a99045-f948-51b8-ae58-f4821d6bfc00-doc
ab70b98c-0c8d-5dee-8767-be79cd077788-doc
226f733f-b856-5fac-914e-e3e6c95140b5-doc
ca5bf901-fcec-56a2-a4d7-0976dd8a4f47-doc
3df18a5c-cea4-5163-927d-fbd2d1b04b3c-doc
9e6c6bf2-b139-5f01-ab29-6a01bf961073-doc
13e50ea5-097e-531c-85a9-9f509c347a63-doc
071d133f-62b4-578f-8251-d49273c6b9ae-doc
49fc978f-7bd9-5b53-ac3e-4f0ee9f48467-doc
9357cbcd-ce38-5839-ad40-b7fce3ee1c03-doc
afa4486a-5c3a-5f91-821d-9edf5ea2229b-doc
81a187ad-d000-534a-8540-4304c4c95632-doc
a1b804cb-afe8-5641-9a4f-89bcba8b5942-doc
afa74d98-9616-51

Delete of nonexisting embedding ID: c866486d-70d8-53d4-a042-ae0b223bafd0-doc
Delete of nonexisting embedding ID: 971df317-6f95-5574-960b-8617ebb320e8-doc
Delete of nonexisting embedding ID: 06f173d4-10dc-5912-8e3c-690b36769eab-doc
Delete of nonexisting embedding ID: c7737722-5406-50ec-a2e3-457631201e43-doc
Delete of nonexisting embedding ID: 5710f1b2-2513-59b1-a22a-06ca846f3335-doc
Delete of nonexisting embedding ID: f62258fc-5052-5064-8778-17d64b48fb75-doc
Delete of nonexisting embedding ID: 3a1ce7ab-9179-5232-8d27-603f1c7fc185-doc
Delete of nonexisting embedding ID: 118f8eeb-9139-5bb3-aa51-ffde0d892907-doc
Delete of nonexisting embedding ID: 8c8564bf-5f18-5fca-b590-917e64a7c696-doc
Delete of nonexisting embedding ID: 1e2fad2c-8b40-5101-b01a-eb7cb27e9093-doc
Delete of nonexisting embedding ID: adeabba7-8bbd-5156-97a3-d1e27d0e8f47-doc
Delete of nonexisting embedding ID: 380c2a34-55c2-5fe6-aaf9-5b98fc734a1d-doc
Delete of nonexisting embedding ID: 2e795c42-c6f8-5c44-aa1c-eb628fdee4fc-doc

c866486d-70d8-53d4-a042-ae0b223bafd0-doc
971df317-6f95-5574-960b-8617ebb320e8-doc
06f173d4-10dc-5912-8e3c-690b36769eab-doc
c7737722-5406-50ec-a2e3-457631201e43-doc
5710f1b2-2513-59b1-a22a-06ca846f3335-doc
f62258fc-5052-5064-8778-17d64b48fb75-doc
3a1ce7ab-9179-5232-8d27-603f1c7fc185-doc
118f8eeb-9139-5bb3-aa51-ffde0d892907-doc
8c8564bf-5f18-5fca-b590-917e64a7c696-doc
1e2fad2c-8b40-5101-b01a-eb7cb27e9093-doc
adeabba7-8bbd-5156-97a3-d1e27d0e8f47-doc
380c2a34-55c2-5fe6-aaf9-5b98fc734a1d-doc
2e795c42-c6f8-5c44-aa1c-eb628fdee4fc-doc
3a25bf9f-42c8-5156-823b-ce6a42754b95-doc
fa75db61-9760-51e8-b436-32ed46777d85-doc
808d1279-ae6d-5a3f-b2a9-6a42245a4f28-doc
deab258b-8c49-5858-8f76-a52a6269500e-doc
2db3b251-253c-55b3-99aa-12540571472c-doc
417933a5-e35b-5dc9-a0d1-bf5bf8a4d2ce-doc
628de61e-0ed2-5b90-898f-38e558ac180f-doc
80065a35-16cd-5fd7-a372-512bd06427e5-doc
aae9e2ec-a925-599b-a250-2f0b2410a291-doc
e9263c31-5c79-5360-816b-5f7f98684880-doc
5f751b55-e510-53da-a888-5ecefa26fee9-doc
7ed5330d-ac46-5e

Delete of nonexisting embedding ID: 7ed5330d-ac46-5eb4-89b5-9971d2414c73-doc
Delete of nonexisting embedding ID: 23909f8f-6287-589a-9018-c962c6900982-doc
Delete of nonexisting embedding ID: a3dff5a7-f7ae-5291-9b03-a0b09282c724-doc
Delete of nonexisting embedding ID: 595aefd0-fcf8-5f8f-8068-5e83d009e593-doc
Delete of nonexisting embedding ID: bbeecc4f-d710-575a-8f54-5c94c81562f7-doc
Delete of nonexisting embedding ID: 8386d710-25b6-51b9-91d8-30785a276e62-doc
Delete of nonexisting embedding ID: 6910f4a8-91ea-598d-9b25-57242d313d65-doc
Delete of nonexisting embedding ID: ab40f9fb-5a08-5f54-8d12-0e9d43477249-doc
Delete of nonexisting embedding ID: dea8e4e4-e47c-5c8e-9a6a-7166bc5e54dd-doc
Delete of nonexisting embedding ID: 49ed9ddb-37cb-5687-ab29-5364cc05e755-doc
Delete of nonexisting embedding ID: 76378bf1-3f39-52d7-8589-31832805c77b-doc
Delete of nonexisting embedding ID: 77937f1f-1c06-5c8e-9b2a-dbaecbb40383-doc
Delete of nonexisting embedding ID: 2bae7fcb-36a7-5286-b8c6-71208a314226-doc

23909f8f-6287-589a-9018-c962c6900982-doc
a3dff5a7-f7ae-5291-9b03-a0b09282c724-doc
595aefd0-fcf8-5f8f-8068-5e83d009e593-doc
bbeecc4f-d710-575a-8f54-5c94c81562f7-doc
8386d710-25b6-51b9-91d8-30785a276e62-doc
6910f4a8-91ea-598d-9b25-57242d313d65-doc
ab40f9fb-5a08-5f54-8d12-0e9d43477249-doc
dea8e4e4-e47c-5c8e-9a6a-7166bc5e54dd-doc
49ed9ddb-37cb-5687-ab29-5364cc05e755-doc
76378bf1-3f39-52d7-8589-31832805c77b-doc
77937f1f-1c06-5c8e-9b2a-dbaecbb40383-doc
2bae7fcb-36a7-5286-b8c6-71208a314226-doc
49333a1c-ca97-5b18-a978-eb9f9f22d43c-doc
24e2eb51-a625-5ba2-bd1e-ceb1b1df0182-doc
c1d11163-ce12-54e8-a638-426f870aa7f4-doc
0551b27b-7308-52ed-9abd-4415dd77ec81-doc
fb6f1cbe-b884-51e1-a624-7ef41fea05e1-doc
5257415b-4130-5a26-946a-d31f4069503e-doc
96bde749-8d97-5d6e-88d2-dac226a05810-doc
c02733e3-c35a-53a8-b5ce-996106d1b55d-doc
af9febcb-31be-5b14-8ce8-0fac9252681a-doc
b03e0de0-ae76-5be7-b66c-ba00c99ac122-doc
26430856-5ae1-5ecf-867b-b933387d0461-doc
242db09b-2597-5c29-b559-99eb35e6645d-doc
53f4e256-e5f4-57

In [35]:
vn.get_training_data()

Unnamed: 0,id,question,content,training_data_type


In [36]:
vn.train(documentation="Table CUSTOMER contains information about business customers.")
vn.train(documentation="CUSTOMER.id is the unique identifier for each customer.")
vn.train(documentation="CUSTOMER.name is the company name of the customer.")
vn.train(documentation="CUSTOMER.addr_str contains the street address for the customer.")
vn.train(documentation="CUSTOMER.addr_city contains the city for the customer.")
vn.train(documentation="CUSTOMER.addr_ste contains the state for the customer.")
vn.train(documentation="CUSTOMER.addr_ctry contains the country for the customer.")
vn.train(documentation="CUSTOMER.addr_zip contains the zip-code for the customer.")

# --- Vendor Table Documentation ---
vn.train(documentation="Table VENDOR lists suppliers of parts.")
vn.train(documentation="VENDOR.id is the unique identifier for each vendor.")
vn.train(documentation="VENDOR.name is the name of the vendor/supplier.")
vn.train(documentation="CUSTOMER.addr_str contains the street address for the vendor.")
vn.train(documentation="CUSTOMER.addr_city contains the city for the vendor.")
vn.train(documentation="CUSTOMER.addr_ste contains the state for the vendor.")
vn.train(documentation="CUSTOMER.addr_ctry contains the country for the vendor.")
vn.train(documentation="CUSTOMER.addr_zip contains the zip-code for the vendor.")

# --- Part Master Table Documentation ---
vn.train(documentation="Table PART_MST is the master list of all parts.")
vn.train(documentation="PART_MST.id is the unique identifier which indicates the unique id for the part.")
vn.train(documentation="PART_MST.p_code is the unique product code for the part.")
vn.train(documentation="PART_MST.procurement indicates if a part is manufactured internally ('M') or purchased ('B'). It must be either 'M' or 'B'.")
vn.train(documentation="PART_MST.unit_price represents the standard price per unit of the part.")
vn.train(documentation="PART_MST.pref_order_qty specifies The preferred quantity to order or manufacture at once, depending on if the part is M or B.")
vn.train(documentation="PART_MST.unit_meas the unit of measure for this part; usually 'EA' for 'eaches'.")
vn.train(documentation="PART_MST.mfg_lt is the lead time to manufacture this part, if it is manufactured. This must an 'M' part and positive.")
vn.train(documentation="PART_MST.curr_stock the current amount of this part in stock in inventory")

# --- Add Documentation for BOM_MST, RTG_MST, JOB_OPER, JOB_MATL, RECV_MST, RECV_LINE ---
# Example:
vn.train(documentation="Table bill_of_m defines the Bill of Materials, showing parent-child part relationships.")
vn.train(documentation="bill_of_m.m_part links to the parent part code in PART_MST. The manufactured part which consumes c_part.")
vn.train(documentation="BOM_MST.c_part links to the child component part code in PART_MST. The component part consumed by m_part.")
vn.train(documentation="BOM_MST.qty_req is The quantity of c_part required to make one unit of m_part.")

# --- Job Master Table Documentation ---
vn.train(documentation="Table job_mst represents the master data for all the jobs.")
vn.train(documentation="job_mst.id is the unique identifier for a job.")
vn.train(documentation="job_mst.job_stat signifies the current status of the job as follows: 'Q' (in queue), 'C' (completed), 'O' (opened), 'X' (cancelled), or 'H' (on hold).")
vn.train(documentation="job_mst.part is the part that this job is manufacturing. This can be found in the PART_MST table.")
vn.train(documentation="job_mst.qty is the quantity of the part that this job is manufacturing.")
vn.train(documentation="job_mst.job_rls is the time at which this job began work.")
vn.train(documentation="job_mst.job_cls is the time at which this job was completed.")

# --- Nonconform master table documentation ---
vn.train(documentation="Table NONCONFORM_MST defines the Nonconformance report master data.")
vn.train(documentation="NONCONFORM_MST.id is a unique id for this nonconformance report.")
vn.train(documentation="NONCONFORM_MST.job signifies if this nonconformance report was issued for a job, the job it was issued for.")
vn.train(documentation="NONCONFORM_MST.po_no signifies if this nonconformance report was issued for a purchase order, the purchase order it was issued for.")
vn.train(documentation="NONCONFORM_MST.qty signifies the quantity of parts on this job/purchase order which were flagged for nonconformance issues.")



# --- Sales Order Header Table Documentation ---
vn.train(documentation="Table SALES_MST represents the header information for a customer sales order.")
vn.train(documentation="SALES_MST.order_no is the unique identifier for a sales order.")
vn.train(documentation="SALES_MST.order_date is the date the sales order was placed.")
vn.train(documentation="SALES_MST.cust links to the CUSTOMER table for the customer who placed the order.")

# --- Sales Order Line Table Documentation ---
vn.train(documentation="Table SALES_LINE contains individual line items for each sales order in SALES_MST.")
vn.train(documentation="SALES_LINE.order_no links back to the SALES_MST table.")
vn.train(documentation="SALES_LINE.line_no is the line number of the line item under the given order_no.")
vn.train(documentation="SALES_LINE.order_stat is the current status of the line item: 'C' (completed), 'O' (opened), or 'X' (cancelled).")
vn.train(documentation="SALES_LINE.part links to the PART_MST table for the specific part ordered.")
vn.train(documentation="SALES_LINE.due is the requested delivery date for this line item.")
vn.train(documentation="SALES_LINE.unit_price is the agreed price per unit for the part on this order line.")
vn.train(documentation="SALES_LINE.qty is the number of units ordered for this part on this line item.")
vn.train(documentation="SALES_LINE.line_cls is the date on which this line item was closed.")

# --- Purchase Order Header Table Documentation ---
vn.train(documentation="Table PURCHASE_MST represents the header information for a purchase order sent to a vendor.")
vn.train(documentation="PURCHASE_MST.order_no is the unique identifier for a purchase order.")
vn.train(documentation="PURCHASE_MST.order_date is the date the purchase order was created.")
vn.train(documentation="PURCHASE_MST.vendor links to the VENDOR table for the supplier receiving the order.")

# --- Purchase Order Line Table Documentation ---
vn.train(documentation="Table PURCHASE_LINE contains individual line items for each purchase order in PURCHASE_MST.")
vn.train(documentation="PURCHASE_LINE.order_no links back to the PURCHASE_MST table.")
vn.train(documentation="PURCHASE_LINE.part links to the PART_MST table for the specific part being purchased.")
vn.train(documentation="PURCHASE_LINE.due is the expected delivery date for this purchased part.")
vn.train(documentation="PURCHASE_LINE.unit_cost is the cost per unit for the part on this purchase order line.")
vn.train(documentation="PURCHASE_LINE.qty is the number of units purchased. It must be a positive number.")
vn.train(documentation="PURCHASE_LINE.order_stat is the current status of the purchase order: 'C' (completed), 'O' (opened), or 'X' (cancelled).")
vn.train(documentation="PURCHASE_LINE.order_type is the type of purchase order: either an 'S' for service order or 'M' for material order.")
vn.train(documentation="PURCHASE_LINE.line_no is the line number of this purchase order line under the given order_no.")
vn.train(documentation="PURCHASE_LINE.line_cls is the date on which this purchase order was closed.")


# --- Vendor Part Table Documentation ---
vn.train(documentation="Table VEND_PART represents vendor and part relationship.")
vn.train(documentation="VEND_PART.vendor is the unique identifier for each vendor.")
vn.train(documentation="VEND_PART.part is the part that can be procured from the vendor.")
vn.train(documentation="VEND_PART.unit_cost is typically the creation or start date of the job.")
vn.train(documentation="VEND_PART.part_lt is The standard lead time for which to place orders for part to vendor")


print("Adding SQL examples...")
vn.train(question="Get all customers", 
        sql="SELECT * FROM customer;")

vn.train(question="Get all vendors in a specific state", 
        sql="SELECT *  FROM vendor WHERE 'addr_ste' = 'TX';")

vn.train(question="Join sales orders with customer info", 
        sql="""SELECT s.order_no, s.order_date, c.name, c.addr_city FROM sales_mst s JOIN customer c ON s.cust = c.id;""")

vn.train(question="Parts with their vendor and unit cost", 
        sql="""SELECT P.p_code, V.name AS vendor_name, VP.unit_cost FROM vend_part VP JOIN vendor  V ON VP.vendor = V.id JOIN part_mst P ON VP.part = P.id;""")

vn.train(question="Total number of sales per customer", 
        sql="""SELECT C.name, COUNT(S.order_no) AS total_orders FROM customer C JOIN sales_mst S ON S.cust = C.id GROUP BY C.name order by COUNT(S.order_no) desc; """)

vn.train(question="Average unit price of each part", 
        sql="""SELECT p_code, AVG(unit_price) AS avg_price FROM part_mst GROUP BY p_code order by AVG(unit_price);""")

vn.train(question="Jobs released in last 30 days", 
        sql="""SELECT * FROM job_mst where date(job_rls)>= date_sub(curdate() , interval 30 day);""")

vn.train(question="Purchase orders due next week", 
        sql="""SELECT * FROM purchase_line where week(due) = week(date_add(curdate() , interval 7 day));""")

vn.train(question="All open jobs", 
        sql="""SELECT * FROM JOB_MST  WHERE job_stat = 'Q';""")

vn.train(question="All sales lines with ‘COMPLETE’ status", 
        sql="""SELECT * FROM sales_line WHERE order_stat = 'C';""")

vn.train(question="Get purchase line for specific order and line", 
        sql="""SELECT * FROM purchase_lineWHERE order_no = 'PO-5436934';""")

vn.train(question="Customers with more than 3 orders", 
        sql="""SELECT name FROM CUSTOMER WHERE id IN (SELECT cust FROM SALES_MST GROUP BY cust HAVING COUNT(order_no) > 3);""")

vn.train(question="List Sales Orders with Customer Names", 
        sql="""SELECT s.order_no, s.order_date, c.name AS customer_name, c.addr_city AS customer_city FROM SALES_MST s JOIN CUSTOMER c ON s.cust = c.id;""")

vn.train(question="Show Details of Parts on a Specific Sales Order", 
        sql="""SELECT sl.order_no, sl.line_no,p.p_code AS part_code,p.unit_meas,sl.qty AS quantity_ordered,sl.unit_price AS price_on_order, p.unit_price AS current_part_price, sl.order_stat AS line_status FROM SALES_LINE sl JOIN PART_MST p ON sl.part = p.id where order_no = "SO-1490003"; """)

vn.train(question="Calculate Total Quantity Sold for Each Part", 
        sql="""SELECT p.p_code, p.id AS part_id, SUM(sl.qty) AS total_quantity_sold FROM SALES_LINE sl JOIN PART_MST p ON sl.part = p.id WHERE sl.order_stat = 'C'  GROUP BY p.p_code, p.id ORDER BY total_quantity_sold DESC;""")

vn.train(question="Calculate Total Value of Purchases per Vendor", 
        sql="""SELECT v.name AS vendor_name, COUNT(DISTINCT pl.order_no) AS number_of_pos, SUM(pl.qty * pl.unit_cost) AS total_purchase_value FROM PURCHASE_LINE pl JOIN PURCHASE_MST pm ON pl.order_no = pm.order_no JOIN VENDOR v ON pm.vendor = v.id GROUP BY v.name ORDER BY total_purchase_value DESC;""")

vn.train(question="Find Customers and Vendors Located in the Same State", 
        sql="""SELECT c.name AS customer_name, v.name AS vendor_name, c.addr_ste AS state FROM CUSTOMER c JOIN VENDOR v ON c.addr_ste = v.addr_ste ORDER BY c.addr_ste, c.name, v.name;""")

vn.train(question="Compare Current Stock vs. Total Purchased vs. Total Sold (Simplified)", 
        sql="""SELECT p.id, p.p_code, p.curr_stock AS current_stock_on_hand, COALESCE(pp.total_purchased, 0) AS total_units_purchased, COALESCE(ps.total_sold, 0) AS total_units_sold, (p.curr_stock + COALESCE(pp.total_purchased, 0) - COALESCE(ps.total_sold, 0)) AS calculated_stock_balance  FROM PART_MST p LEFT JOIN PartPurchases pp ON p.id = pp.part LEFT JOIN PartSales ps ON p.id = ps.part ORDER BY p.p_code; """)

vn.train(question="List Parts Supplied by a Specific Vendor", 
        sql="""SELECT v.id, v.name AS vendor_name, p.p_code AS part_code, vp.unit_cost AS vendor_unit_cost, vp.part_lt AS vendor_lead_time FROM VEND_PART vp left JOIN VENDOR v ON vp.vendor = v.id left JOIN PART_MST p ON vp.part = p.id where v.id = 7 ORDER BY p.p_code;""")

vn.train(question="Top 5 Customers by Sales Value in the Last 6 Months", 
        sql="""SELECT c.id AS customer_id, c.name AS customer_name, SUM(sl.qty * sl.unit_price) AS total_spent FROM CUSTOMER c JOIN SALES_MST s ON c.id = s.cust JOIN SALES_LINE sl ON s.order_no = sl.order_no WHERE s.order_date >= DATE_SUB(CURDATE(), INTERVAL 6 MONTH) AND sl.order_stat <> 'X' GROUP BY c.id, c.name ORDER BY total_spent DESC LIMIT 5;""")

vn.train(question="Parts Below Preferred Order Quantity", 
        sql="""SELECT id,p_code,curr_stock, pref_order_qty FROM PART_MST WHERE curr_stock < pref_order_qty AND procurement <> 'S' ORDER BY (pref_order_qty - curr_stock) DESC;""")

vn.train(question="Monthly Sales Revenue Trend", 
        sql="""SELECT DATE_FORMAT(s.order_date, '%Y-%m') AS sales_month, round(SUM(sl.qty * sl.unit_price),2) AS monthly_revenue FROM SALES_MST s JOIN SALES_LINE sl ON s.order_no = sl.order_no WHERE sl.order_stat <> 'X' GROUP BY sales_month ORDER BY sales_month;""")

vn.train(question="Direct Components for a Manufactured Part (Bill of Materials)", 
        sql="""SELECT mp.p_code AS manufactured_part_code, cp.p_code AS component_part_code, bom.qty_req AS quantity_required, cp.unit_meas AS component_unit FROM BILL_OF_M bom JOIN PART_MST mp ON bom.m_part = mp.id  JOIN PART_MST cp ON bom.c_part = cp.id  WHERE bom.m_part = 5638;""")

vn.train(question="Total Component Quantity Required for Open Production Jobs", 
            sql="""SELECT bom.c_part AS component_part_id, p.p_code AS component_part_code, SUM(j.qty * bom.qty_req) AS total_required_for_open_jobs FROM JOB_MST j JOIN BILL_OF_M bom ON j.part = bom.m_part JOIN PART_MST p ON bom.c_part = p.id   WHERE j.job_stat IN ('O', 'Q') GROUP BY bom.c_part, p.p_code ORDER BY total_required_for_open_jobs DESC;""")

vn.train(question="Nonconformance Rate per Part (Simplified: NC Qty / Qty Purchased)", 
            sql="""WITH PartPurchasesReceived AS (SELECT pl.part, SUM(pl.qty) AS total_received FROM PURCHASE_LINE pl WHERE pl.order_stat = 'C' GROUP BY pl.part),PartNonconformance AS (SELECT pl.part, SUM(nc.qty) AS total_nonconforming FROM NONCONFORM_MST nc JOIN PURCHASE_MST pm ON nc.po_no = pm.order_no  JOIN PURCHASE_LINE pl ON pm.order_no = pl.order_no AND nc.job IS NULL  GROUP BY pl.part)SELECT p.id AS part_id, p.p_code, COALESCE(ppr.total_received, 0) AS total_received, COALESCE(pnc.total_nonconforming, 0) AS total_nonconforming, CASE WHEN COALESCE(ppr.total_received, 0) > 0 THEN (COALESCE(pnc.total_nonconforming, 0) / ppr.total_received) * 100 ELSE 0 END AS nonconformance_rate_percent FROM PART_MST p LEFT JOIN PartPurchasesReceived ppr ON p.id = ppr.part LEFT JOIN PartNonconformance pnc ON p.id = pnc.part WHERE p.procurement = 'B'  ORDER BY nonconformance_rate_percent DESC;""")

vn.train(question="Customers Who Haven't Ordered in Over a Year", 
            sql="""SELECT c.id, c.name, MAX(s.order_date) AS last_order_date  FROM CUSTOMER c LEFT JOIN SALES_MST s ON c.id = s.cust AND s.order_date >= DATE_SUB(CURDATE(), INTERVAL 1 YEAR) WHERE s.order_no IS NULL  GROUP BY c.id, c.name  ORDER BY c.name;""")

vn.train(question="Parts Purchased from Multiple Vendors with Cost Comparison", 
            sql="""SELECT p.id AS part_id, p.p_code, COUNT(DISTINCT vp.vendor) AS number_of_vendors, MIN(vp.unit_cost) AS min_vendor_cost, MAX(vp.unit_cost) AS max_vendor_cost, (MAX(vp.unit_cost) - MIN(vp.unit_cost)) AS cost_difference FROM PART_MST p JOIN VEND_PART vp ON p.id = vp.part GROUP BY p.id, p.p_code HAVING COUNT(DISTINCT vp.vendor) > 1  ORDER BY cost_difference DESC, p.p_code;""")




Adding documentation....
Adding documentation....
Adding documentation....
Adding documentation....
Adding documentation....
Adding documentation....
Adding documentation....
Adding documentation....
Adding documentation....
Adding documentation....
Adding documentation....
Adding documentation....
Adding documentation....
Adding documentation....
Adding documentation....
Adding documentation....
Adding documentation....
Adding documentation....
Adding documentation....
Adding documentation....
Adding documentation....
Adding documentation....
Adding documentation....
Adding documentation....
Adding documentation....
Adding documentation....
Adding documentation....
Adding documentation....
Adding documentation....
Adding documentation....
Adding documentation....
Adding documentation....
Adding documentation....
Adding documentation....
Adding documentation....
Adding documentation....
Adding documentation....
Adding documentation....
Adding documentation....
Adding documentation....


'eb16448e-7427-557c-bb5e-9f676f63dcc3-sql'

In [37]:
training_data = vn.get_training_data()
training_data

Unnamed: 0,id,question,content,training_data_type
0,a54ce698-1275-5bb2-82e0-c04e1cdc0cd3-sql,Get all customers,SELECT * FROM customer;,sql
1,4293528b-c9df-5b02-9c5e-6ef4c6217979-sql,Get all vendors in a specific state,SELECT * FROM vendor WHERE 'addr_ste' = 'TX';,sql
2,d4c9bf2f-3442-51bc-9334-9f2131659f51-sql,Join sales orders with customer info,"SELECT s.order_no, s.order_date, c.name, c.add...",sql
3,41e60ad4-c6fe-5d80-b53d-e176f814a6cf-sql,Parts with their vendor and unit cost,"SELECT P.p_code, V.name AS vendor_name, VP.uni...",sql
4,1b18671a-c5fe-5288-ab9c-2f843422a01d-sql,Total number of sales per customer,"SELECT C.name, COUNT(S.order_no) AS total_orde...",sql
...,...,...,...,...
68,d83ce6c1-5e5e-5b58-bde5-219d2811b351-doc,,Table VEND_PART represents vendor and part rel...,documentation
69,9d2cfeb6-376b-52b4-911e-ff90b1d01409-doc,,VEND_PART.vendor is the unique identifier for ...,documentation
70,d47f3274-ac0c-5782-b821-3f086922139f-doc,,VEND_PART.part is the part that can be procure...,documentation
71,49f130b8-13f2-5c3c-8e60-20acda411259-doc,,VEND_PART.unit_cost is typically the creation ...,documentation


In [40]:
training_data[(training_data.training_data_type=='sql')].content

0                               SELECT * FROM customer;
1        SELECT *  FROM vendor WHERE 'addr_ste' = 'TX';
2     SELECT s.order_no, s.order_date, c.name, c.add...
3     SELECT P.p_code, V.name AS vendor_name, VP.uni...
4     SELECT C.name, COUNT(S.order_no) AS total_orde...
5     SELECT p_code, AVG(unit_price) AS avg_price FR...
6     SELECT * FROM job_mst where date(job_rls)>= da...
7     SELECT * FROM purchase_line where week(due) = ...
8          SELECT * FROM JOB_MST  WHERE job_stat = 'Q';
9      SELECT * FROM sales_line WHERE order_stat = 'C';
10    SELECT * FROM purchase_lineWHERE order_no = 'P...
11    SELECT name FROM CUSTOMER WHERE id IN (SELECT ...
12    SELECT s.order_no, s.order_date, c.name AS cus...
13    SELECT sl.order_no, sl.line_no,p.p_code AS par...
14    SELECT p.p_code, p.id AS part_id, SUM(sl.qty) ...
15    SELECT v.name AS vendor_name, COUNT(DISTINCT p...
16    SELECT c.name AS customer_name, v.name AS vend...
17    SELECT p.id, p.p_code, p.curr_stock AS cur

In [43]:
from datetime import datetime
datetime.now().strftime("%Y-%m-%d")

'2025-04-26'

In [None]:
while True:
    try:
        question = (input("\nUser: "))
        
        if question.lower() == 'exit':
            break
        
        else:
            question = (f"""Today is {current_date}. You are a manufacturing analyst. "
                    Generate ONLY the SQL query (no comments), "
                    "for the following question:""" + f"{question}")
                    
        while True:    
            print("\nAgent thinking...")
            start_time = time.time()
            
            # Get classified response
            llm_response = vn.generate_contextual_response(question)
            category = vn.classify_question(question)
            
            # Extract SQL using implemented function
            extracted_sql = extract_sql_from_response(llm_response)
            try:
                extracted_sql = extracted_sql.split("```sql")[1].replace("```","")
            except:
                continue    
            if not extracted_sql:
                print("\nCould not extract executable SQL from response")
                continue
                
            print(f"\n--- Extracted SQL ({category.upper()}) ---")
            print(extracted_sql)
            print("--- End of Extracted SQL ---")
            
            # Unified execution using Vanna's method
            execute = input(f"\nExecute {category.upper()} SQL query? (y/n): ").lower().strip()
            
            if execute == 'y':
                try:
                    print("Executing query...")
                    exec_start_time = time.time()
                    
                    df_result = vn.run_sql(sql=extracted_sql)
                    # Category-specific post-processing
                    if category == "judgement":
                        print("\n** Analytical Insights **")
                        # create_plot(df_result)
                        print(df_result.describe())
                    elif category == "suggestion":
                        print("\n** Recommended Actions **")
                        # create_plot(df_result)
                        print(df_result.head())
                    else:  # descriptive
                        print("\n** Query Results **")
                        create_plot(df_result)
                    
                    # Display results
                    with pd.option_context('display.max_rows', 20, 
                                        'display.max_columns', None,
                                        'display.width', 1000):
                        if len(df_result) > 20:
                            print(df_result.head(20))
                            print(f"... (truncated, {len(df_result)} rows total)")
                        else:
                            print(df_result)
                    
                    print(f"Query executed successfully in {time.time() - exec_start_time:.2f} seconds")
                    re_run = input(f"\Should I re-think {question.upper()} for a better response? (y/n): ").lower().strip()
                    if re_run == 'n':
                        save = input(f"\nDo You want to save this {category.upper()} SQL query for future use? (y/n): ").lower().strip()
                        if save == "y":
                            print("Saving results...")
                            vn.train(question=question, sql=extracted_sql)
                        else:
                            print("SQL query not saved.")    
                    else:
                        print("Retrying...")
                        continue        

                except Exception as exec_e:
                    print(f"\nError executing SQL: {type(exec_e).__name__} - {exec_e}")
                    print(f"Failed SQL:\n{extracted_sql}")
                    re_run = input(f"\Should I re-think {question.upper()} for a better response? (y/n): ").lower().strip()
                    
                    if re_run == 'n':
                        break
                    else:
                        print("Retryiing...")
                        continue

    except KeyboardInterrupt:
        print("\nExiting...")
        break



In [21]:
import time
import os
import sys
import warnings
import json
import re
from typing import Dict, List
from collections import defaultdict
from datetime import datetime

# --- Environment Setup ---
os.environ["TOKENIZERS_PARALLELISM"] = "false"

# --- Package Imports ---
try:
    import vanna
    from vanna.base import VannaBase
    from vanna.chromadb.chromadb_vector import ChromaDB_VectorStore
    import openai
    import mysql.connector
    import pandas as pd
except ImportError as e:
    print(f"Import Error: {e}")
    sys.exit(1)

# --- Configuration ---
db_type = "mysql"
mysql_host = "127.0.0.1"
mysql_port = 3306
mysql_user = "root"
mysql_password = "Raekwon_wtc$36" 
mysql_dbname = "ft_database"
llm_model_name = 'DeepSeek-R1-Distill-Llama-8B-Q4_K_M.gguf'
local_server_url = 'http://127.0.0.1:8000/v1'
placeholder_api_key = "sk-no-key-required"

# --- Question Classification Constants ---

FEW_SHOT_PROMPT = """
You are a manufacturing domain expert. Classify each question into one of three categories based on these criteria:

DESCRIPTIVE: 
- Questions that ask for direct data or facts
- Usually start with: what, how many, list, show, where, when
- Focus on current or historical state
- Example keywords: count, total, list, find, show, what is, where are

JUDGEMENT:
- Questions that require analysis and evaluation
- Usually start with: why, can, will, should, is it
- Need reasoning about causes or effects
- Example keywords: why, evaluate, assess, analyze, is it possible, what caused

SUGGESTION:
- Questions that ask for recommendations or improvements
- Usually start with: how can, what should, recommend
- Focus on future actions or changes
- Example keywords: improve, optimize, suggest, recommend, how can, what should

Examples:
Q1: "What is the current inventory level?" 
A1: DESCRIPTIVE (asks for current state)

Q2: "Why did we miss our delivery target?"
A2: JUDGEMENT (requires analysis of causes)

Q3: "How can we reduce late deliveries?"
A3: SUGGESTION (asks for improvement recommendations)

Q4: "List all overdue orders"
A4: DESCRIPTIVE (asks for direct data)

Q5: "Should we increase safety stock?"
A5: JUDGEMENT (requires evaluation)

Q6: "What changes should we make to improve efficiency?"
A6: SUGGESTION (asks for improvement recommendations)

Now classify this question:
Q: {question}
A: """

QUESTION_TYPES = ["descriptive", "judgement", "suggestion"]  

CONTEXT_PROMPTS = {
    "descriptive": "Return ONLY SQL with some explanation with plots.",
    "judgement": "When responding, return the required SQL query, provide a detailed analysis of the data including relevant plots, and offer your Judgement on the findings. Clearly state that it is your Judgement and advise the user to independently verify it.",
    "suggestion": "When responding, return the required SQL query, provide a detailed analysis of the data including relevant plots, and offer your suggestion on the findings. Clearly state that it is your Suggestion and advise the user to independently verify it."
}

CLASSIFICATION_METRICS = defaultdict(int)

# --- Keyword-based Classification Helper ---
def keyword_based_classification(question: str) -> str:
    question = question.lower()
    
    descriptive_keywords = ['what is', 'how many', 'list', 'show', 'find', 'where', 'when', 'which', 'count']
    judgement_keywords = ['why', 'could', 'would', 'should', 'evaluate', 'analyze', 'assess', 'is it possible']
    suggestion_keywords = ['how can', 'what should', 'improve', 'recommend', 'suggest', 'advise', 'propose']
    
    # Check for suggestion first (highest priority)
    for keyword in suggestion_keywords:
        if keyword in question:
            return "suggestion"
            
    # Check for judgement second
    for keyword in judgement_keywords:
        if keyword in question:
            return "judgement"
            
    # Default to descriptive
    for keyword in descriptive_keywords:
        if keyword in question:
            return "descriptive"
            
    return None

# --- Pattern-based Classification Function ---
def pattern_based_classification(question: str) -> str:
    """Use regex patterns to classify questions."""
    question = question.lower()
    
    # Descriptive patterns
    descriptive_patterns = [
        r"^what is", r"^how many", r"^list", r"^show", r"^tell me", 
        r"^find", r"^count", r"^where", r"^when", r"^who"
    ]
    
    # Judgment patterns
    judgment_patterns = [
        r"^can", r"^will", r"^is it possible", r"^why (was|did|is)", 
        r"^what caused", r"^is this", r"evaluate", r"assess",r"analyse"
    ]
    
    # Advice patterns
    advice_patterns = [
        r"^how (do|can|should) (i|we)", r"^what should", r"improve", 
        r"optimize", r"recommend", r"suggest", r"focus on"
    ]
    
    # Check patterns
    for pattern in descriptive_patterns:
        if re.search(pattern, question):
            return "descriptive"
    
    for pattern in judgment_patterns:
        if re.search(pattern, question):
            return "judgment"
    
    for pattern in advice_patterns:
        if re.search(pattern, question):
            return "advice"
    
    # Default to advice as the most complex type
    return "advice"


# --- Custom LLM Class with MCP Features ---
class MyLocalLlm(VannaBase):
    def __init__(self, config=None):
        super().__init__(config=config)
        self.model = llm_model_name

    # Implement required abstract methods
    def assistant_message(self, content: str) -> Dict:
        return {"role": "assistant", "content": content}

    def system_message(self, content: str) -> Dict:
        return {"role": "system", "content": content}

    def user_message(self, content: str) -> Dict:
        return {"role": "user", "content": content}
            
    def classify_question(self, question: str) -> str:
        # First try keyword-based classification
        keyword_result = keyword_based_classification(question)
        if keyword_result:
            return keyword_result
        
        # If keyword classification fails, use LLM
        classification_prompt = FEW_SHOT_PROMPT.format(question=question)
        response = self.submit_prompt(classification_prompt)
        
        # Clean up and validate the response
        response = response.strip().lower()
        if response in QUESTION_TYPES:
            return response
        
        # Default to descriptive if everything fails
        return "descriptive"

    def submit_prompt(self, prompt, **kwargs) -> str:
        try:
            local_client = openai.OpenAI(
                base_url=local_server_url,
                api_key=placeholder_api_key,
                timeout=kwargs.get('request_timeout', 180.0)
            )
            
            messages = prompt if isinstance(prompt, list) else [{"role": "user", "content": str(prompt)}]
            
            response = local_client.chat.completions.create(
                model=self.model,
                messages=messages,
                temperature=kwargs.get('temperature', 0.1),
                max_tokens=kwargs.get('max_tokens', 1536),
                stop=kwargs.get('stop', ["``````"])
            )
            
            if response.choices and response.choices[0].message.content:
                return response.choices[0].message.content.strip()
            return "Error: No response generated"

        except Exception as e:
            return f"Error: {str(e)}"

# --- Enhanced Vanna Class ---
class MyVanna(ChromaDB_VectorStore, MyLocalLlm):
    def __init__(self, config=None):
        ChromaDB_VectorStore.__init__(self, config=config)
        MyLocalLlm.__init__(self, config=config)
        
    def generate_contextual_response(self, question: str) -> str:
        category = self.classify_question(question)
        CLASSIFICATION_METRICS[category] += 1
        
        system_msg = f"""You are a manufacturing analyst. Follow these rules:
        - DESCRIPTIVE: {CONTEXT_PROMPTS['descriptive']}
        - JUDGEMENT: {CONTEXT_PROMPTS['judgement']}
        - SUGGESTION: {CONTEXT_PROMPTS['suggestion']}"""
        
        return self.generate_sql(
            question=question,
            chat_history=[self.system_message(system_msg)]
        )

# --- Core Execution Flow --- 
vn = MyVanna(config=None)

# Database connection setup remains unchanged
# --- Connect to Database ---
print(f"Connecting to {db_type} database '{mysql_dbname}' on {mysql_host}...")
try:
    if db_type == "mysql":
        vn.connect_to_mysql(
            host=mysql_host,
            port=mysql_port,
            user=mysql_user,
            password=mysql_password,
            dbname=mysql_dbname
        )
    else:
        print(f"Database type '{db_type}' not configured.")
        sys.exit(1)
    print("Database connection successful.")
except Exception as e:
    print(f"Error connecting to database: {e}")
    print("Please check MySQL server status, credentials, permissions, and required packages.")
    sys.exit(1)

# --- Training Vanna (Run deliberately when needed) ---
print("\n--- Vanna Training ---")
print("Loads metadata (DDL, docs, SQL examples) into Vanna's local ChromaDB vector store.")
train_vanna = True # Set to True to run training, False to skip for normal use
if train_vanna:
    df_information_schema = vn.run_sql("SELECT * FROM INFORMATION_SCHEMA.COLUMNS")
    Plan = vn.get_training_plan_generic(df_information_schema)
    vn.train(plan=Plan)
    print("Running Vanna training...")

    # === Add your specific training calls here ===
    print("  Example: Fetching DDL (Customize for your setup)...")
    # df_ddl = vn.run_sql(f"SHOW CREATE TABLE your_table_name;")
    # if not df_ddl.empty: vn.train(ddl=df_ddl.iloc['Create Table'])

    print("  Example: Adding Documentation (Add yours)...")
    # --- Customer Table Documentation ---
    vn.train(documentation="Table CUSTOMER contains information about business customers.")
    vn.train(documentation="CUSTOMER.id is the unique identifier for each customer.")
    vn.train(documentation="CUSTOMER.name is the company name of the customer.")
    vn.train(documentation="CUSTOMER.addr_str contains the street address for the customer.")
    vn.train(documentation="CUSTOMER.addr_city contains the city for the customer.")
    vn.train(documentation="CUSTOMER.addr_ste contains the state for the customer.")
    vn.train(documentation="CUSTOMER.addr_ctry contains the country for the customer.")
    vn.train(documentation="CUSTOMER.addr_zip contains the zip-code for the customer.")

    # --- Vendor Table Documentation ---
    vn.train(documentation="Table VENDOR lists suppliers of parts.")
    vn.train(documentation="VENDOR.id is the unique identifier for each vendor.")
    vn.train(documentation="VENDOR.name is the name of the vendor/supplier.")
    vn.train(documentation="CUSTOMER.addr_str contains the street address for the vendor.")
    vn.train(documentation="CUSTOMER.addr_city contains the city for the vendor.")
    vn.train(documentation="CUSTOMER.addr_ste contains the state for the vendor.")
    vn.train(documentation="CUSTOMER.addr_ctry contains the country for the vendor.")
    vn.train(documentation="CUSTOMER.addr_zip contains the zip-code for the vendor.")

    # --- Part Master Table Documentation ---
    vn.train(documentation="Table PART_MST is the master list of all parts.")
    vn.train(documentation="PART_MST.id is the unique identifier which indicates the unique id for the part.")
    vn.train(documentation="PART_MST.p_code is the unique product code for the part.")
    vn.train(documentation="PART_MST.procurement indicates if a part is manufactured internally ('M') or purchased ('B'). It must be either 'M' or 'B'.")
    vn.train(documentation="PART_MST.unit_price represents the standard price per unit of the part.")
    vn.train(documentation="PART_MST.pref_order_qty specifies The preferred quantity to order or manufacture at once, depending on if the part is M or B.")
    vn.train(documentation="PART_MST.unit_meas the unit of measure for this part; usually 'EA' for 'eaches'.")
    vn.train(documentation="PART_MST.mfg_lt is the lead time to manufacture this part, if it is manufactured. This must an 'M' part and positive.")
    vn.train(documentation="PART_MST.curr_stock the current amount of this part in stock in inventory")

    # --- Add Documentation for BOM_MST, RTG_MST, JOB_OPER, JOB_MATL, RECV_MST, RECV_LINE ---
    # Example:
    vn.train(documentation="Table bill_of_m defines the Bill of Materials, showing parent-child part relationships.")
    vn.train(documentation="bill_of_m.m_part links to the parent part code in PART_MST. The manufactured part which consumes c_part.")
    vn.train(documentation="BOM_MST.c_part links to the child component part code in PART_MST. The component part consumed by m_part.")
    vn.train(documentation="BOM_MST.qty_req is The quantity of c_part required to make one unit of m_part.")

    # --- Job Master Table Documentation ---
    vn.train(documentation="Table job_mst represents the master data for all the jobs.")
    vn.train(documentation="job_mst.id is the unique identifier for a job.")
    vn.train(documentation="job_mst.job_stat signifies the current status of the job as follows: 'Q' (in queue), 'C' (completed), 'O' (opened), 'X' (cancelled), or 'H' (on hold).")
    vn.train(documentation="job_mst.part is the part that this job is manufacturing. This can be found in the PART_MST table.")
    vn.train(documentation="job_mst.qty is the quantity of the part that this job is manufacturing.")
    vn.train(documentation="job_mst.job_rls is the time at which this job began work.")
    vn.train(documentation="job_mst.job_cls is the time at which this job was completed.")

    # --- Nonconform master table documentation ---
    vn.train(documentation="Table NONCONFORM_MST defines the Nonconformance report master data.")
    vn.train(documentation="NONCONFORM_MST.id is a unique id for this nonconformance report.")
    vn.train(documentation="NONCONFORM_MST.job signifies if this nonconformance report was issued for a job, the job it was issued for.")
    vn.train(documentation="NONCONFORM_MST.po_no signifies if this nonconformance report was issued for a purchase order, the purchase order it was issued for.")
    vn.train(documentation="NONCONFORM_MST.qty signifies the quantity of parts on this job/purchase order which were flagged for nonconformance issues.")



    # --- Sales Order Header Table Documentation ---
    vn.train(documentation="Table SALES_MST represents the header information for a customer sales order.")
    vn.train(documentation="SALES_MST.order_no is the unique identifier for a sales order.")
    vn.train(documentation="SALES_MST.order_date is the date the sales order was placed.")
    vn.train(documentation="SALES_MST.cust links to the CUSTOMER table for the customer who placed the order.")

    # --- Sales Order Line Table Documentation ---
    vn.train(documentation="Table SALES_LINE contains individual line items for each sales order in SALES_MST.")
    vn.train(documentation="SALES_LINE.order_no links back to the SALES_MST table.")
    vn.train(documentation="SALES_LINE.line_no is the line number of the line item under the given order_no.")
    vn.train(documentation="SALES_LINE.order_stat is the current status of the line item: 'C' (completed), 'O' (opened), or 'X' (cancelled).")
    vn.train(documentation="SALES_LINE.part links to the PART_MST table for the specific part ordered.")
    vn.train(documentation="SALES_LINE.due is the requested delivery date for this line item.")
    vn.train(documentation="SALES_LINE.unit_price is the agreed price per unit for the part on this order line.")
    vn.train(documentation="SALES_LINE.qty is the number of units ordered for this part on this line item.")
    vn.train(documentation="SALES_LINE.line_cls is the date on which this line item was closed.")

    # --- Purchase Order Header Table Documentation ---
    vn.train(documentation="Table PURCHASE_MST represents the header information for a purchase order sent to a vendor.")
    vn.train(documentation="PURCHASE_MST.order_no is the unique identifier for a purchase order.")
    vn.train(documentation="PURCHASE_MST.order_date is the date the purchase order was created.")
    vn.train(documentation="PURCHASE_MST.vendor links to the VENDOR table for the supplier receiving the order.")

    # --- Purchase Order Line Table Documentation ---
    vn.train(documentation="Table PURCHASE_LINE contains individual line items for each purchase order in PURCHASE_MST.")
    vn.train(documentation="PURCHASE_LINE.order_no links back to the PURCHASE_MST table.")
    vn.train(documentation="PURCHASE_LINE.part links to the PART_MST table for the specific part being purchased.")
    vn.train(documentation="PURCHASE_LINE.due is the expected delivery date for this purchased part.")
    vn.train(documentation="PURCHASE_LINE.unit_cost is the cost per unit for the part on this purchase order line.")
    vn.train(documentation="PURCHASE_LINE.qty is the number of units purchased. It must be a positive number.")
    vn.train(documentation="PURCHASE_LINE.order_stat is the current status of the purchase order: 'C' (completed), 'O' (opened), or 'X' (cancelled).")
    vn.train(documentation="PURCHASE_LINE.order_type is the type of purchase order: either an 'S' for service order or 'M' for material order.")
    vn.train(documentation="PURCHASE_LINE.line_no is the line number of this purchase order line under the given order_no.")
    vn.train(documentation="PURCHASE_LINE.line_cls is the date on which this purchase order was closed.")


    # --- Vendor Part Table Documentation ---
    vn.train(documentation="Table VEND_PART represents vendor and part relationship.")
    vn.train(documentation="VEND_PART.vendor is the unique identifier for each vendor.")
    vn.train(documentation="VEND_PART.part is the part that can be procured from the vendor.")
    vn.train(documentation="VEND_PART.unit_cost is typically the creation or start date of the job.")
    vn.train(documentation="VEND_PART.part_lt is The standard lead time for which to place orders for part to vendor")


    print("Adding SQL examples...")
    vn.train(question="Get all customers", 
                sql="SELECT * FROM customer;")
    
    vn.train(question="Get all vendors in a specific state", 
                sql="SELECT *  FROM vendor WHERE 'addr_ste' = 'TX';")
    
    vn.train(question="Join sales orders with customer info", 
                sql="""SELECT s.order_no, s.order_date, c.name, c.addr_city FROM sales_mst s JOIN customer c ON s.cust = c.id;""")
    
    vn.train(question="Parts with their vendor and unit cost", 
                sql="""SELECT P.p_code, V.name AS vendor_name, VP.unit_cost FROM vend_part VP JOIN vendor  V ON VP.vendor = V.id JOIN part_mst P ON VP.part = P.id;""")
    
    vn.train(question="Total number of sales per customer", 
                sql="""SELECT C.name, COUNT(S.order_no) AS total_orders FROM customer C JOIN sales_mst S ON S.cust = C.id GROUP BY C.name order by COUNT(S.order_no) desc; """)
    
    vn.train(question="Average unit price of each part", 
                sql="""SELECT p_code, AVG(unit_price) AS avg_price FROM part_mst GROUP BY p_code order by AVG(unit_price);""")
    
    vn.train(question="Jobs released in last 30 days", 
                sql="""SELECT * FROM job_mst where date(job_rls)>= date_sub(curdate() , interval 30 day);""")
    
    vn.train(question="Purchase orders due next week", 
                sql="""SELECT * FROM purchase_line where week(due) = week(date_add(curdate() , interval 7 day));""")
    
    vn.train(question="All open jobs", 
                sql="""SELECT * FROM JOB_MST  WHERE job_stat = 'Q';""")
    
    vn.train(question="All sales lines with ‘COMPLETE’ status", 
                sql="""SELECT * FROM sales_line WHERE order_stat = 'C';""")
    
    vn.train(question="Get purchase line for specific order and line", 
                sql="""SELECT * FROM purchase_lineWHERE order_no = 'PO-5436934';""")
    
    vn.train(question="Customers with more than 3 orders", 
                sql="""SELECT name FROM CUSTOMER WHERE id IN (SELECT cust FROM SALES_MST GROUP BY cust HAVING COUNT(order_no) > 3);""")
    
    vn.train(question="List Sales Orders with Customer Names", 
                sql="""SELECT s.order_no, s.order_date, c.name AS customer_name, c.addr_city AS customer_city FROM SALES_MST s JOIN CUSTOMER c ON s.cust = c.id;""")
    
    vn.train(question="Show Details of Parts on a Specific Sales Order", 
                sql="""SELECT sl.order_no, sl.line_no,p.p_code AS part_code,p.unit_meas,sl.qty AS quantity_ordered,sl.unit_price AS price_on_order, p.unit_price AS current_part_price, sl.order_stat AS line_status FROM SALES_LINE sl JOIN PART_MST p ON sl.part = p.id where order_no = "SO-1490003"; """)
    
    vn.train(question="Calculate Total Quantity Sold for Each Part", 
                sql="""SELECT p.p_code, p.id AS part_id, SUM(sl.qty) AS total_quantity_sold FROM SALES_LINE sl JOIN PART_MST p ON sl.part = p.id WHERE sl.order_stat = 'C'  GROUP BY p.p_code, p.id ORDER BY total_quantity_sold DESC;""")
    
    vn.train(question="Calculate Total Value of Purchases per Vendor", 
                sql="""SELECT v.name AS vendor_name, COUNT(DISTINCT pl.order_no) AS number_of_pos, SUM(pl.qty * pl.unit_cost) AS total_purchase_value FROM PURCHASE_LINE pl JOIN PURCHASE_MST pm ON pl.order_no = pm.order_no JOIN VENDOR v ON pm.vendor = v.id GROUP BY v.name ORDER BY total_purchase_value DESC;""")
    
    vn.train(question="Find Customers and Vendors Located in the Same State", 
                sql="""SELECT c.name AS customer_name, v.name AS vendor_name, c.addr_ste AS state FROM CUSTOMER c JOIN VENDOR v ON c.addr_ste = v.addr_ste ORDER BY c.addr_ste, c.name, v.name;""")
    
    vn.train(question="Compare Current Stock vs. Total Purchased vs. Total Sold (Simplified)", 
                sql="""SELECT p.id, p.p_code, p.curr_stock AS current_stock_on_hand, COALESCE(pp.total_purchased, 0) AS total_units_purchased, COALESCE(ps.total_sold, 0) AS total_units_sold, (p.curr_stock + COALESCE(pp.total_purchased, 0) - COALESCE(ps.total_sold, 0)) AS calculated_stock_balance  FROM PART_MST p LEFT JOIN PartPurchases pp ON p.id = pp.part LEFT JOIN PartSales ps ON p.id = ps.part ORDER BY p.p_code; """)
    
    vn.train(question="List Parts Supplied by a Specific Vendor", 
                sql="""SELECT v.id, v.name AS vendor_name, p.p_code AS part_code, vp.unit_cost AS vendor_unit_cost, vp.part_lt AS vendor_lead_time FROM VEND_PART vp left JOIN VENDOR v ON vp.vendor = v.id left JOIN PART_MST p ON vp.part = p.id where v.id = 7 ORDER BY p.p_code;""")
    
    vn.train(question="Top 5 Customers by Sales Value in the Last 6 Months", 
                sql="""SELECT c.id AS customer_id, c.name AS customer_name, SUM(sl.qty * sl.unit_price) AS total_spent FROM CUSTOMER c JOIN SALES_MST s ON c.id = s.cust JOIN SALES_LINE sl ON s.order_no = sl.order_no WHERE s.order_date >= DATE_SUB(CURDATE(), INTERVAL 6 MONTH) AND sl.order_stat <> 'X' GROUP BY c.id, c.name ORDER BY total_spent DESC LIMIT 5;""")
    
    vn.train(question="Parts Below Preferred Order Quantity", 
                sql="""SELECT id,p_code,curr_stock, pref_order_qty FROM PART_MST WHERE curr_stock < pref_order_qty AND procurement <> 'S' ORDER BY (pref_order_qty - curr_stock) DESC;""")
    
    vn.train(question="Monthly Sales Revenue Trend", 
                sql="""SELECT DATE_FORMAT(s.order_date, '%Y-%m') AS sales_month, round(SUM(sl.qty * sl.unit_price),2) AS monthly_revenue FROM SALES_MST s JOIN SALES_LINE sl ON s.order_no = sl.order_no WHERE sl.order_stat <> 'X' GROUP BY sales_month ORDER BY sales_month;""")

    vn.train(question="Direct Components for a Manufactured Part (Bill of Materials)", 
                sql="""SELECT mp.p_code AS manufactured_part_code, cp.p_code AS component_part_code, bom.qty_req AS quantity_required, cp.unit_meas AS component_unit FROM BILL_OF_M bom JOIN PART_MST mp ON bom.m_part = mp.id  JOIN PART_MST cp ON bom.c_part = cp.id  WHERE bom.m_part = 5638;""")
    
    vn.train(question="Total Component Quantity Required for Open Production Jobs", 
                    sql="""SELECT bom.c_part AS component_part_id, p.p_code AS component_part_code, SUM(j.qty * bom.qty_req) AS total_required_for_open_jobs FROM JOB_MST j JOIN BILL_OF_M bom ON j.part = bom.m_part JOIN PART_MST p ON bom.c_part = p.id   WHERE j.job_stat IN ('O', 'Q') GROUP BY bom.c_part, p.p_code ORDER BY total_required_for_open_jobs DESC;""")
        
    vn.train(question="Nonconformance Rate per Part (Simplified: NC Qty / Qty Purchased)", 
                    sql="""WITH PartPurchasesReceived AS (SELECT pl.part, SUM(pl.qty) AS total_received FROM PURCHASE_LINE pl WHERE pl.order_stat = 'C' GROUP BY pl.part),PartNonconformance AS (SELECT pl.part, SUM(nc.qty) AS total_nonconforming FROM NONCONFORM_MST nc JOIN PURCHASE_MST pm ON nc.po_no = pm.order_no  JOIN PURCHASE_LINE pl ON pm.order_no = pl.order_no AND nc.job IS NULL  GROUP BY pl.part)SELECT p.id AS part_id, p.p_code, COALESCE(ppr.total_received, 0) AS total_received, COALESCE(pnc.total_nonconforming, 0) AS total_nonconforming, CASE WHEN COALESCE(ppr.total_received, 0) > 0 THEN (COALESCE(pnc.total_nonconforming, 0) / ppr.total_received) * 100 ELSE 0 END AS nonconformance_rate_percent FROM PART_MST p LEFT JOIN PartPurchasesReceived ppr ON p.id = ppr.part LEFT JOIN PartNonconformance pnc ON p.id = pnc.part WHERE p.procurement = 'B'  ORDER BY nonconformance_rate_percent DESC;""")
    
    vn.train(question="Customers Who Haven't Ordered in Over a Year", 
                    sql="""SELECT c.id, c.name, MAX(s.order_date) AS last_order_date  FROM CUSTOMER c LEFT JOIN SALES_MST s ON c.id = s.cust AND s.order_date >= DATE_SUB(CURDATE(), INTERVAL 1 YEAR) WHERE s.order_no IS NULL  GROUP BY c.id, c.name  ORDER BY c.name;""")
    
    vn.train(question="Parts Purchased from Multiple Vendors with Cost Comparison", 
                    sql="""SELECT p.id AS part_id, p.p_code, COUNT(DISTINCT vp.vendor) AS number_of_vendors, MIN(vp.unit_cost) AS min_vendor_cost, MAX(vp.unit_cost) AS max_vendor_cost, (MAX(vp.unit_cost) - MIN(vp.unit_cost)) AS cost_difference FROM PART_MST p JOIN VEND_PART vp ON p.id = vp.part GROUP BY p.id, p.p_code HAVING COUNT(DISTINCT vp.vendor) > 1  ORDER BY cost_difference DESC, p.p_code;""")
    
# Add this at the bottom of your script, after the training section
def test_classify_question():
    # Use MyVanna instance instead of MyLocalLlm
    llm = MyVanna(config=None)
    
    # Test cases with expected classifications
    test_cases = [
        ("What is my projected revenue over the next month?", "descriptive"),
        ("Why was order PO-123 late?", "judgement"),
        ("How can I improve on-time delivery for customer X?", "suggestion"),
        ("List all parts in inventory", "descriptive"),
        ("Should I modify my system lead times?", "judgement"),
        ("What should I do about late deliveries?", "suggestion")
    ]
    
    print("\n=== Testing classify_question function ===")
    for question, expected in test_cases:
        print(f"\nQuestion: {question}")
        print(f"Expected classification: {expected}")
        try:
            result = llm.classify_question(question)
            print(f"Actual classification: {result}")
            print(f"Match: {'✓' if result == expected else '✗'}")
        except Exception as e:
            print(f"Error during classification: {str(e)}")
    
    print("\n=== End of testing ===")

# Run the test function
if __name__ == "__main__":
    test_classify_question()

Connecting to mysql database 'ft_database' on 127.0.0.1...
Database connection successful.

--- Vanna Training ---
Loads metadata (DDL, docs, SQL examples) into Vanna's local ChromaDB vector store.


Add of existing embedding ID: 31c39d68-1b22-5545-97e0-353dc991f251-doc
Insert of existing embedding ID: 31c39d68-1b22-5545-97e0-353dc991f251-doc
Add of existing embedding ID: fff92c80-4899-53ea-a8e4-a18c0415b820-doc
Insert of existing embedding ID: fff92c80-4899-53ea-a8e4-a18c0415b820-doc
Add of existing embedding ID: 020be5d7-5209-56ca-8b6a-5962efd95682-doc
Insert of existing embedding ID: 020be5d7-5209-56ca-8b6a-5962efd95682-doc
Add of existing embedding ID: 2f420d2e-9850-51f8-ab1a-2bf1e77e2d3b-doc
Insert of existing embedding ID: 2f420d2e-9850-51f8-ab1a-2bf1e77e2d3b-doc
Add of existing embedding ID: 9aebcf7e-04a6-5368-b307-25d7ca8a1741-doc
Insert of existing embedding ID: 9aebcf7e-04a6-5368-b307-25d7ca8a1741-doc
Add of existing embedding ID: d27aa358-78b0-58f6-8dc7-4f50d0c2b8e2-doc
Insert of existing embedding ID: d27aa358-78b0-58f6-8dc7-4f50d0c2b8e2-doc
Add of existing embedding ID: 97dd033f-f6c1-574a-b60f-242668753672-doc
Insert of existing embedding ID: 97dd033f-f6c1-574a-b60f-24

Running Vanna training...
  Example: Fetching DDL (Customize for your setup)...
  Example: Adding Documentation (Add yours)...
Adding documentation....
Adding documentation....
Adding documentation....
Adding documentation....


Add of existing embedding ID: 5710f1b2-2513-59b1-a22a-06ca846f3335-doc
Insert of existing embedding ID: 5710f1b2-2513-59b1-a22a-06ca846f3335-doc
Add of existing embedding ID: f62258fc-5052-5064-8778-17d64b48fb75-doc
Insert of existing embedding ID: f62258fc-5052-5064-8778-17d64b48fb75-doc
Add of existing embedding ID: 3a1ce7ab-9179-5232-8d27-603f1c7fc185-doc
Insert of existing embedding ID: 3a1ce7ab-9179-5232-8d27-603f1c7fc185-doc
Add of existing embedding ID: 118f8eeb-9139-5bb3-aa51-ffde0d892907-doc
Insert of existing embedding ID: 118f8eeb-9139-5bb3-aa51-ffde0d892907-doc


Adding documentation....
Adding documentation....
Adding documentation....
Adding documentation....


Add of existing embedding ID: 8c8564bf-5f18-5fca-b590-917e64a7c696-doc
Insert of existing embedding ID: 8c8564bf-5f18-5fca-b590-917e64a7c696-doc
Add of existing embedding ID: 1e2fad2c-8b40-5101-b01a-eb7cb27e9093-doc
Insert of existing embedding ID: 1e2fad2c-8b40-5101-b01a-eb7cb27e9093-doc
Add of existing embedding ID: adeabba7-8bbd-5156-97a3-d1e27d0e8f47-doc
Insert of existing embedding ID: adeabba7-8bbd-5156-97a3-d1e27d0e8f47-doc
Add of existing embedding ID: 380c2a34-55c2-5fe6-aaf9-5b98fc734a1d-doc
Insert of existing embedding ID: 380c2a34-55c2-5fe6-aaf9-5b98fc734a1d-doc


Adding documentation....
Adding documentation....
Adding documentation....
Adding documentation....


Add of existing embedding ID: 2e795c42-c6f8-5c44-aa1c-eb628fdee4fc-doc
Insert of existing embedding ID: 2e795c42-c6f8-5c44-aa1c-eb628fdee4fc-doc
Add of existing embedding ID: 3a25bf9f-42c8-5156-823b-ce6a42754b95-doc
Insert of existing embedding ID: 3a25bf9f-42c8-5156-823b-ce6a42754b95-doc
Add of existing embedding ID: fa75db61-9760-51e8-b436-32ed46777d85-doc
Insert of existing embedding ID: fa75db61-9760-51e8-b436-32ed46777d85-doc
Add of existing embedding ID: 808d1279-ae6d-5a3f-b2a9-6a42245a4f28-doc
Insert of existing embedding ID: 808d1279-ae6d-5a3f-b2a9-6a42245a4f28-doc


Adding documentation....
Adding documentation....
Adding documentation....
Adding documentation....


Add of existing embedding ID: deab258b-8c49-5858-8f76-a52a6269500e-doc
Insert of existing embedding ID: deab258b-8c49-5858-8f76-a52a6269500e-doc
Add of existing embedding ID: 2db3b251-253c-55b3-99aa-12540571472c-doc
Insert of existing embedding ID: 2db3b251-253c-55b3-99aa-12540571472c-doc
Add of existing embedding ID: 417933a5-e35b-5dc9-a0d1-bf5bf8a4d2ce-doc
Insert of existing embedding ID: 417933a5-e35b-5dc9-a0d1-bf5bf8a4d2ce-doc
Add of existing embedding ID: 628de61e-0ed2-5b90-898f-38e558ac180f-doc
Insert of existing embedding ID: 628de61e-0ed2-5b90-898f-38e558ac180f-doc


Adding documentation....
Adding documentation....
Adding documentation....
Adding documentation....


Add of existing embedding ID: 80065a35-16cd-5fd7-a372-512bd06427e5-doc
Insert of existing embedding ID: 80065a35-16cd-5fd7-a372-512bd06427e5-doc
Add of existing embedding ID: aae9e2ec-a925-599b-a250-2f0b2410a291-doc
Insert of existing embedding ID: aae9e2ec-a925-599b-a250-2f0b2410a291-doc
Add of existing embedding ID: e9263c31-5c79-5360-816b-5f7f98684880-doc
Insert of existing embedding ID: e9263c31-5c79-5360-816b-5f7f98684880-doc
Add of existing embedding ID: 5f751b55-e510-53da-a888-5ecefa26fee9-doc
Insert of existing embedding ID: 5f751b55-e510-53da-a888-5ecefa26fee9-doc


Adding documentation....
Adding documentation....
Adding documentation....
Adding documentation....


Add of existing embedding ID: 7ed5330d-ac46-5eb4-89b5-9971d2414c73-doc
Insert of existing embedding ID: 7ed5330d-ac46-5eb4-89b5-9971d2414c73-doc
Add of existing embedding ID: 23909f8f-6287-589a-9018-c962c6900982-doc
Insert of existing embedding ID: 23909f8f-6287-589a-9018-c962c6900982-doc
Add of existing embedding ID: a3dff5a7-f7ae-5291-9b03-a0b09282c724-doc
Insert of existing embedding ID: a3dff5a7-f7ae-5291-9b03-a0b09282c724-doc


Adding documentation....
Adding documentation....
Adding documentation....


Add of existing embedding ID: 595aefd0-fcf8-5f8f-8068-5e83d009e593-doc
Insert of existing embedding ID: 595aefd0-fcf8-5f8f-8068-5e83d009e593-doc
Add of existing embedding ID: bbeecc4f-d710-575a-8f54-5c94c81562f7-doc
Insert of existing embedding ID: bbeecc4f-d710-575a-8f54-5c94c81562f7-doc
Add of existing embedding ID: 8386d710-25b6-51b9-91d8-30785a276e62-doc
Insert of existing embedding ID: 8386d710-25b6-51b9-91d8-30785a276e62-doc
Add of existing embedding ID: 6910f4a8-91ea-598d-9b25-57242d313d65-doc
Insert of existing embedding ID: 6910f4a8-91ea-598d-9b25-57242d313d65-doc


Adding documentation....
Adding documentation....
Adding documentation....
Adding documentation....


Add of existing embedding ID: ab40f9fb-5a08-5f54-8d12-0e9d43477249-doc
Insert of existing embedding ID: ab40f9fb-5a08-5f54-8d12-0e9d43477249-doc
Add of existing embedding ID: dea8e4e4-e47c-5c8e-9a6a-7166bc5e54dd-doc
Insert of existing embedding ID: dea8e4e4-e47c-5c8e-9a6a-7166bc5e54dd-doc
Add of existing embedding ID: 49ed9ddb-37cb-5687-ab29-5364cc05e755-doc
Insert of existing embedding ID: 49ed9ddb-37cb-5687-ab29-5364cc05e755-doc
Add of existing embedding ID: 76378bf1-3f39-52d7-8589-31832805c77b-doc
Insert of existing embedding ID: 76378bf1-3f39-52d7-8589-31832805c77b-doc


Adding documentation....
Adding documentation....
Adding documentation....
Adding documentation....


Add of existing embedding ID: 77937f1f-1c06-5c8e-9b2a-dbaecbb40383-doc
Insert of existing embedding ID: 77937f1f-1c06-5c8e-9b2a-dbaecbb40383-doc
Add of existing embedding ID: 2bae7fcb-36a7-5286-b8c6-71208a314226-doc
Insert of existing embedding ID: 2bae7fcb-36a7-5286-b8c6-71208a314226-doc
Add of existing embedding ID: 49333a1c-ca97-5b18-a978-eb9f9f22d43c-doc
Insert of existing embedding ID: 49333a1c-ca97-5b18-a978-eb9f9f22d43c-doc
Add of existing embedding ID: 24e2eb51-a625-5ba2-bd1e-ceb1b1df0182-doc
Insert of existing embedding ID: 24e2eb51-a625-5ba2-bd1e-ceb1b1df0182-doc


Adding documentation....
Adding documentation....
Adding documentation....
Adding documentation....


Add of existing embedding ID: c1d11163-ce12-54e8-a638-426f870aa7f4-doc
Insert of existing embedding ID: c1d11163-ce12-54e8-a638-426f870aa7f4-doc
Add of existing embedding ID: 0551b27b-7308-52ed-9abd-4415dd77ec81-doc
Insert of existing embedding ID: 0551b27b-7308-52ed-9abd-4415dd77ec81-doc
Add of existing embedding ID: fb6f1cbe-b884-51e1-a624-7ef41fea05e1-doc
Insert of existing embedding ID: fb6f1cbe-b884-51e1-a624-7ef41fea05e1-doc
Add of existing embedding ID: 5257415b-4130-5a26-946a-d31f4069503e-doc
Insert of existing embedding ID: 5257415b-4130-5a26-946a-d31f4069503e-doc


Adding documentation....
Adding documentation....
Adding documentation....
Adding documentation....


Add of existing embedding ID: 96bde749-8d97-5d6e-88d2-dac226a05810-doc
Insert of existing embedding ID: 96bde749-8d97-5d6e-88d2-dac226a05810-doc
Add of existing embedding ID: c02733e3-c35a-53a8-b5ce-996106d1b55d-doc
Insert of existing embedding ID: c02733e3-c35a-53a8-b5ce-996106d1b55d-doc
Add of existing embedding ID: af9febcb-31be-5b14-8ce8-0fac9252681a-doc
Insert of existing embedding ID: af9febcb-31be-5b14-8ce8-0fac9252681a-doc
Add of existing embedding ID: b03e0de0-ae76-5be7-b66c-ba00c99ac122-doc
Insert of existing embedding ID: b03e0de0-ae76-5be7-b66c-ba00c99ac122-doc


Adding documentation....
Adding documentation....
Adding documentation....
Adding documentation....


Add of existing embedding ID: 26430856-5ae1-5ecf-867b-b933387d0461-doc
Insert of existing embedding ID: 26430856-5ae1-5ecf-867b-b933387d0461-doc
Add of existing embedding ID: 242db09b-2597-5c29-b559-99eb35e6645d-doc
Insert of existing embedding ID: 242db09b-2597-5c29-b559-99eb35e6645d-doc
Add of existing embedding ID: 53f4e256-e5f4-5793-b2fc-d15fe96aed80-doc
Insert of existing embedding ID: 53f4e256-e5f4-5793-b2fc-d15fe96aed80-doc
Add of existing embedding ID: e3a1bdc3-429f-5a7f-9e80-a667d434142c-doc
Insert of existing embedding ID: e3a1bdc3-429f-5a7f-9e80-a667d434142c-doc


Adding documentation....
Adding documentation....
Adding documentation....
Adding documentation....


Add of existing embedding ID: 128533d2-05c4-5167-8867-de34e7743520-doc
Insert of existing embedding ID: 128533d2-05c4-5167-8867-de34e7743520-doc
Add of existing embedding ID: 37a60c7d-71e5-5999-a7a9-d5062627d6db-doc
Insert of existing embedding ID: 37a60c7d-71e5-5999-a7a9-d5062627d6db-doc
Add of existing embedding ID: 0300c6b3-f8f8-52bb-9d2e-08d83a1e973c-doc
Insert of existing embedding ID: 0300c6b3-f8f8-52bb-9d2e-08d83a1e973c-doc
Add of existing embedding ID: 5b4e09c7-3aa4-5d97-918a-570f5b07fc5b-doc
Insert of existing embedding ID: 5b4e09c7-3aa4-5d97-918a-570f5b07fc5b-doc


Adding documentation....
Adding documentation....
Adding documentation....
Adding documentation....


Add of existing embedding ID: 5ee3dc07-0388-522f-8277-f9abf7399e4b-doc
Insert of existing embedding ID: 5ee3dc07-0388-522f-8277-f9abf7399e4b-doc
Add of existing embedding ID: 7a1d58d5-2171-565f-9edf-3cb8a125ef66-doc
Insert of existing embedding ID: 7a1d58d5-2171-565f-9edf-3cb8a125ef66-doc
Add of existing embedding ID: 7d9e084d-20aa-5ab5-b619-4ce79a69131f-doc
Insert of existing embedding ID: 7d9e084d-20aa-5ab5-b619-4ce79a69131f-doc
Add of existing embedding ID: 0dc60cc5-f32b-578b-9708-dfdadc907edf-doc
Insert of existing embedding ID: 0dc60cc5-f32b-578b-9708-dfdadc907edf-doc


Adding documentation....
Adding documentation....
Adding documentation....
Adding documentation....


Add of existing embedding ID: 1f322c62-5226-5ee3-86f8-695e4ab976b0-doc
Insert of existing embedding ID: 1f322c62-5226-5ee3-86f8-695e4ab976b0-doc
Add of existing embedding ID: bc909e47-ed1f-5b15-be2c-0c1789202c68-doc
Insert of existing embedding ID: bc909e47-ed1f-5b15-be2c-0c1789202c68-doc


Adding documentation....
Adding documentation....


Add of existing embedding ID: 04b42da7-2f59-5af1-9379-f9c8854b943c-doc
Insert of existing embedding ID: 04b42da7-2f59-5af1-9379-f9c8854b943c-doc
Add of existing embedding ID: 87135ea3-a863-589b-8d9e-207c0bf9e368-doc
Insert of existing embedding ID: 87135ea3-a863-589b-8d9e-207c0bf9e368-doc
Add of existing embedding ID: 6dafd24f-2d1b-5b34-a48a-7c52345f8fd3-doc
Insert of existing embedding ID: 6dafd24f-2d1b-5b34-a48a-7c52345f8fd3-doc
Add of existing embedding ID: b7c247a6-3fc3-599f-9eb4-6d2ab1920097-doc
Insert of existing embedding ID: b7c247a6-3fc3-599f-9eb4-6d2ab1920097-doc


Adding documentation....
Adding documentation....
Adding documentation....
Adding documentation....


Add of existing embedding ID: e200dc7d-15c2-55aa-9452-92526bca6476-doc
Insert of existing embedding ID: e200dc7d-15c2-55aa-9452-92526bca6476-doc
Add of existing embedding ID: 3a2b3b3c-d62e-57c9-9017-e00b1dfacf1e-doc
Insert of existing embedding ID: 3a2b3b3c-d62e-57c9-9017-e00b1dfacf1e-doc
Add of existing embedding ID: 0a712900-a404-5806-ac70-6554fa3d6e27-doc
Insert of existing embedding ID: 0a712900-a404-5806-ac70-6554fa3d6e27-doc


Adding documentation....
Adding documentation....
Adding documentation....


Add of existing embedding ID: eaf17db6-7e22-5ab6-b635-0eca04ab8db1-doc
Insert of existing embedding ID: eaf17db6-7e22-5ab6-b635-0eca04ab8db1-doc
Add of existing embedding ID: d83ce6c1-5e5e-5b58-bde5-219d2811b351-doc
Insert of existing embedding ID: d83ce6c1-5e5e-5b58-bde5-219d2811b351-doc
Add of existing embedding ID: 9d2cfeb6-376b-52b4-911e-ff90b1d01409-doc
Insert of existing embedding ID: 9d2cfeb6-376b-52b4-911e-ff90b1d01409-doc
Add of existing embedding ID: d47f3274-ac0c-5782-b821-3f086922139f-doc
Insert of existing embedding ID: d47f3274-ac0c-5782-b821-3f086922139f-doc


Adding documentation....
Adding documentation....
Adding documentation....
Adding documentation....


Add of existing embedding ID: 49f130b8-13f2-5c3c-8e60-20acda411259-doc
Insert of existing embedding ID: 49f130b8-13f2-5c3c-8e60-20acda411259-doc
Add of existing embedding ID: 82f36a27-c95f-598f-ba45-7c4b36d5fce6-doc
Insert of existing embedding ID: 82f36a27-c95f-598f-ba45-7c4b36d5fce6-doc
Add of existing embedding ID: a54ce698-1275-5bb2-82e0-c04e1cdc0cd3-sql
Insert of existing embedding ID: a54ce698-1275-5bb2-82e0-c04e1cdc0cd3-sql
Add of existing embedding ID: 4293528b-c9df-5b02-9c5e-6ef4c6217979-sql
Insert of existing embedding ID: 4293528b-c9df-5b02-9c5e-6ef4c6217979-sql


Adding documentation....
Adding SQL examples...


Add of existing embedding ID: d4c9bf2f-3442-51bc-9334-9f2131659f51-sql
Insert of existing embedding ID: d4c9bf2f-3442-51bc-9334-9f2131659f51-sql
Add of existing embedding ID: 41e60ad4-c6fe-5d80-b53d-e176f814a6cf-sql
Insert of existing embedding ID: 41e60ad4-c6fe-5d80-b53d-e176f814a6cf-sql
Add of existing embedding ID: 1b18671a-c5fe-5288-ab9c-2f843422a01d-sql
Insert of existing embedding ID: 1b18671a-c5fe-5288-ab9c-2f843422a01d-sql
Add of existing embedding ID: f261db6f-0489-5795-abc3-f6ea78655ebd-sql
Insert of existing embedding ID: f261db6f-0489-5795-abc3-f6ea78655ebd-sql
Add of existing embedding ID: 0a85a0c1-8e67-5516-8991-6d619cb2904c-sql
Insert of existing embedding ID: 0a85a0c1-8e67-5516-8991-6d619cb2904c-sql
Add of existing embedding ID: 1af897fe-fe24-511a-8002-4a56ca7980c1-sql
Insert of existing embedding ID: 1af897fe-fe24-511a-8002-4a56ca7980c1-sql
Add of existing embedding ID: ce73e887-d8e1-5ca9-aa6c-797c7e410212-sql
Insert of existing embedding ID: ce73e887-d8e1-5ca9-aa6c-79


=== Testing classify_question function ===

Question: What is my projected revenue over the next month?
Expected classification: descriptive
Actual classification: descriptive
Match: ✓

Question: Why was order PO-123 late?
Expected classification: judgement
Actual classification: judgement
Match: ✓

Question: How can I improve on-time delivery for customer X?
Expected classification: suggestion
Actual classification: suggestion
Match: ✓

Question: List all parts in inventory
Expected classification: descriptive
Actual classification: descriptive
Match: ✓

Question: Should I modify my system lead times?
Expected classification: judgement
Actual classification: judgement
Match: ✓

Question: What should I do about late deliveries?
Expected classification: suggestion
Actual classification: suggestion
Match: ✓

=== End of testing ===


In [20]:
# Update the FEW_SHOT_PROMPT with more specific examples and clearer definitions
FEW_SHOT_PROMPT = """
You are a manufacturing domain expert. Classify each question into one of three categories based on these criteria:

DESCRIPTIVE: 
- Questions that ask for direct data or facts
- Usually start with: what, how many, list, show, where, when
- Focus on current or historical state
- Example keywords: count, total, list, find, show, what is, where are

JUDGEMENT:
- Questions that require analysis and evaluation
- Usually start with: why, can, will, should, is it
- Need reasoning about causes or effects
- Example keywords: why, evaluate, assess, analyze, is it possible, what caused

SUGGESTION:
- Questions that ask for recommendations or improvements
- Usually start with: how can, what should, recommend
- Focus on future actions or changes
- Example keywords: improve, optimize, suggest, recommend, how can, what should

Examples:
Q1: "What is the current inventory level?" 
A1: DESCRIPTIVE (asks for current state)

Q2: "Why did we miss our delivery target?"
A2: JUDGEMENT (requires analysis of causes)

Q3: "How can we reduce late deliveries?"
A3: SUGGESTION (asks for improvement recommendations)

Q4: "List all overdue orders"
A4: DESCRIPTIVE (asks for direct data)

Q5: "Should we increase safety stock?"
A5: JUDGEMENT (requires evaluation)

Q6: "What changes should we make to improve efficiency?"
A6: SUGGESTION (asks for improvement recommendations)

Now classify this question:
Q: {question}
A: """

# Add a keyword-based classification helper
def keyword_based_classification(question: str) -> str:
    question = question.lower()
    
    descriptive_keywords = ['what is', 'how many', 'list', 'show', 'find', 'where', 'when', 'which', 'count']
    judgement_keywords = ['why', 'could', 'would', 'should', 'evaluate', 'analyze', 'assess', 'is it possible']
    suggestion_keywords = ['how can', 'what should', 'improve', 'recommend', 'suggest', 'advise', 'propose']
    
    # Check for suggestion first (highest priority)
    for keyword in suggestion_keywords:
        if keyword in question:
            return "suggestion"
            
    # Check for judgement second
    for keyword in judgement_keywords:
        if keyword in question:
            return "judgement"
            
    # Default to descriptive
    for keyword in descriptive_keywords:
        if keyword in question:
            return "descriptive"
            
    return None

# Update the classify_question method in MyLocalLlm class
def classify_question(self, question: str) -> str:
    # First try keyword-based classification
    keyword_result = keyword_based_classification(question)
    if keyword_result:
        return keyword_result
        
    # If keyword classification fails, use LLM
    classification_prompt = FEW_SHOT_PROMPT.format(question=question)
    response = self.submit_prompt(classification_prompt)
    
    # Clean up and validate the response
    response = response.strip().lower()
    if response in QUESTION_TYPES:
        return response
    
    # Default to descriptive if everything fails
    return "descriptive"

# Add this at the bottom of your script, after the training section
def test_classify_question():
    # Create an instance of MyLocalLlm
    llm = MyLocalLlm(config=None)
    
    # Test cases with expected classifications
    test_cases = [
        ("What is my projected revenue over the next month?", "descriptive"),
        ("Why was order PO-123 late?", "judgement"),
        ("How can I improve on-time delivery for customer X?", "suggestion"),
        ("List all parts in inventory", "descriptive"),
        ("Should I modify my system lead times?", "judgement"),
        ("What should I do about late deliveries?", "suggestion")
    ]
    
    print("\n=== Testing classify_question function ===")
    for question, expected in test_cases:
        print(f"\nQuestion: {question}")
        print(f"Expected classification: {expected}")
        try:
            result = llm.classify_question(question)
            print(f"Actual classification: {result}")
            print(f"Match: {'✓' if result == expected else '✗'}")
        except Exception as e:
            print(f"Error during classification: {str(e)}")
    
    print("\n=== End of testing ===")

# Run the test function
if __name__ == "__main__":
    test_classify_question()

TypeError: Can't instantiate abstract class MyLocalLlm with abstract methods add_ddl, add_documentation, add_question_sql, generate_embedding, get_related_ddl, get_related_documentation, get_similar_question_sql, get_training_data, remove_training_data

In [15]:
llm = MyLocalLlm()
llm.classify_question("how can I address the root causes of late delivery?")

TypeError: Can't instantiate abstract class MyLocalLlm with abstract methods add_ddl, add_documentation, add_question_sql, generate_embedding, get_related_ddl, get_related_documentation, get_similar_question_sql, get_training_data, remove_training_data

In [22]:
# --- Question Classification Constants ---

FEW_SHOT_PROMPT = """

Examples:
Q1: What is my projected revenue over the next month?
A1: DESCRIPTIVE

Q2: Why was order PO-123 late?
A2: JUDGEMENT 

Q3: How can I improve on-time delivery for customer X?
A3: SUGGESTION

Q4: Which parts have the worst on-time delivery in last quarter?
A4: DESCRIPTIVE

Q5: Should I modify my system lead times?
A5: JUDGEMENT

Q6: What should my new system lead time be for part Y?
A6: SUGGESTION

Q7: Which parts/part families have above average demand in the next 'time period'?
A7: DESCRIPTIVE

Q8: What is the most common root cause of my late deliveries overall?
A8: DESCRIPTIVE

Q9: Which parts/part families have above average demand in the next 'time period'?
A9: DESCRIPTIVE

Q10: Can I make my due date for 'line item'?
A10: JUDGEMENT

Q11: Should I modify my system lead times?
A11: JUDGEMENT

Q12: How can I fix/mitigate 'specific root cause' of a late delivery?
A12: SUGGESTION

Q13: Which parts/part families should I focus on improving on-time delivery for?
A13: SUGGESTION

Classify this manufacturing prompt as one of:
- DESCRIPTIVE: These questions are queries for information which can be derived directly from the data
                without further analysis. Many of these questions will have a simple, objective answer, 
                for example: Q1,Q4 . Other questions may require some minor reasonable assumptions to be made on the part of the model; 
                for example, Q9 may require the model to decide. 

- JUDGEMENT: Here, the user is asking for the agent’s opinion. It will still have to query data, as for a descriptive question, 
             but it will also have to provide some further analysis and present its conclusion and reasoning to the user. 
             The user is not necessarily asking for a solution (yet) but instead asking for direction and confirmation. For Example Q2,Q5

- SUGGESTION: These are likely to be the most challenging questions for the agent to answer in a satisfactory manner.
                Here, the user is asking for broader guidance on a more fundamental problem. In these cases, it is likely going to be 
                useful for the agent to direct the user to parts of the software which will assist in answering the question. For Example Q3,Q6,Q12,Q13


Now classify this question:
Q: {question}
A: """

QUESTION_TYPES = ["descriptive", "judgement", "suggestion"]  



CONTEXT_PROMPTS = {
    "descriptive": "Return ONLY SQL with some explanation with plots.",
    "judgement": "When responding, return the required SQL query, provide a detailed analysis of the data including relevant plots, and offer your Judgement on the findings. Clearly state that it is your Judgement and advise the user to independently verify it.",
    "suggestion": "When responding, return the required SQL query, provide a detailed analysis of the data including relevant plots, and offer your suggestion on the findings. Clearly state that it is your Suggestion and advise the user to independently verify it."
}

CLASSIFICATION_METRICS = defaultdict(int)

# --- Pattern-based Classification Function ---
def pattern_based_classification(question: str) -> str:
    """Use regex patterns to classify questions."""
    question = question.lower()
    
    # Descriptive patterns
    descriptive_patterns = [
        r"^what is", r"^how many", r"^list", r"^show", r"^tell me", 
        r"^find", r"^count", r"^where", r"^when", r"^who"
    ]
    
    # Judgment patterns
    judgment_patterns = [
        r"^can", r"^will", r"^is it possible", r"^why (was|did|is)", 
        r"^what caused", r"^is this", r"evaluate", r"assess",r"analyse"
    ]
    
    # Advice patterns
    advice_patterns = [
        r"^how (do|can|should) (i|we)", r"^what should", r"improve", 
        r"optimize", r"recommend", r"suggest", r"focus on"
    ]
    
    # Check patterns
    for pattern in descriptive_patterns:
        if re.search(pattern, question):
            return "descriptive"
    
    for pattern in judgment_patterns:
        if re.search(pattern, question):
            return "judgment"
    
    for pattern in advice_patterns:
        if re.search(pattern, question):
            return "advice"
    
    # Default to advice as the most complex type
    return "advice"



In [1]:
def clear_traiining_data():
    """Clear the training data in the vector store."""    
    training_data = vn.get_training_data()
    for i in training_data.id.to_list():
        vn.remove_training_data(id=i)
    return vn.get_training_data()        

In [2]:
clear_traiining_data()

NameError: name 'vn' is not defined

In [29]:
def refine_question(original_question: str) -> str:
    refinement_prompt = f"""Rephrase and expand this manufacturing question to be more specific and actionable. 
    Maintain original intent while adding context about tables/columns. Return ONLY the improved question.

    Original: {original_question}
    Refined: """
    
    refined = vn.submit_prompt(refinement_prompt).strip()
    return refined if len(refined) > len(original_question) else original_question

In [30]:
refine_question("SHOULD WE CHANGE OUR VENDORS BASED ON THEIR LATE DELIVERIES?")

'<think>\nOkay, so I need to figure out how to rephrase and expand the question "SHOULD WE CHANGE OUR VENDORS BASED ON THEIR LATE DELIVERIES?" into something more specific and actionable. The user also mentioned adding context about tables/columns, but I\'m not entirely sure how that fits in yet.\n\nFirst, let me understand the original intent. It\'s asking whether the company should consider changing its vendors because some of them are delivering products late. That makes sense because late deliveries can cause problems like stockouts or unhappy customers.\n\nNow, to make this more specific and actionable, I need to add details that clarify what exactly is being considered. The user mentioned tables and columns, so maybe they want the question to reference specific data points or metrics from these structures. Perhaps they have a table of vendors with their performance data, including delivery times.\n\nSo, I should structure the refined question in a way that references this data. M

In [31]:
Plan

Train on Information Schema: def.ft_database bill_of_m
Train on Information Schema: def.ft_database customer
Train on Information Schema: def.ft_database job_mst
Train on Information Schema: def.ft_database nonconform_mst
Train on Information Schema: def.ft_database part_mst
Train on Information Schema: def.ft_database purchase_line
Train on Information Schema: def.ft_database purchase_mst
Train on Information Schema: def.ft_database sales_line
Train on Information Schema: def.ft_database sales_mst
Train on Information Schema: def.ft_database vend_part
Train on Information Schema: def.ft_database vendor
Train on Information Schema: def.information_schema ADMINISTRABLE_ROLE_AUTHORIZATIONS
Train on Information Schema: def.information_schema APPLICABLE_ROLES
Train on Information Schema: def.information_schema CHARACTER_SETS
Train on Information Schema: def.information_schema CHECK_CONSTRAINTS
Train on Information Schema: def.information_schema COLLATION_CHARACTER_SET_APPLICABILITY
Train o

In [32]:
df_information_schema

Unnamed: 0,TABLE_CATALOG,TABLE_SCHEMA,TABLE_NAME,COLUMN_NAME,ORDINAL_POSITION,COLUMN_DEFAULT,IS_NULLABLE,DATA_TYPE,CHARACTER_MAXIMUM_LENGTH,CHARACTER_OCTET_LENGTH,...,DATETIME_PRECISION,CHARACTER_SET_NAME,COLLATION_NAME,COLUMN_TYPE,COLUMN_KEY,EXTRA,PRIVILEGES,COLUMN_COMMENT,GENERATION_EXPRESSION,SRS_ID
0,def,ft_database,bill_of_m,m_part,1,,YES,int,,,...,,,,int,,,"select,insert,update,references",,,
1,def,ft_database,bill_of_m,c_part,2,,YES,int,,,...,,,,int,,,"select,insert,update,references",,,
2,def,ft_database,bill_of_m,qty_req,3,,YES,int,,,...,,,,int,,,"select,insert,update,references",,,
3,def,ft_database,customer,id,1,,YES,int,,,...,,,,int,,,"select,insert,update,references",,,
4,def,ft_database,customer,name,2,,YES,text,65535.0,65535.0,...,,utf8mb4,utf8mb4_0900_ai_ci,text,,,"select,insert,update,references",,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3957,def,sys,x$waits_global_by_latency,events,1,,NO,varchar,128.0,512.0,...,,utf8mb4,utf8mb4_0900_ai_ci,varchar(128),,,"select,insert,update,references",,,
3958,def,sys,x$waits_global_by_latency,total,2,,NO,bigint,,,...,,,,bigint unsigned,,,"select,insert,update,references",,,
3959,def,sys,x$waits_global_by_latency,total_latency,3,,NO,bigint,,,...,,,,bigint unsigned,,,"select,insert,update,references",,,
3960,def,sys,x$waits_global_by_latency,avg_latency,4,,NO,bigint,,,...,,,,bigint unsigned,,,"select,insert,update,references",,,


In [34]:
df_information_schema = vn.run_sql("SELECT * FROM INFORMATION_SCHEMA.COLUMNS where TABLE_SCHEMA = 'ft_database';")
df_information_schema

Unnamed: 0,TABLE_CATALOG,TABLE_SCHEMA,TABLE_NAME,COLUMN_NAME,ORDINAL_POSITION,COLUMN_DEFAULT,IS_NULLABLE,DATA_TYPE,CHARACTER_MAXIMUM_LENGTH,CHARACTER_OCTET_LENGTH,...,DATETIME_PRECISION,CHARACTER_SET_NAME,COLLATION_NAME,COLUMN_TYPE,COLUMN_KEY,EXTRA,PRIVILEGES,COLUMN_COMMENT,GENERATION_EXPRESSION,SRS_ID
0,def,ft_database,bill_of_m,m_part,1,,YES,int,,,...,,,,int,,,"select,insert,update,references",,,
1,def,ft_database,bill_of_m,c_part,2,,YES,int,,,...,,,,int,,,"select,insert,update,references",,,
2,def,ft_database,bill_of_m,qty_req,3,,YES,int,,,...,,,,int,,,"select,insert,update,references",,,
3,def,ft_database,customer,id,1,,YES,int,,,...,,,,int,,,"select,insert,update,references",,,
4,def,ft_database,customer,name,2,,YES,text,65535.0,65535.0,...,,utf8mb4,utf8mb4_0900_ai_ci,text,,,"select,insert,update,references",,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
57,def,ft_database,vendor,addr_str,3,,YES,text,65535.0,65535.0,...,,utf8mb4,utf8mb4_0900_ai_ci,text,,,"select,insert,update,references",,,
58,def,ft_database,vendor,addr_city,4,,YES,text,65535.0,65535.0,...,,utf8mb4,utf8mb4_0900_ai_ci,text,,,"select,insert,update,references",,,
59,def,ft_database,vendor,addr_ste,5,,YES,text,65535.0,65535.0,...,,utf8mb4,utf8mb4_0900_ai_ci,text,,,"select,insert,update,references",,,
60,def,ft_database,vendor,addr_ctry,6,,YES,text,65535.0,65535.0,...,,utf8mb4,utf8mb4_0900_ai_ci,text,,,"select,insert,update,references",,,


In [35]:
df_information_schema.TABLE_NAME.unique().tolist()

['bill_of_m',
 'customer',
 'job_mst',
 'nonconform_mst',
 'part_mst',
 'purchase_line',
 'purchase_mst',
 'sales_line',
 'sales_mst',
 'vend_part',
 'vendor']

In [43]:
dict(df_information_schema[['TABLE_NAME','COLUMN_NAME']])

{'TABLE_NAME': 0     bill_of_m
 1     bill_of_m
 2     bill_of_m
 3      customer
 4      customer
         ...    
 57       vendor
 58       vendor
 59       vendor
 60       vendor
 61       vendor
 Name: TABLE_NAME, Length: 62, dtype: object,
 'COLUMN_NAME': 0        m_part
 1        c_part
 2       qty_req
 3            id
 4          name
         ...    
 57     addr_str
 58    addr_city
 59     addr_ste
 60    addr_ctry
 61     addr_zip
 Name: COLUMN_NAME, Length: 62, dtype: object}

In [50]:
import time
import os
import sys
import warnings
import json
import re
from typing import Dict, List
from collections import defaultdict
from datetime import datetime

# --- Environment Setup ---
os.environ["TOKENIZERS_PARALLELISM"] = "false"

# --- Package Imports ---
try:
    import vanna
    from vanna.base import VannaBase
    from vanna.chromadb.chromadb_vector import ChromaDB_VectorStore
    import openai
    import mysql.connector
    import pandas as pd
except ImportError as e:
    print(f"Import Error: {e}")
    sys.exit(1)

# --- Configuration ---
db_type = "mysql"
mysql_host = "127.0.0.1"
mysql_port = 3306
mysql_user = "root"
mysql_password = "Raekwon_wtc$36" 
mysql_dbname = "ft_database"
llm_model_name = 'DeepSeek-R1-Distill-Llama-8B-Q4_K_M.gguf'
local_server_url = 'http://127.0.0.1:8000/v1'
placeholder_api_key = "sk-no-key-required"

# --- Question Classification Constants ---

FEW_SHOT_PROMPT = """
You are a manufacturing domain expert. Classify each question into one of three categories based on these criteria:

DESCRIPTIVE: 
- Questions that ask for direct data or facts
- Usually start with: what, how many, list, show, where, when
- Focus on current or historical state
- Example keywords: count, total, list, find, show, what is, where are

JUDGEMENT:
- Questions that require analysis and evaluation
- Usually start with: why, can, will, should, is it
- Need reasoning about causes or effects
- Example keywords: why, evaluate, assess, analyze, is it possible, what caused

SUGGESTION:
- Questions that ask for recommendations or improvements
- Usually start with: how can, what should, recommend
- Focus on future actions or changes
- Example keywords: improve, optimize, suggest, recommend, how can, what should

Examples:
Q1: "What is the current inventory level?" 
A1: DESCRIPTIVE (asks for current state)

Q2: "Why did we miss our delivery target?"
A2: JUDGEMENT (requires analysis of causes)

Q3: "How can we reduce late deliveries?"
A3: SUGGESTION (asks for improvement recommendations)

Q4: "List all overdue orders"
A4: DESCRIPTIVE (asks for direct data)

Q5: "Should we increase safety stock?"
A5: JUDGEMENT (requires evaluation)

Q6: "What changes should we make to improve efficiency?"
A6: SUGGESTION (asks for improvement recommendations)

Now classify this question:
Q: {question}
A: """

CONTEXT_PROMPTS = {
    "descriptive": "Return ONLY SQL with some explanation with plots.",
    "judgement": "When responding, return the required SQL query, provide a detailed analysis of the data including relevant plots, and offer your Judgement on the findings. Clearly state that it is your Judgement and advise the user to independently verify it.",
    "suggestion": "When responding, return the required SQL query, provide a detailed analysis of the data including relevant plots, and offer your suggestion on the findings. Clearly state that it is your Suggestion and advise the user to independently verify it."
}
QUESTION_TYPES = ["descriptive", "judgement", "suggestion"]  
# Add a keyword-based classification helper
def keyword_based_classification(question: str) -> str:
    question = question.lower()
    
    descriptive_keywords = ['what is', 'how many', 'list', 'show', 'find', 'where', 'when', 'which', 'count']
    judgement_keywords = ['why', 'could', 'would', 'should', 'evaluate', 'analyze', 'assess', 'is it possible']
    suggestion_keywords = ['how can', 'what should', 'improve', 'recommend', 'suggest', 'advise', 'propose']
    
    # Check for suggestion first (highest priority)
    for keyword in suggestion_keywords:
        if keyword in question:
            return "suggestion"
            
    # Check for judgement second
    for keyword in judgement_keywords:
        if keyword in question:
            return "judgement"
            
    # Default to descriptive
    for keyword in descriptive_keywords:
        if keyword in question:
            return "descriptive"
            
    return None

# Update the classify_question method in MyLocalLlm class

CLASSIFICATION_METRICS = defaultdict(int)


# --- Custom LLM Class with MCP Features ---
class MyLocalLlm(VannaBase):
    def __init__(self, config=None):
        super().__init__(config=config)
        self.model = llm_model_name

    # Implement required abstract methods
    def assistant_message(self, content: str) -> Dict:
        return {"role": "assistant", "content": content}

    def system_message(self, content: str) -> Dict:
        return {"role": "system", "content": content}

    def user_message(self, content: str) -> Dict:
        return {"role": "user", "content": content}
            
    def classify_question(self, question: str) -> str:
        # First try keyword-based classification
        keyword_result = keyword_based_classification(question)
        if keyword_result:
            return keyword_result
            
        # If keyword classification fails, use LLM
        classification_prompt = FEW_SHOT_PROMPT.format(question=question)
        response = self.submit_prompt(classification_prompt)
        
        # Clean up and validate the response
        response = response.strip().lower()
        if response in QUESTION_TYPES:
            return response
        # Default to descriptive if everything fails
        return "descriptive"

    def submit_prompt(self, prompt, **kwargs) -> str:
        try:
            local_client = openai.OpenAI(
                base_url=local_server_url,
                api_key=placeholder_api_key,
                timeout=kwargs.get('request_timeout', 180.0)
            )
            
            messages = prompt if isinstance(prompt, list) else [{"role": "user", "content": str(prompt)}]
            
            response = local_client.chat.completions.create(
                model=self.model,
                messages=messages,
                temperature=kwargs.get('temperature', 0.1),
                max_tokens=kwargs.get('max_tokens', 1536),
                stop=kwargs.get('stop', ["``````"])
            )
            
            if response.choices and response.choices[0].message.content:
                return response.choices[0].message.content.strip()
            return "Error: No response generated"

        except Exception as e:
            return f"Error: {str(e)}"

# --- Enhanced Vanna Class ---
class MyVanna(ChromaDB_VectorStore, MyLocalLlm):
    def __init__(self, config=None):
        ChromaDB_VectorStore.__init__(self, config=config)
        MyLocalLlm.__init__(self, config=config)
        
    def generate_contextual_response(self, question: str) -> str:
        category = self.classify_question(question)
        CLASSIFICATION_METRICS[category] += 1
        
        system_msg = f"""You are a manufacturing analyst. Follow these rules:
        - DESCRIPTIVE: {CONTEXT_PROMPTS['descriptive']}
        - JUDGEMENT: {CONTEXT_PROMPTS['judgement']}
        - SUGGESTION: {CONTEXT_PROMPTS['suggestion']}"""
        
        return self.generate_sql(
            question=question,
            chat_history=[self.system_message(system_msg)]
        )

# --- Core Execution Flow --- 
vn = MyVanna(config=None)

# Database connection setup remains unchanged
# --- Connect to Database ---
print(f"Connecting to {db_type} database '{mysql_dbname}' on {mysql_host}...")
try:
    if db_type == "mysql":
        vn.connect_to_mysql(
            host=mysql_host,
            port=mysql_port,
            user=mysql_user,
            password=mysql_password,
            dbname=mysql_dbname
        )
    else:
        print(f"Database type '{db_type}' not configured.")
        sys.exit(1)
    print("Database connection successful.")
except Exception as e:
    print(f"Error connecting to database: {e}")
    print("Please check MySQL server status, credentials, permissions, and required packages.")
    sys.exit(1)

# --- Training Vanna (Run deliberately when needed) ---
print("\n--- Vanna Training ---")
print("Loads metadata (DDL, docs, SQL examples) into Vanna's local ChromaDB vector store.")
train_vanna = True # Set to True to run training, False to skip for normal use
# train_vanna:
df_information_schema = vn.run_sql(f"SELECT * FROM INFORMATION_SCHEMA.COLUMNS where TABLE_SCHEMA = '{mysql_dbname}';")
Plan = vn.get_training_plan_generic(df_information_schema)
vn.train(plan=Plan)
table_list = df_information_schema['TABLE_NAME'].unique().tolist()
column_list = df_information_schema['COLUMN_NAME'].unique().tolist()


Connecting to mysql database 'ft_database' on 127.0.0.1...
Database connection successful.

--- Vanna Training ---
Loads metadata (DDL, docs, SQL examples) into Vanna's local ChromaDB vector store.


In [51]:
table_list

['bill_of_m',
 'customer',
 'job_mst',
 'nonconform_mst',
 'part_mst',
 'purchase_line',
 'purchase_mst',
 'sales_line',
 'sales_mst',
 'vend_part',
 'vendor']

In [47]:
df_information_schema

Unnamed: 0,TABLE_CATALOG,TABLE_SCHEMA,TABLE_NAME,COLUMN_NAME,ORDINAL_POSITION,COLUMN_DEFAULT,IS_NULLABLE,DATA_TYPE,CHARACTER_MAXIMUM_LENGTH,CHARACTER_OCTET_LENGTH,...,DATETIME_PRECISION,CHARACTER_SET_NAME,COLLATION_NAME,COLUMN_TYPE,COLUMN_KEY,EXTRA,PRIVILEGES,COLUMN_COMMENT,GENERATION_EXPRESSION,SRS_ID


In [49]:
vn.run_sql("SELECT * FROM INFORMATION_SCHEMA.COLUMNS where TABLE_SCHEMA;")

Unnamed: 0,TABLE_CATALOG,TABLE_SCHEMA,TABLE_NAME,COLUMN_NAME,ORDINAL_POSITION,COLUMN_DEFAULT,IS_NULLABLE,DATA_TYPE,CHARACTER_MAXIMUM_LENGTH,CHARACTER_OCTET_LENGTH,...,DATETIME_PRECISION,CHARACTER_SET_NAME,COLLATION_NAME,COLUMN_TYPE,COLUMN_KEY,EXTRA,PRIVILEGES,COLUMN_COMMENT,GENERATION_EXPRESSION,SRS_ID


In [53]:
import time
import os
import sys
import warnings
import json
import re
from typing import Dict, List
from collections import defaultdict
from datetime import datetime

# --- Environment Setup ---
os.environ["TOKENIZERS_PARALLELISM"] = "false"

# --- Package Imports ---
try:
    import vanna
    from vanna.base import VannaBase
    from vanna.chromadb.chromadb_vector import ChromaDB_VectorStore
    import openai
    import mysql.connector
    import pandas as pd
except ImportError as e:
    print(f"Import Error: {e}")
    sys.exit(1)

# --- Configuration ---
db_type = "mysql"
mysql_host = "127.0.0.1"
mysql_port = 3306
mysql_user = "root"
mysql_password = "Raekwon_wtc$36" 
mysql_dbname = "ft_database"
llm_model_name = 'mistral-7b-instruct-v0.1.Q8_0.gguf'
local_server_url = 'http://127.0.0.1:8000/v1'
placeholder_api_key = "sk-no-key-required"

# --- Question Classification Constants ---

FEW_SHOT_PROMPT = """
You are a manufacturing domain expert. Classify each question into one of three categories based on these criteria:

DESCRIPTIVE: 
- Questions that ask for direct data or facts
- Usually start with: what, how many, list, show, where, when
- Focus on current or historical state
- Example keywords: count, total, list, find, show, what is, where are

JUDGEMENT:
- Questions that require analysis and evaluation
- Usually start with: why, can, will, should, is it
- Need reasoning about causes or effects
- Example keywords: why, evaluate, assess, analyze, is it possible, what caused

SUGGESTION:
- Questions that ask for recommendations or improvements
- Usually start with: how can, what should, recommend
- Focus on future actions or changes
- Example keywords: improve, optimize, suggest, recommend, how can, what should

Examples:
Q1: "What is the current inventory level?" 
A1: DESCRIPTIVE (asks for current state)

Q2: "Why did we miss our delivery target?"
A2: JUDGEMENT (requires analysis of causes)

Q3: "How can we reduce late deliveries?"
A3: SUGGESTION (asks for improvement recommendations)

Q4: "List all overdue orders"
A4: DESCRIPTIVE (asks for direct data)

Q5: "Should we increase safety stock?"
A5: JUDGEMENT (requires evaluation)

Q6: "What changes should we make to improve efficiency?"
A6: SUGGESTION (asks for improvement recommendations)

Now classify this question:
Q: {question}
A: """

CONTEXT_PROMPTS = {
    "descriptive": "Return ONLY SQL with some explanation with plots.",
    "judgement": "When responding, return the required SQL query, provide a detailed analysis of the data including relevant plots, and offer your Judgement on the findings. Clearly state that it is your Judgement and advise the user to independently verify it.",
    "suggestion": "When responding, return the required SQL query, provide a detailed analysis of the data including relevant plots, and offer your suggestion on the findings. Clearly state that it is your Suggestion and advise the user to independently verify it."
}
QUESTION_TYPES = ["descriptive", "judgement", "suggestion"]  
# Add a keyword-based classification helper
def keyword_based_classification(question: str) -> str:
    question = question.lower()
    
    descriptive_keywords = ['what is', 'how many', 'list', 'show', 'find', 'where', 'when', 'which', 'count']
    judgement_keywords = ['why', 'could', 'would', 'should', 'evaluate', 'analyze', 'assess', 'is it possible']
    suggestion_keywords = ['how can', 'what should', 'improve', 'recommend', 'suggest', 'advise', 'propose']
    
    # Check for suggestion first (highest priority)
    for keyword in suggestion_keywords:
        if keyword in question:
            return "suggestion"
            
    # Check for judgement second
    for keyword in judgement_keywords:
        if keyword in question:
            return "judgement"
            
    # Default to descriptive
    for keyword in descriptive_keywords:
        if keyword in question:
            return "descriptive"
            
    return None

# Update the classify_question method in MyLocalLlm class

CLASSIFICATION_METRICS = defaultdict(int)


# --- Custom LLM Class with MCP Features ---
class MyLocalLlm(VannaBase):
    def __init__(self, config=None):
        super().__init__(config=config)
        self.model = llm_model_name

    # Implement required abstract methods
    def assistant_message(self, content: str) -> Dict:
        return {"role": "assistant", "content": content}

    def system_message(self, content: str) -> Dict:
        return {"role": "system", "content": content}

    def user_message(self, content: str) -> Dict:
        return {"role": "user", "content": content}
            
    def classify_question(self, question: str) -> str:
        # First try keyword-based classification
        keyword_result = keyword_based_classification(question)
        if keyword_result:
            return keyword_result
            
        # If keyword classification fails, use LLM
        classification_prompt = FEW_SHOT_PROMPT.format(question=question)
        response = self.submit_prompt(classification_prompt)
        
        # Clean up and validate the response
        response = response.strip().lower()
        if response in QUESTION_TYPES:
            return response
        # Default to descriptive if everything fails
        return "descriptive"

    def submit_prompt(self, prompt, **kwargs) -> str:
        try:
            local_client = openai.OpenAI(
                base_url=local_server_url,
                api_key=placeholder_api_key,
                timeout=kwargs.get('request_timeout', 180.0)
            )
            
            messages = prompt if isinstance(prompt, list) else [{"role": "user", "content": str(prompt)}]
            
            response = local_client.chat.completions.create(
                model=self.model,
                messages=messages,
                temperature=kwargs.get('temperature', 0.1),
                max_tokens=kwargs.get('max_tokens', 1536),
                stop=kwargs.get('stop', ["``````"])
            )
            
            if response.choices and response.choices[0].message.content:
                return response.choices[0].message.content.strip()
            return "Error: No response generated"

        except Exception as e:
            return f"Error: {str(e)}"

# --- Enhanced Vanna Class ---
class MyVanna(ChromaDB_VectorStore, MyLocalLlm):
    def __init__(self, config=None):
        ChromaDB_VectorStore.__init__(self, config=config)
        MyLocalLlm.__init__(self, config=config)
        
    def generate_contextual_response(self, question: str) -> str:
        category = self.classify_question(question)
        CLASSIFICATION_METRICS[category] += 1
        
        system_msg = f"""You are a manufacturing analyst. Follow these rules:
        - DESCRIPTIVE: {CONTEXT_PROMPTS['descriptive']}
        - JUDGEMENT: {CONTEXT_PROMPTS['judgement']}
        - SUGGESTION: {CONTEXT_PROMPTS['suggestion']}"""
        
        return self.generate_sql(
            question=question,
            chat_history=[self.system_message(system_msg)]
        )

# --- Core Execution Flow --- 
vn = MyVanna(config=None)

# Database connection setup remains unchanged
# --- Connect to Database ---
print(f"Connecting to {db_type} database '{mysql_dbname}' on {mysql_host}...")
try:
    if db_type == "mysql":
        vn.connect_to_mysql(
            host=mysql_host,
            port=mysql_port,
            user=mysql_user,
            password=mysql_password,
            dbname=mysql_dbname
        )
    else:
        print(f"Database type '{db_type}' not configured.")
        sys.exit(1)
    print("Database connection successful.")
except Exception as e:
    print(f"Error connecting to database: {e}")
    print("Please check MySQL server status, credentials, permissions, and required packages.")
    sys.exit(1)

# --- Training Vanna (Run deliberately when needed) ---
print("\n--- Vanna Training ---")
print("Loads metadata (DDL, docs, SQL examples) into Vanna's local ChromaDB vector store.")
train_vanna = True # Set to True to run training, False to skip for normal use
if train_vanna:
    df_information_schema = vn.run_sql(f"SELECT * FROM INFORMATION_SCHEMA.COLUMNS where TABLE_SCHEMA = '{mysql_dbname}';")
    Plan = vn.get_training_plan_generic(df_information_schema)
    vn.train(plan=Plan)
    table_list = df_information_schema['TABLE_NAME'].unique().tolist()
    column_list = df_information_schema['COLUMN_NAME'].unique().tolist()
    print("Running Vanna training...")

    # === Add your specific training calls here ===
    print("  Example: Fetching DDL (Customize for your setup)...")
    # df_ddl = vn.run_sql(f"SHOW CREATE TABLE your_table_name;")
    # if not df_ddl.empty: vn.train(ddl=df_ddl.iloc['Create Table'])

    print("  Example: Adding Documentation (Add yours)...")
    # --- Customer Table Documentation ---
    vn.train(documentation="Table CUSTOMER contains information about business customers.")
    vn.train(documentation="CUSTOMER.id is the unique identifier for each customer.")
    vn.train(documentation="CUSTOMER.name is the company name of the customer.")
    vn.train(documentation="CUSTOMER.addr_str contains the street address for the customer.")
    vn.train(documentation="CUSTOMER.addr_city contains the city for the customer.")
    vn.train(documentation="CUSTOMER.addr_ste contains the state for the customer.")
    vn.train(documentation="CUSTOMER.addr_ctry contains the country for the customer.")
    vn.train(documentation="CUSTOMER.addr_zip contains the zip-code for the customer.")

    # --- Vendor Table Documentation ---
    vn.train(documentation="Table VENDOR lists suppliers of parts.")
    vn.train(documentation="VENDOR.id is the unique identifier for each vendor.")
    vn.train(documentation="VENDOR.name is the name of the vendor/supplier.")
    vn.train(documentation="CUSTOMER.addr_str contains the street address for the vendor.")
    vn.train(documentation="CUSTOMER.addr_city contains the city for the vendor.")
    vn.train(documentation="CUSTOMER.addr_ste contains the state for the vendor.")
    vn.train(documentation="CUSTOMER.addr_ctry contains the country for the vendor.")
    vn.train(documentation="CUSTOMER.addr_zip contains the zip-code for the vendor.")

    # --- Part Master Table Documentation ---
    vn.train(documentation="Table PART_MST is the master list of all parts.")
    vn.train(documentation="PART_MST.id is the unique identifier which indicates the unique id for the part.")
    vn.train(documentation="PART_MST.p_code is the unique product code for the part.")
    vn.train(documentation="PART_MST.procurement indicates if a part is manufactured internally ('M') or purchased ('B'). It must be either 'M' or 'B'.")
    vn.train(documentation="PART_MST.unit_price represents the standard price per unit of the part.")
    vn.train(documentation="PART_MST.pref_order_qty specifies The preferred quantity to order or manufacture at once, depending on if the part is M or B.")
    vn.train(documentation="PART_MST.unit_meas the unit of measure for this part; usually 'EA' for 'eaches'.")
    vn.train(documentation="PART_MST.mfg_lt is the lead time to manufacture this part, if it is manufactured. This must an 'M' part and positive.")
    vn.train(documentation="PART_MST.curr_stock the current amount of this part in stock in inventory")

    # --- Add Documentation for BOM_MST, RTG_MST, JOB_OPER, JOB_MATL, RECV_MST, RECV_LINE ---
    # Example:
    vn.train(documentation="Table bill_of_m defines the Bill of Materials, showing parent-child part relationships.")
    vn.train(documentation="bill_of_m.m_part links to the parent part code in PART_MST. The manufactured part which consumes c_part.")
    vn.train(documentation="BOM_MST.c_part links to the child component part code in PART_MST. The component part consumed by m_part.")
    vn.train(documentation="BOM_MST.qty_req is The quantity of c_part required to make one unit of m_part.")

    # --- Job Master Table Documentation ---
    vn.train(documentation="Table job_mst represents the master data for all the jobs.")
    vn.train(documentation="job_mst.id is the unique identifier for a job.")
    vn.train(documentation="job_mst.job_stat signifies the current status of the job as follows: 'Q' (in queue), 'C' (completed), 'O' (opened), 'X' (cancelled), or 'H' (on hold).")
    vn.train(documentation="job_mst.part is the part that this job is manufacturing. This can be found in the PART_MST table.")
    vn.train(documentation="job_mst.qty is the quantity of the part that this job is manufacturing.")
    vn.train(documentation="job_mst.job_rls is the time at which this job began work.")
    vn.train(documentation="job_mst.job_cls is the time at which this job was completed.")

    # --- Nonconform master table documentation ---
    vn.train(documentation="Table NONCONFORM_MST defines the Nonconformance report master data.")
    vn.train(documentation="NONCONFORM_MST.id is a unique id for this nonconformance report.")
    vn.train(documentation="NONCONFORM_MST.job signifies if this nonconformance report was issued for a job, the job it was issued for.")
    vn.train(documentation="NONCONFORM_MST.po_no signifies if this nonconformance report was issued for a purchase order, the purchase order it was issued for.")
    vn.train(documentation="NONCONFORM_MST.qty signifies the quantity of parts on this job/purchase order which were flagged for nonconformance issues.")



    # --- Sales Order Header Table Documentation ---
    vn.train(documentation="Table SALES_MST represents the header information for a customer sales order.")
    vn.train(documentation="SALES_MST.order_no is the unique identifier for a sales order.")
    vn.train(documentation="SALES_MST.order_date is the date the sales order was placed.")
    vn.train(documentation="SALES_MST.cust links to the CUSTOMER table for the customer who placed the order.")

    # --- Sales Order Line Table Documentation ---
    vn.train(documentation="Table SALES_LINE contains individual line items for each sales order in SALES_MST.")
    vn.train(documentation="SALES_LINE.order_no links back to the SALES_MST table.")
    vn.train(documentation="SALES_LINE.line_no is the line number of the line item under the given order_no.")
    vn.train(documentation="SALES_LINE.order_stat is the current status of the line item: 'C' (completed), 'O' (opened), or 'X' (cancelled).")
    vn.train(documentation="SALES_LINE.part links to the PART_MST table for the specific part ordered.")
    vn.train(documentation="SALES_LINE.due is the requested delivery date for this line item.")
    vn.train(documentation="SALES_LINE.unit_price is the agreed price per unit for the part on this order line.")
    vn.train(documentation="SALES_LINE.qty is the number of units ordered for this part on this line item.")
    vn.train(documentation="SALES_LINE.line_cls is the date on which this line item was closed.")

    # --- Purchase Order Header Table Documentation ---
    vn.train(documentation="Table PURCHASE_MST represents the header information for a purchase order sent to a vendor.")
    vn.train(documentation="PURCHASE_MST.order_no is the unique identifier for a purchase order.")
    vn.train(documentation="PURCHASE_MST.order_date is the date the purchase order was created.")
    vn.train(documentation="PURCHASE_MST.vendor links to the VENDOR table for the supplier receiving the order.")

    # --- Purchase Order Line Table Documentation ---
    vn.train(documentation="Table PURCHASE_LINE contains individual line items for each purchase order in PURCHASE_MST.")
    vn.train(documentation="PURCHASE_LINE.order_no links back to the PURCHASE_MST table.")
    vn.train(documentation="PURCHASE_LINE.part links to the PART_MST table for the specific part being purchased.")
    vn.train(documentation="PURCHASE_LINE.due is the expected delivery date for this purchased part.")
    vn.train(documentation="PURCHASE_LINE.unit_cost is the cost per unit for the part on this purchase order line.")
    vn.train(documentation="PURCHASE_LINE.qty is the number of units purchased. It must be a positive number.")
    vn.train(documentation="PURCHASE_LINE.order_stat is the current status of the purchase order: 'C' (completed), 'O' (opened), or 'X' (cancelled).")
    vn.train(documentation="PURCHASE_LINE.order_type is the type of purchase order: either an 'S' for service order or 'M' for material order.")
    vn.train(documentation="PURCHASE_LINE.line_no is the line number of this purchase order line under the given order_no.")
    vn.train(documentation="PURCHASE_LINE.line_cls is the date on which this purchase order was closed.")


    # --- Vendor Part Table Documentation ---
    vn.train(documentation="Table VEND_PART represents vendor and part relationship.")
    vn.train(documentation="VEND_PART.vendor is the unique identifier for each vendor.")
    vn.train(documentation="VEND_PART.part is the part that can be procured from the vendor.")
    vn.train(documentation="VEND_PART.unit_cost is typically the creation or start date of the job.")
    vn.train(documentation="VEND_PART.part_lt is The standard lead time for which to place orders for part to vendor")


    print("Adding SQL examples...")
    vn.train(question="Get all customers", 
                sql="SELECT * FROM customer;")
    
    vn.train(question="Get all vendors in a specific state", 
                sql="SELECT *  FROM vendor WHERE 'addr_ste' = 'TX';")
    
    vn.train(question="Join sales orders with customer info", 
                sql="""SELECT s.order_no, s.order_date, c.name, c.addr_city FROM sales_mst s JOIN customer c ON s.cust = c.id;""")
    
    vn.train(question="Parts with their vendor and unit cost", 
                sql="""SELECT P.p_code, V.name AS vendor_name, VP.unit_cost FROM vend_part VP JOIN vendor  V ON VP.vendor = V.id JOIN part_mst P ON VP.part = P.id;""")
    
    vn.train(question="Total number of sales per customer", 
                sql="""SELECT C.name, COUNT(S.order_no) AS total_orders FROM customer C JOIN sales_mst S ON S.cust = C.id GROUP BY C.name order by COUNT(S.order_no) desc; """)
    
    vn.train(question="Average unit price of each part", 
                sql="""SELECT p_code, AVG(unit_price) AS avg_price FROM part_mst GROUP BY p_code order by AVG(unit_price);""")
    
    vn.train(question="Jobs released in last 30 days", 
                sql="""SELECT * FROM job_mst where date(job_rls)>= date_sub(curdate() , interval 30 day);""")
    
    vn.train(question="Purchase orders due next week", 
                sql="""SELECT * FROM purchase_line where week(due) = week(date_add(curdate() , interval 7 day));""")
    
    vn.train(question="All open jobs", 
                sql="""SELECT * FROM JOB_MST  WHERE job_stat = 'Q';""")
    
    vn.train(question="All sales lines with ‘COMPLETE’ status", 
                sql="""SELECT * FROM sales_line WHERE order_stat = 'C';""")
    
    vn.train(question="Get purchase line for specific order and line", 
                sql="""SELECT * FROM purchase_lineWHERE order_no = 'PO-5436934';""")
    
    vn.train(question="Customers with more than 3 orders", 
                sql="""SELECT name FROM CUSTOMER WHERE id IN (SELECT cust FROM SALES_MST GROUP BY cust HAVING COUNT(order_no) > 3);""")
    
    vn.train(question="List Sales Orders with Customer Names", 
                sql="""SELECT s.order_no, s.order_date, c.name AS customer_name, c.addr_city AS customer_city FROM SALES_MST s JOIN CUSTOMER c ON s.cust = c.id;""")
    
    vn.train(question="Show Details of Parts on a Specific Sales Order", 
                sql="""SELECT sl.order_no, sl.line_no,p.p_code AS part_code,p.unit_meas,sl.qty AS quantity_ordered,sl.unit_price AS price_on_order, p.unit_price AS current_part_price, sl.order_stat AS line_status FROM SALES_LINE sl JOIN PART_MST p ON sl.part = p.id where order_no = "SO-1490003"; """)
    
    vn.train(question="Calculate Total Quantity Sold for Each Part", 
                sql="""SELECT p.p_code, p.id AS part_id, SUM(sl.qty) AS total_quantity_sold FROM SALES_LINE sl JOIN PART_MST p ON sl.part = p.id WHERE sl.order_stat = 'C'  GROUP BY p.p_code, p.id ORDER BY total_quantity_sold DESC;""")
    
    vn.train(question="Calculate Total Value of Purchases per Vendor", 
                sql="""SELECT v.name AS vendor_name, COUNT(DISTINCT pl.order_no) AS number_of_pos, SUM(pl.qty * pl.unit_cost) AS total_purchase_value FROM PURCHASE_LINE pl JOIN PURCHASE_MST pm ON pl.order_no = pm.order_no JOIN VENDOR v ON pm.vendor = v.id GROUP BY v.name ORDER BY total_purchase_value DESC;""")
    
    vn.train(question="Find Customers and Vendors Located in the Same State", 
                sql="""SELECT c.name AS customer_name, v.name AS vendor_name, c.addr_ste AS state FROM CUSTOMER c JOIN VENDOR v ON c.addr_ste = v.addr_ste ORDER BY c.addr_ste, c.name, v.name;""")
    
    vn.train(question="Compare Current Stock vs. Total Purchased vs. Total Sold (Simplified)", 
                sql="""SELECT p.id, p.p_code, p.curr_stock AS current_stock_on_hand, COALESCE(pp.total_purchased, 0) AS total_units_purchased, COALESCE(ps.total_sold, 0) AS total_units_sold, (p.curr_stock + COALESCE(pp.total_purchased, 0) - COALESCE(ps.total_sold, 0)) AS calculated_stock_balance  FROM PART_MST p LEFT JOIN PartPurchases pp ON p.id = pp.part LEFT JOIN PartSales ps ON p.id = ps.part ORDER BY p.p_code; """)
    
    vn.train(question="List Parts Supplied by a Specific Vendor", 
                sql="""SELECT v.id, v.name AS vendor_name, p.p_code AS part_code, vp.unit_cost AS vendor_unit_cost, vp.part_lt AS vendor_lead_time FROM VEND_PART vp left JOIN VENDOR v ON vp.vendor = v.id left JOIN PART_MST p ON vp.part = p.id where v.id = 7 ORDER BY p.p_code;""")
    
    vn.train(question="Top 5 Customers by Sales Value in the Last 6 Months", 
                sql="""SELECT c.id AS customer_id, c.name AS customer_name, SUM(sl.qty * sl.unit_price) AS total_spent FROM CUSTOMER c JOIN SALES_MST s ON c.id = s.cust JOIN SALES_LINE sl ON s.order_no = sl.order_no WHERE s.order_date >= DATE_SUB(CURDATE(), INTERVAL 6 MONTH) AND sl.order_stat <> 'X' GROUP BY c.id, c.name ORDER BY total_spent DESC LIMIT 5;""")
    
    vn.train(question="Parts Below Preferred Order Quantity", 
                sql="""SELECT id,p_code,curr_stock, pref_order_qty FROM PART_MST WHERE curr_stock < pref_order_qty AND procurement <> 'S' ORDER BY (pref_order_qty - curr_stock) DESC;""")
    
    vn.train(question="Monthly Sales Revenue Trend", 
                sql="""SELECT DATE_FORMAT(s.order_date, '%Y-%m') AS sales_month, round(SUM(sl.qty * sl.unit_price),2) AS monthly_revenue FROM SALES_MST s JOIN SALES_LINE sl ON s.order_no = sl.order_no WHERE sl.order_stat <> 'X' GROUP BY sales_month ORDER BY sales_month;""")

    vn.train(question="Direct Components for a Manufactured Part (Bill of Materials)", 
                sql="""SELECT mp.p_code AS manufactured_part_code, cp.p_code AS component_part_code, bom.qty_req AS quantity_required, cp.unit_meas AS component_unit FROM BILL_OF_M bom JOIN PART_MST mp ON bom.m_part = mp.id  JOIN PART_MST cp ON bom.c_part = cp.id  WHERE bom.m_part = 5638;""")
    
    vn.train(question="Total Component Quantity Required for Open Production Jobs", 
                    sql="""SELECT bom.c_part AS component_part_id, p.p_code AS component_part_code, SUM(j.qty * bom.qty_req) AS total_required_for_open_jobs FROM JOB_MST j JOIN BILL_OF_M bom ON j.part = bom.m_part JOIN PART_MST p ON bom.c_part = p.id   WHERE j.job_stat IN ('O', 'Q') GROUP BY bom.c_part, p.p_code ORDER BY total_required_for_open_jobs DESC;""")
        
    vn.train(question="Nonconformance Rate per Part (Simplified: NC Qty / Qty Purchased)", 
                    sql="""WITH PartPurchasesReceived AS (SELECT pl.part, SUM(pl.qty) AS total_received FROM PURCHASE_LINE pl WHERE pl.order_stat = 'C' GROUP BY pl.part),PartNonconformance AS (SELECT pl.part, SUM(nc.qty) AS total_nonconforming FROM NONCONFORM_MST nc JOIN PURCHASE_MST pm ON nc.po_no = pm.order_no  JOIN PURCHASE_LINE pl ON pm.order_no = pl.order_no AND nc.job IS NULL  GROUP BY pl.part)SELECT p.id AS part_id, p.p_code, COALESCE(ppr.total_received, 0) AS total_received, COALESCE(pnc.total_nonconforming, 0) AS total_nonconforming, CASE WHEN COALESCE(ppr.total_received, 0) > 0 THEN (COALESCE(pnc.total_nonconforming, 0) / ppr.total_received) * 100 ELSE 0 END AS nonconformance_rate_percent FROM PART_MST p LEFT JOIN PartPurchasesReceived ppr ON p.id = ppr.part LEFT JOIN PartNonconformance pnc ON p.id = pnc.part WHERE p.procurement = 'B'  ORDER BY nonconformance_rate_percent DESC;""")
    
    vn.train(question="Customers Who Haven't Ordered in Over a Year", 
                    sql="""SELECT c.id, c.name, MAX(s.order_date) AS last_order_date  FROM CUSTOMER c LEFT JOIN SALES_MST s ON c.id = s.cust AND s.order_date >= DATE_SUB(CURDATE(), INTERVAL 1 YEAR) WHERE s.order_no IS NULL  GROUP BY c.id, c.name  ORDER BY c.name;""")
    
    vn.train(question="Parts Purchased from Multiple Vendors with Cost Comparison", 
                    sql="""SELECT p.id AS part_id, p.p_code, COUNT(DISTINCT vp.vendor) AS number_of_vendors, MIN(vp.unit_cost) AS min_vendor_cost, MAX(vp.unit_cost) AS max_vendor_cost, (MAX(vp.unit_cost) - MIN(vp.unit_cost)) AS cost_difference FROM PART_MST p JOIN VEND_PART vp ON p.id = vp.part GROUP BY p.id, p.p_code HAVING COUNT(DISTINCT vp.vendor) > 1  ORDER BY cost_difference DESC, p.p_code;""")
    
    vn.train(question="Implement simple exponential smoothing forecast",
             sql = "WITH MonthlySales AS (SELECT  DATE_FORMAT(order_date, '%Y-%m') AS month, SUM(qty) AS sales FROM SALES_LINE join sales_mst on SALES_LINE.order_no = sales_mst.order_no GROUP BY DATE_FORMAT(order_date, '%Y-%m')),Smoothing AS (SELECT month, sales, @forecast := COALESCE(@forecast * 0.8 + sales * 0.2, sales) AS forecast FROM MonthlySales CROSS JOIN (SELECT @forecast := NULL) init ORDER BY month)SELECT * FROM Smoothing;" )
    
    vn.train(question="Compare actual purchase costs vs vendor quoted prices.",
             sql="SELECT pl.order_no,p.p_code,pl.unit_cost AS actual_cost,vp.unit_cost AS quoted_cost,(pl.unit_cost - vp.unit_cost) AS variance FROM PURCHASE_LINE pl JOIN VEND_PART vp ON pl.part = vp.part AND pl.order_no IN (SELECT order_no FROM PURCHASE_MST WHERE vendor in (select vendor from vend_part))JOIN PART_MST p ON pl.part = p.id;")
    
    vn.train(question="Analyze sales distribution by customer state.",
             sql="SELECT c.addr_ste AS state,COUNT(DISTINCT s.order_no) AS order_count,SUM(sl.qty * sl.unit_price) AS total_revenue FROM CUSTOMER c JOIN SALES_MST s ON c.id = s.cust JOIN SALES_LINE sl ON s.order_no = sl.order_no GROUP BY c.addr_ste ORDER BY total_revenue DESC;")
    
    vn.train(question="Calculate yield loss percentage from non-conformances.",
                sql="SELECT p.p_code, COALESCE(SUM(n.qty),0) AS nc_qty, SUM(j.qty) AS total_produced, (COALESCE(SUM(n.qty),0) / SUM(j.qty)) * 100 AS yield_loss_percent FROM JOB_MST j LEFT JOIN NONCONFORM_MST n ON j.id = n.job JOIN PART_MST p ON j.part = p.id GROUP BY p.id,p.p_code HAVING yield_loss_percent > 5;")
    
    vn.train(question="Identify single-source vendors with high-risk in terms of part quantity.",
                sql="select name,p_code,curr_stock,pref_order_qty from (SELECT  p.id, p.p_code, COUNT(DISTINCT vp.vendor) AS vendor_count, sum(p.curr_stock) curr_stock, sum(p.pref_order_qty) pref_order_qty FROM VEND_PART vp JOIN PART_MST p ON vp.part = p.id JOIN VENDOR v ON vp.vendor = v.id GROUP BY p.id,p.p_code HAVING vendor_count = 1 ) t join vend_part on t.id = vend_part.part join vendor on vend_part.vendor = vendor.id where curr_stock < pref_order_qty;")
    
    vn.train(question="Categorize inventory by last movement date.",
                    sql="SELECT p.p_code, sum(p.curr_stock) curr_stock, CASE WHEN MAX(s.order_date) IS NULL THEN 'No Sales' WHEN MAX(s.order_date) < DATE_SUB(NOW(), INTERVAL 6 MONTH) THEN 'Slow Moving' ELSE 'Active' END AS inventory_status FROM PART_MST p LEFT JOIN SALES_LINE sl ON p.id = sl.part LEFT JOIN SALES_MST s ON sl.order_no = s.order_no GROUP BY p.p_code;")
    
    vn.train(question="Find frequently paired parts in customer orders.",
                    sql="SELECT a.part AS part1, b.part AS part2, COUNT(*) AS pair_count FROM SALES_LINE a JOIN SALES_LINE b ON a.order_no = b.order_no AND a.part < b.part GROUP BY part1, part2 HAVING pair_count > 1 ORDER BY pair_count DESC;")
    
    vn.train(question="Calculate average delay between job release and completion.",
                    sql="SELECT p.p_code,AVG(DATEDIFF(j.job_cls, j.job_rls)) AS avg_delay_days, COUNT(*) AS job_count FROM JOB_MST j JOIN PART_MST p ON j.part = p.id WHERE j.job_stat = 'C' GROUP BY p.p_code HAVING avg_delay_days > 7;")
    
    vn.train(question="Identify customers with decreasing order frequency over quarters.",
                    sql="WITH QuarterlyOrders AS (SELECT c.id, QUARTER(s.order_date) AS qtr, COUNT(*) AS orders FROM CUSTOMER c JOIN SALES_MST s ON c.id = s.cust GROUP BY c.id, QUARTER(s.order_date))SELECT id AS customer_id,qtr,orders, LAG(orders) OVER(PARTITION BY id ORDER BY qtr) AS prev_qtr_orders,(orders - LAG(orders) OVER(PARTITION BY id ORDER BY qtr)) AS trend FROM QuarterlyOrders;")
    
    vn.train(question="Find parts with no sales in the last 6 months but maintained inventory.",
                    sql="SELECT p.p_code, p.curr_stock, MAX(s.order_date) AS last_sale_date FROM PART_MST p LEFT JOIN SALES_LINE sl ON p.id = sl.part LEFT JOIN SALES_MST s ON sl.order_no = s.order_no GROUP BY p.p_code,p.curr_stock HAVING last_sale_date < DATE_SUB(NOW(), INTERVAL 6 MONTH) OR last_sale_date IS NULL;")
    
    vn.train(question="Predict next month's sales using 3-month moving average.",
                    sql="WITH MonthlySales AS (SELECT DATE_FORMAT(s.order_date, '%Y-%m') AS month, SUM(sl.qty) AS total_sales FROM SALES_MST s JOIN SALES_LINE sl ON s.order_no = sl.order_no GROUP BY DATE_FORMAT(s.order_date, '%Y-%m'))SELECT month, total_sales, AVG(total_sales) OVER(ORDER BY month ROWS BETWEEN 2 PRECEDING AND CURRENT ROW) AS forecast FROM MonthlySales;")
    
    vn.train(question="Identify parts with inconsistent vendor lead times exceeding 20% variability.",
                    sql="SELECT p.p_code,v.id, v.name AS vendor,AVG(CAST(SUBSTRING_INDEX(vp.part_lt, ' ', 1) AS UNSIGNED)) AS avg_lead_days,STDDEV(CAST(SUBSTRING_INDEX(vp.part_lt, ' ', 1) AS UNSIGNED)) AS std_deviation, (STDDEV(CAST(SUBSTRING_INDEX(vp.part_lt, ' ', 1) AS UNSIGNED)) /  AVG(CAST(SUBSTRING_INDEX(vp.part_lt, ' ', 1) AS UNSIGNED))) AS variability_ratio FROM VEND_PART vp JOIN VENDOR v ON vp.vendor = v.id JOIN PART_MST p ON vp.part = p.id GROUP BY v.id,p.p_code, v.name;")
    
    vn.train(question="Suggest price adjustments based on vendor cost changes and sales performance.",
                    sql="SELECT p.id, p.p_code, p.unit_price AS current_price,MIN(vp.unit_cost) AS lowest_vendor_cost,AVG(sl.unit_price) AS avg_sale_price, CASE WHEN MIN(vp.unit_cost) > p.unit_price * 0.9 THEN 'Increase Price' WHEN AVG(sl.unit_price) < p.unit_price * 0.8 THEN 'Decrease Price' ELSE 'Maintain Price' END AS recommendation FROM PART_MST p JOIN VEND_PART vp ON p.id = vp.part JOIN SALES_LINE sl ON p.id = sl.part GROUP BY p.id,p.p_code, p.unit_price;")
    
    vn.train(question="Find components with the highest demand from open production jobs compared to current stock.",
             sql="WITH ComponentDemand AS (SELECT bom.c_part, SUM(j.qty * bom.qty_req) AS required_qty FROM JOB_MST j JOIN BILL_OF_M bom ON j.part = bom.m_part WHERE j.job_stat IN ('Q','O') GROUP BY bom.c_part)SELECT p.p_code,cd.required_qty,p.curr_stock,(cd.required_qty - p.curr_stock) AS deficit FROM ComponentDemand cd JOIN PART_MST p ON cd.c_part = p.id WHERE cd.required_qty > p.curr_stock;")
    
    vn.train(question="Calculate the financial impact of non-conformances by linking them to purchase order costs.",
             sql = "SELECT n.id AS nc_id,pl.order_no,p.p_code,n.qty AS nc_quantity,pl.unit_cost,(n.qty * pl.unit_cost) AS financial_impact FROM NONCONFORM_MST n JOIN PURCHASE_MST pm ON n.po_no = pm.order_no JOIN PURCHASE_LINE pl ON pm.order_no = pl.order_no JOIN PART_MST p ON pl.part = p.idWHERE n.po_no IS NOT NULL;")
    
    vn.train(question="Rank vendors by average delivery speed (time between order date and line closure) for completed purchase orders.",
             sql="SELECT v.name,AVG(DATEDIFF(pl.line_cls, pm.order_date)) AS avg_delivery_days,DENSE_RANK() OVER(ORDER BY AVG(DATEDIFF(pl.line_cls, pm.order_date))) AS performance_rank FROM PURCHASE_MST pm JOIN PURCHASE_LINE pl ON pm.order_no = pl.order_no JOIN VENDOR v ON pm.vendor = v.id WHERE pl.order_stat = 'C' GROUP BY v.id,v.name HAVING avg_delivery_days IS NOT NULL;")
    # --- General Business Rules/Concepts ---
    # vn.train(documentation="'Lead Time' generally refers to the time required to procure or produce a part (PART_MST.sys_lt is a default).")
    # vn.train(documentation="'On-time Delivery' can be assessed by comparing SALES_LINE.due_date with actual delivery dates (potentially in another table like deliveries if it exists).")



# def extract_sql_from_response(response: str) -> str:
#     """Extract SQL code from LLM response using regex patterns"""
#     # Match multi-line SQL between `````` with flexible whitespace
#     response.split(":\n\n")[1].replace("\n"," ")
# def create_plot(question,extracted_sql,df_result):
#     plotly_code = vn.generate_plotly_code(question = question,
#                                           sql = extracted_sql,
#                                           df = df_result)
#     fig = vn.get_plotly_figure(plotly_code=plotly_code,
#                                df=df_result)
#     fig.show()

# --- SQL Extraction Function ---
def extract_sql_from_response(response: str) -> str:
    """Extract SQL code from LLM response using multi-pattern approach"""
    if not response:
        return None
        
    # Clean up the response
    response = response.strip()
    
    # Pattern 1: Look for SQL between ```sql and ``` markers
    sql_block_pattern = r'```sql\s*(.*?)\s*```'
    sql_matches = re.findall(sql_block_pattern, response, re.DOTALL)
    if sql_matches:
        return sql_matches[0].strip()
    
    # Pattern 2: Look for SQL between ``` markers
    code_block_pattern = r'```\s*(.*?)\s*```'
    code_matches = re.findall(code_block_pattern, response, re.DOTALL)
    if code_matches:
        for match in code_matches:
            if re.search(r'\b(SELECT|INSERT|UPDATE|DELETE|WITH)\b', match, re.IGNORECASE):
                return match.strip()
    
    # Pattern 3: Direct SQL pattern
    sql_pattern = r'\b(SELECT|WITH|INSERT|UPDATE|DELETE)[\s\S]+?;'
    direct_match = re.search(sql_pattern, response, re.IGNORECASE)
    if direct_match:
        return direct_match.group(0).strip()
    
    return None

# --- complex query architecture --- 
def generate_complex_sql(question: str) -> str:
    """Handles multi-step manufacturing queries"""
    system_msg = """You are an SQL expert. Follow these steps:
    1. Identify required tables from: sales_line, part_mst, purchase_line
    2. Determine necessary joins and filters
    3. Apply temporal constraints using CURDATE()
    4. Include proper aggregations (SUM, AVG, COUNT)
    5. Validate against database schema"""
    
    return vn.generate_sql(
        question=new_question,
        chat_history=[vn.system_message(system_msg)]
    )


# --- Question Refinement Function ---
def refine_question(original_question: str) -> str:
    refinement_prompt = f"""You are an expert Prompt Engineer. Rephrase and expand this manufacturing question to be more specific and actionable. 
    Maintain original intent while adding context about tables from {table_list} and columns from {column_list}. Return ONLY the improved question.

    Original: {original_question}
    Refined: """
    
    refined = vn.submit_prompt(refinement_prompt).strip()
    return refined if len(refined) > len(original_question) else original_question
    

# --- LLM Analysis and Explanation ---
def analyze_and_explain(question, df_result, category):
    """
    For judgement/suggestion: Use LLM to analyze the DataFrame and give a judgement or recommendation.
    For descriptive: Optionally summarize the data.
    """
    # Convert DataFrame to CSV or dict for LLM context (limit rows for brevity)
    data_sample = df_result.head(20).to_csv(index=False)
    if category == "judgement":
        prompt = f"""
You are a manufacturing analytics expert.
Given the following question:
"{question}"
And the following data (CSV format):
{data_sample}

Analyze the data, provide a clear judgement about the situation, and explain your reasoning. Clearly state: "Judgement: ...". Advise the user to independently verify your judgement.
"""
    elif category == "suggestion":
        prompt = f"""
You are a manufacturing analytics expert.
Given the following question:
"{question}"
And the following data (CSV format):
{data_sample}

Analyze the data and provide a concrete, actionable recommendation. Clearly state: "Suggestion: ...". Advise the user to independently verify your suggestion.
"""
    else:
        # For descriptive, just summarize (optional)
        prompt = f"""
You are a manufacturing analytics expert.
Given the following question:
"{question}"
And the following data (CSV format):
{data_sample}

Briefly summarize the main findings from the data.
"""
    # Call your LLM (reuse submit_prompt)
    response = vn.submit_prompt(prompt)
    return response.strip()


current_date = datetime.now().strftime("%Y-%m-%d")
# --- Main Interaction Loop ---
# --- Modified Main Interaction Loop ---
while True:
    try:
        question = "whats the total sales value till now?"
        
        if question.lower() == 'exit':
            break
        else:
            # new_question = refine_question(question)
            new_question = f"""Today is {current_date}. You are a manufacturing analyst. "
                    Generate ONLY the SQL query (no comments) using these tables only {table_list}, "
                    "for the following new_question:""" + f" {question}"
                    
        print("\nAgent thinking...")          
        start_time = time.time()

        # Get classified response
        category = vn.classify_question(new_question)
        print(f"\n--- Classified Category: {category.upper()} ---")

        llm_response = generate_complex_sql(new_question)

        # Extract SQL using implemented function
        extracted_sql = extract_sql_from_response(llm_response)
        try:
            extracted_sql = extracted_sql.split("```sql")[1].replace("```","")
            print(f"\n--- Extracted SQL ({category.upper()}) ---")
            print(extracted_sql)
            print("--- End of Extracted SQL ---")
        except:
            print("\nCould not extract executable SQL from response")
            continue

        execute = input(f"\nExecute {category.upper()} SQL query? (y/n): ").lower().strip()
        
        if execute == 'y':
            try:
                print("Executing query...")
                exec_start_time = time.time()
                
                df_result = vn.run_sql(sql=extracted_sql)
                # Category-specific post-processing
                if category == "judgement":
                    print("\n** Analytical Insights **")
                    print(df_result.describe())
                    # print(f"\n=== {category.upper()} EXPLANATION ===\n{explanation}")

                elif category == "suggestion":
                    print("\n** Recommended Actions **")
                    print(df_result.head())
                    # explanation = analyze_and_explain(question, df_result, category)
                    

                else:  # descriptive
                    print("\n** Query Results **")
                    
                
                # Display results
                with pd.option_context('display.max_rows', 20, 
                                    'display.max_columns', None,
                                    'display.width', 1000):
                    if len(df_result) > 20:
                        print(df_result.head(20))
                        print(f"... (truncated, {len(df_result)} rows total)")
                    else:
                        print(df_result)


                # create_plot(df_result,extracted_sql,question)
                print(f"Query executed successfully in {time.time() - exec_start_time:.2f} seconds")
                explanation = analyze_and_explain(question, df_result, category)
                print(f"\n=== {category.upper()} EXPLANATION ===\n{explanation}")
                re_run = input(f"\Should I re-think {question.upper()} for a better response? (y/n): ").lower().strip()
                if re_run == 'n':
                    save = input(f"\nDo You want to save this {category.upper()} SQL query for future use? (y/n): ").lower().strip()
                    if save == "y":
                        print("Saving results...")
                        vn.train(question=question, sql=extracted_sql)
                        break
                    else:
                        print("SQL query not saved.")
                        break    
                else:
                    print("Retrying...")
                    new_question = (f"""The query is giving this error:  {type(exec_e).__name__} - {exec_e} 
                                    resolve the error and provide a better response for this question: """
                                    + new_question)
                    continue
                    

            except Exception as exec_e:
                print(f"\nError executing SQL: {type(exec_e).__name__} - {exec_e}")
                print(f"Failed SQL:\n{extracted_sql}")
                re_run = input(f"\nShould I re-think {question.upper()} for a better response? (y/n): ").lower().strip()
                
                if re_run == 'n':
                    break
                else:
                    print("Retrying...")
                    new_question = (f"""The query is giving this error:  {type(exec_e).__name__} - {exec_e} 
                                    resolve the error and provide a better response for this question: """
                                    + new_question)
                    continue

        else:
            break

    except KeyboardInterrupt:
        print("\nExiting...")
        break


Connecting to mysql database 'ft_database' on 127.0.0.1...
Database connection successful.

--- Vanna Training ---
Loads metadata (DDL, docs, SQL examples) into Vanna's local ChromaDB vector store.


Add of existing embedding ID: 31c39d68-1b22-5545-97e0-353dc991f251-doc
Insert of existing embedding ID: 31c39d68-1b22-5545-97e0-353dc991f251-doc
Add of existing embedding ID: fff92c80-4899-53ea-a8e4-a18c0415b820-doc
Insert of existing embedding ID: fff92c80-4899-53ea-a8e4-a18c0415b820-doc
Add of existing embedding ID: 020be5d7-5209-56ca-8b6a-5962efd95682-doc
Insert of existing embedding ID: 020be5d7-5209-56ca-8b6a-5962efd95682-doc
Add of existing embedding ID: 2f420d2e-9850-51f8-ab1a-2bf1e77e2d3b-doc
Insert of existing embedding ID: 2f420d2e-9850-51f8-ab1a-2bf1e77e2d3b-doc
Add of existing embedding ID: 9aebcf7e-04a6-5368-b307-25d7ca8a1741-doc
Insert of existing embedding ID: 9aebcf7e-04a6-5368-b307-25d7ca8a1741-doc
Add of existing embedding ID: d27aa358-78b0-58f6-8dc7-4f50d0c2b8e2-doc
Insert of existing embedding ID: d27aa358-78b0-58f6-8dc7-4f50d0c2b8e2-doc
Add of existing embedding ID: 97dd033f-f6c1-574a-b60f-242668753672-doc
Insert of existing embedding ID: 97dd033f-f6c1-574a-b60f-24

Running Vanna training...
  Example: Fetching DDL (Customize for your setup)...
  Example: Adding Documentation (Add yours)...
Adding documentation....
Adding documentation....


Add of existing embedding ID: 06f173d4-10dc-5912-8e3c-690b36769eab-doc
Insert of existing embedding ID: 06f173d4-10dc-5912-8e3c-690b36769eab-doc
Add of existing embedding ID: c7737722-5406-50ec-a2e3-457631201e43-doc
Insert of existing embedding ID: c7737722-5406-50ec-a2e3-457631201e43-doc


Adding documentation....
Adding documentation....


Add of existing embedding ID: 5710f1b2-2513-59b1-a22a-06ca846f3335-doc
Insert of existing embedding ID: 5710f1b2-2513-59b1-a22a-06ca846f3335-doc
Add of existing embedding ID: f62258fc-5052-5064-8778-17d64b48fb75-doc
Insert of existing embedding ID: f62258fc-5052-5064-8778-17d64b48fb75-doc


Adding documentation....
Adding documentation....


Add of existing embedding ID: 3a1ce7ab-9179-5232-8d27-603f1c7fc185-doc
Insert of existing embedding ID: 3a1ce7ab-9179-5232-8d27-603f1c7fc185-doc
Add of existing embedding ID: 118f8eeb-9139-5bb3-aa51-ffde0d892907-doc
Insert of existing embedding ID: 118f8eeb-9139-5bb3-aa51-ffde0d892907-doc


Adding documentation....
Adding documentation....


Add of existing embedding ID: 8c8564bf-5f18-5fca-b590-917e64a7c696-doc
Insert of existing embedding ID: 8c8564bf-5f18-5fca-b590-917e64a7c696-doc
Add of existing embedding ID: 1e2fad2c-8b40-5101-b01a-eb7cb27e9093-doc
Insert of existing embedding ID: 1e2fad2c-8b40-5101-b01a-eb7cb27e9093-doc


Adding documentation....
Adding documentation....


Add of existing embedding ID: adeabba7-8bbd-5156-97a3-d1e27d0e8f47-doc
Insert of existing embedding ID: adeabba7-8bbd-5156-97a3-d1e27d0e8f47-doc
Add of existing embedding ID: 380c2a34-55c2-5fe6-aaf9-5b98fc734a1d-doc
Insert of existing embedding ID: 380c2a34-55c2-5fe6-aaf9-5b98fc734a1d-doc
Add of existing embedding ID: 2e795c42-c6f8-5c44-aa1c-eb628fdee4fc-doc
Insert of existing embedding ID: 2e795c42-c6f8-5c44-aa1c-eb628fdee4fc-doc


Adding documentation....
Adding documentation....
Adding documentation....


Add of existing embedding ID: 3a25bf9f-42c8-5156-823b-ce6a42754b95-doc
Insert of existing embedding ID: 3a25bf9f-42c8-5156-823b-ce6a42754b95-doc
Add of existing embedding ID: fa75db61-9760-51e8-b436-32ed46777d85-doc
Insert of existing embedding ID: fa75db61-9760-51e8-b436-32ed46777d85-doc
Add of existing embedding ID: 808d1279-ae6d-5a3f-b2a9-6a42245a4f28-doc
Insert of existing embedding ID: 808d1279-ae6d-5a3f-b2a9-6a42245a4f28-doc


Adding documentation....
Adding documentation....
Adding documentation....


Add of existing embedding ID: deab258b-8c49-5858-8f76-a52a6269500e-doc
Insert of existing embedding ID: deab258b-8c49-5858-8f76-a52a6269500e-doc
Add of existing embedding ID: 2db3b251-253c-55b3-99aa-12540571472c-doc
Insert of existing embedding ID: 2db3b251-253c-55b3-99aa-12540571472c-doc


Adding documentation....
Adding documentation....


Add of existing embedding ID: 417933a5-e35b-5dc9-a0d1-bf5bf8a4d2ce-doc
Insert of existing embedding ID: 417933a5-e35b-5dc9-a0d1-bf5bf8a4d2ce-doc
Add of existing embedding ID: 628de61e-0ed2-5b90-898f-38e558ac180f-doc
Insert of existing embedding ID: 628de61e-0ed2-5b90-898f-38e558ac180f-doc
Add of existing embedding ID: 80065a35-16cd-5fd7-a372-512bd06427e5-doc
Insert of existing embedding ID: 80065a35-16cd-5fd7-a372-512bd06427e5-doc


Adding documentation....
Adding documentation....
Adding documentation....


Add of existing embedding ID: aae9e2ec-a925-599b-a250-2f0b2410a291-doc
Insert of existing embedding ID: aae9e2ec-a925-599b-a250-2f0b2410a291-doc
Add of existing embedding ID: e9263c31-5c79-5360-816b-5f7f98684880-doc
Insert of existing embedding ID: e9263c31-5c79-5360-816b-5f7f98684880-doc
Add of existing embedding ID: 5f751b55-e510-53da-a888-5ecefa26fee9-doc
Insert of existing embedding ID: 5f751b55-e510-53da-a888-5ecefa26fee9-doc


Adding documentation....
Adding documentation....
Adding documentation....


Add of existing embedding ID: 7ed5330d-ac46-5eb4-89b5-9971d2414c73-doc
Insert of existing embedding ID: 7ed5330d-ac46-5eb4-89b5-9971d2414c73-doc
Add of existing embedding ID: 23909f8f-6287-589a-9018-c962c6900982-doc
Insert of existing embedding ID: 23909f8f-6287-589a-9018-c962c6900982-doc
Add of existing embedding ID: a3dff5a7-f7ae-5291-9b03-a0b09282c724-doc
Insert of existing embedding ID: a3dff5a7-f7ae-5291-9b03-a0b09282c724-doc


Adding documentation....
Adding documentation....
Adding documentation....


Add of existing embedding ID: 595aefd0-fcf8-5f8f-8068-5e83d009e593-doc
Insert of existing embedding ID: 595aefd0-fcf8-5f8f-8068-5e83d009e593-doc
Add of existing embedding ID: bbeecc4f-d710-575a-8f54-5c94c81562f7-doc
Insert of existing embedding ID: bbeecc4f-d710-575a-8f54-5c94c81562f7-doc


Adding documentation....
Adding documentation....


Add of existing embedding ID: 8386d710-25b6-51b9-91d8-30785a276e62-doc
Insert of existing embedding ID: 8386d710-25b6-51b9-91d8-30785a276e62-doc
Add of existing embedding ID: 6910f4a8-91ea-598d-9b25-57242d313d65-doc
Insert of existing embedding ID: 6910f4a8-91ea-598d-9b25-57242d313d65-doc


Adding documentation....
Adding documentation....


Add of existing embedding ID: ab40f9fb-5a08-5f54-8d12-0e9d43477249-doc
Insert of existing embedding ID: ab40f9fb-5a08-5f54-8d12-0e9d43477249-doc
Add of existing embedding ID: dea8e4e4-e47c-5c8e-9a6a-7166bc5e54dd-doc
Insert of existing embedding ID: dea8e4e4-e47c-5c8e-9a6a-7166bc5e54dd-doc


Adding documentation....
Adding documentation....


Add of existing embedding ID: 49ed9ddb-37cb-5687-ab29-5364cc05e755-doc
Insert of existing embedding ID: 49ed9ddb-37cb-5687-ab29-5364cc05e755-doc
Insert of existing embedding ID: 76378bf1-3f39-52d7-8589-31832805c77b-doc


Adding documentation....
Adding documentation....


Insert of existing embedding ID: 77937f1f-1c06-5c8e-9b2a-dbaecbb40383-doc
Insert of existing embedding ID: 2bae7fcb-36a7-5286-b8c6-71208a314226-doc
Insert of existing embedding ID: 49333a1c-ca97-5b18-a978-eb9f9f22d43c-doc


Adding documentation....
Adding documentation....
Adding documentation....


Insert of existing embedding ID: 24e2eb51-a625-5ba2-bd1e-ceb1b1df0182-doc
Insert of existing embedding ID: c1d11163-ce12-54e8-a638-426f870aa7f4-doc


Adding documentation....
Adding documentation....


Insert of existing embedding ID: 0551b27b-7308-52ed-9abd-4415dd77ec81-doc
Insert of existing embedding ID: fb6f1cbe-b884-51e1-a624-7ef41fea05e1-doc
Insert of existing embedding ID: 5257415b-4130-5a26-946a-d31f4069503e-doc


Adding documentation....
Adding documentation....
Adding documentation....


Insert of existing embedding ID: 96bde749-8d97-5d6e-88d2-dac226a05810-doc
Insert of existing embedding ID: c02733e3-c35a-53a8-b5ce-996106d1b55d-doc
Insert of existing embedding ID: af9febcb-31be-5b14-8ce8-0fac9252681a-doc


Adding documentation....
Adding documentation....
Adding documentation....


Insert of existing embedding ID: b03e0de0-ae76-5be7-b66c-ba00c99ac122-doc
Insert of existing embedding ID: 26430856-5ae1-5ecf-867b-b933387d0461-doc


Adding documentation....
Adding documentation....


Insert of existing embedding ID: 242db09b-2597-5c29-b559-99eb35e6645d-doc
Insert of existing embedding ID: 53f4e256-e5f4-5793-b2fc-d15fe96aed80-doc


Adding documentation....
Adding documentation....


Insert of existing embedding ID: e3a1bdc3-429f-5a7f-9e80-a667d434142c-doc
Insert of existing embedding ID: 128533d2-05c4-5167-8867-de34e7743520-doc
Insert of existing embedding ID: 37a60c7d-71e5-5999-a7a9-d5062627d6db-doc


Adding documentation....
Adding documentation....
Adding documentation....


Insert of existing embedding ID: 0300c6b3-f8f8-52bb-9d2e-08d83a1e973c-doc
Insert of existing embedding ID: 5b4e09c7-3aa4-5d97-918a-570f5b07fc5b-doc
Insert of existing embedding ID: 5ee3dc07-0388-522f-8277-f9abf7399e4b-doc


Adding documentation....
Adding documentation....
Adding documentation....


Insert of existing embedding ID: 7a1d58d5-2171-565f-9edf-3cb8a125ef66-doc
Insert of existing embedding ID: 7d9e084d-20aa-5ab5-b619-4ce79a69131f-doc
Insert of existing embedding ID: 0dc60cc5-f32b-578b-9708-dfdadc907edf-doc


Adding documentation....
Adding documentation....
Adding documentation....


Insert of existing embedding ID: 1f322c62-5226-5ee3-86f8-695e4ab976b0-doc
Insert of existing embedding ID: bc909e47-ed1f-5b15-be2c-0c1789202c68-doc
Insert of existing embedding ID: 04b42da7-2f59-5af1-9379-f9c8854b943c-doc


Adding documentation....
Adding documentation....
Adding documentation....


Insert of existing embedding ID: 87135ea3-a863-589b-8d9e-207c0bf9e368-doc
Insert of existing embedding ID: 6dafd24f-2d1b-5b34-a48a-7c52345f8fd3-doc
Insert of existing embedding ID: b7c247a6-3fc3-599f-9eb4-6d2ab1920097-doc


Adding documentation....
Adding documentation....
Adding documentation....


Insert of existing embedding ID: e200dc7d-15c2-55aa-9452-92526bca6476-doc
Insert of existing embedding ID: 3a2b3b3c-d62e-57c9-9017-e00b1dfacf1e-doc


Adding documentation....
Adding documentation....


Insert of existing embedding ID: 0a712900-a404-5806-ac70-6554fa3d6e27-doc
Insert of existing embedding ID: eaf17db6-7e22-5ab6-b635-0eca04ab8db1-doc
Insert of existing embedding ID: d83ce6c1-5e5e-5b58-bde5-219d2811b351-doc


Adding documentation....
Adding documentation....


Insert of existing embedding ID: 9d2cfeb6-376b-52b4-911e-ff90b1d01409-doc


Adding documentation....
Adding documentation....


Insert of existing embedding ID: d47f3274-ac0c-5782-b821-3f086922139f-doc
Insert of existing embedding ID: 49f130b8-13f2-5c3c-8e60-20acda411259-doc


Adding documentation....
Adding documentation....


Insert of existing embedding ID: 82f36a27-c95f-598f-ba45-7c4b36d5fce6-doc
Insert of existing embedding ID: a54ce698-1275-5bb2-82e0-c04e1cdc0cd3-sql


Adding SQL examples...


Insert of existing embedding ID: 4293528b-c9df-5b02-9c5e-6ef4c6217979-sql
Insert of existing embedding ID: d4c9bf2f-3442-51bc-9334-9f2131659f51-sql
Insert of existing embedding ID: 41e60ad4-c6fe-5d80-b53d-e176f814a6cf-sql
Insert of existing embedding ID: 1b18671a-c5fe-5288-ab9c-2f843422a01d-sql
Insert of existing embedding ID: f261db6f-0489-5795-abc3-f6ea78655ebd-sql
Insert of existing embedding ID: 0a85a0c1-8e67-5516-8991-6d619cb2904c-sql
Insert of existing embedding ID: 1af897fe-fe24-511a-8002-4a56ca7980c1-sql
Insert of existing embedding ID: ce73e887-d8e1-5ca9-aa6c-797c7e410212-sql
Insert of existing embedding ID: 8b230bce-2570-547d-991c-474d5c438162-sql
Insert of existing embedding ID: b9a02b4a-3f38-50ad-b293-5f36a79d5370-sql
Insert of existing embedding ID: 81759c39-654f-5840-a43b-bc38f5e9bad2-sql
Insert of existing embedding ID: 99aa0d08-288b-50fc-8256-4c0033538229-sql
Insert of existing embedding ID: 75131bde-8356-51d8-9b85-30f298a51275-sql
Insert of existing embedding ID: 51322


Agent thinking...

--- Classified Category: DESCRIPTIVE ---
SQL Prompt: [{'role': 'system', 'content': "You are a SQL expert. Please help to generate a SQL query to answer the question. Your response should ONLY be based on the given context and follow the response guidelines and format instructions. \n===Additional Context \n\nThe following columns are in the part_mst table in the def database:\n\n|    | TABLE_CATALOG   | TABLE_SCHEMA   | TABLE_NAME   | COLUMN_NAME    | DATA_TYPE   | COLUMN_COMMENT   |\n|---:|:----------------|:---------------|:-------------|:---------------|:------------|:-----------------|\n| 20 | def             | ft_database    | part_mst     | id             | int         |                  |\n| 21 | def             | ft_database    | part_mst     | p_code         | text        |                  |\n| 22 | def             | ft_database    | part_mst     | procurement    | text        |                  |\n| 23 | def             | ft_database    | part_mst     | 

In [54]:
llm_response

"SELECT SUM(sl.qty * sl.unit_price) AS total_sales_value FROM sales_line sl JOIN sales_mst sm ON sl.order_no = sm.order_no WHERE sm.order_date <= '2025-04-30';"

In [55]:
extract_sql_from_response(llm_response)

"SELECT SUM(sl.qty * sl.unit_price) AS total_sales_value FROM sales_line sl JOIN sales_mst sm ON sl.order_no = sm.order_no WHERE sm.order_date <= '2025-04-30';"

In [56]:
import vanna

In [59]:
import streamlit as st
import pandas as pd
import time
from datetime import datetime
from mistral_v2 import (
    vn,  # your MyVanna instance
    extract_sql_from_response,
    generate_complex_sql,
    analyze_and_explain,
)

current_date = datetime.now().strftime("%Y-%m-%d")

st.title("FactoryTwin AI Assistant",)

if "messages" not in st.session_state:
    st.session_state.messages = []
if "step" not in st.session_state:
    st.session_state.step = 0
if "pending" not in st.session_state:
    st.session_state.pending = {}

# Show chat history
for message in st.session_state.messages:
    with st.chat_message(message["role"]):
        if isinstance(message["content"], pd.DataFrame):
            st.dataframe(message["content"])
        else:
            st.markdown(message["content"])

def add_message(role, content):
    st.session_state.messages.append({"role": role, "content": content})

def main():
    # Step 0: Get user question
    if st.session_state.step == 0:
        prompt = "whats the total sales value till now?"
        if prompt:
            if prompt.lower() == "exit":
                add_message("assistant", "Session ended. Goodbye!")
                st.session_state.step = -1
                return
            st.session_state.pending["question"] = prompt
            add_message("user", prompt)
            new_question = (
                f"Remember Today's date for date related queries {current_date}. "
                f"Answer this question: {prompt}"
            )
            st.session_state.pending["new_question"] = new_question
            # Classify question
            category = vn.classify_question(prompt)
            st.session_state.pending["category"] = category
            add_message("assistant", f"**Classified Category:** `{category.upper()}`")
            st.session_state.step = 1
            st.rerun()

    # Step 1: Generate SQL and show agent thinking
    elif st.session_state.step == 1:
        new_question = st.session_state.pending["new_question"]
        category = st.session_state.pending["category"]
        with st.chat_message("assistant"):
            st.markdown("🧠 **Agent thinking...**")
            st.code(new_question)
        start_time = time.time()
        llm_response = generate_complex_sql(new_question)
        extracted_sql = extract_sql_from_response(llm_response)
        st.session_state.pending["llm_response"] = llm_response
        st.session_state.pending["extracted_sql"] = extracted_sql
        if not extracted_sql:
            add_message("assistant", "❌ Could not extract executable SQL from response.")
            st.session_state.step = 0
            return
        add_message("assistant", f"**Extracted SQL ({category.upper()}):**\n``````")
        if category != "suggestion":
            st.session_state.step = 2
        else:
            st.session_state.step = 4
        st.rerun()

    # Step 2: Ask user to execute SQL
    elif st.session_state.step == 2:
        category = st.session_state.pending["category"]
        execute = st.radio(
            f"Execute {category.upper()} SQL query?",
            ("Yes", "No"),
            key=f"execute_{len(st.session_state.messages)}"
        )
        if execute:
            if execute == "Yes":
                st.session_state.step = 3
            else:
                add_message("assistant", f"Query execution cancelled.")
                st.session_state.step = 0
            st.rerun()

    # Step 3: Execute SQL, show results, ask for re-think/save
    elif st.session_state.step == 3:
        extracted_sql = st.session_state.pending["extracted_sql"]
        question = st.session_state.pending["question"]
        category = st.session_state.pending["category"]
        try:
            with st.spinner("Executing query..."):
                exec_start_time = time.time()
                df_result = vn.run_sql(sql=extracted_sql)
            if category == "judgement":
                add_message("assistant", "**Analytical Insights**")
                add_message("assistant", df_result.describe())
            else:
                add_message("assistant", "**Query Results**")
            if len(df_result) > 20:
                add_message("assistant", df_result.head(20))
                add_message("assistant", f"... (truncated, {len(df_result)} rows total)")
            else:
                add_message("assistant", df_result)
            add_message("assistant", f"Query executed successfully in {time.time() - exec_start_time:.2f} seconds")
            explanation = analyze_and_explain(question, df_result, category)
            add_message("assistant", f"**{category.upper()} EXPLANATION**\n{explanation}")
            re_run = st.radio(
                f"Should I re-think '{question.upper()}' for a better response?",
                ("No", "Yes"),
                key=f"rethink_{len(st.session_state.messages)}"
            )
            if re_run == "No":
                save = st.radio(
                    f"Do you want to save this {category.upper()} SQL query for future use?",
                    ("Yes", "No"),
                    key=f"save_{len(st.session_state.messages)}"
                )
                if save == "Yes":
                    add_message("assistant", "Saving results...")
                    vn.train(question=question, sql=extracted_sql)
                else:
                    add_message("assistant", "SQL query not saved.")
                st.session_state.step = 0
            else:
                add_message("assistant", "Retrying...")
                st.session_state.step = 1  # Go back to regenerate
            st.rerun()
        except Exception as exec_e:
            add_message(
                "assistant",
                f"❌ **Error executing SQL:** {type(exec_e).__name__} - {exec_e}\n"
                f"**Failed SQL:**\n``````"
            )
            re_run = st.radio(
                f"Should I re-think '{question.upper()}' for a better response?",
                ("No", "Yes"),
                key=f"rethink_err_{len(st.session_state.messages)}"
            )
            if re_run == "No":
                st.session_state.step = 0
            else:
                add_message("assistant", "Retrying...")
                st.session_state.step = 1
            st.rerun()

    # Step 4: Suggestion category: ask for satisfaction
    elif st.session_state.step == 4:
        question = st.session_state.pending["question"]
        s_rethink = st.radio(
            f"Are you satisfied with the SUGGESTION or do you want me to re-think?",
            ("Yes", "No"),
            key=f"suggestion_{len(st.session_state.messages)}"
        )
        if s_rethink == "Yes":
            st.session_state.step = 0
        else:
            add_message("assistant", "Retrying...")
            st.session_state.step = 1
        st.rerun()

    # Step -1: Session ended
    elif st.session_state.step == -1:
        st.stop()

main()




In [61]:
new_question

'Today is 2025-04-30. You are a manufacturing analyst. "\n                    Generate ONLY the SQL query (no comments) using these tables only [\'bill_of_m\', \'customer\', \'job_mst\', \'nonconform_mst\', \'part_mst\', \'purchase_line\', \'purchase_mst\', \'sales_line\', \'sales_mst\', \'vend_part\', \'vendor\'], "\n                    "for the following new_question: whats the total sales value till now?'

In [63]:
print("The query is giving this error: {type(exec_e).__name__} - {exec_e}. "
                        "Resolve the error and provide a better response for this question: "
                        + question)

The query is giving this error: {type(exec_e).__name__} - {exec_e}. Resolve the error and provide a better response for this question: whats the total sales value till now?


In [65]:
training_data = vn.get_training_data()
training_data[training_data.training_data_type=='documentation']

Unnamed: 0,id,question,content,training_data_type
0,971df317-6f95-5574-960b-8617ebb320e8-doc,,Table CUSTOMER contains information about busi...,documentation
1,06f173d4-10dc-5912-8e3c-690b36769eab-doc,,CUSTOMER.id is the unique identifier for each ...,documentation
2,c7737722-5406-50ec-a2e3-457631201e43-doc,,CUSTOMER.name is the company name of the custo...,documentation
3,5710f1b2-2513-59b1-a22a-06ca846f3335-doc,,CUSTOMER.addr_str contains the street address ...,documentation
4,f62258fc-5052-5064-8778-17d64b48fb75-doc,,CUSTOMER.addr_city contains the city for the c...,documentation
...,...,...,...,...
79,97dd033f-f6c1-574a-b60f-242668753672-doc,,The following columns are in the purchase_mst ...,documentation
80,cf81a43b-c3f7-5282-b3cd-2adcb2b0a3fb-doc,,The following columns are in the sales_line ta...,documentation
81,b675c221-5c95-550d-9eb7-ed55de111f03-doc,,The following columns are in the sales_mst tab...,documentation
82,ff1bb159-0fcf-57c1-a002-26d9f3ba2d9b-doc,,The following columns are in the vend_part tab...,documentation


In [66]:
clear_traiining_data()

Delete of nonexisting embedding ID: b1a9a879-7e82-5fec-8dcc-0af5cfeccde6-sql
Delete of nonexisting embedding ID: 40fc65d2-13a6-5705-bc41-e7f05a866702-sql
Delete of nonexisting embedding ID: 4676fe4d-a800-590a-a6e2-b413597a190f-sql
Delete of nonexisting embedding ID: fb7fcae4-2c70-5677-8311-58ee801d538f-sql
Delete of nonexisting embedding ID: 6ff1198a-2811-544a-9255-2fc225e9d2b6-sql
Delete of nonexisting embedding ID: 3b9429ec-afa5-5912-b50b-f42354a2fb00-sql
Delete of nonexisting embedding ID: 43ec2853-0cbb-5d06-821c-a6e9da0d9fd2-sql
Delete of nonexisting embedding ID: 1a9a3ff1-80b9-59d4-aec5-9ec9baff0fe8-sql
Delete of nonexisting embedding ID: abf090f5-351d-5d3d-aa04-e003678090d7-sql
Delete of nonexisting embedding ID: a3f58cf6-a0a4-52bf-968e-31a4c01ba6c6-doc
Delete of nonexisting embedding ID: 5d79f594-e95b-5c85-85d8-b288956afbc6-doc
Delete of nonexisting embedding ID: dbc129a2-896e-54d7-8cf5-5144b538a5a6-doc
Delete of nonexisting embedding ID: ff987249-dd4e-5859-abf0-9aa631d36fac-doc

Unnamed: 0,id,question,content,training_data_type


In [67]:
vn.get_training_data()

Unnamed: 0,id,question,content,training_data_type
0,a54ce698-1275-5bb2-82e0-c04e1cdc0cd3-sql,Get all customers,SELECT * FROM customer;,sql
1,4293528b-c9df-5b02-9c5e-6ef4c6217979-sql,Get all vendors in a specific state,SELECT * FROM vendor WHERE 'addr_ste' = 'TX';,sql
2,d4c9bf2f-3442-51bc-9334-9f2131659f51-sql,Join sales orders with customer info,"SELECT s.order_no, s.order_date, c.name, c.add...",sql
3,41e60ad4-c6fe-5d80-b53d-e176f814a6cf-sql,Parts with their vendor and unit cost,"SELECT P.p_code, V.name AS vendor_name, VP.uni...",sql
4,1b18671a-c5fe-5288-ab9c-2f843422a01d-sql,Total number of sales per customer,"SELECT C.name, COUNT(S.order_no) AS total_orde...",sql
...,...,...,...,...
80,9d2cfeb6-376b-52b4-911e-ff90b1d01409-doc,,VEND_PART.vendor is the unique identifier for ...,documentation
81,d47f3274-ac0c-5782-b821-3f086922139f-doc,,VEND_PART.part is the part that can be procure...,documentation
82,49f130b8-13f2-5c3c-8e60-20acda411259-doc,,VEND_PART.unit_cost is typically the creation ...,documentation
83,82f36a27-c95f-598f-ba45-7c4b36d5fce6-doc,,VEND_PART.part_lt is The standard lead time fo...,documentation


In [68]:
LLM_Response= """SELECT * FROM purchase_line where week(due) = week(date_add(2025-05-04 , interval 7 day)); 

To optimize the supply chain, consider implementing the following strategies:

1. Improve demand forecasting: Accurate demand forecasting can help in better inventory management and reduce safety stock requirements.
2. Implement a vendor relationship management (VRM) system: This can help in negotiating better prices with vendors and ensure timely delivery of goods.
3. Optimize transportation routes: Efficient transportation routes can reduce shipping costs and lead times.
4. Implement lean manufacturing principles: This can help in reducing waste and improving efficiency in the production process.
5. Utilize technology: Implementing technologies such as RFID, IoT, and AI-based systems can improve supply chain visibility and enable real-time decision making.
6. Foster collaboration: Collaboration between different departments and stakeholders can help in identifying bottlenecks and optimizing the entire supply chain process."""

In [70]:
LLM_Response.split(";")[1].replace("\n"," ")

'   To optimize the supply chain, consider implementing the following strategies:  1. Improve demand forecasting: Accurate demand forecasting can help in better inventory management and reduce safety stock requirements. 2. Implement a vendor relationship management (VRM) system: This can help in negotiating better prices with vendors and ensure timely delivery of goods. 3. Optimize transportation routes: Efficient transportation routes can reduce shipping costs and lead times. 4. Implement lean manufacturing principles: This can help in reducing waste and improving efficiency in the production process. 5. Utilize technology: Implementing technologies such as RFID, IoT, and AI-based systems can improve supply chain visibility and enable real-time decision making. 6. Foster collaboration: Collaboration between different departments and stakeholders can help in identifying bottlenecks and optimizing the entire supply chain process.'

In [4]:
import time
import os
import sys
import warnings
import json
import re
from typing import Dict, List
from collections import defaultdict
from datetime import datetime

# --- Environment Setup ---
os.environ["TOKENIZERS_PARALLELISM"] = "false"

# --- Package Imports ---
# try:
# import vanna
from vanna.base import VannaBase
from vanna.chromadb.chromadb_vector import ChromaDB_VectorStore
import openai
import mysql.connector
import pandas as pd
# except ImportError as e:
# print(f"Import Error: {e}")
# sys.exit(1)

# --- Configuration ---
db_type = "mysql"
mysql_host = "127.0.0.1"
mysql_port = 3306
mysql_user = "root"
mysql_password = "Raekwon_wtc$36" 
mysql_dbname = "ft_database"
llm_model_name = 'mistral-7b-instruct-v0.1.Q8_0.gguf'
local_server_url = 'http://127.0.0.1:8000/v1'
placeholder_api_key = "sk-no-key-required"

# --- Question Classification Constants ---

FEW_SHOT_PROMPT = """
You are a manufacturing domain expert. Classify each question into one of three categories based on these criteria:

DESCRIPTIVE: 
- Questions that ask for direct data or facts
- Usually start with: what, how many, list, show, where, when
- Focus on current or historical state
- Example keywords: count, total, list, find, show, what is, where are

JUDGEMENT:
- Questions that require analysis and evaluation
- Usually start with: why, can, will, should, is it
- Need reasoning about causes or effects
- Example keywords: why, evaluate, assess, analyze, is it possible, what caused

SUGGESTION:
- Questions that ask for recommendations or improvements
- Usually start with: how can, what should, recommend
- Focus on future actions or changes
- Example keywords: improve, optimize, suggest, recommend, how can, what should

Examples:
Q1: "What is the current inventory level?" 
A1: DESCRIPTIVE (asks for current state)

Q2: "Why did we miss our delivery target?"
A2: JUDGEMENT (requires analysis of causes)

Q3: "How can we reduce late deliveries?"
A3: SUGGESTION (asks for improvement recommendations)

Q4: "List all overdue orders"
A4: DESCRIPTIVE (asks for direct data)

Q5: "Should we increase safety stock?"
A5: JUDGEMENT (requires evaluation)

Q6: "What changes should we make to improve efficiency?"
A6: SUGGESTION (asks for improvement recommendations)

Now classify this question:
Q: {question}
A: """

CONTEXT_PROMPTS = {
    "descriptive": "Return ONLY SQL with some explanation with plots.",
    "judgement": "When responding, return the required SQL query, provide a detailed analysis of the data including relevant plots, and offer your Judgement on the findings. Clearly state that it is your Judgement and advise the user to independently verify it.",
    "suggestion": "When responding, return the required SQL query, provide a detailed analysis of the data including relevant plots, and offer your suggestion on the findings. Clearly state that it is your Suggestion and advise the user to independently verify it."
}
QUESTION_TYPES = ["descriptive", "judgement", "suggestion"]  
# Add a keyword-based classification helper
def keyword_based_classification(question: str) -> str:
    question = question.lower()
    
    descriptive_keywords = ['what is', 'how many', 'list', 'show', 'find', 'where', 'when', 'which', 'count']
    judgement_keywords = ['why', 'could', 'would', 'should', 'evaluate', 'analyze', 'assess', 'is it possible']
    suggestion_keywords = ['how can', 'what should', 'improve', 'recommend', 'suggest', 'advise', 'propose']
    
    # Check for suggestion first (highest priority)
    for keyword in suggestion_keywords:
        if keyword in question:
            return "suggestion"
            
    # Check for judgement second
    for keyword in judgement_keywords:
        if keyword in question:
            return "judgement"
            
    # Default to descriptive
    for keyword in descriptive_keywords:
        if keyword in question:
            return "descriptive"
            
    return None

# Update the classify_question method in MyLocalLlm class

CLASSIFICATION_METRICS = defaultdict(int)


# --- Custom LLM Class with MCP Features ---
class MyLocalLlm(VannaBase):
    def __init__(self, config=None):
        super().__init__(config=config)
        self.model = llm_model_name

    # Implement required abstract methods
    def assistant_message(self, content: str) -> Dict:
        return {"role": "assistant", "content": content}

    def system_message(self, content: str) -> Dict:
        return {"role": "system", "content": content}

    def user_message(self, content: str) -> Dict:
        return {"role": "user", "content": content}
            
    def classify_question(self, question: str) -> str:
        # First try keyword-based classification
        keyword_result = keyword_based_classification(question)
        if keyword_result:
            return keyword_result
            
        # If keyword classification fails, use LLM
        classification_prompt = FEW_SHOT_PROMPT.format(question=question)
        response = self.submit_prompt(classification_prompt)
        
        # Clean up and validate the response
        response = response.strip().lower()
        if response in QUESTION_TYPES:
            return response
        # Default to descriptive if everything fails
        return "descriptive"

    def submit_prompt(self, prompt, **kwargs) -> str:
        try:
            local_client = openai.OpenAI(
                base_url=local_server_url,
                api_key=placeholder_api_key,
                timeout=kwargs.get('request_timeout', 180.0)
            )
            
            messages = prompt if isinstance(prompt, list) else [{"role": "user", "content": str(prompt)}]
            
            response = local_client.chat.completions.create(
                model=self.model,
                messages=messages,
                temperature=kwargs.get('temperature', 0.1),
                max_tokens=kwargs.get('max_tokens', 5000),
                stop=kwargs.get('stop', ["``````"])
            )
            
            if response.choices and response.choices[0].message.content:
                return response.choices[0].message.content.strip()
            return "Error: No response generated"

        except Exception as e:
            return f"Error: {str(e)}"

# --- Enhanced Vanna Class ---
class MyVanna(ChromaDB_VectorStore, MyLocalLlm):
    def __init__(self, config=None):
        ChromaDB_VectorStore.__init__(self, config=config)
        MyLocalLlm.__init__(self, config=config)
        
    def generate_contextual_response(self, question: str) -> str:
        category = self.classify_question(question)
        CLASSIFICATION_METRICS[category] += 1
        
        system_msg = f"""You are a manufacturing analyst. Follow these rules:
        - DESCRIPTIVE: {CONTEXT_PROMPTS['descriptive']}
        - JUDGEMENT: {CONTEXT_PROMPTS['judgement']}
        - SUGGESTION: {CONTEXT_PROMPTS['suggestion']}"""
        
        return self.generate_sql(
            question=question,
            chat_history=[self.system_message(system_msg)]
        )

# --- Core Execution Flow --- 
vn = MyVanna(config=None)

# Database connection setup remains unchanged
# --- Connect to Database ---
print(f"Connecting to {db_type} database '{mysql_dbname}' on {mysql_host}...")
try:
    if db_type == "mysql":
        vn.connect_to_mysql(
            host=mysql_host,
            port=mysql_port,
            user=mysql_user,
            password=mysql_password,
            dbname=mysql_dbname
        )
    else:
        print(f"Database type '{db_type}' not configured.")
        sys.exit(1)
    print("Database connection successful.")
except Exception as e:
    print(f"Error connecting to database: {e}")
    print("Please check MySQL server status, credentials, permissions, and required packages.")
    sys.exit(1)

# --- Training Vanna (Run deliberately when needed) ---
print("\n--- Vanna Training ---")
print("Loads metadata (DDL, docs, SQL examples) into Vanna's local ChromaDB vector store.")
train_vanna = True # Set to True to run training, False to skip for normal use
if train_vanna:
    df_information_schema = vn.run_sql(f"SELECT * FROM INFORMATION_SCHEMA.COLUMNS where TABLE_SCHEMA = '{mysql_dbname}';")
    Plan = vn.get_training_plan_generic(df_information_schema)
    vn.train(plan=Plan)
    table_list = df_information_schema['TABLE_NAME'].unique().tolist()
    column_list = df_information_schema['COLUMN_NAME'].unique().tolist()
    print("Running Vanna training...")

    # === Add your specific training calls here ===
    print("  Example: Fetching DDL (Customize for your setup)...")
    # df_ddl = vn.run_sql(f"SHOW CREATE TABLE your_table_name;")
    # if not df_ddl.empty: vn.train(ddl=df_ddl.iloc['Create Table'])

    print("  Example: Adding Documentation (Add yours)...")
    # --- Customer Table Documentation ---
    vn.train(documentation="Table CUSTOMER contains information about business customers.")
    vn.train(documentation="CUSTOMER.id is the unique identifier for each customer.")
    vn.train(documentation="CUSTOMER.name is the company name of the customer.")
    vn.train(documentation="CUSTOMER.addr_str contains the street address for the customer.")
    vn.train(documentation="CUSTOMER.addr_city contains the city for the customer.")
    vn.train(documentation="CUSTOMER.addr_ste contains the state for the customer.")
    vn.train(documentation="CUSTOMER.addr_ctry contains the country for the customer.")
    vn.train(documentation="CUSTOMER.addr_zip contains the zip-code for the customer.")

    # --- Vendor Table Documentation ---
    vn.train(documentation="Table VENDOR lists suppliers of parts.")
    vn.train(documentation="VENDOR.id is the unique identifier for each vendor.")
    vn.train(documentation="VENDOR.name is the name of the vendor/supplier.")
    vn.train(documentation="CUSTOMER.addr_str contains the street address for the vendor.")
    vn.train(documentation="CUSTOMER.addr_city contains the city for the vendor.")
    vn.train(documentation="CUSTOMER.addr_ste contains the state for the vendor.")
    vn.train(documentation="CUSTOMER.addr_ctry contains the country for the vendor.")
    vn.train(documentation="CUSTOMER.addr_zip contains the zip-code for the vendor.")

    # --- Part Master Table Documentation ---
    vn.train(documentation="Table PART_MST is the master list of all parts.")
    vn.train(documentation="PART_MST.id is the unique identifier which indicates the unique id for the part.")
    vn.train(documentation="PART_MST.p_code is the unique product code for the part.")
    vn.train(documentation="PART_MST.procurement indicates if a part is manufactured internally ('M') or purchased ('B'). It must be either 'M' or 'B'.")
    vn.train(documentation="PART_MST.unit_price represents the standard price per unit of the part.")
    vn.train(documentation="PART_MST.pref_order_qty specifies The preferred quantity to order or manufacture at once, depending on if the part is M or B.")
    vn.train(documentation="PART_MST.unit_meas the unit of measure for this part; usually 'EA' for 'eaches'.")
    vn.train(documentation="PART_MST.mfg_lt is the lead time to manufacture this part, if it is manufactured. This must an 'M' part and positive.")
    vn.train(documentation="PART_MST.curr_stock the current amount of this part in stock in inventory")

    # --- Add Documentation for BOM_MST, RTG_MST, JOB_OPER, JOB_MATL, RECV_MST, RECV_LINE ---
    # Example:
    vn.train(documentation="Table bill_of_m defines the Bill of Materials, showing parent-child part relationships.")
    vn.train(documentation="bill_of_m.m_part links to the parent part code in PART_MST. The manufactured part which consumes c_part.")
    vn.train(documentation="BOM_MST.c_part links to the child component part code in PART_MST. The component part consumed by m_part.")
    vn.train(documentation="BOM_MST.qty_req is The quantity of c_part required to make one unit of m_part.")

    # --- Job Master Table Documentation ---
    vn.train(documentation="Table job_mst represents the master data for all the jobs.")
    vn.train(documentation="job_mst.id is the unique identifier for a job.")
    vn.train(documentation="job_mst.job_stat signifies the current status of the job as follows: 'Q' (in queue), 'C' (completed), 'O' (opened), 'X' (cancelled), or 'H' (on hold).")
    vn.train(documentation="job_mst.part is the part that this job is manufacturing. This can be found in the PART_MST table.")
    vn.train(documentation="job_mst.qty is the quantity of the part that this job is manufacturing.")
    vn.train(documentation="job_mst.job_rls is the time at which this job began work.")
    vn.train(documentation="job_mst.job_cls is the time at which this job was completed.")

    # --- Nonconform master table documentation ---
    vn.train(documentation="Table NONCONFORM_MST defines the Nonconformance report master data.")
    vn.train(documentation="NONCONFORM_MST.id is a unique id for this nonconformance report.")
    vn.train(documentation="NONCONFORM_MST.job signifies if this nonconformance report was issued for a job, the job it was issued for.")
    vn.train(documentation="NONCONFORM_MST.po_no signifies if this nonconformance report was issued for a purchase order, the purchase order it was issued for.")
    vn.train(documentation="NONCONFORM_MST.qty signifies the quantity of parts on this job/purchase order which were flagged for nonconformance issues.")



    # --- Sales Order Header Table Documentation ---
    vn.train(documentation="Table SALES_MST represents the header information for a customer sales order.")
    vn.train(documentation="SALES_MST.order_no is the unique identifier for a sales order.")
    vn.train(documentation="SALES_MST.order_date is the date the sales order was placed.")
    vn.train(documentation="SALES_MST.cust links to the CUSTOMER table for the customer who placed the order.")

    # --- Sales Order Line Table Documentation ---
    vn.train(documentation="Table SALES_LINE contains individual line items for each sales order in SALES_MST.")
    vn.train(documentation="SALES_LINE.order_no links back to the SALES_MST table.")
    vn.train(documentation="SALES_LINE.line_no is the line number of the line item under the given order_no.")
    vn.train(documentation="SALES_LINE.order_stat is the current status of the line item: 'C' (completed), 'O' (opened), or 'X' (cancelled).")
    vn.train(documentation="SALES_LINE.part links to the PART_MST table for the specific part ordered.")
    vn.train(documentation="SALES_LINE.due is the requested delivery date for this line item.")
    vn.train(documentation="SALES_LINE.unit_price is the agreed price per unit for the part on this order line.")
    vn.train(documentation="SALES_LINE.qty is the number of units ordered for this part on this line item.")
    vn.train(documentation="SALES_LINE.line_cls is the date on which this line item was closed.")

    # --- Purchase Order Header Table Documentation ---
    vn.train(documentation="Table PURCHASE_MST represents the header information for a purchase order sent to a vendor.")
    vn.train(documentation="PURCHASE_MST.order_no is the unique identifier for a purchase order.")
    vn.train(documentation="PURCHASE_MST.order_date is the date the purchase order was created.")
    vn.train(documentation="PURCHASE_MST.vendor links to the VENDOR table for the supplier receiving the order.")

    # --- Purchase Order Line Table Documentation ---
    vn.train(documentation="Table PURCHASE_LINE contains individual line items for each purchase order in PURCHASE_MST.")
    vn.train(documentation="PURCHASE_LINE.order_no links back to the PURCHASE_MST table.")
    vn.train(documentation="PURCHASE_LINE.part links to the PART_MST table for the specific part being purchased.")
    vn.train(documentation="PURCHASE_LINE.due is the expected delivery date for this purchased part.")
    vn.train(documentation="PURCHASE_LINE.unit_cost is the cost per unit for the part on this purchase order line.")
    vn.train(documentation="PURCHASE_LINE.qty is the number of units purchased. It must be a positive number.")
    vn.train(documentation="PURCHASE_LINE.order_stat is the current status of the purchase order: 'C' (completed), 'O' (opened), or 'X' (cancelled).")
    vn.train(documentation="PURCHASE_LINE.order_type is the type of purchase order: either an 'S' for service order or 'M' for material order.")
    vn.train(documentation="PURCHASE_LINE.line_no is the line number of this purchase order line under the given order_no.")
    vn.train(documentation="PURCHASE_LINE.line_cls is the date on which this purchase order was closed.")


    # --- Vendor Part Table Documentation ---
    vn.train(documentation="Table VEND_PART represents vendor and part relationship.")
    vn.train(documentation="VEND_PART.vendor is the unique identifier for each vendor.")
    vn.train(documentation="VEND_PART.part is the part that can be procured from the vendor.")
    vn.train(documentation="VEND_PART.unit_cost is typically the creation or start date of the job.")
    vn.train(documentation="VEND_PART.part_lt is The standard lead time for which to place orders for part to vendor")


    vn.train(documentation="""Comprehensive Chapter-by-Chapter Report on Factory Physics by Wallace J. Hopp and Mark L. Spearman

Factory Physics provides a rigorous framework for understanding manufacturing and supply chain operations through scientific principles. Below is a detailed summary of each chapter, structured to reflect the book’s three-part organization: historical context, foundational theories, and practical applications. Key formulas and laws are described in narrative form to align with the requested format.
Part I: The Lessons of History
Chapter 1: Manufacturing in America
This chapter traces the evolution of U.S. manufacturing from the Industrial Revolution to modern practices. It critiques the decline of American manufacturing dominance in the late 20th century, attributing it to short-term financial strategies, overreliance on technology, and fragmented management approaches. The authors emphasize the need for a scientific foundation to restore competitiveness, setting the stage for the "Factory Physics" framework.
Chapter 2: Inventory Control: From EOQ to ROP
The chapter reviews classical inventory models, including the Economic Order Quantity (EOQ) model, which balances ordering and holding costs to determine optimal batch sizes. The Reorder Point (ROP) method is introduced as a way to manage stochastic demand by setting inventory thresholds based on lead times and service levels. Criticisms of these models include their reliance on static assumptions and neglect of variability.
Chapter 3: The MRP Crusade
Material Requirements Planning (MRP) and its evolution into Enterprise Resource Planning (ERP) are analyzed. While MRP improved coordination of production schedules, its limitations-such as infinite capacity assumptions and sensitivity to demand fluctuations-are highlighted. The authors argue that MRP’s rigidity often leads to inefficiencies in dynamic environments.
Chapter 4: From the JIT Revolution to Lean Manufacturing
The Just-in-Time (JIT) philosophy and its successor, Lean Manufacturing, are explored. Key principles like waste reduction, pull systems, and Total Quality Management (TQM) are discussed. However, the chapter warns against dogmatic adherence to Lean without understanding underlying variability. The Kanban system is presented as a method to limit work-in-process (WIP) but is noted to falter under high variability.
Chapter 5: What Went Wrong
This chapter diagnoses the failures of 20th-century manufacturing strategies, including overemphasis on financial metrics, disjointed improvement initiatives, and inadequate integration of human factors. The authors advocate for a systems-oriented approach grounded in the laws of factory physics.
Part II: Factory Physics – The Science of Manufacturing
Chapter 6: A Science of Manufacturing
The authors introduce the core philosophy of Factory Physics: manufacturing systems obey natural laws akin to physical systems. Key objectives-throughput, inventory, and cycle time-are defined, and the need for predictive models to balance these metrics is emphasized.
Chapter 7: Basic Factory Dynamics
Foundational relationships are established:
Little’s Law: Work-in-process (WIP) equals the product of throughput and cycle time.
Bottleneck Rate: The slowest process in a system determines maximum throughput.
Critical WIP: The minimum WIP required to achieve bottleneck throughput.
Chapter 8: Variability Basics
Variability is quantified using the coefficient of variation (CV). The chapter explains how randomness in processing times and demand propagates through systems, degrading performance. The Kingman equation (VUT) is introduced, linking cycle time to variability, utilization, and process time.
Chapter 9: The Corrupting Influence of Variability
The Variability Law states that increased variability always degrades system performance, necessitating buffers in inventory, capacity, or time. The Worst-Case Performance Law shows that deterministic systems can exhibit poor performance due to misaligned scheduling, challenging Lean’s aversion to all variability.
Chapter 10: Push and Pull Production Systems
Push systems (e.g., MRP) and pull systems (e.g., CONWIP, Kanban) are compared. The CONWIP method, which limits WIP while maintaining flexibility, is advocated as a hybrid approach. The authors demonstrate how pull systems reduce cycle time variability by controlling WIP.
Chapter 11: The Human Element
Human factors-such as motivation, decision-making biases, and the balance of responsibility-are analyzed. The chapter argues that effective systems design must align incentives with operational goals to avoid counterproductive behaviors.
Chapter 12: Total Quality Manufacturing
Quality management principles, including Statistical Process Control (SPC) and Six Sigma, are integrated into the Factory Physics framework. The cost of quality is framed as a trade-off between prevention costs and failure costs.
Part III: Principles in Practice
Chapter 13: A Pull Planning Framework
A hierarchical planning approach is proposed, combining long-term forecasts with short-term pull signals. The quota-setting model aligns production targets with capacity constraints to avoid overburdening bottlenecks.
Chapter 14: Shop Floor Control
Practical strategies for managing shop floors include CONWIP configurations and Statistical Throughput Control (STC), which monitors output to detect deviations from planned performance.
Chapter 15: Production Scheduling
Scheduling techniques prioritize bottleneck resources using dynamic dispatching rules. The earliest due date (EDD) and critical ratio (CR) methods are evaluated for their ability to balance throughput and timely delivery.
Chapter 16: Aggregate and Workforce Planning
The chapter integrates capacity planning with workforce management. Linear programming models optimize product mix and labor allocation under demand uncertainty.
Chapter 17: Supply Chain Management
Inventory management strategies for raw materials, WIP, and finished goods are detailed. The multi-echelon supply chain model emphasizes coordination across stages to minimize bullwhip effects.
Chapter 18: Capacity Management
Methods for capacity expansion and line balancing are discussed. The line-of-balance problem is solved to synchronize production rates across stations, ensuring smooth flow.
Chapter 19: Synthesis – Pulling It All Together
The final chapter synthesizes concepts into a unified strategy. Case studies illustrate how combining Lean, Six Sigma, and Factory Physics principles achieves sustainable improvements. The authors stress the importance of experimentation and data-driven decision-making.
Key Formulas in Narrative Form
Little’s Law: The average number of items in a system equals the average arrival rate multiplied by the average time each item spends in the system.
Kingman’s Equation (VUT): Cycle time increases with variability, utilization, and process time.
Bottleneck Rate: The maximum throughput of a system is determined by its slowest process.
Variability Buffering: Systems buffer variability using inventory, capacity, or time.
CONWIP WIP Limit: The optimal WIP level balances throughput and cycle time.
By grounding manufacturing management in scientific principles, Factory Physics provides a timeless framework for optimizing complex operations. Its integration of theory and practice makes it indispensable for academics and industry professionals alike.""")


    print("Adding SQL examples...")
    vn.train(question="Get all customers", 
                sql="SELECT * FROM customer;")
    
    vn.train(question="Get all vendors in a specific state", 
                sql="SELECT *  FROM vendor WHERE 'addr_ste' = 'TX';")
    
    vn.train(question="Join sales orders with customer info", 
                sql="""SELECT s.order_no, s.order_date, c.name, c.addr_city FROM sales_mst s JOIN customer c ON s.cust = c.id;""")
    
    vn.train(question="Parts with their vendor and unit cost", 
                sql="""SELECT P.p_code, V.name AS vendor_name, VP.unit_cost FROM vend_part VP JOIN vendor  V ON VP.vendor = V.id JOIN part_mst P ON VP.part = P.id;""")
    
    vn.train(question="Total number of sales per customer", 
                sql="""SELECT C.name, COUNT(S.order_no) AS total_orders FROM customer C JOIN sales_mst S ON S.cust = C.id GROUP BY C.name order by COUNT(S.order_no) desc; """)
    
    vn.train(question="Average unit price of each part", 
                sql="""SELECT p_code, AVG(unit_price) AS avg_price FROM part_mst GROUP BY p_code order by AVG(unit_price);""")
    
    vn.train(question="Jobs released in last 30 days", 
                sql="""SELECT * FROM job_mst where date(job_rls)>= date_sub(curdate() , interval 30 day);""")
    
    vn.train(question="Purchase orders due next week", 
                sql="""SELECT * FROM purchase_line where week(due) = week(date_add(curdate() , interval 7 day));""")
    
    vn.train(question="All open jobs", 
                sql="""SELECT * FROM JOB_MST  WHERE job_stat = 'Q';""")
    
    vn.train(question="All sales lines with ‘COMPLETE’ status", 
                sql="""SELECT * FROM sales_line WHERE order_stat = 'C';""")
    
    vn.train(question="Get purchase line for specific order and line", 
                sql="""SELECT * FROM purchase_lineWHERE order_no = 'PO-5436934';""")
    
    vn.train(question="Customers with more than 3 orders", 
                sql="""SELECT name FROM CUSTOMER WHERE id IN (SELECT cust FROM SALES_MST GROUP BY cust HAVING COUNT(order_no) > 3);""")
    
    vn.train(question="List Sales Orders with Customer Names", 
                sql="""SELECT s.order_no, s.order_date, c.name AS customer_name, c.addr_city AS customer_city FROM SALES_MST s JOIN CUSTOMER c ON s.cust = c.id;""")
    
    vn.train(question="Show Details of Parts on a Specific Sales Order", 
                sql="""SELECT sl.order_no, sl.line_no,p.p_code AS part_code,p.unit_meas,sl.qty AS quantity_ordered,sl.unit_price AS price_on_order, p.unit_price AS current_part_price, sl.order_stat AS line_status FROM SALES_LINE sl JOIN PART_MST p ON sl.part = p.id where order_no = "SO-1490003"; """)
    
    vn.train(question="Calculate Total Quantity Sold for Each Part", 
                sql="""SELECT p.p_code, p.id AS part_id, SUM(sl.qty) AS total_quantity_sold FROM SALES_LINE sl JOIN PART_MST p ON sl.part = p.id WHERE sl.order_stat = 'C'  GROUP BY p.p_code, p.id ORDER BY total_quantity_sold DESC;""")
    
    vn.train(question="Calculate Total Value of Purchases per Vendor", 
                sql="""SELECT v.name AS vendor_name, COUNT(DISTINCT pl.order_no) AS number_of_pos, SUM(pl.qty * pl.unit_cost) AS total_purchase_value FROM PURCHASE_LINE pl JOIN PURCHASE_MST pm ON pl.order_no = pm.order_no JOIN VENDOR v ON pm.vendor = v.id GROUP BY v.name ORDER BY total_purchase_value DESC;""")
    
    vn.train(question="Find Customers and Vendors Located in the Same State", 
                sql="""SELECT c.name AS customer_name, v.name AS vendor_name, c.addr_ste AS state FROM CUSTOMER c JOIN VENDOR v ON c.addr_ste = v.addr_ste ORDER BY c.addr_ste, c.name, v.name;""")
    
    vn.train(question="Compare Current Stock vs. Total Purchased vs. Total Sold (Simplified)", 
                sql="""SELECT p.id, p.p_code, p.curr_stock AS current_stock_on_hand, COALESCE(pp.total_purchased, 0) AS total_units_purchased, COALESCE(ps.total_sold, 0) AS total_units_sold, (p.curr_stock + COALESCE(pp.total_purchased, 0) - COALESCE(ps.total_sold, 0)) AS calculated_stock_balance  FROM PART_MST p LEFT JOIN PartPurchases pp ON p.id = pp.part LEFT JOIN PartSales ps ON p.id = ps.part ORDER BY p.p_code; """)
    
    vn.train(question="List Parts Supplied by a Specific Vendor", 
                sql="""SELECT v.id, v.name AS vendor_name, p.p_code AS part_code, vp.unit_cost AS vendor_unit_cost, vp.part_lt AS vendor_lead_time FROM VEND_PART vp left JOIN VENDOR v ON vp.vendor = v.id left JOIN PART_MST p ON vp.part = p.id where v.id = 7 ORDER BY p.p_code;""")
    
    vn.train(question="Top 5 Customers by Sales Value in the Last 6 Months", 
                sql="""SELECT c.id AS customer_id, c.name AS customer_name, SUM(sl.qty * sl.unit_price) AS total_spent FROM CUSTOMER c JOIN SALES_MST s ON c.id = s.cust JOIN SALES_LINE sl ON s.order_no = sl.order_no WHERE s.order_date >= DATE_SUB(CURDATE(), INTERVAL 6 MONTH) AND sl.order_stat <> 'X' GROUP BY c.id, c.name ORDER BY total_spent DESC LIMIT 5;""")
    
    vn.train(question="Parts Below Preferred Order Quantity", 
                sql="""SELECT id,p_code,curr_stock, pref_order_qty FROM PART_MST WHERE curr_stock < pref_order_qty AND procurement <> 'S' ORDER BY (pref_order_qty - curr_stock) DESC;""")
    
    vn.train(question="Monthly Sales Revenue Trend", 
                sql="""SELECT DATE_FORMAT(s.order_date, '%Y-%m') AS sales_month, round(SUM(sl.qty * sl.unit_price),2) AS monthly_revenue FROM SALES_MST s JOIN SALES_LINE sl ON s.order_no = sl.order_no WHERE sl.order_stat <> 'X' GROUP BY sales_month ORDER BY sales_month;""")

    vn.train(question="Direct Components for a Manufactured Part (Bill of Materials)", 
                sql="""SELECT mp.p_code AS manufactured_part_code, cp.p_code AS component_part_code, bom.qty_req AS quantity_required, cp.unit_meas AS component_unit FROM BILL_OF_M bom JOIN PART_MST mp ON bom.m_part = mp.id  JOIN PART_MST cp ON bom.c_part = cp.id  WHERE bom.m_part = 5638;""")
    
    vn.train(question="Total Component Quantity Required for Open Production Jobs", 
                    sql="""SELECT bom.c_part AS component_part_id, p.p_code AS component_part_code, SUM(j.qty * bom.qty_req) AS total_required_for_open_jobs FROM JOB_MST j JOIN BILL_OF_M bom ON j.part = bom.m_part JOIN PART_MST p ON bom.c_part = p.id   WHERE j.job_stat IN ('O', 'Q') GROUP BY bom.c_part, p.p_code ORDER BY total_required_for_open_jobs DESC;""")
        
    vn.train(question="Nonconformance Rate per Part (Simplified: NC Qty / Qty Purchased)", 
                    sql="""WITH PartPurchasesReceived AS (SELECT pl.part, SUM(pl.qty) AS total_received FROM PURCHASE_LINE pl WHERE pl.order_stat = 'C' GROUP BY pl.part),PartNonconformance AS (SELECT pl.part, SUM(nc.qty) AS total_nonconforming FROM NONCONFORM_MST nc JOIN PURCHASE_MST pm ON nc.po_no = pm.order_no  JOIN PURCHASE_LINE pl ON pm.order_no = pl.order_no AND nc.job IS NULL  GROUP BY pl.part)SELECT p.id AS part_id, p.p_code, COALESCE(ppr.total_received, 0) AS total_received, COALESCE(pnc.total_nonconforming, 0) AS total_nonconforming, CASE WHEN COALESCE(ppr.total_received, 0) > 0 THEN (COALESCE(pnc.total_nonconforming, 0) / ppr.total_received) * 100 ELSE 0 END AS nonconformance_rate_percent FROM PART_MST p LEFT JOIN PartPurchasesReceived ppr ON p.id = ppr.part LEFT JOIN PartNonconformance pnc ON p.id = pnc.part WHERE p.procurement = 'B'  ORDER BY nonconformance_rate_percent DESC;""")
    
    vn.train(question="Customers Who Haven't Ordered in Over a Year", 
                    sql="""SELECT c.id, c.name, MAX(s.order_date) AS last_order_date  FROM CUSTOMER c LEFT JOIN SALES_MST s ON c.id = s.cust AND s.order_date >= DATE_SUB(CURDATE(), INTERVAL 1 YEAR) WHERE s.order_no IS NULL  GROUP BY c.id, c.name  ORDER BY c.name;""")
    
    vn.train(question="Parts Purchased from Multiple Vendors with Cost Comparison", 
                    sql="""SELECT p.id AS part_id, p.p_code, COUNT(DISTINCT vp.vendor) AS number_of_vendors, MIN(vp.unit_cost) AS min_vendor_cost, MAX(vp.unit_cost) AS max_vendor_cost, (MAX(vp.unit_cost) - MIN(vp.unit_cost)) AS cost_difference FROM PART_MST p JOIN VEND_PART vp ON p.id = vp.part GROUP BY p.id, p.p_code HAVING COUNT(DISTINCT vp.vendor) > 1  ORDER BY cost_difference DESC, p.p_code;""")
    
    vn.train(question="Implement simple exponential smoothing forecast",
            sql = "WITH MonthlySales AS (SELECT  DATE_FORMAT(order_date, '%Y-%m') AS month, SUM(qty) AS sales FROM SALES_LINE join sales_mst on SALES_LINE.order_no = sales_mst.order_no GROUP BY DATE_FORMAT(order_date, '%Y-%m')),Smoothing AS (SELECT month, sales, @forecast := COALESCE(@forecast * 0.8 + sales * 0.2, sales) AS forecast FROM MonthlySales CROSS JOIN (SELECT @forecast := NULL) init ORDER BY month)SELECT * FROM Smoothing;" )
    
    vn.train(question="Compare actual purchase costs vs vendor quoted prices.",
            sql="SELECT pl.order_no,p.p_code,pl.unit_cost AS actual_cost,vp.unit_cost AS quoted_cost,(pl.unit_cost - vp.unit_cost) AS variance FROM PURCHASE_LINE pl JOIN VEND_PART vp ON pl.part = vp.part AND pl.order_no IN (SELECT order_no FROM PURCHASE_MST WHERE vendor in (select vendor from vend_part))JOIN PART_MST p ON pl.part = p.id;")
    
    vn.train(question="Analyze sales distribution by customer state.",
            sql="SELECT c.addr_ste AS state,COUNT(DISTINCT s.order_no) AS order_count,SUM(sl.qty * sl.unit_price) AS total_revenue FROM CUSTOMER c JOIN SALES_MST s ON c.id = s.cust JOIN SALES_LINE sl ON s.order_no = sl.order_no GROUP BY c.addr_ste ORDER BY total_revenue DESC;")
    
    vn.train(question="Calculate yield loss percentage from non-conformances.",
                sql="SELECT p.p_code, COALESCE(SUM(n.qty),0) AS nc_qty, SUM(j.qty) AS total_produced, (COALESCE(SUM(n.qty),0) / SUM(j.qty)) * 100 AS yield_loss_percent FROM JOB_MST j LEFT JOIN NONCONFORM_MST n ON j.id = n.job JOIN PART_MST p ON j.part = p.id GROUP BY p.id,p.p_code HAVING yield_loss_percent > 5;")
    
    vn.train(question="Identify single-source vendors with high-risk in terms of part quantity.",
                sql="select name,p_code,curr_stock,pref_order_qty from (SELECT  p.id, p.p_code, COUNT(DISTINCT vp.vendor) AS vendor_count, sum(p.curr_stock) curr_stock, sum(p.pref_order_qty) pref_order_qty FROM VEND_PART vp JOIN PART_MST p ON vp.part = p.id JOIN VENDOR v ON vp.vendor = v.id GROUP BY p.id,p.p_code HAVING vendor_count = 1 ) t join vend_part on t.id = vend_part.part join vendor on vend_part.vendor = vendor.id where curr_stock < pref_order_qty;")
    
    vn.train(question="Categorize inventory by last movement date.",
                    sql="SELECT p.p_code, sum(p.curr_stock) curr_stock, CASE WHEN MAX(s.order_date) IS NULL THEN 'No Sales' WHEN MAX(s.order_date) < DATE_SUB(NOW(), INTERVAL 6 MONTH) THEN 'Slow Moving' ELSE 'Active' END AS inventory_status FROM PART_MST p LEFT JOIN SALES_LINE sl ON p.id = sl.part LEFT JOIN SALES_MST s ON sl.order_no = s.order_no GROUP BY p.p_code;")
    
    vn.train(question="Find frequently paired parts in customer orders.",
                    sql="SELECT a.part AS part1, b.part AS part2, COUNT(*) AS pair_count FROM SALES_LINE a JOIN SALES_LINE b ON a.order_no = b.order_no AND a.part < b.part GROUP BY part1, part2 HAVING pair_count > 1 ORDER BY pair_count DESC;")
    
    vn.train(question="Calculate average delay between job release and completion.",
                    sql="SELECT p.p_code,AVG(DATEDIFF(j.job_cls, j.job_rls)) AS avg_delay_days, COUNT(*) AS job_count FROM JOB_MST j JOIN PART_MST p ON j.part = p.id WHERE j.job_stat = 'C' GROUP BY p.p_code HAVING avg_delay_days > 7;")
    
    vn.train(question="Identify customers with decreasing order frequency over quarters.",
                    sql="WITH QuarterlyOrders AS (SELECT c.id, QUARTER(s.order_date) AS qtr, COUNT(*) AS orders FROM CUSTOMER c JOIN SALES_MST s ON c.id = s.cust GROUP BY c.id, QUARTER(s.order_date))SELECT id AS customer_id,qtr,orders, LAG(orders) OVER(PARTITION BY id ORDER BY qtr) AS prev_qtr_orders,(orders - LAG(orders) OVER(PARTITION BY id ORDER BY qtr)) AS trend FROM QuarterlyOrders;")
    
    vn.train(question="Find parts with no sales in the last 6 months but maintained inventory.",
                    sql="SELECT p.p_code, p.curr_stock, MAX(s.order_date) AS last_sale_date FROM PART_MST p LEFT JOIN SALES_LINE sl ON p.id = sl.part LEFT JOIN SALES_MST s ON sl.order_no = s.order_no GROUP BY p.p_code,p.curr_stock HAVING last_sale_date < DATE_SUB(NOW(), INTERVAL 6 MONTH) OR last_sale_date IS NULL;")
    
    vn.train(question="Predict next month's sales using 3-month moving average.",
                    sql="WITH MonthlySales AS (SELECT DATE_FORMAT(s.order_date, '%Y-%m') AS month, SUM(sl.qty) AS total_sales FROM SALES_MST s JOIN SALES_LINE sl ON s.order_no = sl.order_no GROUP BY DATE_FORMAT(s.order_date, '%Y-%m'))SELECT month, total_sales, AVG(total_sales) OVER(ORDER BY month ROWS BETWEEN 2 PRECEDING AND CURRENT ROW) AS forecast FROM MonthlySales;")
    
    vn.train(question="Identify parts with inconsistent vendor lead times exceeding 20% variability.",
                    sql="SELECT p.p_code,v.id, v.name AS vendor,AVG(CAST(SUBSTRING_INDEX(vp.part_lt, ' ', 1) AS UNSIGNED)) AS avg_lead_days,STDDEV(CAST(SUBSTRING_INDEX(vp.part_lt, ' ', 1) AS UNSIGNED)) AS std_deviation, (STDDEV(CAST(SUBSTRING_INDEX(vp.part_lt, ' ', 1) AS UNSIGNED)) /  AVG(CAST(SUBSTRING_INDEX(vp.part_lt, ' ', 1) AS UNSIGNED))) AS variability_ratio FROM VEND_PART vp JOIN VENDOR v ON vp.vendor = v.id JOIN PART_MST p ON vp.part = p.id GROUP BY v.id,p.p_code, v.name;")
    
    vn.train(question="Suggest price adjustments based on vendor cost changes and sales performance.",
                    sql="SELECT p.id, p.p_code, p.unit_price AS current_price,MIN(vp.unit_cost) AS lowest_vendor_cost,AVG(sl.unit_price) AS avg_sale_price, CASE WHEN MIN(vp.unit_cost) > p.unit_price * 0.9 THEN 'Increase Price' WHEN AVG(sl.unit_price) < p.unit_price * 0.8 THEN 'Decrease Price' ELSE 'Maintain Price' END AS recommendation FROM PART_MST p JOIN VEND_PART vp ON p.id = vp.part JOIN SALES_LINE sl ON p.id = sl.part GROUP BY p.id,p.p_code, p.unit_price;")
    
    vn.train(question="Find components with the highest demand from open production jobs compared to current stock.",
            sql="WITH ComponentDemand AS (SELECT bom.c_part, SUM(j.qty * bom.qty_req) AS required_qty FROM JOB_MST j JOIN BILL_OF_M bom ON j.part = bom.m_part WHERE j.job_stat IN ('Q','O') GROUP BY bom.c_part)SELECT p.p_code,cd.required_qty,p.curr_stock,(cd.required_qty - p.curr_stock) AS deficit FROM ComponentDemand cd JOIN PART_MST p ON cd.c_part = p.id WHERE cd.required_qty > p.curr_stock;")
    
    vn.train(question="Calculate the financial impact of non-conformances by linking them to purchase order costs.",
            sql = "SELECT n.id AS nc_id,pl.order_no,p.p_code,n.qty AS nc_quantity,pl.unit_cost,(n.qty * pl.unit_cost) AS financial_impact FROM NONCONFORM_MST n JOIN PURCHASE_MST pm ON n.po_no = pm.order_no JOIN PURCHASE_LINE pl ON pm.order_no = pl.order_no JOIN PART_MST p ON pl.part = p.idWHERE n.po_no IS NOT NULL;")
    
    vn.train(question="Rank vendors by average delivery speed (time between order date and line closure) for completed purchase orders.",
            sql="SELECT v.name,AVG(DATEDIFF(pl.line_cls, pm.order_date)) AS avg_delivery_days,DENSE_RANK() OVER(ORDER BY AVG(DATEDIFF(pl.line_cls, pm.order_date))) AS performance_rank FROM PURCHASE_MST pm JOIN PURCHASE_LINE pl ON pm.order_no = pl.order_no JOIN VENDOR v ON pm.vendor = v.id WHERE pl.order_stat = 'C' GROUP BY v.id,v.name HAVING avg_delivery_days IS NOT NULL;")
    # --- General Business Rules/Concepts ---
    # vn.train(documentation="'Lead Time' generally refers to the time required to procure or produce a part (PART_MST.sys_lt is a default).")
    # vn.train(documentation="'On-time Delivery' can be assessed by comparing SALES_LINE.due_date with actual delivery dates (potentially in another table like deliveries if it exists).")



# def extract_sql_from_response(response: str) -> str:
#     """Extract SQL code from LLM response using regex patterns"""
#     # Match multi-line SQL between `````` with flexible whitespace
#     response.split(":\n\n")[1].replace("\n"," ")
# def create_plot(question,extracted_sql,df_result):
#     plotly_code = vn.generate_plotly_code(question = question,
#                                           sql = extracted_sql,
#                                           df = df_result)
#     fig = vn.get_plotly_figure(plotly_code=plotly_code,
#                                df=df_result)
#     fig.show()

# --- SQL Extraction Function ---
def extract_sql_from_response(response: str) -> str:
    """Extract SQL code from LLM response using multi-pattern approach"""
    if not response:
        return None
        
    # Clean up the response
    response = response.strip()
    
    # Pattern 1: Look for SQL between ```sql and ``` markers
    sql_block_pattern = r'```sql\s*(.*?)\s*```'
    sql_matches = re.findall(sql_block_pattern, response, re.DOTALL)
    if sql_matches:
        return sql_matches[0].strip()
    
    # Pattern 2: Look for SQL between ``` markers
    code_block_pattern = r'```\s*(.*?)\s*```'
    code_matches = re.findall(code_block_pattern, response, re.DOTALL)
    if code_matches:
        for match in code_matches:
            if re.search(r'\b(SELECT|INSERT|UPDATE|DELETE|WITH)\b', match, re.IGNORECASE):
                return match.strip()
    
    # Pattern 3: Direct SQL pattern
    sql_pattern = r'\b(SELECT|WITH|INSERT|UPDATE|DELETE)[\s\S]+?;'
    direct_match = re.search(sql_pattern, response, re.IGNORECASE)
    if direct_match:
        return direct_match.group(0).strip()
    
    return None

# --- complex query architecture --- 
def generate_complex_sql(question: str) -> str:
    """Handles multi-step manufacturing queries"""
    system_msg = """You are an SQL expert. Follow these steps:
    1. Identify required tables from: sales_line, part_mst, purchase_line
    2. Determine necessary joins and filters
    3. Apply temporal constraints using CURDATE()
    4. Include proper aggregations (SUM, AVG, COUNT)
    5. Validate against database schema"""
    
    return vn.generate_sql(
        question=question,
        chat_history=[vn.system_message(system_msg)]
    )


# --- Question Refinement Function ---
def refine_question(original_question: str) -> str:
    refinement_prompt = f"""You are an expert Prompt Engineer. Rephrase and expand this manufacturing question to be more specific and actionable. 
    Maintain original intent while adding context about tables from {table_list} and columns from {column_list}. Return ONLY the improved question.

    Original: {original_question}
    Refined: """
    
    refined = vn.submit_prompt(refinement_prompt).strip()
    return refined if len(refined) > len(original_question) else original_question
    

# --- LLM Analysis and Explanation ---
def analyze_and_explain(question, df_result, category):
    """
    For judgement/suggestion: Use LLM to analyze the DataFrame and give a judgement or recommendation.
    For descriptive: Optionally summarize the data.
    """
    # Convert DataFrame to CSV or dict for LLM context (limit rows for brevity)
    data_sample = df_result.head(20).to_csv(index=False)
    if category == "judgement":
        prompt = f"""
You are a manufacturing analytics expert.
Given the following question:
"{question}"
And the following data (CSV format):
{data_sample}

Analyze the data, provide a clear judgement about the situation, and explain your reasoning. Clearly state: "Judgement: ...". Advise the user to independently verify your judgement.
"""
    elif category == "suggestion":
        prompt = f"""
You are a manufacturing analytics expert.
Given the following question:
"{question}"
And the following data (CSV format):
{data_sample}

Analyze the data and provide a concrete, actionable recommendation. Clearly state: "Suggestion: ...". Advise the user to independently verify your suggestion.
"""
    else:
        # For descriptive, just summarize (optional)
        prompt = f"""
You are a manufacturing analytics expert.
Given the following question:
"{question}"
And the following data (CSV format):
{data_sample}

Briefly summarize the main findings from the data.
"""
    # Call your LLM (reuse submit_prompt)
    response = vn.submit_prompt(prompt)
    return response.strip()


current_date = datetime.now().strftime("%Y-%m-%d")

Connecting to mysql database 'ft_database' on 127.0.0.1...
Database connection successful.

--- Vanna Training ---
Loads metadata (DDL, docs, SQL examples) into Vanna's local ChromaDB vector store.


Add of existing embedding ID: 3a2b3b3c-d62e-57c9-9017-e00b1dfacf1e-doc
Add of existing embedding ID: 0a712900-a404-5806-ac70-6554fa3d6e27-doc
Add of existing embedding ID: eaf17db6-7e22-5ab6-b635-0eca04ab8db1-doc
Add of existing embedding ID: d83ce6c1-5e5e-5b58-bde5-219d2811b351-doc
Add of existing embedding ID: 9d2cfeb6-376b-52b4-911e-ff90b1d01409-doc
Add of existing embedding ID: d47f3274-ac0c-5782-b821-3f086922139f-doc
Add of existing embedding ID: 49f130b8-13f2-5c3c-8e60-20acda411259-doc
Add of existing embedding ID: 82f36a27-c95f-598f-ba45-7c4b36d5fce6-doc
Add of existing embedding ID: 72c8046f-9699-56b9-9f95-d61740148535-doc
Add of existing embedding ID: 31c39d68-1b22-5545-97e0-353dc991f251-doc
Add of existing embedding ID: fff92c80-4899-53ea-a8e4-a18c0415b820-doc
Add of existing embedding ID: 020be5d7-5209-56ca-8b6a-5962efd95682-doc
Add of existing embedding ID: 2f420d2e-9850-51f8-ab1a-2bf1e77e2d3b-doc
Add of existing embedding ID: 9aebcf7e-04a6-5368-b307-25d7ca8a1741-doc
Add of

Running Vanna training...
  Example: Fetching DDL (Customize for your setup)...
  Example: Adding Documentation (Add yours)...
Adding documentation....
Adding documentation....
Adding documentation....


Add of existing embedding ID: c7737722-5406-50ec-a2e3-457631201e43-doc
Insert of existing embedding ID: c7737722-5406-50ec-a2e3-457631201e43-doc
Add of existing embedding ID: 5710f1b2-2513-59b1-a22a-06ca846f3335-doc
Insert of existing embedding ID: 5710f1b2-2513-59b1-a22a-06ca846f3335-doc
Add of existing embedding ID: f62258fc-5052-5064-8778-17d64b48fb75-doc
Insert of existing embedding ID: f62258fc-5052-5064-8778-17d64b48fb75-doc


Adding documentation....
Adding documentation....
Adding documentation....


Add of existing embedding ID: 3a1ce7ab-9179-5232-8d27-603f1c7fc185-doc
Insert of existing embedding ID: 3a1ce7ab-9179-5232-8d27-603f1c7fc185-doc
Add of existing embedding ID: 118f8eeb-9139-5bb3-aa51-ffde0d892907-doc
Insert of existing embedding ID: 118f8eeb-9139-5bb3-aa51-ffde0d892907-doc
Add of existing embedding ID: 8c8564bf-5f18-5fca-b590-917e64a7c696-doc
Insert of existing embedding ID: 8c8564bf-5f18-5fca-b590-917e64a7c696-doc


Adding documentation....
Adding documentation....
Adding documentation....


Add of existing embedding ID: 1e2fad2c-8b40-5101-b01a-eb7cb27e9093-doc
Insert of existing embedding ID: 1e2fad2c-8b40-5101-b01a-eb7cb27e9093-doc
Add of existing embedding ID: adeabba7-8bbd-5156-97a3-d1e27d0e8f47-doc
Insert of existing embedding ID: adeabba7-8bbd-5156-97a3-d1e27d0e8f47-doc
Add of existing embedding ID: 380c2a34-55c2-5fe6-aaf9-5b98fc734a1d-doc
Insert of existing embedding ID: 380c2a34-55c2-5fe6-aaf9-5b98fc734a1d-doc


Adding documentation....
Adding documentation....
Adding documentation....


Add of existing embedding ID: 2e795c42-c6f8-5c44-aa1c-eb628fdee4fc-doc
Insert of existing embedding ID: 2e795c42-c6f8-5c44-aa1c-eb628fdee4fc-doc
Add of existing embedding ID: 3a25bf9f-42c8-5156-823b-ce6a42754b95-doc
Insert of existing embedding ID: 3a25bf9f-42c8-5156-823b-ce6a42754b95-doc
Add of existing embedding ID: fa75db61-9760-51e8-b436-32ed46777d85-doc
Insert of existing embedding ID: fa75db61-9760-51e8-b436-32ed46777d85-doc


Adding documentation....
Adding documentation....
Adding documentation....


Add of existing embedding ID: 808d1279-ae6d-5a3f-b2a9-6a42245a4f28-doc
Insert of existing embedding ID: 808d1279-ae6d-5a3f-b2a9-6a42245a4f28-doc
Add of existing embedding ID: deab258b-8c49-5858-8f76-a52a6269500e-doc
Insert of existing embedding ID: deab258b-8c49-5858-8f76-a52a6269500e-doc
Add of existing embedding ID: 2db3b251-253c-55b3-99aa-12540571472c-doc
Insert of existing embedding ID: 2db3b251-253c-55b3-99aa-12540571472c-doc


Adding documentation....
Adding documentation....
Adding documentation....


Add of existing embedding ID: 417933a5-e35b-5dc9-a0d1-bf5bf8a4d2ce-doc
Insert of existing embedding ID: 417933a5-e35b-5dc9-a0d1-bf5bf8a4d2ce-doc
Add of existing embedding ID: 628de61e-0ed2-5b90-898f-38e558ac180f-doc
Insert of existing embedding ID: 628de61e-0ed2-5b90-898f-38e558ac180f-doc
Add of existing embedding ID: 80065a35-16cd-5fd7-a372-512bd06427e5-doc
Insert of existing embedding ID: 80065a35-16cd-5fd7-a372-512bd06427e5-doc


Adding documentation....
Adding documentation....
Adding documentation....


Add of existing embedding ID: aae9e2ec-a925-599b-a250-2f0b2410a291-doc
Insert of existing embedding ID: aae9e2ec-a925-599b-a250-2f0b2410a291-doc
Add of existing embedding ID: e9263c31-5c79-5360-816b-5f7f98684880-doc
Insert of existing embedding ID: e9263c31-5c79-5360-816b-5f7f98684880-doc
Add of existing embedding ID: 5f751b55-e510-53da-a888-5ecefa26fee9-doc
Insert of existing embedding ID: 5f751b55-e510-53da-a888-5ecefa26fee9-doc
Add of existing embedding ID: 7ed5330d-ac46-5eb4-89b5-9971d2414c73-doc
Insert of existing embedding ID: 7ed5330d-ac46-5eb4-89b5-9971d2414c73-doc


Adding documentation....
Adding documentation....
Adding documentation....
Adding documentation....


Add of existing embedding ID: 23909f8f-6287-589a-9018-c962c6900982-doc
Insert of existing embedding ID: 23909f8f-6287-589a-9018-c962c6900982-doc
Add of existing embedding ID: a3dff5a7-f7ae-5291-9b03-a0b09282c724-doc
Insert of existing embedding ID: a3dff5a7-f7ae-5291-9b03-a0b09282c724-doc
Add of existing embedding ID: 595aefd0-fcf8-5f8f-8068-5e83d009e593-doc
Insert of existing embedding ID: 595aefd0-fcf8-5f8f-8068-5e83d009e593-doc


Adding documentation....
Adding documentation....
Adding documentation....


Add of existing embedding ID: bbeecc4f-d710-575a-8f54-5c94c81562f7-doc
Insert of existing embedding ID: bbeecc4f-d710-575a-8f54-5c94c81562f7-doc
Add of existing embedding ID: 8386d710-25b6-51b9-91d8-30785a276e62-doc
Insert of existing embedding ID: 8386d710-25b6-51b9-91d8-30785a276e62-doc
Add of existing embedding ID: 6910f4a8-91ea-598d-9b25-57242d313d65-doc
Insert of existing embedding ID: 6910f4a8-91ea-598d-9b25-57242d313d65-doc


Adding documentation....
Adding documentation....
Adding documentation....


Add of existing embedding ID: ab40f9fb-5a08-5f54-8d12-0e9d43477249-doc
Insert of existing embedding ID: ab40f9fb-5a08-5f54-8d12-0e9d43477249-doc
Add of existing embedding ID: dea8e4e4-e47c-5c8e-9a6a-7166bc5e54dd-doc
Insert of existing embedding ID: dea8e4e4-e47c-5c8e-9a6a-7166bc5e54dd-doc
Add of existing embedding ID: 49ed9ddb-37cb-5687-ab29-5364cc05e755-doc
Insert of existing embedding ID: 49ed9ddb-37cb-5687-ab29-5364cc05e755-doc


Adding documentation....
Adding documentation....
Adding documentation....


Add of existing embedding ID: 76378bf1-3f39-52d7-8589-31832805c77b-doc
Insert of existing embedding ID: 76378bf1-3f39-52d7-8589-31832805c77b-doc
Add of existing embedding ID: 77937f1f-1c06-5c8e-9b2a-dbaecbb40383-doc
Insert of existing embedding ID: 77937f1f-1c06-5c8e-9b2a-dbaecbb40383-doc
Add of existing embedding ID: 2bae7fcb-36a7-5286-b8c6-71208a314226-doc
Insert of existing embedding ID: 2bae7fcb-36a7-5286-b8c6-71208a314226-doc
Add of existing embedding ID: 49333a1c-ca97-5b18-a978-eb9f9f22d43c-doc


Adding documentation....
Adding documentation....
Adding documentation....


Insert of existing embedding ID: 49333a1c-ca97-5b18-a978-eb9f9f22d43c-doc
Add of existing embedding ID: 24e2eb51-a625-5ba2-bd1e-ceb1b1df0182-doc
Insert of existing embedding ID: 24e2eb51-a625-5ba2-bd1e-ceb1b1df0182-doc
Add of existing embedding ID: c1d11163-ce12-54e8-a638-426f870aa7f4-doc
Insert of existing embedding ID: c1d11163-ce12-54e8-a638-426f870aa7f4-doc


Adding documentation....
Adding documentation....
Adding documentation....


Add of existing embedding ID: 0551b27b-7308-52ed-9abd-4415dd77ec81-doc
Insert of existing embedding ID: 0551b27b-7308-52ed-9abd-4415dd77ec81-doc
Add of existing embedding ID: fb6f1cbe-b884-51e1-a624-7ef41fea05e1-doc
Insert of existing embedding ID: fb6f1cbe-b884-51e1-a624-7ef41fea05e1-doc
Add of existing embedding ID: 5257415b-4130-5a26-946a-d31f4069503e-doc
Insert of existing embedding ID: 5257415b-4130-5a26-946a-d31f4069503e-doc
Add of existing embedding ID: 96bde749-8d97-5d6e-88d2-dac226a05810-doc
Insert of existing embedding ID: 96bde749-8d97-5d6e-88d2-dac226a05810-doc


Adding documentation....
Adding documentation....
Adding documentation....
Adding documentation....


Add of existing embedding ID: c02733e3-c35a-53a8-b5ce-996106d1b55d-doc
Insert of existing embedding ID: c02733e3-c35a-53a8-b5ce-996106d1b55d-doc
Add of existing embedding ID: af9febcb-31be-5b14-8ce8-0fac9252681a-doc
Insert of existing embedding ID: af9febcb-31be-5b14-8ce8-0fac9252681a-doc
Add of existing embedding ID: b03e0de0-ae76-5be7-b66c-ba00c99ac122-doc
Insert of existing embedding ID: b03e0de0-ae76-5be7-b66c-ba00c99ac122-doc


Adding documentation....
Adding documentation....
Adding documentation....


Add of existing embedding ID: 26430856-5ae1-5ecf-867b-b933387d0461-doc
Insert of existing embedding ID: 26430856-5ae1-5ecf-867b-b933387d0461-doc
Add of existing embedding ID: 242db09b-2597-5c29-b559-99eb35e6645d-doc
Insert of existing embedding ID: 242db09b-2597-5c29-b559-99eb35e6645d-doc
Add of existing embedding ID: 53f4e256-e5f4-5793-b2fc-d15fe96aed80-doc
Insert of existing embedding ID: 53f4e256-e5f4-5793-b2fc-d15fe96aed80-doc


Adding documentation....
Adding documentation....
Adding documentation....


Add of existing embedding ID: e3a1bdc3-429f-5a7f-9e80-a667d434142c-doc
Insert of existing embedding ID: e3a1bdc3-429f-5a7f-9e80-a667d434142c-doc
Add of existing embedding ID: 128533d2-05c4-5167-8867-de34e7743520-doc
Insert of existing embedding ID: 128533d2-05c4-5167-8867-de34e7743520-doc
Add of existing embedding ID: 37a60c7d-71e5-5999-a7a9-d5062627d6db-doc
Insert of existing embedding ID: 37a60c7d-71e5-5999-a7a9-d5062627d6db-doc


Adding documentation....
Adding documentation....
Adding documentation....


Add of existing embedding ID: 0300c6b3-f8f8-52bb-9d2e-08d83a1e973c-doc
Insert of existing embedding ID: 0300c6b3-f8f8-52bb-9d2e-08d83a1e973c-doc
Add of existing embedding ID: 5b4e09c7-3aa4-5d97-918a-570f5b07fc5b-doc
Insert of existing embedding ID: 5b4e09c7-3aa4-5d97-918a-570f5b07fc5b-doc
Add of existing embedding ID: 5ee3dc07-0388-522f-8277-f9abf7399e4b-doc
Insert of existing embedding ID: 5ee3dc07-0388-522f-8277-f9abf7399e4b-doc


Adding documentation....
Adding documentation....
Adding documentation....


Add of existing embedding ID: 7a1d58d5-2171-565f-9edf-3cb8a125ef66-doc
Insert of existing embedding ID: 7a1d58d5-2171-565f-9edf-3cb8a125ef66-doc
Add of existing embedding ID: 7d9e084d-20aa-5ab5-b619-4ce79a69131f-doc
Insert of existing embedding ID: 7d9e084d-20aa-5ab5-b619-4ce79a69131f-doc
Add of existing embedding ID: 0dc60cc5-f32b-578b-9708-dfdadc907edf-doc
Insert of existing embedding ID: 0dc60cc5-f32b-578b-9708-dfdadc907edf-doc


Adding documentation....
Adding documentation....
Adding documentation....


Add of existing embedding ID: 1f322c62-5226-5ee3-86f8-695e4ab976b0-doc
Insert of existing embedding ID: 1f322c62-5226-5ee3-86f8-695e4ab976b0-doc
Add of existing embedding ID: bc909e47-ed1f-5b15-be2c-0c1789202c68-doc
Insert of existing embedding ID: bc909e47-ed1f-5b15-be2c-0c1789202c68-doc
Add of existing embedding ID: 04b42da7-2f59-5af1-9379-f9c8854b943c-doc
Insert of existing embedding ID: 04b42da7-2f59-5af1-9379-f9c8854b943c-doc


Adding documentation....
Adding documentation....
Adding documentation....


Add of existing embedding ID: 87135ea3-a863-589b-8d9e-207c0bf9e368-doc
Insert of existing embedding ID: 87135ea3-a863-589b-8d9e-207c0bf9e368-doc
Add of existing embedding ID: 6dafd24f-2d1b-5b34-a48a-7c52345f8fd3-doc
Insert of existing embedding ID: 6dafd24f-2d1b-5b34-a48a-7c52345f8fd3-doc
Add of existing embedding ID: b7c247a6-3fc3-599f-9eb4-6d2ab1920097-doc
Insert of existing embedding ID: b7c247a6-3fc3-599f-9eb4-6d2ab1920097-doc


Adding documentation....
Adding documentation....
Adding documentation....


Add of existing embedding ID: e200dc7d-15c2-55aa-9452-92526bca6476-doc
Insert of existing embedding ID: e200dc7d-15c2-55aa-9452-92526bca6476-doc
Add of existing embedding ID: 3a2b3b3c-d62e-57c9-9017-e00b1dfacf1e-doc
Insert of existing embedding ID: 3a2b3b3c-d62e-57c9-9017-e00b1dfacf1e-doc
Add of existing embedding ID: 0a712900-a404-5806-ac70-6554fa3d6e27-doc
Insert of existing embedding ID: 0a712900-a404-5806-ac70-6554fa3d6e27-doc


Adding documentation....
Adding documentation....
Adding documentation....


Add of existing embedding ID: eaf17db6-7e22-5ab6-b635-0eca04ab8db1-doc
Insert of existing embedding ID: eaf17db6-7e22-5ab6-b635-0eca04ab8db1-doc
Add of existing embedding ID: d83ce6c1-5e5e-5b58-bde5-219d2811b351-doc
Insert of existing embedding ID: d83ce6c1-5e5e-5b58-bde5-219d2811b351-doc
Add of existing embedding ID: 9d2cfeb6-376b-52b4-911e-ff90b1d01409-doc
Insert of existing embedding ID: 9d2cfeb6-376b-52b4-911e-ff90b1d01409-doc


Adding documentation....
Adding documentation....
Adding documentation....


Add of existing embedding ID: d47f3274-ac0c-5782-b821-3f086922139f-doc
Insert of existing embedding ID: d47f3274-ac0c-5782-b821-3f086922139f-doc
Add of existing embedding ID: 49f130b8-13f2-5c3c-8e60-20acda411259-doc
Insert of existing embedding ID: 49f130b8-13f2-5c3c-8e60-20acda411259-doc
Add of existing embedding ID: 82f36a27-c95f-598f-ba45-7c4b36d5fce6-doc
Insert of existing embedding ID: 82f36a27-c95f-598f-ba45-7c4b36d5fce6-doc


Adding documentation....
Adding documentation....
Adding documentation....


Add of existing embedding ID: 72c8046f-9699-56b9-9f95-d61740148535-doc
Insert of existing embedding ID: 72c8046f-9699-56b9-9f95-d61740148535-doc
Add of existing embedding ID: e46ad89f-ba47-5bca-953f-3b2d0f40fbe0-sql
Add of existing embedding ID: 5936eb89-b669-58ea-a84b-d0511579b144-sql
Add of existing embedding ID: 672b6eb5-7282-5382-a6cc-a53e25760475-sql
Add of existing embedding ID: 207c2f0a-c3a4-5525-80cf-930580d71d79-sql
Add of existing embedding ID: a54ce698-1275-5bb2-82e0-c04e1cdc0cd3-sql
Add of existing embedding ID: 4293528b-c9df-5b02-9c5e-6ef4c6217979-sql
Add of existing embedding ID: d4c9bf2f-3442-51bc-9334-9f2131659f51-sql
Add of existing embedding ID: 41e60ad4-c6fe-5d80-b53d-e176f814a6cf-sql
Add of existing embedding ID: 1b18671a-c5fe-5288-ab9c-2f843422a01d-sql
Add of existing embedding ID: f261db6f-0489-5795-abc3-f6ea78655ebd-sql
Add of existing embedding ID: 0a85a0c1-8e67-5516-8991-6d619cb2904c-sql
Add of existing embedding ID: 1af897fe-fe24-511a-8002-4a56ca7980c1-sql
Add

Adding SQL examples...


Add of existing embedding ID: 3108b1d4-dc85-5d2d-94f4-09f0aaef57ac-sql
Add of existing embedding ID: bf78b736-0b5a-5202-a9bc-ebfed9792c82-sql
Add of existing embedding ID: e41f08aa-9932-5ef1-aa57-f0aa2cdaaca5-sql
Add of existing embedding ID: 27ac0de0-c0df-5207-87ff-de29750cc9b0-sql
Add of existing embedding ID: 483b8731-5bdb-5ec8-a0c2-462c407b6c3a-sql
Add of existing embedding ID: 8354ffe5-bdc7-5566-a921-327ccc379e08-sql
Add of existing embedding ID: 00ee5fe5-0399-5776-badb-ba1b70a8e429-sql
Add of existing embedding ID: c459050c-e31a-5cc5-99c3-e8c6173f8f96-sql
Add of existing embedding ID: eb16448e-7427-557c-bb5e-9f676f63dcc3-sql
Add of existing embedding ID: 23a40f8f-7aa0-51ba-b3d3-13e2a32599b6-sql
Add of existing embedding ID: e6bed23d-0ff8-5472-a77c-8d7562a2c490-sql
Add of existing embedding ID: fd8c2455-2d25-5515-9c8c-2bc063cef91f-sql
Add of existing embedding ID: 8cae1de9-4013-5ead-b95f-4b89fbd7371e-sql
Add of existing embedding ID: 155522fd-213d-58f7-8024-9692c600a3a3-sql
Add of

In [5]:
clear_traiining_data()

Unnamed: 0,id,question,content,training_data_type


In [7]:
vn.get_training_data()

Unnamed: 0,id,question,content,training_data_type
0,a54ce698-1275-5bb2-82e0-c04e1cdc0cd3-sql,Get all customers,SELECT * FROM customer;,sql
1,4293528b-c9df-5b02-9c5e-6ef4c6217979-sql,Get all vendors in a specific state,SELECT * FROM vendor WHERE 'addr_ste' = 'TX';,sql
2,d4c9bf2f-3442-51bc-9334-9f2131659f51-sql,Join sales orders with customer info,"SELECT s.order_no, s.order_date, c.name, c.add...",sql
3,41e60ad4-c6fe-5d80-b53d-e176f814a6cf-sql,Parts with their vendor and unit cost,"SELECT P.p_code, V.name AS vendor_name, VP.uni...",sql
4,1b18671a-c5fe-5288-ab9c-2f843422a01d-sql,Total number of sales per customer,"SELECT C.name, COUNT(S.order_no) AS total_orde...",sql
...,...,...,...,...
80,9d2cfeb6-376b-52b4-911e-ff90b1d01409-doc,,VEND_PART.vendor is the unique identifier for ...,documentation
81,d47f3274-ac0c-5782-b821-3f086922139f-doc,,VEND_PART.part is the part that can be procure...,documentation
82,49f130b8-13f2-5c3c-8e60-20acda411259-doc,,VEND_PART.unit_cost is typically the creation ...,documentation
83,82f36a27-c95f-598f-ba45-7c4b36d5fce6-doc,,VEND_PART.part_lt is The standard lead time fo...,documentation
