<a href="https://colab.research.google.com/github/mr-ankit-tech/Coding_with_llm/blob/Workspace/Super_Agent_for_getting_insight.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
# REQUIRED INSTALLS (if not already done)
!pip install langchain langchain_openai langchain_community
!pip install transformers accelerate bitsandbytes
!pip install huggingface_hub
!pip install langchain-huggingface

# === Python Imports ===
import os
import sqlite3
import pandas as pd
from langchain.chat_models import ChatOpenAI
from langchain.sql_database import SQLDatabase
from langchain.prompts import PromptTemplate
from google.colab import userdata, data_table
from IPython.display import display
from datetime import datetime # Import datetime

Collecting langchain_openai
  Downloading langchain_openai-0.3.24-py3-none-any.whl.metadata (2.3 kB)
Collecting langchain_community
  Downloading langchain_community-0.3.26-py3-none-any.whl.metadata (2.9 kB)
Collecting langchain-core<1.0.0,>=0.3.58 (from langchain)
  Downloading langchain_core-0.3.66-py3-none-any.whl.metadata (5.8 kB)
Collecting langchain
  Downloading langchain-0.3.26-py3-none-any.whl.metadata (7.8 kB)
Collecting dataclasses-json<0.7,>=0.5.7 (from langchain_community)
  Downloading dataclasses_json-0.6.7-py3-none-any.whl.metadata (25 kB)
Collecting pydantic-settings<3.0.0,>=2.4.0 (from langchain_community)
  Downloading pydantic_settings-2.10.0-py3-none-any.whl.metadata (3.4 kB)
Collecting httpx-sse<1.0.0,>=0.4.0 (from langchain_community)
  Downloading httpx_sse-0.4.0-py3-none-any.whl.metadata (9.0 kB)
Collecting marshmallow<4.0.0,>=3.18.0 (from dataclasses-json<0.7,>=0.5.7->langchain_community)
  Downloading marshmallow-3.26.1-py3-none-any.whl.metadata (7.3 kB)
Coll

In [3]:
# === Load Data into SQLite ===
daily_data_df = pd.read_csv("/content/Heartbeat.csv")
sqlite_connection = sqlite3.connect("ankit_tutorial.db")
db_path = os.path.abspath("ankit_tutorial.db")
daily_data_df.to_sql("Heartbeat", sqlite_connection, if_exists="replace")

daily_data_df = pd.read_csv("/content/Audit.csv")
sqlite_connection = sqlite3.connect("ankit_tutorial.db")
db_path = os.path.abspath("ankit_tutorial.db")
daily_data_df.to_sql("Audit", sqlite_connection, if_exists="replace")


4013

In [4]:

# === Initialize ChatOpenAI ===
OPENAI_API_KEY = userdata.get('OPENAI_API_KEY')
llm = ChatOpenAI(openai_api_key=OPENAI_API_KEY, model_name="gpt-3.5-turbo-1106")

# === Create SQLDatabase Object ===
my_db = SQLDatabase.from_uri(f"sqlite:///{db_path}")
print(f"Tables available to the agent: {my_db.get_table_names()}")

# === Utility: Dynamic Schema Fetch for a Given Table ===
def get_table_schema(table_name: str) -> str:
  db=my_db
  if table_name not in db.get_table_names():
    raise ValueError(f"Table '{table_name}' does not exist in the database.")
  # Pass the table name as a list to get_table_info
  return db.get_table_info([table_name])

# === Prompt Template to Generate SQL ===
sql_prompt_template = PromptTemplate(
    input_variables=["user_inquiry", "background_info"],
    template="""
    You are a SQLite expert. Given an input question, create a syntactically correct SQLite query to run.
    Never query all columns (*).
    Only use the columns you see in the schema below.

    {background_info}
    Question: {user_inquiry}

    Only return the SQL query, nothing else.
    """
    )

# === Full Function to Handle Dynamic Table & Question ===
def Super_Agent(table_name: str, user_inquiry: str) -> pd.DataFrame:
    # Get schema for selected table
    table_schema = get_table_schema(table_name)
    background_info = f"""
    You are a SQLite expert. The following is the schema of the table `{table_name}`.
    Use only the columns listed below when writing SQL queries.
    Never use SELECT *.
    {table_schema}
    """


    # Format prompt
    formatted_prompt = sql_prompt_template.format(
        background_info=background_info,
        user_inquiry=user_inquiry
    )

    # Generate SQL from LLM
    sql_query = llm.invoke(formatted_prompt).content.strip()
    print("Generated SQL query:")
    print(sql_query)

    # Execute SQL
    conn = sqlite3.connect(db_path)
    df_result = pd.read_sql_query(sql_query, conn)
    conn.close()

    # Show Data
    data_table.enable_dataframe_formatter()
    display(df_result)

 # === Generate dynamic CSV filename ===
    timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
    safe_table_name = table_name.replace(" ", "_").lower()
    filename = f"Extract_{safe_table_name}_{timestamp}.csv"

    # Save to CSV
    df_result.to_csv(filename, index=False)
    print(f"✅ Data saved to {filename}")

    #return df_result

  llm = ChatOpenAI(openai_api_key=OPENAI_API_KEY, model_name="gpt-3.5-turbo-1106")


Tables available to the agent: ['Audit', 'Heartbeat']


  print(f"Tables available to the agent: {my_db.get_table_names()}")


In [5]:
# You can dynamically change table name and question here
Super_Agent("Heartbeat","Compare actual revenue and booked revenue by Trade")
Super_Agent("Audit","Can you please share the amount by countries")

# Other Examples:
# query_table(llm, my_db, "Insight", "What is the total Actual_FFE by Year?")
# query_table(llm, my_db, "Insight", "List all shipments from Loading_country = 'India'")


Generated SQL query:
SELECT Trade, SUM(Actual_Revenue) AS Total_Actual_Revenue, SUM(Booked_Revenue) AS Total_Booked_Revenue
FROM Heartbeat
GROUP BY Trade;


Unnamed: 0,Trade,Total_Actual_Revenue,Total_Booked_Revenue
0,B3 - WCSA - North America,37773.0,351384.9
1,C2 - North America-Central America,813791.4,5712081.0
2,I1 - IET,212258.5,1617782.0
3,L7 - Intra NEU,2342156.0,24773170.0
4,R1 - Intra Asia,357290.7,1332669.0
5,U2 - Intra Americas,39646.62,279230.3
6,UG - ECSA - ECSA,12905880.0,115151600.0
7,W5 - Intra Africa,38243910.0,390057700.0
8,X1 - Europe - French Antilles,40722.64,254516.5
9,Z1 - Americas - East Africa,67923270.0,414745700.0


✅ Data saved to Extract_heartbeat_20250622_081339.csv
Generated SQL query:
SELECT Country, SUM(Amount) AS Total_Amount
FROM Audit
GROUP BY Country;


Unnamed: 0,Country,Total_Amount
0,,3898785000.0
1,Australia,1501765.0
2,Belgium,2339197.0
3,Benin,118423.2
4,Brazil,1791583.0
5,Cambodia,-78576.69
6,Cameroon,92014.48
7,Canada,15345800.0
8,Chile,-2.3
9,China,29386840.0


✅ Data saved to Extract_audit_20250622_081339.csv
