In [None]:
from langchain_openai import ChatOpenAI


DEEPINFRA_API_TOKEN = "xxxxxxx"
DEEPINFRA_BASE_URL = "https://api.deepinfra.com/v1/openai"
def get_chat_model(model_name: str = "meta-llama/Llama-3.3-70B-Instruct-Turbo", _temp: float = 0) -> ChatOpenAI:
    return ChatOpenAI(
        api_key=DEEPINFRA_API_TOKEN,
        base_url=DEEPINFRA_BASE_URL,
        model=model_name,
        temperature= _temp
    )

llm = get_chat_model(model_name="Qwen/Qwen3-Coder-480B-A35B-Instruct")

In [2]:
from langchain_core.output_parsers import JsonOutputParser
from langchain_core.prompts import PromptTemplate
from langchain_openai import ChatOpenAI
from pydantic import BaseModel, Field

model = get_chat_model()


# Define your desired data structure.
class sql1(BaseModel):
    sql: str = Field(description="sql query for the user question")

db_schema = """
    "table_column_map": {
        "campaign_details": [
        "campaign_id",
        "campaign_name",
        "total_impressions"
        ],
        "campaign_performance": [
        "campaign_id",
        "customer_id",
        "metrics_impressions",
        "segments_date",
        "total_impressions"
        ]
    }
  """
# And a query intented to prompt a language model to populate the data structure.
joke_query = "Which campaign had the maximum impressions on 1 jan 2025?"

# Set up a parser + inject instructions into the prompt template.
parser = JsonOutputParser(pydantic_object=sql1)

prompt = PromptTemplate(
    template="Answer the user query based on given schema : \n{schema}.\n{format_instructions}\n{query}\n",
    input_variables=["query", "schema"],
    partial_variables={"format_instructions": parser.get_format_instructions(),},
)

chain = prompt | model | parser

ans = chain.invoke({"query": joke_query, "schema": db_schema})

In [7]:
parser.get_format_instructions()

'The output should be formatted as a JSON instance that conforms to the JSON schema below.\n\nAs an example, for the schema {"properties": {"foo": {"title": "Foo", "description": "a list of strings", "type": "array", "items": {"type": "string"}}}, "required": ["foo"]}\nthe object {"foo": ["bar", "baz"]} is a well-formatted instance of the schema. The object {"properties": {"foo": ["bar", "baz"]}} is not well-formatted.\n\nHere is the output schema:\n```\n{"properties": {"sql": {"description": "sql query for the user question", "title": "Sql", "type": "string"}}, "required": ["sql"]}\n```'

In [3]:
ans

{'sql': "SELECT campaign_id FROM campaign_performance WHERE segments_date = '2025-01-01' ORDER BY metrics_impressions DESC LIMIT 1"}

In [4]:
ans['sql']

"SELECT campaign_id FROM campaign_performance WHERE segments_date = '2025-01-01' ORDER BY metrics_impressions DESC LIMIT 1"

In [41]:
prompt

PromptTemplate(input_variables=['query', 'schema'], input_types={}, partial_variables={'format_instructions': 'The output should be formatted as a JSON instance that conforms to the JSON schema below.\n\nAs an example, for the schema {"properties": {"foo": {"title": "Foo", "description": "a list of strings", "type": "array", "items": {"type": "string"}}}, "required": ["foo"]}\nthe object {"foo": ["bar", "baz"]} is a well-formatted instance of the schema. The object {"properties": {"foo": ["bar", "baz"]}} is not well-formatted.\n\nHere is the output schema:\n```\n{"properties": {"sql": {"description": "sql query for the user question", "title": "Sql", "type": "string"}}, "required": ["sql"]}\n```'}, template='Answer the user query based on given schema : \n{schema}.\n{format_instructions}\n{query}\n')

In [55]:
SYSTEM_PROPMT_SQL1 = """
Answer the user query based on given schema :

{db_schema}.\n

{format_instructions}\n.

{query}\n"

If the user query is not related to the schema, return 'None' as the sql query.
"""

sq1_input_variables = ["query", "db_schema"]

def generate_sql1_example(db_schema, user_question):
    class sql1(BaseModel):
        sql: str = Field(description="sql query for the user question")
    model = get_chat_model()
    parser = JsonOutputParser(pydantic_object=sql1)
    prompt = PromptTemplate(
        template=SYSTEM_PROPMT_SQL1,
        input_variables=sq1_input_variables,
        partial_variables={"format_instructions": parser.get_format_instructions(), },
    )

    chain = prompt | model | parser

    ans = chain.invoke({"query": user_question, "db_schema": db_schema})

    return ans['sql']

In [57]:
db_schema = """
    "table_column_map": {
        "campaign_details": [
        "campaign_id",
        "campaign_name",
        "total_impressions"
        ],
        "campaign_performance": [
        "campaign_id",
        "customer_id",
        "metrics_impressions",
        "segments_date",
        "total_impressions"
        ]
    }
    """
ans = generate_sql1_example(db_schema, "Hi how are yoU")

In [58]:
type(ans)

str

In [62]:
def generate_structured_sql(model, full_schema, pruned_schema, user_question, _instructions, other_info):
    class sql1(BaseModel):
        sql: str = Field(description="sql query for the user question")
    parser = JsonOutputParser(pydantic_object=sql1)

    prompt = PromptTemplate(
        template=SYSTEM_PROPMT_SQL1,
        input_variables=sq1_input_variables,
        partial_variables={"format_instructions": parser.get_format_instructions(), },
    )
    chain = prompt | model | parser
    ans = chain.invoke({
        "query": user_question,
        "full_schema": full_schema,
        "pruned_schema": pruned_schema,
        "instruction": _instructions,
        "other_info": other_info
    })
    if ans['sql'].lower == 'none':
        raise ValueError("The generated SQL query is None. Please check the input query")
    return ans['sql']

def generate_sql1(_full_schema, _pruned_schema, _user_question, other_info, instructions):
    """
    Generate SQL1 based on the full schema and pruned schema, user question, and other relevant information.
    :return:
    """
    model_to_use = "Qwen/Qwen3-Coder-480B-A35B-Instruct"
    llm_client = get_chat_model(model_name = model_to_use)
    if llm_client is not None:
        return generate_structured_sql(
            model=llm_client,
            full_schema=_full_schema,
            pruned_schema=_pruned_schema,
            user_question=_user_question,
            other_info=other_info,
            _instructions = instructions
        )

In [63]:

SYSTEM_PROPMT_SQL1 = """
Answer the user query based on given schema :
Here is the full schema of the database:
{full_schema}.\n

Here is the schema that you should focus on to answer the user query:
{pruned_schema}.\n

{format_instructions}\n.

{instruction}\n

{query}\n"

If the user query is not related to the schema, return 'None' as the sql query.
Here are some other information that you should consider while generating the sql query:
{other_info}\n

"""

sq1_input_variables = ["query", "full_schema", "pruned_schema", "instruction", "other_info"]


In [None]:
sql1 = generate_sql1(
        _full_schema="",
        _pruned_schema="""
        "table_column_map": {
            "campaign_details": [
            "campaign_id",
            "campaign_name",
            "total_impressions"
            ],
            "campaign_performance": [
            "campaign_id",
            "customer_id",
            "metrics_impressions",
            "segments_date",
            "total_impressions"
            ]
        }""",
        _user_question="what is my top performing campaign?",
        instructions="Only generate the SQL query that answers the user question based on the provided schema.",
        other_info="""The provide schema is in following format : 
        {
            'table_name': {
                'column_name_1' : 'datatype OR comma-separated enumerated values',
                'column_name_2' : 'datatype OR comma-separated enumerated values',
                ...
            },
            ...
        }"""  # TODO : Pass other metadata information here
    )

In [65]:
sql1

'SELECT cd.campaign_id, cd.campaign_name, SUM(cp.metrics_impressions) as total_impressions FROM campaign_details cd JOIN campaign_performance cp ON cd.campaign_id = cp.campaign_id GROUP BY cd.campaign_id, cd.campaign_name ORDER BY total_impressions DESC LIMIT 1'

# From Graph code

In [17]:
from langchain_core.output_parsers import JsonOutputParser
from langchain_core.prompts import PromptTemplate
from langchain_openai import ChatOpenAI
from pydantic import BaseModel, Field
# from modules.prompts.sql1_prompts import SYSTEM_PROPMT_SQL1, sq1_input_variables

SYSTEM_PROPMT_SQL1 = """
Answer the user query based on given schema :
Here is the full schema of the database:
{full_schema}.\n

Here is the schema that you should focus on to answer the user query:
{pruned_schema}.\n

{format_instructions}\n.

{instruction}\n

{query}\n"

If the user query is not related to the schema, return 'None' as the sql query.
Here are some other information that you should consider while generating the sql query:
{other_info}\n

"""

sq1_input_variables = ["query", "full_schema", "pruned_schema", "instruction", "other_info"]



def generate_structured_sql(model, full_schema, pruned_schema, user_question, _instructions, other_info):
    class sql1(BaseModel):
        sql: str = Field(description="generated sql query for the user question")
    parser = JsonOutputParser(pydantic_object=sql1)

    prompt = PromptTemplate(
        template=SYSTEM_PROPMT_SQL1,
        input_variables=sq1_input_variables,
        partial_variables={"format_instructions": parser.get_format_instructions(), },
    )
    # print(f"Prompt template : {prompt}")
    chain = prompt | model | parser
    ans = chain.invoke({
        "query": user_question,
        "full_schema": full_schema,
        "pruned_schema": pruned_schema,
        "instruction": _instructions,
        "other_info": other_info
    }, verbose=True)
    print(f"Answer from LLM : {ans}")
    if isinstance(ans, str):
        if ans.strip().lower() == "none":
            raise ValueError("The generated SQL query is None. Please check the input query")
        return ans
    elif isinstance(ans, dict):
        if ans.get('sql', '').strip().lower() == 'none':
            raise ValueError("The generated SQL query is None. Please check the input query")
        return ans.get('sql', '')
    else:
        raise TypeError(f"Unexpected response type from model: {type(ans)}")


def generate_sql1(_full_schema, _pruned_schema, _user_question, other_info, instructions):
    """
    Generate SQL1 based on the full schema and pruned schema, user question, and other relevant information.
    :return:
    """
    # model_to_use = "Qwen/Qwen3-Coder-480B-A35B-Instruct"
    # model_to_use = "meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8"
    llm_client = get_chat_model()
    if llm_client is not None:
        return generate_structured_sql(
            model=llm_client,
            full_schema=_full_schema,
            pruned_schema=_pruned_schema,
            user_question=_user_question,
            other_info=other_info,
            _instructions = instructions
        )

In [None]:
fs = ""
ps = {
      "table_column_map": {
        "campaign_details": [
          "campaign_id",
          "campaign_name",
          "total_impressions"
        ],
      "campaign_performance": [
        "campaign_id",
        "customer_id",
        "metrics_impressions",
        "segments_date",
        "total_impressions"
        ]
      }
    } 
instructions = ""

other_info="""The provide schema is in following format : 
        {
            'table_name': {
                'column_name_1' : 'datatype OR comma-separated enumerated values',
                'column_name_2' : 'datatype OR comma-separated enumerated values',
                ...
            },
            ...
        }"""
import json

generate_sql1(
    _full_schema = fs,
    _pruned_schema = json.dumps(ps, indent=2),
    _user_question = "Which campaign had the maximum impressions on 1 jan 2025?",
    other_info=other_info,
    instructions=instructions
)

# Using pydantic models to as tools

In [None]:
from pydantic import BaseModel, Field
from typing import Optional
from langchain_core.messages import HumanMessage, SystemMessage
import json

# Basic toxic nsfw guardrail tool
class sql_generator(BaseModel):
    """Saves the the SQL query generator based on user input and schema"""
    sql_query : Optional[bool] = Field(
        default=False,
        description="Save the generated SQL query on the user input and schema."
    )
    
def sql1_gen(usr_messages: HumanMessage, model_to_use: str = "meta-llama/Llama-3.3-70B-Instruct-Turbo", temperature: float = 0.3) -> dict:
    SQL_SYS_PROMPT = """
    Answer the user query based on given schema :

    Here is the schema that you should focus on to answer the user query:
    {pruned_schema}.\n


    {query}\n"

    If the user query is not related to the schema, return 'None' as the sql query.
    Here are some other information that you should consider while generating the sql query:
    {other_info}\n

    """
    ps = {
        "table_column_map": {
            "campaign_details": [
            "campaign_id",
            "campaign_name",
            "total_impressions"
            ],
        "campaign_performance": [
            "campaign_id",
            "customer_id",
            "metrics_impressions",
            "segments_date",
            "total_impressions"
            ]
        }
        } 


    other_info="""The provide schema is in following format : 
            {
                'table_name': {
                    'column_name_1' : 'datatype OR comma-separated enumerated values',
                    'column_name_2' : 'datatype OR comma-separated enumerated values',
                    ...
                },
                ...
            }"""

    SQL_SYS_PROMPT = SQL_SYS_PROMPT.format(
        pruned_schema = json.dumps(ps, indent=2),
        other_info=other_info,
        query=usr_messages.content
    )
    system_message = SystemMessage(
        content=SQL_SYS_PROMPT
    )
    llm_client = get_chat_model(model_name=model_to_use, _temp=temperature)
    llm_client = llm_client.bind_tools([sql_generator])
    response = llm_client.invoke([system_message]+[usr_messages])
    return response


In [34]:
usr = HumanMessage(content="Which campaign had the maximum impressions on 1 jan 2025?")

In [35]:
ans = sql1_gen(usr_messages=usr)

In [36]:
ans.tool_calls[0]

{'name': 'sql_generator',
 'args': {'sql_query': True},
 'id': 'call_KKEZEVVGaRvLZnL5luuOTeZ3',
 'type': 'tool_call'}

In [37]:
SYSTEM_PROMPT_SQL1 = """
Answer the user query based on given schema :
Here is the full schema of the database:
{full_schema}.\n

Here is the schema that you should focus on to answer the user query:
{pruned_schema}.\n

{instruction}\n

{query}\n"

If the user query is not related to the schema, return 'None' as the sql query.
Here are some other information that you should consider while generating the sql query:
{other_info}\n

"""

In [38]:
a = True
b = "True"

In [39]:
isinstance(a, bool), isinstance(b, bool)

(True, False)