# Text to IRIS SQL with langchain

An experiment on how to use langchain framework, IRIS Vector Search and LLMs to generate IRIS-compatible SQL from users' prompts.

## Setup

In [1]:
!pip install --upgrade --quiet langchain langchain-openai langchain-iris pandas

In [2]:
import os

from sqlalchemy import create_engine

import hashlib

import pandas as pd;

from langchain_core.prompts import PromptTemplate, ChatPromptTemplate
from langchain_core.example_selectors import SemanticSimilarityExampleSelector
from langchain_openai import OpenAIEmbeddings, ChatOpenAI
from langchain.docstore.document import Document
from langchain_community.document_loaders import DataFrameLoader
from langchain.text_splitter import CharacterTextSplitter
from langchain_core.output_parsers import StrOutputParser
from langchain_iris import IRISVector

In [3]:
# IRIS database connection parameters
os.environ["ISC_LOCAL_SQL_HOSTNAME"] = "sqlzilla-iris-1"
os.environ["ISC_LOCAL_SQL_PORT"] = "1972"
os.environ["ISC_LOCAL_SQL_NAMESPACE"] = "IRISAPP"
os.environ["ISC_LOCAL_SQL_USER"] = "_system"
os.environ["ISC_LOCAL_SQL_PWD"] = "SYS"

In [4]:
# IRIS database connection string
args = {
    'hostname': os.getenv("ISC_LOCAL_SQL_HOSTNAME"), 
    'port': os.getenv("ISC_LOCAL_SQL_PORT"), 
    'namespace': os.getenv("ISC_LOCAL_SQL_NAMESPACE"), 
    'username': os.getenv("ISC_LOCAL_SQL_USER"), 
    'password': os.getenv("ISC_LOCAL_SQL_PWD")
}
iris_conn_str = f"iris://{args['username']}:{args['password']}@{args['hostname']}:{args['port']}/{args['namespace']}"

In [5]:
# Connection to IRIS database
engine = create_engine(iris_conn_str)
cnx = engine.connect().connection

In [6]:
# Dict for context information for system prompt
context = {}
context["input"] = "What were the top 3 years with recorded events?"
context["top_k"] = 3

## Prompt creation

In [7]:
iris_sql_template = """
You are an InterSystems IRIS expert. Given an input question, first create a syntactically correct InterSystems IRIS query to run and return the answer to the input question.
Unless the user specifies in the question a specific number of examples to obtain, query for at most {top_k} results using the TOP clause as per InterSystems IRIS. You can order the results to return the most informative data in the database.
Never query for all columns from a table. You must query only the columns that are needed to answer the question. Wrap each column name in single quotes ('') to denote them as delimited identifiers.
Pay attention to use only the column names you can see in the tables below. Be careful to not query for columns that do not exist. Also, pay attention to which column is in which table.
Pay attention to use CAST(CURRENT_DATE as date) function to get the current date, if the question involves "today".
"""

In [8]:
tables_prompt_template = """
Only use the following tables:
{table_info}
"""

In [9]:
prompt_sql_few_shots_template = """
Below are a number of examples of questions and their corresponding SQL queries.

{examples_value}
"""

In [10]:
example_prompt_template = "User input: {input}\nSQL query: {query}"
example_prompt = PromptTemplate.from_template(example_prompt_template)

In [11]:
user_prompt = "\n"+example_prompt.invoke({"input": "{input}", "query": ""}).to_string()

In [12]:
prompt = (
    ChatPromptTemplate.from_messages([("system", iris_sql_template)])
    + ChatPromptTemplate.from_messages([("system", tables_prompt_template)])
    + ChatPromptTemplate.from_messages([("system", prompt_sql_few_shots_template)])
    + ChatPromptTemplate.from_messages([("human", user_prompt)])
)
prompt

ChatPromptTemplate(input_variables=['examples_value', 'input', 'table_info', 'top_k'], messages=[SystemMessagePromptTemplate(prompt=PromptTemplate(input_variables=['top_k'], template='\nYou are an InterSystems IRIS expert. Given an input question, first create a syntactically correct InterSystems IRIS query to run and return the answer to the input question.\nUnless the user specifies in the question a specific number of examples to obtain, query for at most {top_k} results using the TOP clause as per InterSystems IRIS. You can order the results to return the most informative data in the database.\nNever query for all columns from a table. You must query only the columns that are needed to answer the question. Wrap each column name in single quotes (\'\') to denote them as delimited identifiers.\nPay attention to use only the column names you can see in the tables below. Be careful to not query for columns that do not exist. Also, pay attention to which column is in which table.\nPay a

## Auxiliary functions

In [13]:
# todo: add foreign key definitions
def get_table_definitions_array(cnx, schema, table=None):
    """
    Generate SQL `CREATE TABLE` statements for tables in a given schema.

    This function queries the database to retrieve column definitions and 
    constructs SQL `CREATE TABLE` statements for each table within the specified schema. 
    If a specific table name is provided, it generates the statement only for that table.

    Args:
        cnx: An IRIS connection object.
        schema (str): The name of the schema to retrieve table definitions from.
        table (str, optional): The name of a specific table to retrieve the definition for. 
                               If not provided, definitions for all tables in the schema are retrieved.

    Returns:
        list of str: A list of SQL `CREATE TABLE` statements as strings.
    """
    
    cursor = cnx.cursor()

    # Base query to get columns information
    query = """
    SELECT TABLE_SCHEMA, TABLE_NAME, COLUMN_NAME, DATA_TYPE, IS_NULLABLE, COLUMN_DEFAULT, PRIMARY_KEY, null EXTRA
    FROM INFORMATION_SCHEMA.COLUMNS
    WHERE TABLE_SCHEMA = %s
    """
    
    # Parameters for the query
    params = [schema]

    # Adding optional filters
    if table:
        query += " AND TABLE_NAME = %s"
        params.append(table)
    
    # Execute the query
    cursor.execute(query, params)

    # Fetch the results
    rows = cursor.fetchall()
    
    # Process the results to generate the table definition(s)
    table_definitions = {}
    for row in rows:
        table_schema, table_name, column_name, column_type, is_nullable, column_default, column_key, extra = row
        if table_name not in table_definitions:
            table_definitions[table_name] = []
        table_definitions[table_name].append({
            "column_name": column_name,
            "column_type": column_type,
            "is_nullable": is_nullable,
            "column_default": column_default,
            "column_key": column_key,
            "extra": extra
        })

    primary_keys = {}
    
    # Build the output string
    result = []
    for table_name, columns in table_definitions.items():
        table_def = f"CREATE TABLE {schema}.{table_name} (\n"
        column_definitions = []
        for column in columns:
            column_def = f"  {column['column_name']} {column['column_type']}"
            if column['is_nullable'] == "NO":
                column_def += " NOT NULL"
            if column['column_default'] is not None:
                column_def += f" DEFAULT {column['column_default']}"
            if column['extra']:
                column_def += f" {column['extra']}"
            column_definitions.append(column_def)
        if table_name in primary_keys:
            pk_def = f"  PRIMARY KEY ({', '.join(primary_keys[table_name])})"
            column_definitions.append(pk_def)
        table_def += ",\n".join(column_definitions)
        table_def += "\n);"
        result.append(table_def)

    return result

In [14]:
def get_table_definitions(cnx, schema, table=None):
    """
    Generate SQL `CREATE TABLE` statements as a single string for tables in a given schema.

    This function uses `get_table_definitions_array` to retrieve column definitions and 
    constructs SQL `CREATE TABLE` statements for each table within the specified schema. 
    If a specific table name is provided, it generates the statement only for that table.
    The results are concatenated into a single string.

    Args:
        cnx: An IRIS connection object.
        schema (str): The name of the schema to retrieve table definitions from.
        table (str, optional): The name of a specific table to retrieve the definition for. 
                               If not provided, definitions for all tables in the schema are retrieved.

    Returns:
        str: A single string containing SQL `CREATE TABLE` statements separated by double newlines.
    """

    return "\n\n".join(get_table_definitions_array(cnx=cnx, schema=schema, table=table))

In [15]:
def get_ids_from_string_array(array):
    """
    Generate hash IDs for a list of strings to identify documents by their content and avoid duplications in the vector store.

    This function takes an array of strings and returns a list of MD5 hash values for each string. 
    These hash values can be used to identify the content of the documents, helping to prevent duplicate entries in the vector store.

    Args:
        array (list of str): A list of strings to be hashed.

    Returns:
        list of str: A list of MD5 hash values as hexadecimal strings.
    """

    return [str(hashlib.md5(x.encode()).hexdigest()) for x in array]

In [16]:
def exists_in_db(cnx, collection_name, id):
    """
    Check if a record with a specific ID exists in a given database collection.

    This function queries the database to determine whether a record with the specified ID exists 
    in the specified collection (table).

    Args:
        cnx: An IRIS connection object.
        collection_name (str): The name of the collection (table) to search.
        id (str): The ID of the record to check for existence.

    Returns:
        bool: True if a record with the specified ID exists in the collection, False otherwise.
    """
    
    schema_name = "SQLUser"
    
    cursor = cnx.cursor()
    query = f"""
    SELECT TOP 1 id
    FROM INFORMATION_SCHEMA.TABLES 
    WHERE TABLE_SCHEMA = %s and TABLE_NAME = %s
    """
    params = [schema_name, collection_name]
    cursor.execute(query, params)
    rows = cursor.fetchall()
    if len(rows) == 0:
        return False

    del cursor, query, params, rows
    
    cursor = cnx.cursor()
    query = f"""
    SELECT TOP 1 id
    FROM {collection_name}
    WHERE id = %s
    """
    params = [id]
    cursor.execute(query, params)
    rows = cursor.fetchall()
    return len(rows) > 0

In [17]:
def filter_not_in_collection(cnx, collection_name, docs_array, ids_array):
    """
    Filter out documents whose IDs already exist in a specified database collection.

    This function takes an array of documents and their corresponding IDs, and returns a tuple containing 
    two lists: one with documents that do not have matching IDs in the specified collection, and another 
    with their corresponding IDs.

    Args:
        cnx: An IRIS connection object.
        collection_name (str): The name of the collection (table) to check for existing IDs.
        docs_array (list): A list of documents to be checked.
        ids_array (list): A list of IDs corresponding to the documents.

    Returns:
        tuple: A tuple of two lists:
            - The first list contains documents not present in the collection.
            - The second list contains the corresponding IDs of those documents.
    """
    
    filtered = [x for x in zip(docs_array, ids_array) if not exists_in_db(cnx, collection_name, x[1])]
    return list(zip(*filtered)) or ([], [])

## Database schema context management

In [18]:
table_def = get_table_definitions_array(cnx=cnx, schema='Aviation')
table_df = pd.DataFrame(data=table_def, columns=["col_def"])
table_df["id"] = table_df.index + 1
table_df

Unnamed: 0,col_def,id
0,CREATE TABLE Aviation.Aircraft (\n Event bigi...,1
1,CREATE TABLE Aviation.Crew (\n Aircraft varch...,2
2,CREATE TABLE Aviation.Event (\n ID bigint NOT...,3


In [19]:
loader = DataFrameLoader(table_df, page_content_column="col_def")
documents = loader.load()
text_splitter = CharacterTextSplitter(chunk_size=400, chunk_overlap=20, separator="\n")
tables_docs = text_splitter.split_documents(documents)
tables_docs

[Document(metadata={'id': 1}, page_content='CREATE TABLE Aviation.Aircraft (\n  Event bigint NOT NULL,\n  ID varchar NOT NULL,\n  AccidentExplosion varchar,\n  AccidentFire varchar,\n  AirFrameHours varchar,\n  AirFrameHoursSince varchar,\n  AirFrameHoursSinceLastInspection varchar,\n  AircraftCategory varchar,\n  AircraftCertMaxGrossWeight integer,\n  AircraftHomeBuilt varchar,\n  AircraftKey integer NOT NULL,\n  AircraftManufacturer varchar,'),
 Document(metadata={'id': 1}, page_content='AircraftModel varchar,\n  AircraftRegistrationClass varchar,\n  AircraftSerialNo varchar,\n  AircraftSeries varchar,\n  Damage varchar,\n  DepartureAirportId varchar,\n  DepartureCity varchar,\n  DepartureCountry varchar,\n  DepartureSameAsEvent varchar,\n  DepartureState varchar,\n  DepartureTime integer,\n  DepartureTimeZone varchar,\n  DestinationAirportId varchar,\n  DestinationCity varchar,'),
 Document(metadata={'id': 1}, page_content='DestinationCountry varchar,\n  DestinationSameAsLocal varch

In [20]:
new_tables_docs, tables_docs_ids = filter_not_in_collection(
    cnx, 
    "sql_tables", 
    tables_docs, 
    get_ids_from_string_array([x.page_content for x in tables_docs])
)
print("tables yet not stored by IRISVector:\n", new_tables_docs, tables_docs_ids)

tables yet not stored by IRISVector:
 (Document(metadata={'id': 1}, page_content='CREATE TABLE Aviation.Aircraft (\n  Event bigint NOT NULL,\n  ID varchar NOT NULL,\n  AccidentExplosion varchar,\n  AccidentFire varchar,\n  AirFrameHours varchar,\n  AirFrameHoursSince varchar,\n  AirFrameHoursSinceLastInspection varchar,\n  AircraftCategory varchar,\n  AircraftCertMaxGrossWeight integer,\n  AircraftHomeBuilt varchar,\n  AircraftKey integer NOT NULL,\n  AircraftManufacturer varchar,'), Document(metadata={'id': 1}, page_content='AircraftModel varchar,\n  AircraftRegistrationClass varchar,\n  AircraftSerialNo varchar,\n  AircraftSeries varchar,\n  Damage varchar,\n  DepartureAirportId varchar,\n  DepartureCity varchar,\n  DepartureCountry varchar,\n  DepartureSameAsEvent varchar,\n  DepartureState varchar,\n  DepartureTime integer,\n  DepartureTimeZone varchar,\n  DestinationAirportId varchar,\n  DestinationCity varchar,'), Document(metadata={'id': 1}, page_content='DestinationCountry varc

In [21]:
db = IRISVector.from_documents(
    embedding = OpenAIEmbeddings(), 
    # embedding = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2"),
    # embedding = FastEmbeddings(),
    # embedding = FakeEmbeddings(size=123),
    documents = tables_docs,
    connection_string=iris_conn_str,
    collection_name="sql_tables",
    ids=tables_docs_ids
)

In [22]:
relevant_tables_docs = db.similarity_search(context["input"])
relevant_tables_docs

[Document(metadata={'id': 3}, page_content='LocationSiteZipCode varchar,\n  LocationState varchar,\n  MidAir varchar,\n  NTSBId varchar,\n  NarrativeCause varchar,\n  NarrativeFull varchar,\n  NarrativeSummary varchar,\n  OnGroundCollision varchar,\n  SkyConditionCeiling varchar,\n  SkyConditionCeilingHeight integer,\n  SkyConditionNonCeiling varchar,\n  SkyConditionNonCeilingHeight integer,\n  TimeZone varchar,\n  Type varchar,\n  Visibility varchar,'),
 Document(metadata={'id': 1}, page_content='DestinationCountry varchar,\n  DestinationSameAsLocal varchar,\n  DestinationState varchar,\n  EngineCount integer,\n  EvacuationOccurred varchar,\n  EventId varchar NOT NULL,\n  FlightMedical varchar,\n  FlightMedicalType varchar,\n  FlightPhase integer,\n  FlightPlan varchar,\n  FlightPlanActivated varchar,\n  FlightSiteSeeing varchar,\n  FlightType varchar,\n  GearType varchar,'),
 Document(metadata={'id': 3}, page_content='InjuriesGroundSerious integer,\n  InjuriesHighest varchar,\n  Inju

In [23]:
relevant_tables_docs_indices = [x.metadata["id"] for x in relevant_tables_docs]
indices = table_df["id"].isin(relevant_tables_docs_indices)
relevant_tables_array = [x for x in table_df[indices]["col_def"]]
context["table_info"] = "\n\n".join(relevant_tables_array)
print(context["table_info"])

CREATE TABLE Aviation.Aircraft (
  Event bigint NOT NULL,
  ID varchar NOT NULL,
  AccidentExplosion varchar,
  AccidentFire varchar,
  AirFrameHours varchar,
  AirFrameHoursSince varchar,
  AirFrameHoursSinceLastInspection varchar,
  AircraftCategory varchar,
  AircraftCertMaxGrossWeight integer,
  AircraftHomeBuilt varchar,
  AircraftKey integer NOT NULL,
  AircraftManufacturer varchar,
  AircraftModel varchar,
  AircraftRegistrationClass varchar,
  AircraftSerialNo varchar,
  AircraftSeries varchar,
  Damage varchar,
  DepartureAirportId varchar,
  DepartureCity varchar,
  DepartureCountry varchar,
  DepartureSameAsEvent varchar,
  DepartureState varchar,
  DepartureTime integer,
  DepartureTimeZone varchar,
  DestinationAirportId varchar,
  DestinationCity varchar,
  DestinationCountry varchar,
  DestinationSameAsLocal varchar,
  DestinationState varchar,
  EngineCount integer,
  EvacuationOccurred varchar,
  EventId varchar NOT NULL,
  FlightMedical varchar,
  FlightMedicalType va

## Few shots management

In [24]:
examples = [
    {
        "input": "List all aircrafts.", 
        "query": "SELECT * FROM Aviation.Aircraft"
    },
    {
        "input": "Find all incidents for the aircraft with ID 'N12345'.",
        "query": "SELECT * FROM Aviation.Event WHERE EventId IN (SELECT EventId FROM Aviation.Aircraft WHERE ID = 'N12345')"
    },
    {
        "input": "List all incidents in the 'Commercial' operation type.",
        "query": "SELECT * FROM Aviation.Event WHERE EventId IN (SELECT EventId FROM Aviation.Aircraft WHERE OperationType = 'Commercial')"
    },
    {
        "input": "Find the total number of incidents.",
        "query": "SELECT COUNT(*) FROM Aviation.Event"
    },
    {
        "input": "List all incidents that occurred in 'Canada'.",
        "query": "SELECT * FROM Aviation.Event WHERE LocationCountry = 'Canada'"
    },
    {
        "input": "How many incidents are associated with the aircraft with AircraftKey 5?",
        "query": "SELECT COUNT(*) FROM Aviation.Aircraft WHERE AircraftKey = 5"
    },
    {
        "input": "Find the total number of distinct aircrafts involved in incidents.",
        "query": "SELECT COUNT(DISTINCT AircraftKey) FROM Aviation.Aircraft"
    },
    {
        "input": "List all incidents that occurred after 5 PM.",
        "query": "SELECT * FROM Aviation.Event WHERE EventTime > 1700"
    },
    {
        "input": "Who are the top 5 operators by the number of incidents?",
        "query": "SELECT TOP 5 OperatorName, COUNT(*) AS IncidentCount FROM Aviation.Aircraft GROUP BY OperatorName ORDER BY IncidentCount DESC"
    },
    {
        "input": "Which incidents occurred in the year 2020?",
        "query": "SELECT * FROM Aviation.Event WHERE YEAR(EventDate) = '2020'"
    },
    {
        "input": "What was the month with most events in the year 2020?",
        "query": "SELECT TOP 1 MONTH(EventDate) EventMonth, COUNT(*) EventCount FROM Aviation.Event WHERE YEAR(EventDate) = '2020' GROUP BY MONTH(EventDate) ORDER BY EventCount DESC"
    },
    {
        "input": "How many crew members were involved in incidents?",
        "query": "SELECT COUNT(*) FROM Aviation.Crew"
    },
    {
        "input": "List all incidents with detailed aircraft information for incidents that occurred in the year 2012.",
        "query": "SELECT e.EventId, e.EventDate, a.AircraftManufacturer, a.AircraftModel, a.AircraftCategory FROM Aviation.Event e JOIN Aviation.Aircraft a ON e.EventId = a.EventId WHERE Year(e.EventDate) = 2012"
    },
    {
        "input": "Find all incidents where there were more than 5 injuries and include the aircraft manufacturer and model.",
        "query": "SELECT e.EventId, e.InjuriesTotal, a.AircraftManufacturer, a.AircraftModel FROM Aviation.Event e JOIN Aviation.Aircraft a ON e.EventId = a.EventId WHERE e.InjuriesTotal > 5"
    },
    {
        "input": "List all crew members involved in incidents with serious injuries, along with the incident date and location.",
        "query": "SELECT c.CrewNumber, c.Age, c.Sex, e.EventDate, e.LocationCity, e.LocationState FROM Aviation.Crew c JOIN Aviation.Event e ON c.EventId = e.EventId WHERE c.Injury = 'Serious'"
    },
]


In [25]:
new_sql_samples, sql_samples_ids = filter_not_in_collection(
    cnx, 
    "sql_samples", 
    examples, 
    get_ids_from_string_array([x['input'] for x in examples])
)
print("samples yet not stored by IRISVector:\n", new_sql_samples, sql_samples_ids)

samples yet not stored by IRISVector:
 ({'input': 'List all aircrafts.', 'query': 'SELECT * FROM Aviation.Aircraft'}, {'input': "Find all incidents for the aircraft with ID 'N12345'.", 'query': "SELECT * FROM Aviation.Event WHERE EventId IN (SELECT EventId FROM Aviation.Aircraft WHERE ID = 'N12345')"}, {'input': "List all incidents in the 'Commercial' operation type.", 'query': "SELECT * FROM Aviation.Event WHERE EventId IN (SELECT EventId FROM Aviation.Aircraft WHERE OperationType = 'Commercial')"}, {'input': 'Find the total number of incidents.', 'query': 'SELECT COUNT(*) FROM Aviation.Event'}, {'input': "List all incidents that occurred in 'Canada'.", 'query': "SELECT * FROM Aviation.Event WHERE LocationCountry = 'Canada'"}, {'input': 'How many incidents are associated with the aircraft with AircraftKey 5?', 'query': 'SELECT COUNT(*) FROM Aviation.Aircraft WHERE AircraftKey = 5'}, {'input': 'Find the total number of distinct aircrafts involved in incidents.', 'query': 'SELECT COUNT(

In [26]:
example_selector = SemanticSimilarityExampleSelector.from_examples(
    new_sql_samples,
    OpenAIEmbeddings(),
    IRISVector,
    k=5,
    input_keys=["input"],
    connection_string=iris_conn_str,
    collection_name="sql_samples",
    ids=sql_samples_ids
)

In [27]:
example_selector.select_examples({"input": context["input"]})

[{'input': 'Which incidents occurred in the year 2020?',
  'query': "SELECT * FROM Aviation.Event WHERE YEAR(EventDate) = '2020'"},
 {'input': 'What was the month with most events in the year 2020?',
  'query': "SELECT TOP 1 MONTH(EventDate) EventMonth, COUNT(*) EventCount FROM Aviation.Event WHERE YEAR(EventDate) = '2020' GROUP BY MONTH(EventDate) ORDER BY EventCount DESC"},
 {'input': 'How many crew members were involved in incidents?',
  'query': 'SELECT COUNT(*) FROM Aviation.Crew'},
 {'input': 'Who are the top 5 operators by the number of incidents?',
  'query': 'SELECT TOP 5 OperatorName, COUNT(*) AS IncidentCount FROM Aviation.Aircraft GROUP BY OperatorName ORDER BY IncidentCount DESC'},
 {'input': "List all incidents that occurred in 'Canada'.",
  'query': "SELECT * FROM Aviation.Event WHERE LocationCountry = 'Canada'"}]

In [28]:
context["examples_value"] = "\n\n".join([
    example_prompt.invoke(x).to_string() for x in example_selector.select_examples({"input": context["input"]})
])
context["examples_value"]

"User input: Which incidents occurred in the year 2020?\nSQL query: SELECT * FROM Aviation.Event WHERE YEAR(EventDate) = '2020'\n\nUser input: What was the month with most events in the year 2020?\nSQL query: SELECT TOP 1 MONTH(EventDate) EventMonth, COUNT(*) EventCount FROM Aviation.Event WHERE YEAR(EventDate) = '2020' GROUP BY MONTH(EventDate) ORDER BY EventCount DESC\n\nUser input: How many crew members were involved in incidents?\nSQL query: SELECT COUNT(*) FROM Aviation.Crew\n\nUser input: Who are the top 5 operators by the number of incidents?\nSQL query: SELECT TOP 5 OperatorName, COUNT(*) AS IncidentCount FROM Aviation.Aircraft GROUP BY OperatorName ORDER BY IncidentCount DESC\n\nUser input: List all incidents that occurred in 'Canada'.\nSQL query: SELECT * FROM Aviation.Event WHERE LocationCountry = 'Canada'"

## Prompt execution

In [29]:
prompt_value = prompt.invoke({
    "top_k": context["top_k"],
    "table_info": context["table_info"],
    "examples_value": context["examples_value"],
    "input": context["input"]
})
print(prompt_value.to_string())

System: 
You are an InterSystems IRIS expert. Given an input question, first create a syntactically correct InterSystems IRIS query to run and return the answer to the input question.
Unless the user specifies in the question a specific number of examples to obtain, query for at most 3 results using the TOP clause as per InterSystems IRIS. You can order the results to return the most informative data in the database.
Never query for all columns from a table. You must query only the columns that are needed to answer the question. Wrap each column name in single quotes ('') to denote them as delimited identifiers.
Pay attention to use only the column names you can see in the tables below. Be careful to not query for columns that do not exist. Also, pay attention to which column is in which table.
Pay attention to use CAST(CURRENT_DATE as date) function to get the current date, if the question involves "today".

System: 
Only use the following tables:
CREATE TABLE Aviation.Aircraft (
  Ev

In [30]:
model = ChatOpenAI(model="gpt-3.5-turbo", temperature=0)
output_parser = StrOutputParser()
chain_model = prompt | model | output_parser
response = chain_model.invoke({
    "top_k": context["top_k"],
    "table_info": context["table_info"],
    "examples_value": context["examples_value"],
    "input": context["input"]
})
print(response)

SELECT TOP 3 YEAR(EventDate) AS EventYear, COUNT(*) AS EventCount
FROM Aviation.Event
GROUP BY YEAR(EventDate)
ORDER BY EventCount DESC


## References

- https://python.langchain.com/v0.1/docs/expression_language/get_started/
- https://python.langchain.com/v0.1/docs/use_cases/sql/prompting/
- https://python.langchain.com/v0.1/docs/modules/model_io/prompts/composition/