# Airbyte SQL Index Guide

We will show how to generate SQL queries on a Snowflake db generated by Airbyte.

In [None]:
import logging
import sys

logging.basicConfig(stream=sys.stdout, level=logging.INFO)
logging.getLogger().addHandler(logging.StreamHandler(stream=sys.stdout))

Hack to make snowflake-sqlalchemy work despite incompatible sqlalchemy versions

Taken from https://github.com/snowflakedb/snowflake-sqlalchemy/issues/380#issuecomment-1470762025

In [None]:
# Hack to make snowflake-sqlalchemy work until they patch it

def snowflake_sqlalchemy_20_monkey_patches():
    import sqlalchemy.util.compat

    # make strings always return unicode strings
    sqlalchemy.util.compat.string_types = (str,)
    sqlalchemy.types.String.RETURNS_UNICODE = True

    import snowflake.sqlalchemy.snowdialect

    snowflake.sqlalchemy.snowdialect.SnowflakeDialect.returns_unicode_strings = True

    # make has_table() support the `info_cache` kwarg
    import snowflake.sqlalchemy.snowdialect

    def has_table(self, connection, table_name, schema=None, info_cache=None):
        """
        Checks if the table exists
        """
        return self._has_object(connection, "TABLE", table_name, schema)

    snowflake.sqlalchemy.snowdialect.SnowflakeDialect.has_table = has_table

# usage: call this function before creating an engine:
try:
    snowflake_sqlalchemy_20_monkey_patches()
except Exception as e:
    raise ValueError("Please run `pip install snowflake-sqlalchemy`")

### Define database

We pass the Snowflake uri to the SQL db constructor

In [None]:
from llama_index.langchain_helpers.sql_wrapper import SQLDatabase

snowflake_uri = 'snowflake://<user_login_name>:<password>@<account_identifier>/<database_name>/<schema_name>?warehouse=<warehouse_name>&role=<role_name>'



First we try connecting with sqlalchemy to check the db works.

In [None]:
from sqlalchemy import select, create_engine, MetaData, Table

# view current table
engine = create_engine(snowflake_uri)
metadata = MetaData(bind=None)
table = Table(
    'ZENDESK_TICKETS', 
    metadata, 
    autoload=True, 
    autoload_with=engine
)
stmt = select(table.columns)


with engine.connect() as connection:
    results = connection.execute(stmt).fetchone()
    print(results)
    print(results.keys())


### Build Index

We then build the SQL Index (`SQLStructStoreIndex`).

In [None]:
from llama_index import SQLStructStoreIndex, SQLDatabase, VectorStoreIndex
from llama_index.indices.struct_store import SQLContextContainerBuilder

# NOTE: the table_name specified here is the table that you
# want to extract into from unstructured documents.
sql_database = SQLDatabase(engine)
context_builder = SQLContextContainerBuilder(sql_database)
table_schema_index = context_builder.derive_index_from_context(
    VectorStoreIndex,
)
query_str = "When was the last ticket created?"
context_builder.query_index_for_context(table_schema_index, query_str, store_context_str=True)
context_container = context_builder.build_context_container()
index = SQLStructStoreIndex(
    sql_database=sql_database,
    table_name="ZENDESK_TICKETS",
    table=table,
    sql_context_container=context_container,
)

### Query Index

We first show how we can execute a raw SQL query, which directly executes over the table.

In [None]:
query_engine = index.as_query_engine(
    query_mode="sql"
)
response = query_engine.query("SELECT created_at FROM ZENDESK_TICKETS limit 5")

In [None]:
from IPython.display import Markdown, display
display(Markdown(f"<b>{response}</b>"))

We then show a natural language query, which is translated to a SQL query under the hood with our text-to-SQL prompt.

In [None]:

query_engine = index.as_query_engine()
display(Markdown(f"<b>{context_container.context_str}</b>"))
response = query_engine.query(query_str)

In [None]:
display(Markdown(f"<b>{response}</b>"))

In [None]:
# you can also fetch the raw result from SQLAlchemy! 
response.extra_info["result"]