In [1]:
from langchain_community.llms import Ollama 
from dotenv import find_dotenv, load_dotenv
from langchain_core.prompts import ChatPromptTemplate
from langchain_community.utilities import SQLDatabase
import os
from langchain_core.runnables import RunnablePassthrough
from langchain_openai import ChatOpenAI
from langchain_core.output_parsers import StrOutputParser

In [5]:
!pip install python-dotenv psycopg2-binary sqlalchemy sqlalchemy-redshift

Collecting sqlalchemy-redshift
  Using cached sqlalchemy_redshift-0.8.14-py2.py3-none-any.whl (38 kB)
Collecting sqlalchemy
  Using cached SQLAlchemy-1.4.51.tar.gz (8.5 MB)
  Preparing metadata (setup.py) ... [?25ldone
Building wheels for collected packages: sqlalchemy
  Building wheel for sqlalchemy (setup.py) ... [?25ldone
[?25h  Created wheel for sqlalchemy: filename=SQLAlchemy-1.4.51-cp39-cp39-macosx_10_9_x86_64.whl size=1570322 sha256=7816b3e14c45bad9be73b0046f8ef1485d27f441e7c856cf104495df4cf7d9b5
  Stored in directory: /Users/marceloyou/Library/Caches/pip/wheels/ea/1f/c4/709e5d2dadd5fcf063a6eab07bee8692056f8699367111f2b0
Successfully built sqlalchemy
Installing collected packages: sqlalchemy, sqlalchemy-redshift
  Attempting uninstall: sqlalchemy
    Found existing installation: SQLAlchemy 2.0.15
    Uninstalling SQLAlchemy-2.0.15:
      Successfully uninstalled SQLAlchemy-2.0.15
[31mERROR: pip's dependency resolver does not currently take into account all the packages that 

In [2]:
_ = load_dotenv(find_dotenv())

In [3]:
database_uri = f"redshift+psycopg2://{os.environ['redshift_user']}:{os.environ['redshift_pass']}@redshift-cluster-comp0087-demo.cvliubs5oipw.eu-west-2.redshift.amazonaws.com:5439/comp0087"

In [4]:
sqldb = SQLDatabase.from_uri(database_uri=database_uri, schema = 'test')

In [5]:
sqldb.dialect

'redshift'

In [6]:
sqldb.get_usable_table_names()

['football']

In [7]:
sqldb.get_table_info()

'\nCREATE TABLE test.football (\n\tseason VARCHAR(256), \n\tleague VARCHAR(256), \n\tdiv VARCHAR(256), \n\tdate DATE, \n\thometeam VARCHAR(256), \n\tawayteam VARCHAR(256), \n\tfthg REAL, \n\tftag REAL, \n\tftr VARCHAR(256), \n\ththg REAL, \n\thtag REAL, \n\thtr VARCHAR(256), \n\treferee VARCHAR(256), \n\ths REAL, \n\t"as" REAL, \n\thst REAL, \n\tast REAL, \n\thf REAL, \n\taf REAL, \n\thc REAL, \n\tac REAL, \n\thy REAL, \n\tay REAL, \n\thr REAL, \n\tar REAL, \n\tmatchname VARCHAR(256)\n)\n\n/*\n3 rows from football table:\nseason\tleague\tdiv\tdate\thometeam\tawayteam\tfthg\tftag\tftr\ththg\thtag\thtr\treferee\ths\tas\thst\tast\thf\taf\thc\tac\thy\tay\thr\tar\tmatchname\n2000-2001\tPremier League\tE0\t2000-08-19\tCharlton\tManchester City\t4.0\t0.0\tH\t2.0\t0.0\tH\tRob Harris\t17.0\t8.0\t14.0\t4.0\t13.0\t12.0\t6.0\t6.0\t1.0\t2.0\t0.0\t0.0\tCharlton VS Manchester City\n2000-2001\tPremier League\tE0\t2000-08-19\tChelsea\tWest Ham\t4.0\t2.0\tH\t1.0\t0.0\tH\tGraham Barber\t17.0\t12.0\t10.0\

In [8]:
sqldb.run("SELECT COUNT(DISTINCT hometeam) FROM football;")

'[(45,)]'

In [9]:
def get_schema(_):
    return sqldb.get_table_info()

In [15]:
def run_query(query):
    return sqldb.run(query)

In [11]:
model = Ollama(model = 'codellama')
llm = ChatOpenAI()

In [16]:
template = """You are a SQLite expert. Given an input question, first create a syntactically correct SQLite query to run, then look at the results of the query and return the answer to the input question.
Unless the user specifies in the question a specific number of examples to obtain, query for at most 5 results using the LIMIT clause as per SQLite. You can order the results to return the most informative data in the database.
Never query for all columns from a table. You must query only the columns that are needed to answer the question. Wrap each column name in double quotes (") to denote them as delimited identifiers.
Pay attention to use only the column names you can see in the tables below. Be careful to not query for columns that do not exist. Also, pay attention to which column is in which table.
Pay attention to use date('now') function to get the current date, if the question involves "today".

Use the following format:

Question: Question here
SQLQuery: SQL Query to run
SQLResult: Result of the SQLQuery
Answer: Final answer here

Only use the following tables:
{schema}

Question: {question}
SQLQuery:"""

prompt = ChatPromptTemplate.from_template(template)

In [17]:
sql_response = (
    RunnablePassthrough.assign(schema=get_schema)
    | prompt
    | llm.bind(stop=["\nSQLResult:"])
    | StrOutputParser()
)

In [18]:
sql_response.invoke({"question": "How many number of distinct teams in the football league"})

'SELECT COUNT(DISTINCT hometeam) FROM test.football'

In [19]:
template = """Based on the table schema below, question, sql query, and sql response, write a natural language response 
as well as showing the SQL Query in the following format.

<<Result Format>>
Answer:
Query:

Schema:{schema}
Question: {question}
SQL Query: {query}
SQL Response: {response}
 """
prompt_response = ChatPromptTemplate.from_template(template)

In [20]:
full_chain = (
    RunnablePassthrough.assign(query=sql_response).assign(
        schema=get_schema,
        response=lambda x: sqldb.run(x["query"]),
    )
    | prompt_response
    | llm
)

In [21]:
full_chain.invoke({"question": "How many distinct teams in the football league"})

AIMessage(content='Answer: There are 45 distinct teams in the football league.\n\nQuery: SELECT COUNT(DISTINCT hometeam) FROM football;')

In [1]:
from langchain_community.llms import Ollama 
from dotenv import find_dotenv, load_dotenv
from langchain_core.prompts import ChatPromptTemplate
from langchain_community.utilities import SQLDatabase
from langchain_core.runnables import RunnablePassthrough
from langchain_core.output_parsers import StrOutputParser
from langchain_openai import ChatOpenAI
import os 


_ = load_dotenv(find_dotenv())

redshift_user = os.environ['redshift_user']
redshift_pass = os.environ['redshift_pass']

llm = ChatOpenAI()
model = Ollama(model = 'codellama')

database_uri = f"redshift+psycopg2://{redshift_user}:{redshift_pass}@redshift-cluster-comp0087-demo.cvliubs5oipw.eu-west-2.redshift.amazonaws.com:5439/comp0087"
sqldb = SQLDatabase.from_uri(database_uri=database_uri, schema = 'test')



query_generation_tempalte = """You are a SQLite expert. Given an input question, first create a syntactically correct SQLite query to run, then look at the results of the query and return the answer to the input question.
Unless the user specifies in the question a specific number of examples to obtain, query for at most 5 results using the LIMIT clause as per SQLite. You can order the results to return the most informative data in the database.
Never query for all columns from a table. You must query only the columns that are needed to answer the question. Wrap each column name in double quotes (") to denote them as delimited identifiers.
Pay attention to use only the column names you can see in the tables below. Be careful to not query for columns that do not exist. Also, pay attention to which column is in which table.
Pay attention to use date('now') function to get the current date, if the question involves "today".

Use the following format:

Question: Question here
SQLQuery: SQL Query to run
SQLResult: Result of the SQLQuery
Answer: Final answer here

Only use the following tables:
{schemas}

Question: {question}
SQLQuery:"""

answer_generation_template = """Based on the table schema below, question, sql query, and sql response, write a natural language response 
as well as showing the SQL Query in the following format.

<<Result Format>>
Answer:
Query:

Schema:{schemas}
Question: {question}
SQL Query: {query}
SQL Response: {response}
 """

query_promot = ChatPromptTemplate.from_template(query_generation_tempalte)
answer_prompt = ChatPromptTemplate.from_template(answer_generation_template)


def get_schema(_):
    return sqldb.get_table_info()

def run_query(query):
    return sqldb.run(query)

sql_response = (
    RunnablePassthrough.assign(schemas=get_schema)
    | query_promot
    | llm.bind(stop=["\nSQLResult:"])
    | StrOutputParser()
)

full_chain = (
    RunnablePassthrough.assign(query=sql_response).assign(
        schemas=get_schema,
        response=lambda x: sqldb.run(x["query"]),
    )
    | answer_prompt
    | llm
    | StrOutputParser()
)

print(full_chain.invoke({"question": "How many distinct teams in the football league"}))



Answer: There are 45 distinct teams in the football league.

Query: 
SELECT COUNT(DISTINCT hometeam) AS distinct_teams
FROM football
