In [1]:
import re
import time
import json
import requests
import langchain
import langchain_community 
from langchain.llms import Huggi
from langchain.prompts import PromptTemplate
from langchain_community.utilities import SQLDatabase
from langchain.schema.runnable import RunnablePassthrough
from sqlalchemy import create_engine, inspect, MetaData, text

In [10]:
#Creating SQLite database cursor 
uri = "sqlite:///./data/Chinook_Sqlite.sqlite"
db_path = "./data/Chinook_Sqlite.sqlite"
db = SQLDatabase.from_uri(f"sqlite:///{db_path}")   

def get_schema(_):
    schema = db.get_table_info()
    
    #to remove charachters between /* */ and new line characters.
    schema_cleaned = re.sub(r'/\*.*?\*/', '', schema, flags=re.DOTALL)
    
    #to remove leading and trailing spaces
    schema_cleaned = schema_cleaned.strip()
    
    #to remove extra blank spaces
    schema_cleaned = re.sub(r'\n\s*\n+', '\n\n', schema_cleaned)
    
    splited = schema_cleaned.split('\nCREATE')

    for i in range(1,len(splited)):
        splited[i] = "CREATE" + splited[i]    

    final_schema = "\n".join(splited)

    return final_schema

In [35]:
prompt = """
            You are an expert SQL query generator. You will be provided with a database schema and user instructions.\ 
            Your task is to generate accurate and efficient SQL queries based on the given schema and user requirements. \
            Follow these steps:\
     \
     ***instructions*** \
     1.Understand the Schema: \
        Thoroughly examine the provided database schema. \
        Note the tables, columns, data types, and relationships between tables (e.g., primary keys, foreign keys).\
     2.Interpret User Instructions: \ 
        Carefully read and understand the user's requirements for the SQL query. \
        Identify the tables and columns involved, the conditions for filtering data, and the desired output.\
    3.Generate SQL Queries:\
        Write SQL queries that accurately reflect the user's requirements.\
        Ensure the queries are syntactically correct and optimized for performance.\
        Return the Query:\   

    Provide the generated SQL query as the result. \
    {schema}
    
    User Instruction:\
    {user_question}
    
    ***strict instructions***
    Just provide only and only SQL query do not use escape sequeces in output and \
    also not write Here is the generated SQL query:
    Do not add anything extra.
    """

In [36]:
prompt_temp = PromptTemplate(template = prompt,input_variables=['schema','user_question'])

llm = Ollama(
    base_url='http://localhost:11434',
    model='llama3',
)

sql_chain = (
    RunnablePassthrough.assign(schema=get_schema)
    | prompt_temp
    | llm
)

In [82]:
def run_query(sql_query='SELECT * FROM Album limit 1;', db_uri='sqlite:///./data/Chinook_Sqlite.sqlite'):
#     print(f"SQL Query: {sql_query}")
    engine = create_engine(db_uri)
    with engine.connect() as conn:
        result = conn.execute(text(sql_query))
        results_as_dict = result.mappings().all()
#         print(f"result_as_dict: {results_as_dict}")             
        return results_as_dict

In [85]:
prompt_t = """
            You have given a query response as an input it will be dictionary containing colname : value your task is to use that and 
            Based on given schema and answer the user's question:
            {schema}
            
            recieved response from database:
            {response}
            
            user question:
            {user_question}   
            
            '''strict instrucations'''
            Don't print response and question which I have provided to you.
    """

In [83]:
def full_chain():
    user_question = 'how many albums are there in the database?'
    response = sql_chain.invoke({"user_question": user_question})
    result = run_query(response)
#     print(f'response : {response}')
    answer = llm(prompt=prompt_t.format(user_question=user_question,schema=get_schema(_),response=result))   
    return answer

In [84]:
print(full_chain())

SQL Query: SELECT COUNT(*) 
FROM "Album";
result_as_dict: [{'COUNT(*)': 347}]
response : SELECT COUNT(*) 
FROM "Album";
Based on the schema and the received response from the database, the answer to the user's question "how many albums are there in the database?" would be:

347
