https://python.langchain.com/docs/use_cases/sql/quickstart/

### **Test the sqldb**

In [1]:
from langchain_community.utilities import SQLDatabase
from pyprojroot import here
import warnings
warnings.filterwarnings("ignore")

**Connecting to the sqldb**

In [2]:
db_path = str(here("data")) + "/csv_xlsx_sqldb.db"
db = SQLDatabase.from_uri(f"sqlite:///{db_path}")

In [None]:
db

In [None]:
# validate the connection to the vectordb
print(db.dialect)
print(db.get_usable_table_names())
db.run("SELECT * FROM actions_rows LIMIT 10;")

### **Test the access to the environment variables**

In [None]:
from dotenv import load_dotenv
import os
print("Environment variables are loaded:", load_dotenv())
print("test by reading a variable:", os.getenv("gpt_deployment_name"))

### **Test your GPT model**

In [None]:
from openai import OpenAI

messages = [
    {"role": "system", "content": str(
        "You are a helpful assistant"
    )},
    {"role": "user", "content": str("hello")}
]

client = OpenAI(
    api_key=os.getenv("OPENAI_API_KEY"),
)

response = client.chat.completions.create(
    model=os.getenv("gpt_deployment_name"),
    messages=messages
)

print(response.choices[0].message.content)

### **1. SQL query chain**

In [5]:
# Load the LLM
from langchain_openai import ChatOpenAI

model_name = os.getenv("gpt_deployment_name")
openai_api_key = os.getenv("OPENAI_API_KEY")
llm = ChatOpenAI(
    openai_api_key=openai_api_key,
    model_name=model_name,
    temperature=1.0)


In [None]:
from langchain.chains import create_sql_query_chain

chain = create_sql_query_chain(llm, db)
response = chain.invoke({"question": "How many actions are there?"})
print(response)

Execute the query to make sure it’s valid

In [None]:
db.run(response)

In [None]:
chain.get_prompts()[0].pretty_print()

### **Add QuerySQLDataBaseTool to the chain**
Execute SQL query

**This is the most dangerous part of creating a SQL chain.** Consider carefully if it is OK to run automated queries over your data. Minimize the database connection permissions as much as possible. Consider adding a human approval step to you chains before query execution (see below).

We can use the QuerySQLDatabaseTool to easily add query execution to our chain:

In [None]:
from langchain_community.tools.sql_database.tool import QuerySQLDataBaseTool

write_query = create_sql_query_chain(llm, db)
execute_query = QuerySQLDataBaseTool(db=db)

chain = write_query | execute_query

chain.invoke({"question": "How many actions are there"})

### **Answer the question in a user friendly manner**

In [None]:
from operator import itemgetter
from langchain_core.output_parsers import StrOutputParser
from langchain_core.prompts import PromptTemplate
from langchain_core.runnables import RunnablePassthrough

answer_prompt = PromptTemplate.from_template(
    """Given the following user question, corresponding SQL query, and SQL result, answer the user question.

Question: {question}
SQL Query: {query}
SQL Result: {result}
Answer: """
)

answer = answer_prompt | llm | StrOutputParser()
chain = (
    RunnablePassthrough.assign(query=write_query).assign(
        result=itemgetter("query") | execute_query
    )
    | answer
)

chain.invoke({"question": "How many actions are there"})

### **2. Agents**

Agent which provides a more flexible way of interacting with SQL databases. The main advantages of using the SQL Agent are:

- It can answer questions based on the databases’ schema as well as on the databases’ content (like describing a specific table).
- It can recover from errors by running a generated query, catching the traceback and regenerating it correctly.
- It can answer questions that require multiple dependent queries.
- It will save tokens by only considering the schema from relevant tables.

To initialize the agent, we use create_sql_agent function. This agent contains the SQLDatabaseToolkit which contains tools to:

- Create and execute queries
- Check query syntax
- Retrieve table descriptions
- …

In [7]:
from langchain_community.agent_toolkits import create_sql_agent

agent_executor = create_sql_agent(llm, db=db, agent_type="openai-tools", verbose=True, max_execution_time = 10)

In [None]:
agent_executor.invoke(
    {
        "input": "How many actions have to do with Blockchains?"
    }
)

In [None]:
agent_executor.invoke({"input": "Provide me only the most recent action, its description, its comment and the static event description (from static_event_id) in a list."})
# agent_executor.invoke("Describe the playlisttrack table")

Insert temp table into the Database for testing new records upload

In [None]:
import pandas as pd

# Load the CSV file
file_path = str(here("data")) + "/input_data/RAG_sample_data_no_match_column_names.csv"
data = pd.read_csv(file_path)

# data
# Insert the data into a temporary table
data.to_sql("temp_table", con=db._engine, if_exists="replace", index=False)
print("CSV data has been uploaded to the database.")

# reload the database
# NOTE: might be able to do this in a more efficient way
db_path = str(here("data")) + "/csv_xlsx_sqldb.db"
db = SQLDatabase.from_uri(f"sqlite:///{db_path}")

db.run("SELECT COUNT(*) FROM temp_table;")
print(db.dialect)
print(db.get_usable_table_names())

Test new temp_table and being able to insert them as records

In [None]:

import json

agent_executor = create_sql_agent(llm, db=db, agent_type="openai-tools", verbose=True, max_execution_time = 10)

# Now we can ask the agent to execute queries that will use this data
# agent_executor.invoke({"input": "You are a Data Analyst with SQL expertise. I need you to provide me with the proper SQL insert statements code (with no specific values) for some new data that is stored in the temp_table. Use the schema as a reference to know which column from the temp_table goes into which column from the other tables. Using the data from the temp_table, provide me only the proper SQL insert statements to insert them into the appropriate table (except the temp_table)."})

# agent_executor.invoke({"input": "You are a Data Analyst with SQL expertise. We've added new data that is stored in the temp_table. Programmatically speaking, how would you generate the proper SQL insert statements to insert the records in temp_table into the other tables?"})

response = agent_executor.invoke({"input": "You are a Data Analyst with SQL expertise. Provide me a mapping of all columns from the temp_table to the other tables by analyzing the existing records. Provide me only the answer in JSON format(without new lines) using the following example: {'temp_table.column_name': 'other_table.column_name'}"})

# response = agent_executor.invoke({"input": "You are a Data Analyst with SQL expertise. Pull 20 records from the temp_table and match each column to the appropriate column from the other tables (can only match 1). Then, provide me a mapping of all columns from the temp_table to the other tables. Provide me only the answer in JSON format(without new lines) using the following example: {'temp_table.column_name': 'other_table.column_name'}"})

# response = agent_executor.invoke({"input": "Which table column from either actions_rows or static_actions_rows matches the temp_table announcement_timestamp column?"})

print(response["output"])

# print(json.loads(response["output"]))

# # Extract the 'output' and process it as JSON
# output_raw = response.get('output')  # Assuming 'output' is part of the response
# try:
#     # Clean up the output (in case it includes extra text like explanations)
#     json_start_index = output_raw.find('{')  # Locate the JSON part
#     json_data = output_raw[json_start_index:]  # Extract JSON-like content

#     # Parse the JSON
#     mapping = json.loads(json_data)
#     print(mapping)
# except (json.JSONDecodeError, AttributeError) as e:
#     print("Error parsing JSON:", e)
#     print("Raw Output:", output_raw)
