In [1]:
import pandas as pd
from langchain_community.utilities import SQLDatabase
from langchain.chains import create_sql_query_chain
from langchain_community.tools.sql_database.tool import QuerySQLDataBaseTool
from langchain_google_genai import ChatGoogleGenerativeAI
from langchain_community.agent_toolkits import create_sql_agent
from sqlalchemy import create_engine
from dotenv import load_dotenv
import os

  from .autonotebook import tqdm as notebook_tqdm


In [3]:
df=pd.read_csv("data/titanic.csv")
df.info()
df.head(5)

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 887 entries, 0 to 886
Data columns (total 8 columns):
 #   Column                   Non-Null Count  Dtype  
---  ------                   --------------  -----  
 0   Survived                 887 non-null    int64  
 1   Pclass                   887 non-null    int64  
 2   Name                     887 non-null    object 
 3   Sex                      887 non-null    object 
 4   Age                      887 non-null    float64
 5   Siblings/Spouses Aboard  887 non-null    int64  
 6   Parents/Children Aboard  887 non-null    int64  
 7   Fare                     887 non-null    float64
dtypes: float64(2), int64(4), object(2)
memory usage: 55.6+ KB


Unnamed: 0,Survived,Pclass,Name,Sex,Age,Siblings/Spouses Aboard,Parents/Children Aboard,Fare
0,0,3,Mr. Owen Harris Braund,male,22.0,1,0,7.25
1,1,1,Mrs. John Bradley (Florence Briggs Thayer) Cum...,female,38.0,1,0,71.2833
2,1,3,Miss. Laina Heikkinen,female,26.0,0,0,7.925
3,1,1,Mrs. Jacques Heath (Lily May Peel) Futrelle,female,35.0,1,0,53.1
4,0,3,Mr. William Henry Allen,male,35.0,0,0,8.05


***Converting csv to SQL***

In [4]:
engine=create_engine(f"sqlite:///data/sqldb.db")
df.to_sql("titanic", engine, index=False)

887

In [5]:
db = SQLDatabase(engine=engine)
print(db.dialect)
print(db.get_usable_table_names())
db.run("SELECT * FROM titanic WHERE Age < 2;")

sqlite
['titanic']


"[(1, 2, 'Master. Alden Gates Caldwell', 'male', 0.83, 0, 2, 29.0), (0, 3, 'Master. Eino Viljami Panula', 'male', 1.0, 4, 1, 39.6875), (1, 3, 'Miss. Eleanor Ileen Johnson', 'female', 1.0, 1, 1, 11.1333), (1, 2, 'Master. Richard F Becker', 'male', 1.0, 2, 1, 39.0), (1, 1, 'Master. Hudson Trevor Allison', 'male', 0.92, 1, 2, 151.55), (1, 3, 'Miss. Maria Nakid', 'female', 1.0, 0, 2, 15.7417), (0, 3, 'Master. Sidney Leonard Goodwin', 'male', 1.0, 5, 2, 46.9), (1, 3, 'Miss. Helene Barbara Baclini', 'female', 0.75, 2, 1, 19.2583), (1, 3, 'Miss. Eugenie Baclini', 'female', 0.75, 2, 1, 19.2583), (1, 2, 'Master. Viljo Hamalainen', 'male', 0.67, 1, 1, 14.5), (1, 3, 'Master. Bertram Vere Dean', 'male', 1.0, 1, 2, 20.575), (1, 3, 'Master. Assad Alexander Thomas', 'male', 0.42, 0, 1, 8.5167), (1, 2, 'Master. Andre Mallet', 'male', 1.0, 0, 2, 37.0042), (1, 2, 'Master. George Sibley Richards', 'male', 0.83, 1, 1, 18.75)]"

I***nteracting with database***

In [24]:
#load_ext dotenv
load_dotenv()
print(load_dotenv())

True


In [20]:
#Load the LLM
llm = ChatGoogleGenerativeAI(
    model="gemini-1.5-pro",
    temperature=0,
    max_tokens=None,
    timeout=None,
    max_retries=2)

In [22]:
agent_executor = create_sql_agent(llm, db=db, verbose=True)
agent_executor.invoke(
    {
        "input": "Tell me more about Anders Johan Anderson"
    }
)



[1m> Entering new SQL Agent Executor chain...[0m
[32;1m[1;3mThought: I need to find out what table Anders Johan Anderson might be in. To do that, I need to know what tables are available.
Action: sql_db_list_tables
Action Input: [0m[38;5;200m[1;3mtitanic[0m[32;1m[1;3mThought: I should check the schema of the titanic table to see if it contains information about passengers.
Action: sql_db_schema
Action Input: titanic[0m[33;1m[1;3m
CREATE TABLE titanic (
	"Survived" BIGINT, 
	"Pclass" BIGINT, 
	"Name" TEXT, 
	"Sex" TEXT, 
	"Age" FLOAT, 
	"Siblings/Spouses Aboard" BIGINT, 
	"Parents/Children Aboard" BIGINT, 
	"Fare" FLOAT
)

/*
3 rows from titanic table:
Survived	Pclass	Name	Sex	Age	Siblings/Spouses Aboard	Parents/Children Aboard	Fare
0	3	Mr. Owen Harris Braund	male	22.0	1	0	7.25
1	1	Mrs. John Bradley (Florence Briggs Thayer) Cumings	female	38.0	1	0	71.2833
1	3	Miss. Laina Heikkinen	female	26.0	0	0	7.925
*/[0m[32;1m[1;3mThought: The titanic table has information about pa

Retrying langchain_google_genai.chat_models._chat_with_retry.<locals>._chat_with_retry in 2.0 seconds as it raised ResourceExhausted: 429 Resource has been exhausted (e.g. check quota)..
Retrying langchain_google_genai.chat_models._chat_with_retry.<locals>._chat_with_retry in 2.0 seconds as it raised ResourceExhausted: 429 Resource has been exhausted (e.g. check quota)..


[36;1m[1;3m```sql
SELECT * FROM titanic WHERE Name = 'Anders Johan Andersson'
```[0m

Retrying langchain_google_genai.chat_models._chat_with_retry.<locals>._chat_with_retry in 4.0 seconds as it raised ResourceExhausted: 429 Resource has been exhausted (e.g. check quota)..
Retrying langchain_google_genai.chat_models._chat_with_retry.<locals>._chat_with_retry in 8.0 seconds as it raised ResourceExhausted: 429 Resource has been exhausted (e.g. check quota)..
Retrying langchain_google_genai.chat_models._chat_with_retry.<locals>._chat_with_retry in 16.0 seconds as it raised ResourceExhausted: 429 Resource has been exhausted (e.g. check quota)..
Retrying langchain_google_genai.chat_models._chat_with_retry.<locals>._chat_with_retry in 32.0 seconds as it raised ResourceExhausted: 429 Resource has been exhausted (e.g. check quota)..


[32;1m[1;3mAction: sql_db_query
Action Input: SELECT * FROM titanic WHERE Name = 'Anders Johan Andersson'[0m[36;1m[1;3m[0m[32;1m[1;3mThought: The query returned no results, so Anders Johan Anderson might not be in the database, or there might be a slight misspelling in the name. I can try searching for similar names to see if I can find a match.
Action: sql_db_query_checker
Action Input: SELECT * FROM titanic WHERE Name LIKE '%Anders%'[0m[36;1m[1;3m```sql
SELECT * FROM titanic WHERE Name LIKE '%Anders%'
```[0m

Retrying langchain_google_genai.chat_models._chat_with_retry.<locals>._chat_with_retry in 2.0 seconds as it raised ResourceExhausted: 429 Resource has been exhausted (e.g. check quota)..


[32;1m[1;3mAction: sql_db_query
Action Input: SELECT * FROM titanic WHERE Name LIKE '%Anders%'[0m[36;1m[1;3m[(0, 3, 'Mr. Anders Johan Andersson', 'male', 39.0, 1, 5, 31.275), (1, 3, 'Miss. Erna Alexandra Andersson', 'female', 17.0, 4, 2, 7.925), (0, 3, 'Mr. Anders Vilhelm Gustafsson', 'male', 37.0, 2, 0, 7.925), (0, 3, 'Miss. Ellis Anna Maria Andersson', 'female', 2.0, 4, 2, 31.275), (1, 3, 'Mr. August Edvard Andersson', 'male', 27.0, 0, 0, 7.7958), (1, 3, 'Miss. Carla Christine Nielsine Andersen-Jensen', 'female', 19.0, 1, 0, 7.8542), (1, 1, 'Mr. Harry Anderson', 'male', 48.0, 0, 0, 26.55), (0, 1, 'Mr. William Anderson Walker', 'male', 47.0, 0, 0, 34.0208), (0, 3, 'Miss. Ingeborg Constanzia Andersson', 'female', 9.0, 4, 2, 31.275), (0, 3, 'Miss. Sigrid Elisabeth Andersson', 'female', 11.0, 4, 2, 31.275), (0, 3, 'Mrs. Anders Johan (Alfrida Konstantia Brogren) Andersson', 'female', 39.0, 1, 5, 31.275), (0, 3, 'Mr. Thor Anderson Olsvigen', 'male', 20.0, 0, 0, 9.225), (0, 3, 'Mr. Pet

Retrying langchain_google_genai.chat_models._chat_with_retry.<locals>._chat_with_retry in 4.0 seconds as it raised ResourceExhausted: 429 Resource has been exhausted (e.g. check quota)..
Retrying langchain_google_genai.chat_models._chat_with_retry.<locals>._chat_with_retry in 8.0 seconds as it raised ResourceExhausted: 429 Resource has been exhausted (e.g. check quota)..
Retrying langchain_google_genai.chat_models._chat_with_retry.<locals>._chat_with_retry in 16.0 seconds as it raised ResourceExhausted: 429 Resource has been exhausted (e.g. check quota)..
Retrying langchain_google_genai.chat_models._chat_with_retry.<locals>._chat_with_retry in 32.0 seconds as it raised ResourceExhausted: 429 Resource has been exhausted (e.g. check quota)..


[32;1m[1;3mThought: I now have a list of passengers with 'Anders' in their name. I can see that 'Mr. Anders Johan Andersson' is present in the list.
Final Answer: Mr. Anders Johan Andersson was a passenger on the Titanic. He was traveling with his wife, Mrs. Anders Johan (Alfrida Konstantia Brogren) Andersson, and their five children. He was 39 years old and unfortunately, he did not survive the sinking. 
[0m

[1m> Finished chain.[0m


{'input': 'Tell me more about Anders Johan Anderson',
 'output': 'Mr. Anders Johan Andersson was a passenger on the Titanic. He was traveling with his wife, Mrs. Anders Johan (Alfrida Konstantia Brogren) Andersson, and their five children. He was 39 years old and unfortunately, he did not survive the sinking.'}

In [23]:
agent_executor.invoke({"input": "what's the average age of survivors"})



[1m> Entering new SQL Agent Executor chain...[0m
[32;1m[1;3mThought: I need to find out which table contains the age and survival status information. To do that, I need to list all tables first.
Action: sql_db_list_tables
Action Input: [0m[38;5;200m[1;3mtitanic[0m[32;1m[1;3mThought: There's only one table: 'titanic'. I need to check its schema to see if it has age and survival status.
Action: sql_db_schema
Action Input: titanic[0m[33;1m[1;3m
CREATE TABLE titanic (
	"Survived" BIGINT, 
	"Pclass" BIGINT, 
	"Name" TEXT, 
	"Sex" TEXT, 
	"Age" FLOAT, 
	"Siblings/Spouses Aboard" BIGINT, 
	"Parents/Children Aboard" BIGINT, 
	"Fare" FLOAT
)

/*
3 rows from titanic table:
Survived	Pclass	Name	Sex	Age	Siblings/Spouses Aboard	Parents/Children Aboard	Fare
0	3	Mr. Owen Harris Braund	male	22.0	1	0	7.25
1	1	Mrs. John Bradley (Florence Briggs Thayer) Cumings	female	38.0	1	0	71.2833
1	3	Miss. Laina Heikkinen	female	26.0	0	0	7.925
*/[0m

Retrying langchain_google_genai.chat_models._chat_with_retry.<locals>._chat_with_retry in 2.0 seconds as it raised ResourceExhausted: 429 Resource has been exhausted (e.g. check quota)..


[32;1m[1;3mThought: The 'titanic' table has both 'Survived' and 'Age' columns. I can write a query to calculate the average age of survivors.
Action: sql_db_query_checker
Action Input: SELECT AVG("Age") FROM titanic WHERE "Survived" = 1[0m

Retrying langchain_google_genai.chat_models._chat_with_retry.<locals>._chat_with_retry in 4.0 seconds as it raised ResourceExhausted: 429 Resource has been exhausted (e.g. check quota)..
Retrying langchain_google_genai.chat_models._chat_with_retry.<locals>._chat_with_retry in 2.0 seconds as it raised ResourceExhausted: 429 Resource has been exhausted (e.g. check quota)..


[36;1m[1;3m```sql
SELECT AVG("Age") FROM "titanic" WHERE "Survived" = 1
```[0m

Retrying langchain_google_genai.chat_models._chat_with_retry.<locals>._chat_with_retry in 4.0 seconds as it raised ResourceExhausted: 429 Resource has been exhausted (e.g. check quota)..
Retrying langchain_google_genai.chat_models._chat_with_retry.<locals>._chat_with_retry in 8.0 seconds as it raised ResourceExhausted: 429 Resource has been exhausted (e.g. check quota)..


[32;1m[1;3mThought: The query looks good. I can execute it now.
Action: sql_db_query
Action Input: SELECT AVG("Age") FROM titanic WHERE "Survived" = 1[0m[36;1m[1;3m[(28.408391812865496,)][0m[32;1m[1;3mThought: I now know the final answer
Final Answer: The average age of survivors is 28.41. 
[0m

[1m> Finished chain.[0m


{'input': "what's the average age of survivors",
 'output': 'The average age of survivors is 28.41.'}