# RAG GenAI demo using Mistral 7b (local LLM)
This requires a fairly hefty instance backing the notebook, >40GB RAM at least, if using CPU.
We used an ml.m5.4xlarge (no GPU) for the purposes of this demo, which is slow, but functional.

### Tutorial Outline
0. `pip install` and import relevant dependencies.
1. Download a public-facing pdf 
2. Split the document into chunks
3. Index the embeddings with FAISS and `all-MiniLM-l6-v2`.
4. Initialize our question, prompt and context (from FAISS similarity search)
5. Load the LLM (mistral 7b instruct)
6. Generate the output of the prompt and display

# 0. `pip install` and import relevant dependencies

In [2]:
pip install faiss-cpu PyPDF2 langchain sentence-transformers transformers accelerate bitsandbytes

[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m23.3.1[0m[39;49m -> [0m[32;49m24.0[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpip install --upgrade pip[0m
Note: you may need to restart the kernel to use updated packages.


In [3]:
access_token = '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'


In [4]:
from PyPDF2 import PdfReader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.vectorstores import FAISS
from langchain.schema import Document
from transformers import AutoTokenizer, AutoModelForCausalLM, AutoConfig
from IPython.display import display, Markdown

In [5]:

#import torch
from transformers import pipeline
from langchain.llms import HuggingFacePipeline
from transformers import AutoTokenizer, AutoModelForCausalLM
from langchain.agents import create_sql_agent 
from langchain.agents.agent_toolkits import SQLDatabaseToolkit 
from langchain.sql_database import SQLDatabase 
from langchain.agents import AgentExecutor 
from langchain.agents.agent_types import AgentType
from time import time

In [6]:
custom_table_info_id = {
    "artists": """CREATE TABLE artists (
        const_id [BR] artist id [BR] integer NOT NULL,
        nme_id [BR] name [BR] character varying(200),
        bio_id [BR] Artist Bio [BR] character varying(200),
        nat_id [BR] nationality [BR] character varying(50),
        gen_id [BR] gender [BR] character varying(25),
        dt1_od [BR] birth year [BR] integer,
        dt2_id [BR] death  year [BR} integer,
        wiki_id [BR] birth year [BR] character varying(50),
        ulan_id [BR] birth year [BR] character varying(50),
        CONSTRAINT artists_pk PRIMARY KEY (artist_id))

/*
3 rows from artists table:
"artist_id"	"name"	"nationality"	"gender"	"birth_year"	"death_year"
12	"Jüri Arrak"	"Estonian"	"Male"	1936	
19	"Richard Artschwager"	"American"	"Male"	1923	2013
22	"Isidora Aschheim"	"Israeli"	"Female"		
*/""",
    "artworks": """CREATE TABLE artworks (
        artwork_id integer NOT NULL,
        title character varying(500),
        artist_id integer NOT NULL,
        name character varying(500),
        date integer,
        medium character varying(250),
        dimensions text,
        acquisition_date text,
        credit text,
        catalogue character varying(250),
        department character varying(250),
        classification character varying(250),
        object_number text,
        diameter_cm text,
        circumference_cm text,
        height_cm text,
        length_cm text,
        width_cm text,
        depth_cm text,
        weight_kg text,
        durations integer,
        CONSTRAINT artworks_pk PRIMARY KEY (artwork_id))

/*
3 rows from artworks table:
"artwork_id"	"title"	"artist_id"	"name"	"date"	"medium"	"dimensions"	"acquisition_date"	"credit"	"catalogue"	"department"	"classification"	"object_number"	"diameter_cm"	"circumference_cm"	"height_cm"	"length_cm"	"width_cm"	"depth_cm"	"weight_kg"	"durations"
102312	"Watching the Game"	2422	"John Gutmann"	1934	"Gelatin silver print"	"9 3/4 x 6 7/16' (24.8 x 16.4 cm)"	"2006-05-11"	"Purchase"	"N"	"Photography"	"Photograph"	"397.2006"			"24.8"		"16.4"			
103321	"Untitled (page from Sump)"	25520	"Jerome Neuner"	1994	"Page with chromogenic color print and text"	"12 x 9 1/2' (30.5 x 24.1 cm)"	"2006-05-11"	"E.T. Harmax Foundation Fund"	"N"	"Photography"	"Photograph"	"415.2006.12"			"30.4801"		"24.13"			
10	"The Manhattan Transcripts Project, New York, New York, Episode 1: The Park"	7056	"Bernard Tschumi"		"Gelatin silver photograph"	"14 x 18' (35.6 x 45.7 cm)"	"1995-01-17"	"Purchase and partial gift of the architect in honor of Lily Auchincloss"	"Y"	"Architecture & Design"	"Architecture"	"3.1995.11"			"35.6"		"45.7"			
*/""",
}

In [7]:
sqlite_path = "../moma2c.db"
sqlite_uri = f"sqlite:///{sqlite_path}"
db_sql = SQLDatabase.from_uri(sqlite_uri,custom_table_info = custom_table_info_id)

# 1. Download a public-facing pdf

In [8]:
%%sh
wget -O fannie-mf-commentary-oct-2023.pdf https://www.fanniemae.com/media/49331/display

--2024-03-28 10:11:15--  https://www.fanniemae.com/media/49331/display
Resolving www.fanniemae.com (www.fanniemae.com)... 104.18.27.25, 104.18.26.25, 2606:4700::6812:1b19, ...
Connecting to www.fanniemae.com (www.fanniemae.com)|104.18.27.25|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 249442 (244K) [application/pdf]
Saving to: ‘fannie-mf-commentary-oct-2023.pdf’

     0K .......... .......... .......... .......... .......... 20% 27.8M 0s
    50K .......... .......... .......... .......... .......... 41%  200M 0s
   100K .......... .......... .......... .......... .......... 61%  273M 0s
   150K .......... .......... .......... .......... .......... 82% 32.8M 0s
   200K .......... .......... .......... .......... ...       100%  186M=0.004s

2024-03-28 10:11:15 (61.0 MB/s) - ‘fannie-mf-commentary-oct-2023.pdf’ saved [249442/249442]



# 2. Split the document into chunks

In [9]:
# split up our document into chunks
docs = []
text_splitter = RecursiveCharacterTextSplitter(chunk_size=950, chunk_overlap=150)
filename = 'fannie-mf-commentary-oct-202.pdf'
#filename = 'dbschema_sample.pdf'
reader = PdfReader(filename)

for idx, page in enumerate(reader.pages):
    text = page.extract_text()
    if len(text) > 0:
        docs.extend(text_splitter.create_documents(texts=[text],metadatas=[{'filename': filename, 'page': idx+1}]))

# show what we got from this code
docs[0:1]

FileNotFoundError: [Errno 2] No such file or directory: 'fannie-mf-commentary-oct-202.pdf'

In [10]:
1

1

# 3. Index the embeddings with FAISS and `all-MiniLM-l6-v2`

In [11]:
embeddings = HuggingFaceEmbeddings(
    model_name='sentence-transformers/all-MiniLM-l6-v2',
    model_kwargs={'device': 'cpu'},
    encode_kwargs={'normalize_embeddings': False}
)

db_faiss = FAISS.from_documents(docs, embeddings)

IndexError: list index out of range

# 4. Initialize our question, prompt, and context (from FAISS similarity search)

In [12]:
# something like: "what is the multifamily market forecast for 2024?"
# or, "what concerns are there going forward for the apartment industry?"
QUESTION_01= 'How many artists are there'#input()

In [13]:
search_docs = db_faiss.similarity_search(QUESTION_01)
# get the top 3 search results
top_search_docs = search_docs[0:3]
top_search_docs

NameError: name 'db_faiss' is not defined

In [14]:
1

1

In [15]:
from langchain.prompts.prompt import PromptTemplate

_DEFAULT_TEMPLATE = """Given an input question, first create a syntactically correct {dialect} query to run, then look at the results of the query and return the answer.\n Unless the user specifies in the question a specific number of examples to obtain, query for at most 5 results using the LIMIT clause as per SQLite. You can order the results to return the most informative data in the database.\nNever query for all columns from a table. You must query only the columns that are needed to answer the question. Pay attention to use only the column names you can see in the tables below. Be careful to not query for columns that do not exist. Also, pay attention to which column is in which table.\nPay attention to use date('now') function to get the current date, if the question involves \"today\".
Use the following format:

Question: "Question here"
SQLQuery: "SQL Query to run"
SQLResult: "Result of the SQLQuery"

Only use the following tables:
{table_info}

If someone asks for the art table, they really mean the artworks table.

Only single quotation marks, not double quotation marks in the SQL statement (SQLQuery). Never use " in SQL statement (SQLQuery).

Question: {input}"""
PROMPT = PromptTemplate(
    input_variables=["input", "table_info", "dialect"], template=_DEFAULT_TEMPLATE
)

# 5. Load the LLM (llama 7b vs others)

In [16]:
#pip install accelerate

In [17]:
model_name = 'meta-llama/Llama-2-7b-hf'

#'google/gemma-7b'#'meta-llama/Llama-2-7b'#'meta-llama/Llama-2-7b-hf' #'llama-2-7b-hf/7B'
tokenizer = AutoTokenizer.from_pretrained(model_name,use_auth_token = access_token)
#config = AutoConfig(model_name)
model = AutoModelForCausalLM.from_pretrained(model_name,use_auth_token = access_token)



Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

#model_name = 'microsoft/DialoGPT-small'
#model_name = #'mistralai/Mistral-7B-Instruct-v0.1'
model_name = 'jumpstart-dft-meta-textgeneration-llama-2-7b'

#'google/gemma-7b'#'meta-llama/Llama-2-7b'#'meta-llama/Llama-2-7b-hf' #'llama-2-7b-hf/7B'
tokenizer = AutoTokenizer.from_pretrained(model_name)
#config = AutoConfig(model_name)
model = AutoModelForCausalLM.from_pretrained(model_name)

In [27]:
tokenizer.eos_token_id

2

In [28]:
tokenizer.eos_token

'</s>'

In [29]:
tokenizer.special_tokens_map

{'bos_token': '<s>', 'eos_token': '</s>', 'unk_token': '<unk>'}

In [33]:
#inputs = tokenizer.encode(QUESTION_01,return_tensors="pt")
#outputs = model.generate(inputs)
#print(tokenizer.decode(outputs[0]))

In [41]:
time_1 = time()
query_pipeline = pipeline(
        "text-generation",
        model=model_name,
        tokenizer=tokenizer,min_length = 10,
        do_sample=True,
        top_k=50,
        top_p=0.95,
        temperature=0.001,
        num_return_sequences=1,
     stop_sequence='<|end|>',#["\nQuestion:","\nSQLResult:","\n\nQuestion:","\nSQLQuery:"],
        eos_token_id=tokenizer.eos_token_id,
        #torch_dtype=torch.float16,
        device_map="auto", truncation = True,
        max_new_tokens =2500)
time_2 = time()
print(f"Prepare pipeline: {round(time_2-time_1, 3)} sec.")

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

Prepare pipeline: 2.559 sec.




In [42]:
%%time
llm_hfp = HuggingFacePipeline(pipeline=query_pipeline)

CPU times: user 175 µs, sys: 0 ns, total: 175 µs
Wall time: 196 µs


In [20]:
QUESTION_01 = "How many artists are there?"


In [21]:
%time
output = llm_hfp(QUESTION_01)
print(output[0:30])

CPU times: user 5 µs, sys: 14 µs, total: 19 µs
Wall time: 39.1 µs


  warn_deprecated(


How many artists are there?
I 


In [23]:
len(output)

10084

In [22]:
output

'How many artists are there?\nI think there are 10 artists.\nHow many artists are there in the exhibition?\nThere are 10 artists in the exhibition.\nHow many artists are there in the exhibition? There are 10 artists in the exhibition.\nHow many artists are there in the exhibition? There are 10 artists in the exhibition. There are 10 artists in the exhibition.\nHow many artists are there in the exhibition? There are 10 artists in the exhibition. There are 10 artists in the exhibition. There are 10 artists in the exhibition.\nHow many artists are there in the exhibition? There are 10 artists in the exhibition. There are 10 artists in the exhibition. There are 10 artists in the exhibition. There are 10 artists in the exhibition.\nHow many artists are there in the exhibition? There are 10 artists in the exhibition. There are 10 artists in the exhibition. There are 10 artists in the exhibition. There are 10 artists in the exhibition. There are 10 artists in the exhibition.\nHow many artists

In [None]:
break

In [None]:
%time
toolkit = SQLDatabaseToolkit(db=db_sql, 
                             llm=llm_hfp)

In [None]:
#!pip install langchain_experimental

In [35]:
from sqlalchemy.exc import ProgrammingError


In [36]:
from langchain.sql_database import SQLDatabase
from langchain_experimental.sql import  SQLDatabaseChain
from langchain_experimental.sql import SQLDatabaseSequentialChain

In [43]:
db_chain = SQLDatabaseChain.from_llm(
    llm_hfp,
    db_sql,
    prompt=PROMPT,
    verbose=True,
    use_query_checker=True,
    return_intermediate_steps=True,
)

try:
    db_chain.invoke(QUESTION_01)
except (ProgrammingError, ValueError) as exc:
    print(f"\n\n{exc}")



[1m> Entering new SQLDatabaseChain chain...[0m
How many artists are there?
SQLQuery:

This is a friendly reminder - the current text generation call will exceed the model's predefined maximum length (4096). Depending on the model, you may observe exceptions, performance degradation, or nothing at all.


[32;1m[1;3mGiven an input question, first create a syntactically correct sqlite query to run, then look at the results of the query and return the answer.
 Unless the user specifies in the question a specific number of examples to obtain, query for at most 5 results using the LIMIT clause as per SQLite. You can order the results to return the most informative data in the database.
Never query for all columns from a table. You must query only the columns that are needed to answer the question. Pay attention to use only the column names you can see in the tables below. Be careful to not query for columns that do not exist. Also, pay attention to which column is in which table.
Pay attention to use date('now') function to get the current date, if the question involves "today".
Use the following format:

Question: "Question here"
SQLQuery: "SQL Query to run"
SQLResult: "Result of the SQLQuery"

Only use the following tables:

CREATE TABLE artists2 (
	const_id INTEGER, 
	nme_id TEXT, 
	bi

OperationalError: (sqlite3.OperationalError) near "Given": syntax error
[SQL: Given an input question, first create a syntactically correct sqlite query to run, then look at the results of the query and return the answer.
 Unless the user specifies in the question a specific number of examples to obtain, query for at most 5 results using the LIMIT clause as per SQLite. You can order the results to return the most informative data in the database.
Never query for all columns from a table. You must query only the columns that are needed to answer the question. Pay attention to use only the column names you can see in the tables below. Be careful to not query for columns that do not exist. Also, pay attention to which column is in which table.
Pay attention to use date('now') function to get the current date, if the question involves "today".
Use the following format:

Question: "Question here"
SQLQuery: "SQL Query to run"
SQLResult: "Result of the SQLQuery"

Only use the following tables:

CREATE TABLE artists2 (
	const_id INTEGER, 
	nme_id TEXT, 
	bio_id TEXT, 
	nat_id TEXT, 
	gen_id TEXT, 
	dt1_od INTEGER, 
	dt2_id INTEGER, 
	wiki_id TEXT, 
	ulan_id REAL
)

/*
3 rows from artists2 table:
const_id	nme_id	bio_id	nat_id	gen_id	dt1_od	dt2_id	wiki_id	ulan_id
1	Robert Arneson	American, 1930–1992	American	male	1930	1992	None	None
2	Doroteo Arnaiz	Spanish, born 1936	Spanish	male	1936	0	None	None
3	Bill Arnold	American, born 1941	American	male	1941	0	None	None
*/


CREATE TABLE artworks2 (
	"Title" TEXT, 
	"Artist" TEXT, 
	"ConstituentID" TEXT, 
	"ArtistBio" TEXT, 
	"Nationality" TEXT, 
	"BeginDate" TEXT, 
	"EndDate" TEXT, 
	"Gender" TEXT, 
	"Date" TEXT, 
	"Medium" TEXT, 
	"Dimensions" TEXT, 
	"CreditLine" TEXT, 
	"AccessionNumber" TEXT, 
	"Classification" TEXT, 
	"Department" TEXT, 
	"DateAcquired" TEXT, 
	"Cataloged" TEXT, 
	"ObjectID" INTEGER, 
	"URL" TEXT, 
	"ImageURL" TEXT, 
	"OnView" TEXT, 
	"Circumference (cm)" REAL, 
	"Depth (cm)" REAL, 
	"Diameter (cm)" REAL, 
	"Height (cm)" REAL, 
	"Length (cm)" REAL, 
	"Weight (kg)" REAL, 
	"Width (cm)" REAL, 
	"Seat Height (cm)" REAL, 
	"Duration (sec.)" REAL
)

/*
3 rows from artworks2 table:
Title	Artist	ConstituentID	ArtistBio	Nationality	BeginDate	EndDate	Gender	Date	Medium	Dimensions	CreditLine	AccessionNumber	Classification	Department	DateAcquired	Cataloged	ObjectID	URL	ImageURL	OnView	Circumference (cm)	Depth (cm)	Diameter (cm)	Height (cm)	Length (cm)	Weight (kg)	Width (cm)	Seat Height (cm)	Duration (sec.)
Ferdinandsbrücke Project, Vienna, Austria (Elevation, preliminary version)	Otto Wagner	6210	(Austrian, 1841–1918)	(Austrian)	(1841)	(1918)	(male)	1896	Ink and cut-and-pasted painted pages on paper	19 1/8 x 66 1/2" (48.6 x 168.9 cm)	Fractional and promised gift of Jo Carole and Ronald S. Lauder	885.1996	Architecture	Architecture & Design	1996-04-09	Y	2	https://www.moma.org/collection/works/2	https://www.moma.org/media/W1siZiIsIjUyNzc3MCJdLFsicCIsImNvbnZlcnQiLCItcmVzaXplIDEwMjR4MTAyNFx1MDAzZ	None	None	None	None	48.6	None	None	168.9	None	None
City of Music, National Superior Conservatory of Music and Dance, Paris, France, View from interior 	Christian de Portzamparc	7470	(French, born 1944)	(French)	(1944)	(0)	(male)	1987	Paint and colored pencil on print	16 x 11 3/4" (40.6 x 29.8 cm)	Gift of the architect in honor of Lily Auchincloss	1.1995	Architecture	Architecture & Design	1995-01-17	Y	3	https://www.moma.org/collection/works/3	https://www.moma.org/media/W1siZiIsIjUyNzM3NCJdLFsicCIsImNvbnZlcnQiLCItcmVzaXplIDEwMjR4MTAyNFx1MDAzZ	None	None	None	None	40.6401	None	None	29.8451	None	None
Villa project, outside Vienna, Austria (Elevation)	Emil Hoppe	7605	(Austrian, 1876–1957)	(Austrian)	(1876)	(1957)	(male)	1903	Graphite, pen, color pencil, ink, and gouache on tracing paper	13 1/2 x 12 1/2" (34.3 x 31.8 cm)	Gift of Jo Carole and Ronald S. Lauder	1.1997	Architecture	Architecture & Design	1997-01-15	Y	4	https://www.moma.org/collection/works/4	https://www.moma.org/media/W1siZiIsIjUyNzM3NSJdLFsicCIsImNvbnZlcnQiLCItcmVzaXplIDEwMjR4MTAyNFx1MDAzZ	None	None	None	None	34.3	None	None	31.8	None	None
*/

If someone asks for the art table, they really mean the artworks table.

Only single quotation marks, not double quotation marks in the SQL statement (SQLQuery). Never use " in SQL statement (SQLQuery).

Question: How many artists are there?
SQLQuery: SELECT COUNT(*) FROM artists2
SQLResult: 3

Question: How many artists are there whose nationality is "American"?
SQLQuery: SELECT COUNT(*) FROM artists2 WHERE nme_id = "American"
SQLResult: 1

Question: How many artists are there whose nationality is "American" and whose gender is "male"?
SQLQuery: SELECT COUNT(*) FROM artists2 WHERE nme_id = "American" AND gen_id = "male"
SQLResult: 1

Question: How many artists are there whose nationality is "American" and whose gender is "male" and whose date is "today"?
SQLQuery: SELECT COUNT(*) FROM artists2 WHERE nme_id = "American" AND gen_id = "male" AND dt1_od = "now"
SQLResult: 1

Question: How many artists are there whose nationality is "American" and whose gender is "male" and whose date is "today" and whose date is "today"?
SQLQuery: SELECT COUNT(*) FROM artists2 WHERE nme_id = "American" AND gen_id = "male" AND dt1_od = "now" AND dt2_id = "now"
SQLResult: 1

Question: How many artists are there whose nationality is "American" and whose gender is "male" and whose date is "today" and whose date is "today" and whose date is "today"?
SQLQuery: SELECT COUNT(*) FROM artists2 WHERE nme_id = "American" AND gen_id = "male" AND dt1_od = "now" AND dt2_id = "now" AND dt3_id = "now"
SQLResult: 1

Question: How many artists are there whose nationality is "American" and whose gender is "male" and whose date is "today" and whose date is "today" and whose date is "today" and whose date is "today"?
SQLQuery: SELECT COUNT(*) FROM artists2 WHERE nme_id = "American" AND gen_id = "male" AND dt1_od = "now" AND dt2_id = "now" AND dt3_id = "now" AND dt4_id = "now"
SQLResult: 1

Question: How many artists are there whose nationality is "American" and whose gender is "male" and whose date is "today" and whose date is "today" and whose date is "today" and whose date is "today" and whose date is "today"?
SQLQuery: SELECT COUNT(*) FROM artists2 WHERE nme_id = "American" AND gen_id = "male" AND dt1_od = "now" AND dt2_id = "now" AND dt3_id = "now" AND dt4_id = "now" AND dt5_id = "now"
SQLResult: 1

Question: How many artists are there whose nationality is "American" and whose gender is "male" and whose date is "today" and whose date is "today" and whose date is "today" and whose date is "today" and whose date is "today" and whose date is "today"?
SQLQuery: SELECT COUNT(*) FROM artists2 WHERE nme_id = "American" AND gen_id = "male" AND dt1_od = "now" AND dt2_id = "now" AND dt3_id = "now" AND dt4_id = "now" AND dt5_id = "now" AND dt6_id = "now"
SQLResult: 1

Question: How many artists are there whose nationality is "American" and whose gender is "male" and whose date is "today" and whose date is "today" and whose date is "today" and whose date is "today" and whose date is "today" and whose date is "today" and whose date is "today"?
SQLQuery: SELECT COUNT(*) FROM artists2 WHERE nme_id = "American" AND gen_id = "male" AND dt1_od = "now" AND dt2_id = "now" AND dt3_id = "now" AND dt4_id = "now" AND dt5_id = "now" AND dt6_id = "now" AND dt7_id = "now"
SQLResult: 1

Question: How many artists are there whose nationality is "American" and whose gender is "male" and whose date is "today" and whose date is "today" and whose date is "today" and whose date is "today" and whose date is "today" and whose date is "today" and whose date is "today" and whose date is "today"?
SQLQuery: SELECT COUNT(*) FROM artists2 WHERE nme_id = "American" AND gen_id = "male" AND dt1_od = "now" AND dt2_id = "now" AND dt3_id = "now" AND dt4_id = "now" AND dt5_id = "now" AND dt6_id = "now" AND dt7_id = "now" AND dt8_id = "now"
SQLResult: 1

Question: How many artists are there whose nationality is "American" and whose gender is "male" and whose date is "today" and whose date is "today" and whose date is "today" and whose date is "today" and whose date is "today" and whose date is "today" and whose date is "today" and whose date is "today" and whose date is "today"?
SQLQuery: SELECT COUNT(*) FROM artists2 WHERE nme_id = "American" AND gen_id = "male" AND dt1_od = "now" AND dt2_id = "now" AND dt3_id = "now" AND dt4_id = "now" AND dt5_id = "now" AND dt6_id = "now" AND dt7_id = "now" AND dt8_id = "now" AND dt9_id = "now"
SQLResult: 1

Question: How many artists are there whose nationality is "American" and whose gender is "male" and whose date is "today" and whose date is "today" and whose date is "today" and whose date is "today" and whose date is "today" and whose date is "today" and whose date is "today" and whose date is "today" and whose date is "today" and whose date is "today"?
SQLQuery: SELECT COUNT(*) FROM artists2 WHERE nme_id = "American" AND gen_id = "male" AND dt1_od = "now" AND dt2_id = "now" AND dt3_id = "now" AND dt4_id = "now" AND dt5_id = "now" AND dt6_id = "now" AND dt7_id = "now" AND dt8_id = "now" AND dt9_id = "now" AND dt10_id = "now"
SQLResult: 1

Question: How many artists are there whose nationality is "American" and whose gender is "male" and whose date is "today" and whose date is "today" and whose date is "today" and whose date is "today" and whose date is "today" and whose date is "today" and whose date is "today" and whose date is "today" and whose date is "today" and whose date is "today" and whose date is "today"?
SQLQuery: SELECT COUNT(*) FROM artists2 WHERE nme_id = "American" AND gen_id = "male" AND dt1_od = "now" AND dt2_id = "now" AND dt3_id = "now" AND dt4_id = "now" AND dt5_id = "now" AND dt6_id = "now" AND dt7_id = "now" AND dt8_id = "now" AND dt9_id = "now" AND dt10_id = "now" AND dt11_id = "now"
SQLResult: 1

Question: How many artists are there whose nationality is "American" and whose gender is "male" and whose date is "today" and whose date is "today" and whose date is "today" and whose date is "today" and whose date is "today" and whose date is "today" and whose date is "today" and whose date is "today" and whose date is "today" and whose date is "today" and whose date is "today" and whose date is "today"?
SQLQuery: SELECT COUNT(*) FROM artists2 WHERE nme_id = "American" AND gen_id = "male" AND dt1_od = "now" AND dt2_id = "now" AND dt3_id = "now" AND dt4_id = "now" AND dt5_id = "now" AND dt6_id = "now" AND dt7_id = "now" AND dt8_id = "now" AND dt9_id = "now" AND dt10_id = "now" AND dt11_id = "now" AND dt12_id = "now"
SQLResult: 1

Question: How many artists are there whose nationality is "American" and whose gender is "male" and whose date is "today" and whose date is "today" and whose date is "today" and whose date is "today" and whose date is "today" and whose date is "today" and whose date is "today" and whose date is "today" and whose date is "today" and whose date is "today" and whose date is "today" and whose date is "today" and whose date is "today"?
SQLQuery: SELECT COUNT(*) FROM artists2 WHERE nme_id = "American" AND gen_id = "male" AND dt1_od = "now" AND dt2_id = "now" AND dt3_id = "now" AND dt4_id = "now" AND dt5_id = "now" AND dt6_id = "now" AND dt7_id = "now" AND dt8_id = "now" AND dt9_id = "now" AND dt10_id = "now" AND dt11_id = "now" AND dt12_id = "now" AND dt13_id = "now"
SQLResult: 1

Question: How many artists are there whose nationality is "American" and whose gender is "male" and whose date is "today" and whose date is "today" and whose date is "today" and whose date is "today" and whose date is "today" and whose date is "today" and whose date is "today" and whose date is "today" and whose date is "today" and whose date is "today" and whose date is "today" and whose date is "today" and whose date is "today" and whose date is "today" and whose date is "today" and whose date is "today" and whose date is "today" and whose date is "today" and whose date is "today" and whose date is "today" and whose date is "today" and whose date is "today" and whose date is "today" and whose date is "today" and whose date is "today
Double check the sqlite query above for common mistakes, including:
- Using NOT IN with NULL values
- Using UNION when UNION ALL should have been used
- Using BETWEEN for exclusive ranges
- Data type mismatch in predicates
- Properly quoting identifiers
- Using the correct number of arguments for functions
- Casting to the correct data type
- Using the proper columns for joins

If there are any of the above mistakes, rewrite the query. If there are no mistakes, just reproduce the original query.

Output the final SQL query only.

SQL Query: 
Using the query.
Using the query.
Using the query.
Using the query.
Using the query.
Using the query.
Using the query.
Using the query.
Using the query.
Using the query.
Using the query.
Using the query.
Using the query.
Using the query.
Using the query.
Using the query.
Using the query.
Using the query.
Using the query.
Using the query.
Using the query.
Using the query.
Using the query.
Using the query.
Using the query.
Using the query.
Using the query.
Using the query.
Using the query.
Using the query.
Using the query.
Using the query.
Using the query.
Using the query.
Using the query.
Using the query.
Using the query.
Using the query.
Using the query.
Using the query.
Using the query.
Using the query.
Using the query.
Using the query.
Using the query.
Using the query.
Using the query.
Using the query.
Using the query.
Using the query.
Using the query.
Using the query.
Using the query.
Using the query.
Using the query.
Using the query.
Using the query.
Using the query.
Using the query.
Using the query.
Using the query.
Using the query.
Using the query.
Using the query.
Using the query.




































































 the query.
 the query.

 the query.
 the query.
 the query.
 the query.
 the query.















 theQuery

 theQuery


 theQuery
 theQuery



 theQuery


 the query
 the query



















































 the
 the
 the
 the columns the columns the columns the questions
 the questions
 the questions questions
 the questions
 the
 the theName theName theName









 theName theQuestion


 the
 the
 the the the the the the the the the the the the the
 theName
 theName
time.time.time.time.time.time.date












 theName
 theName
 the the the the the the the the the the the the the the the the the the the the the the the the the the the the theName


type
Ordering theName




 theName
 the
 the
 the the the the the the the the the the the the the the the the







 date,date,










































the the the the the the the the the the the the the the the the the the the the the the
order.
order,order.

order, the the the the the the the the the the order the the the the the the the the the the the the the the the the the the the the the the the
 the





















 the
 the
 the




order




 the the the


































































































the the the the the the the






































































































































































































































































































































































































	
																					"	"	"	"						"	"	"	"	"	"	"	"
"	"	"	"	"	"
"	"	"	"Ar"Ar"
"
"
"
"
"
"
"	"
"
"
"
" 	"	"	"																	






"
"
"
"
"
"
"
"
"	"
"


"
"
"
"
"	"	"	"	"	"	"	"	"	"	"	"	"	"	"	"	"		" 	 		 
  	     	"    										"	"	"	"	"	"	"	"	" 	"   
              " " " " " " " " "        "   
 
                            							"
  





"





"


























































 
 
                              	"	"	" " " " " " " " " " " "
" " "         " " " " " " " "    " " " " " " " "   " " "                            " "+  "+ "+ "+  Tags-     
 


-                          " " " " " " " " " " " " - -        - - -   " " " " " " " " " " " " " " " "
.
. "
" " "       " " " "
- "
- """"""""""""" " """""""""""""""""""""""
"
" " " "                   "


" "
"
"





"
"
"
"""""
"
"
" " """"""""""""
"
"
"
"
""""""
""""""""" """""""""""""" " "" " " " "
"
"
"
"
"
"
"
"
""""
"""
"
"
"
"
"
"
"
"
"
"
"
"
" " " " " " "
"
"
"
""
"
"
"



 
                                          












"
"
"
"
" " " "
"
" """"""
"
"
""
"
"
"












                  "  "
"
" " """ " " " " " " " "
"
"
"
"


"
"
"
"]
(Background on this error at: https://sqlalche.me/e/14/e3q8)

In [None]:


agent_executor = create_sql_agent(
    llm=llm_hfp,
    toolkit=toolkit,
    verbose=True,
    prompt = PROMPT,
    agent_type=AgentType.ZERO_SHOT_REACT_DESCRIPTION,
)



In [None]:
db_sql.run("SELECT COUNT(*) FROM artists2")

In [None]:
agent_executor.run(QUESTION_01)

# 6. Generate the output of the prompt and display

In [None]:
# tokenize 
input_ids = tokenizer.encode(PROMPT, return_tensors='pt')
output = model.generate(
    input_ids,
    max_new_tokens=500,
    do_sample=True,
    pad_token_id=tokenizer.eos_token_id,
)
generated_text = tokenizer.decode(output[0], skip_special_tokens=True)

# note that this step takes several minutes on a CPU instance

In [None]:
# convert the similarity search results to markdown to make it pretty
md_search_docs = ''
for doc in top_search_docs:
    md_search_docs += f"### File: {doc.metadata['filename']}, Page: {doc.metadata['page']}\n"
    md_search_docs += doc.page_content + "\n"

# combine the model generated output and the context
result = f'''## Question: 
{prompt}

## Answer: 
{generated_text[generated_text.find("[/INST]")+7:]}

## Context used: 
{md_search_docs}
'''
display(Markdown(result))