<a href="https://colab.research.google.com/github/loni9164/text_sql/blob/main/sql_starcoder_2.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Install libraries

In [None]:
!pip install langchain langchain-experimental
!pip install -q  langchain
!pip install sentence-transformers
!pip install chromadb

!pip3 install transformers optimum
!pip3 install auto-gptq --extra-index-url https://huggingface.github.io/autogptq-index/whl/cu121/  # Use cu117 if on CUDA 11.7

In [None]:
!apt-get -qq update
!apt-get -qq -y install postgresql
!pip install psycopg2-binary

# Imports

In [1]:
import psycopg2
import sqlite3
import time
import pickle
import re

# from langchain.llms import CTransformers
from langchain import PromptTemplate, LLMChain
from langchain.callbacks.manager import CallbackManager
from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler
from langchain.utilities import SQLDatabase
from langchain.prompts import PromptTemplate
from langchain_experimental.sql import SQLDatabaseChain

# Create PostgreSQL

In [2]:
!service postgresql start
!sudo -u postgres psql -U postgres -c "ALTER USER postgres PASSWORD '12345';"
!service postgresql restart

 * Starting PostgreSQL 14 database server
   ...done.
ALTER ROLE
 * Restarting PostgreSQL 14 database server
   ...done.


In [3]:
conn = psycopg2.connect(
    host="localhost",
    user="postgres",
    password="12345",  # Use the correct password here
    dbname="postgres"
)
conn.autocommit = True
cursor = conn.cursor()

In [4]:
# Creating a new database
cursor.execute("CREATE DATABASE credit_card_system")
cursor.close()
conn.close()

DuplicateDatabase: ignored

In [None]:
# Connecting to the new database
conn = psycopg2.connect(
    host="localhost",
    user="postgres",
    password="12345",
    dbname="credit_card_system"
)
cursor = conn.cursor()

In [None]:
# SQL statements to create tables
create_table_statements = [
    """
    CREATE TABLE branch (
        branch_id INTEGER,
        branch_name TEXT,
        branch_address TEXT,
        branch_phone TEXT,
        branch_manager TEXT,
        branch_email TEXT,
        established_date DATE,
        number_of_employees INTEGER,
        PRIMARY KEY (branch_id)
    )
    """,
    """
    CREATE TABLE category (
        category_id INTEGER,
        category_name TEXT,
        PRIMARY KEY (category_id)
    )
    """,

    """
    CREATE TABLE users (
        user_id INTEGER,
        user_name TEXT,
        user_email TEXT,
        user_address TEXT,
        user_phone TEXT,
        date_of_birth DATE,
        registration_date DATE,
        status TEXT,
        branch_id INTEGER,
        PRIMARY KEY (user_id),
        FOREIGN KEY(branch_id) REFERENCES branch (branch_id)
    )
    """,

    """
    CREATE TABLE credit_card (
        card_id INTEGER,
        user_id INTEGER,
        card_number TEXT,
        card_type TEXT,
        expiry_date DATE,
        cvv INTEGER,
        issue_date DATE,
        credit_limit REAL,
        current_balance REAL,
        statement_balance REAL,
        PRIMARY KEY (card_id),
        FOREIGN KEY(user_id) REFERENCES users (user_id)
    )
    """,

    """
    CREATE TABLE transactions (
        transaction_id INTEGER,
        card_id INTEGER,
        transaction_date DATE,
        amount REAL,
        merchant TEXT,
        category_id INTEGER,
        transaction_type TEXT,
        transaction_status TEXT,
        description TEXT,
        PRIMARY KEY (transaction_id),
        FOREIGN KEY(card_id) REFERENCES credit_card (card_id),
        FOREIGN KEY(category_id) REFERENCES category (category_id)
    )
    """,

    """
    CREATE TABLE credit_card_financial (
        financial_id INTEGER,
        card_id INTEGER,
        overdue_charges REAL,
        loan_amount REAL,
        emi_amount REAL,
        emi_due_date DATE,
        interest_rate REAL,
        payment_due_date DATE,
        minimum_payment REAL,
        PRIMARY KEY (financial_id),
        FOREIGN KEY(card_id) REFERENCES credit_card (card_id)
    )
    """,

    """
    CREATE TABLE reward (
        reward_id INTEGER,
        transaction_id INTEGER,
        points_earned INTEGER,
        points_redeemed INTEGER,
        current_balance INTEGER,
        PRIMARY KEY (reward_id),
        FOREIGN KEY(transaction_id) REFERENCES transactions (transaction_id)
    )
    """
]


# Execute each CREATE TABLE statement
for statement in create_table_statements:
    cursor.execute(statement)

conn.commit()

In [None]:
import pandas as pd

# Function to load data from CSV to a table
def load_csv_to_table(csv_file_path, table_name):
    data = pd.read_csv(csv_file_path)
    for i, row in data.iterrows():
        insert_query = "INSERT INTO {} VALUES %s".format(table_name)
        cursor.execute(insert_query, (tuple(row),))

In [None]:
!git clone https://github.com/loni9164/text_sql.git

In [None]:
# Load data from CSV files in the correct order
load_csv_to_table('text_sql/csv_files/branch.csv', 'branch')
load_csv_to_table('text_sql/csv_files/category.csv', 'category')
load_csv_to_table('text_sql/csv_files/users.csv', 'users')
load_csv_to_table('text_sql/csv_files/credit_card.csv', 'credit_card')
load_csv_to_table('text_sql/csv_files/transactions.csv', 'transactions')
load_csv_to_table('text_sql/csv_files/credit_card_financial.csv', 'credit_card_financial')
load_csv_to_table('text_sql/csv_files/reward.csv', 'reward')

conn.commit()

# DB connection

In [2]:
# Connecting to the new database
conn = psycopg2.connect(
    host="localhost",
    user="postgres",
    password="12345",
    dbname="credit_card_system"
)

In [3]:
def query_db(query):
  cursor = conn.cursor()
  cursor.execute("ROLLBACK")
  cursor.execute(query)
  return cursor.fetchall()

query_db('SELECT user_id FROM users LIMIT 5;')

[(1,), (2,), (3,), (4,), (5,)]

In [4]:
db_user = "postgres"
db_password = "12345"
db_host = "localhost"
db_name = "credit_card_system"

cursor = conn.cursor()

connection_string = f"postgresql://{db_user}:{db_password}@{db_host}/{db_name}"
db = SQLDatabase.from_uri(connection_string)
table_info = db.table_info
# print(table_info)

# Load LLM model

In [11]:
import os
os.environ['PYTORCH_CUDA_ALLOC_CONF'] = 'max_split_size_mb:250'  # Set desired value (e.g., 512)

In [5]:
from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline

model_name_or_path = "TheBloke/sqlcoder2-GPTQ"
# To use a different branch, change revision
# For example: revision="gptq-4bit-128g-actorder_True"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path,
                                             device_map="auto",
                                             trust_remote_code=False,
                                             revision="main")

In [6]:
from auto_gptq import exllama_set_max_input_length
model = exllama_set_max_input_length(model, max_input_length=3000)
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path, use_fast=True)

In [7]:
from transformers import GenerationConfig
generation_config = GenerationConfig(
    # max_length=1000,  # Adjust as needed
    max_new_tokens=512,
    do_sample=True,
    temperature=0.1,
    top_p=0.95,
    top_k=40,
    repetition_penalty=1.1
)

In [8]:
# print("\n\n*** Generate:")



# Inference can also be done using transformers' pipeline

# print("*** Pipeline:")
# pipe = pipeline(
#     "text-generation",
#     model=model,
#     tokenizer=tokenizer,
#     generation_config=generation_config
# )


# print(pipe(prompt_template)[0]['generated_text'])

# Quey generation without longchain

In [27]:
def update_prompt_template(prompt):
  template = f'''
  ## Task
  Generate a SQL query to answer the following question:
  `{prompt}`

  ### Database Schema
  This query will run on a database whose schema is represented in this string:
  CREATE TABLE branch (
        branch_id INTEGER,
        branch_name TEXT,
        branch_address TEXT,
        branch_phone TEXT,
        branch_manager TEXT,
        branch_email TEXT,
        established_date DATE,
        number_of_employees INTEGER,
        PRIMARY KEY (branch_id)
    )


    CREATE TABLE category (
        category_id INTEGER,
        category_name TEXT,
        PRIMARY KEY (category_id)
    )

    CREATE TABLE users (
        user_id INTEGER,
        user_name TEXT,
        user_email TEXT,
        user_address TEXT,
        user_phone TEXT,
        date_of_birth DATE,
        registration_date DATE,
        status TEXT,
        branch_id INTEGER,
        PRIMARY KEY (user_id),
        FOREIGN KEY(branch_id) REFERENCES branch (branch_id)
    )


    CREATE TABLE credit_card (
        card_id INTEGER,
        user_id INTEGER,
        card_number TEXT,
        card_type TEXT,
        expiry_date DATE,
        cvv INTEGER,
        issue_date DATE,
        credit_limit REAL,
        current_balance REAL,
        statement_balance REAL,
        PRIMARY KEY (card_id),
        FOREIGN KEY(user_id) REFERENCES users (user_id)
    )


    CREATE TABLE transactions (
        transaction_id INTEGER,
        card_id INTEGER,
        transaction_date DATE,
        amount REAL,
        merchant TEXT,
        category_id INTEGER,
        transaction_type TEXT,
        transaction_status TEXT,
        description TEXT,
        PRIMARY KEY (transaction_id),
        FOREIGN KEY(card_id) REFERENCES credit_card (card_id),
        FOREIGN KEY(category_id) REFERENCES category (category_id)
    )


    CREATE TABLE credit_card_financial (
        financial_id INTEGER,
        card_id INTEGER,
        overdue_charges REAL,
        loan_amount REAL,
        emi_amount REAL,
        emi_due_date DATE,
        interest_rate REAL,
        payment_due_date DATE,
        minimum_payment REAL,
        PRIMARY KEY (financial_id),
        FOREIGN KEY(card_id) REFERENCES credit_card (card_id)
    )

    CREATE TABLE reward (
        reward_id INTEGER,
        transaction_id INTEGER,
        points_earned INTEGER,
        points_redeemed INTEGER,
        current_balance INTEGER,
        PRIMARY KEY (reward_id),
        FOREIGN KEY(transaction_id) REFERENCES transactions (transaction_id)
    )

  ### SQL
  Given the database schema, here is the SQL query that answers `{prompt}`:
  ```sql
  '''
  return template


In [28]:
prompt_template = update_prompt_template("How many users do we have")

In [None]:
input_ids = tokenizer(prompt_template, return_tensors='pt').input_ids.cuda()
output = model.generate(inputs=input_ids, temperature=0.1, do_sample=True, top_p=0.95, top_k=40, max_new_tokens=512)
print(tokenizer.decode(output[0]))

In [55]:
def generate_sql_pipe(prompt):
  updated_prompt = update_prompt_template(prompt)
  sql_query = pipe(updated_prompt)[0]['generated_text'].split("```sql")[-1].strip()
  return sql_query

In [71]:
print(generate_sql_pipe('how many users do we have'))

SELECT COUNT(DISTINCT users.user_id) AS total_users FROM users;


In [None]:
questions = ['List all the cards going to expire by sep 2026',
 'what is the total amount spent by user 2 for food and groceries?',
 'How many unique users do we have in transactions',
 'What is the total amount lended to the user 1',
 'List all branches established after 2010.',
 'List the user names and total reward points earned by each user.',
 "Find the total amount spent in 'Movies and Entertainment' category by User 1.",
 'What is the total loan amount issued to users of Branch 3?',
 "List all transactions made in the 'Shopping' category."]

for qus in questions:
  print(generate_sql_pipe(qus))


# Test queries

In [10]:
questions = [
    'What is the total number of active credit cards?',
    "How many transactions were made in the 'Shopping' category?",
    "Which user has the most number of credit cards?",
    "Find the total amount spent on fuel by all users.",
    "What is the oldest branch of the bank?",
    "Find the user with the oldest credit card.",
    "How many rewards were earned for transactions above $1000?",
    "Which category has the lowest average transaction amount?",
    "What is the total outstanding balance for all credit cards?",
    "How many branches have less than 20 employees?",
    "List the top 3 users by total transaction amount.",
    "What is the average number of transactions per user?"
]


answers = ["6", "93", "Michael Baldwin", "23960.57", "Branch 3", "Gabrielle Anderson", "0", "Shopping", "14090.2296532", "2", "Gabrielle Anderson, Michael Baldwin", "166.66666666666666"]

test_data = [{'qury_text':q, 'result': a} for q ,a in zip(questions, answers)]

In [None]:
few_shot_examples2 = []

for i, data in enumerate(test_data):
  if i in (0,3,5,6,10):
    item = {}
    question = data['qury_text']
    print(question)
    print(data['result'])
    item['Question'] = question
    sqlquery = generate_sql_pipe(data['qury_text'])
    item['SQLQuery'] = sqlquery['query']
    print(sqlquery['query'])
    item['SQLResult'] = sqlquery['result']
    print(item['SQLResult'])
    item['Answer'] = ''
    few_shot_examples2.append(item)
    print('---------------------------------')

# Longchain

## longchain Template

In [11]:
template = f'''
  ## Task
  Generate a SQL query to answer the following question:
  `{input}`

  ### Database Schema
  This query will run on a database whose schema is represented in this string:
  CREATE TABLE branch (
        branch_id INTEGER,
        branch_name TEXT,
        branch_address TEXT,
        branch_phone TEXT,
        branch_manager TEXT,
        branch_email TEXT,
        established_date DATE,
        number_of_employees INTEGER,
        PRIMARY KEY (branch_id)
    )


    CREATE TABLE category (
        category_id INTEGER,
        category_name TEXT,
        PRIMARY KEY (category_id)
    )

    CREATE TABLE users (
        user_id INTEGER,
        user_name TEXT,
        user_email TEXT,
        user_address TEXT,
        user_phone TEXT,
        date_of_birth DATE,
        registration_date DATE,
        status TEXT,
        branch_id INTEGER,
        PRIMARY KEY (user_id),
        FOREIGN KEY(branch_id) REFERENCES branch (branch_id)
    )


    CREATE TABLE credit_card (
        card_id INTEGER,
        user_id INTEGER,
        card_number TEXT,
        card_type TEXT,
        expiry_date DATE,
        cvv INTEGER,
        issue_date DATE,
        credit_limit REAL,
        current_balance REAL,
        statement_balance REAL,
        PRIMARY KEY (card_id),
        FOREIGN KEY(user_id) REFERENCES users (user_id)
    )


    CREATE TABLE transactions (
        transaction_id INTEGER,
        card_id INTEGER,
        transaction_date DATE,
        amount REAL,
        merchant TEXT,
        category_id INTEGER,
        transaction_type TEXT,
        transaction_status TEXT,
        description TEXT,
        PRIMARY KEY (transaction_id),
        FOREIGN KEY(card_id) REFERENCES credit_card (card_id),
        FOREIGN KEY(category_id) REFERENCES category (category_id)
    )


    CREATE TABLE credit_card_financial (
        financial_id INTEGER,
        card_id INTEGER,
        overdue_charges REAL,
        loan_amount REAL,
        emi_amount REAL,
        emi_due_date DATE,
        interest_rate REAL,
        payment_due_date DATE,
        minimum_payment REAL,
        PRIMARY KEY (financial_id),
        FOREIGN KEY(card_id) REFERENCES credit_card (card_id)
    )

    CREATE TABLE reward (
        reward_id INTEGER,
        transaction_id INTEGER,
        points_earned INTEGER,
        points_redeemed INTEGER,
        current_balance INTEGER,
        PRIMARY KEY (reward_id),
        FOREIGN KEY(transaction_id) REFERENCES transactions (transaction_id)
    )

  ### SQL
  Given the database schema, here is the SQL query that answers `{input}`:
  ```sql
  '''

# Integration

In [9]:
from langchain.llms import HuggingFacePipeline
from langchain import PromptTemplate, LLMChain
llm = HuggingFacePipeline(pipeline=pipe)

## Dummpy template

In [23]:
PROMPT_SUFFIX = """Given an input question, first create a syntactically correct PostgreSQL query to run, then look at the results of the query and return the answer.
Use the following format:

Only use the following tables:

	CREATE TABLE branch (
        branch_id INTEGER,
        branch_name TEXT,
        branch_address TEXT,
        branch_phone TEXT,
        branch_manager TEXT,
        branch_email TEXT,
        established_date DATE,
        number_of_employees INTEGER,
        PRIMARY KEY (branch_id)
    )


    CREATE TABLE category (
        category_id INTEGER,
        category_name TEXT,
        PRIMARY KEY (category_id)
    )

    CREATE TABLE users (
        user_id INTEGER,
        user_name TEXT,
        user_email TEXT,
        user_address TEXT,
        user_phone TEXT,
        date_of_birth DATE,
        registration_date DATE,
        status TEXT,
        branch_id INTEGER,
        PRIMARY KEY (user_id),
        FOREIGN KEY(branch_id) REFERENCES branch (branch_id)
    )


    CREATE TABLE credit_card (
        card_id INTEGER,
        user_id INTEGER,
        card_number TEXT,
        card_type TEXT,
        expiry_date DATE,
        cvv INTEGER,
        issue_date DATE,
        credit_limit REAL,
        current_balance REAL,
        statement_balance REAL,
        PRIMARY KEY (card_id),
        FOREIGN KEY(user_id) REFERENCES users (user_id)
    )


    CREATE TABLE transactions (
        transaction_id INTEGER,
        card_id INTEGER,
        transaction_date DATE,
        amount REAL,
        merchant TEXT,
        category_id INTEGER,
        transaction_type TEXT,
        transaction_status TEXT,
        description TEXT,
        PRIMARY KEY (transaction_id),
        FOREIGN KEY(card_id) REFERENCES credit_card (card_id),
        FOREIGN KEY(category_id) REFERENCES category (category_id)
    )


    CREATE TABLE credit_card_financial (
        financial_id INTEGER,
        card_id INTEGER,
        overdue_charges REAL,
        loan_amount REAL,
        emi_amount REAL,
        emi_due_date DATE,
        interest_rate REAL,
        payment_due_date DATE,
        minimum_payment REAL,
        PRIMARY KEY (financial_id),
        FOREIGN KEY(card_id) REFERENCES credit_card (card_id)
    )


    CREATE TABLE reward (
        reward_id INTEGER,
        transaction_id INTEGER,
        points_earned INTEGER,
        points_redeemed INTEGER,
        current_balance INTEGER,
        PRIMARY KEY (reward_id),
        FOREIGN KEY(transaction_id) REFERENCES transactions (transaction_id)
    )

Question: {input}"""

## Chain

In [24]:
from langchain.chains.sql_database.prompt import SQLITE_PROMPT, POSTGRES_PROMPT, PROMPT_SUFFIX

In [None]:
db_chain = SQLDatabaseChain.from_llm(llm, db, verbose=True, return_sql=True,
                                     prompt= POSTGRES_PROMPT)

In [None]:
db_chain('what is the name of user for userid 2')

In [None]:
db_chain.run(test_data[6]['qury_text'])

In [None]:
db_chain = SQLDatabaseChain.from_llm(llm, db, verbose=True, return_sql=False,
                                     prompt=PromptTemplate(input_variables=["input", "table_info"],
                                     template=PROMPT_SUFFIX))

db_chain.run('how many cards do we have')

In [None]:
db_chain3 = SQLDatabaseChain.from_llm(llm, db, verbose=True, return_sql=False)


db_chain3.run('what is the name of user for userid 2')

In [None]:
db_chain.run("How many employees are there?")

# # Test sqlcoder without few shots learning

In [None]:
db_chain.run('how many cards do we have')

# Comaparing base sqlcoder model, with prompt and without prompt

In [None]:
res_lst = []
failed_cases = []

for i, item in enumerate(test_data):
  print(i)
  print("Question:")
  question = item['qury_text']
  print(question)
  print('--------------------')
  print(item['result'])
  print('--------------------')
  # print("Gen_sql_wto_longchain:")
  # print(gen_sql_wto_longchain(question))
  # print('--------------------')
  print("Longchain1")
  try:
    res = db_chain.run(question)
    print(res)
  except:
    failed_cases.append(question)
    print("Longchain1 Failed")
  print('--------------------')
  # print("Longchain3")
  # try:
  #   res = db_chain3.run(question)
  #   print(res)
  # except:
  #   print("Longchain2 Failed")

  # print('***********************')
  # print('\n')

In [None]:
failed_cases

In [None]:
db_chain.run('What is the total number of active credit cards?')

In [None]:
with open('text_sql/few_shot_examples', 'rb') as f:
  few_shot_examples = pickle.load(f)

In [None]:
len(few_shot_examples)

In [None]:
few_shot_examples[0]

In [None]:
for i, item in enumerate(few_shots):
  print(i)
  question = item['Question']
  print('Question:')
  print(question)
  print('------------------------------------------')
  print('SQL query:')
  print(item['SQLQuery'])
  print('------------------------------------------')
  print('Expected result:')
  print(item['Answer'])
  print('------------------------------------------')
  print('sqlcoder_34b:')
  try:
    sql_query_formatted = format_to_sqlite(item['sqlcoder_34b'])
    print(sql_query_formatted)
    print('------')
    print(query_db(sql_query_formatted))
    print('------------------------------------------')
  except:
    print('Failed', i)
  print('sqlcoder2 result:')
  try:
    prompt_template = update_prompt_template(question)
    sql_query = llm_pipe(prompt_template)
    sql_query = format_to_sqlite(sql_query)
    print(sql_query)
    print('------')
    print(query_db(sql_query))
  except:
    print('Failed', i)
  print("Longchain:")
  try:
    sql_query = chain.invoke({"question": question})
    sql_query = format_to_sqlite(sql_query)
    print(sql_query)
    print('------')
    print(db.run(sql_query))
    print('------')
  except:
    print('Failed', i)
  print('****************************************************************************************')
  print('\n')

In [None]:
db_chain = SQLDatabaseChain.from_llm(llm, db, verbose=True)

In [None]:
db_chain.run( "What is the total number of  credit cards?")

In [None]:
dir(db_chain)

In [None]:
# from langchain.llms import GooglePalm

# api_key = 'AIzaSyCEP46MCrbkUR0AENTGOJzqYRXTm6NUd7Q'

# llm = GooglePalm(google_api_key=api_key, temperature=0.2)

In [None]:
from langchain.utilities import SQLDatabase
from langchain_experimental.sql import SQLDatabaseChain
db_chain = SQLDatabaseChain.from_llm(llm, db, verbose=True)

In [None]:
db_chain.run("Find the total amount spent on fuel by all users.")

# Few shot learning

We will use few shot learning to fix issues we have seen so far

### Creating Semantic Similarity Based example selector

- create embedding on the few_shots
- Store the embeddings in Chroma DB
- Retrieve the the top most Semantically close example from the vector store

In [11]:
from langchain.embeddings import HuggingFaceEmbeddings

embeddings = HuggingFaceEmbeddings(model_name='sentence-transformers/all-MiniLM-L6-v2')

In [12]:
few_shot_examples = [
  {'Question': 'What is the total number of active credit cards?',
  'SQLQuery': 'SELECT COUNT(DISTINCT card_id) AS number_of_cards FROM credit_card WHERE expiry_date > CURRENT_DATE;',
  'SQLResult': '[(6,)]',
  'Answer': 'There are six active credit cards in the system'},
 {'Question': 'Find the total amount spent on fuel by all users.',
  'SQLQuery': "SELECT SUM(transactions.amount::FLOAT) AS total_amount FROM transactions JOIN category ON transactions.category_id = category.category_id WHERE category.category_name ilike '%fuel%' AND transactions.transaction_status ILIKE '%completed%' AND transactions.transaction_type ILIKE '%Debit%';",
  'SQLResult': '[(4508.70002746582,)]',
  'Answer': 'Total amount spent on fuel by all users for completed transactions in 4508.7'},
 {'Question': 'Find the user with the oldest credit card.',
  'SQLQuery': 'SELECT users.user_name FROM users JOIN credit_card ON users.user_id = credit_card.user_id ORDER BY credit_card.expiry_date ASC LIMIT 1;',
  'SQLResult': "[('Gabrielle Anderson',)]",
  'Answer': 'The user with oldest credit card is Gabrielle Anderson'},
 {'Question': 'How many rewards were earned for transactions above 1000?',
  'SQLQuery': 'SELECT COUNT(DISTINCT r.reward_id) AS total_rewards FROM transactions t JOIN reward r ON t.transaction_id = r.transaction_id WHERE t.amount > 1000;',
  'SQLResult': '[(None,)]',
  'Answer': 'There were no rewards earned for transactions above 1000'},
 {'Question': 'List the top 3 users by total transaction amount.',
  'SQLQuery': 'SELECT u.user_name, SUM(t.amount) AS total_amount FROM users u JOIN credit_card c ON u.user_id = c.user_id JOIN transactions t ON c.card_id = t.card_id GROUP BY u.user_name ORDER BY total_amount DESC NULLS LAST LIMIT 3;',
  'SQLResult': "[('Gabrielle Anderson', 128059.26), ('Michael Baldwin', 121319.25)]",
  'Answer': 'Top 3 users by total transaction amount are Gabrielle Anderson (with 128059.26) and Michael Baldwin (With 121319.25).'}]

In [13]:
to_vectorize = [" ".join(example.values()) for example in few_shot_examples]

In [14]:
from langchain.vectorstores import Chroma
vectorstore = Chroma.from_texts(to_vectorize, embeddings, metadatas=few_shot_examples)

In [15]:
from langchain.prompts import SemanticSimilarityExampleSelector

example_selector = SemanticSimilarityExampleSelector(
    vectorstore=vectorstore,
    k=2,
)

In [16]:
example_selector.select_examples({"Question": "How many total rewards do we have"})

[{'Answer': 'There were no rewards earned for transactions above 1000',
  'Question': 'How many rewards were earned for transactions above 1000?',
  'SQLQuery': 'SELECT COUNT(DISTINCT r.reward_id) AS total_rewards FROM transactions t JOIN reward r ON t.transaction_id = r.transaction_id WHERE t.amount > 1000;',
  'SQLResult': '[(None,)]'},
 {'Answer': 'There are six active credit cards in the system',
  'Question': 'What is the total number of active credit cards?',
  'SQLQuery': 'SELECT COUNT(DISTINCT card_id) AS number_of_cards FROM credit_card WHERE expiry_date > CURRENT_DATE;',
  'SQLResult': '[(6,)]'}]

In [17]:
from langchain.chains.sql_database.prompt import SQLITE_PROMPT, POSTGRES_PROMPT, PROMPT, PROMPT_SUFFIX, PromptTemplate
from langchain.prompts import FewShotPromptTemplate

### Setting up PromptTemplete using input variables

In [18]:
from langchain.prompts.prompt import PromptTemplate

example_prompt = PromptTemplate(
    input_variables=["Question", "SQLQuery", "SQLResult","Answer",],
    template="\nQuestion: {Question}\nSQLQuery: {SQLQuery}\nSQLResult: {SQLResult}\nAnswer: {Answer}",
)

In [19]:
prefix = '''
You are a PostgreSQL expert. Given an input question, first create a syntactically correct PostgreSQL query to run, then look at the results of the query and return the answer to the input question.
Unless the user specifies in the question a specific number of examples to obtain, query for at most {top_k} results using the LIMIT clause as per PostgreSQL. You can order the results to return the most informative data in the database.
Never query for all columns from a table. You must query only the columns that are needed to answer the question. Wrap each column name in double quotes (") to denote them as delimited identifiers.
Pay attention to use only the column names you can see in the tables below. Be careful to not query for columns that do not exist. Also, pay attention to which column is in which table.
Pay attention to use CURRENT_DATE function to get the current date, if the question involves "today".

Use the following format:

Question: Question here
SQLQuery: SQL Query to run
SQLResult: Result of the SQLQuery
Answer: Final answer here

No pre-amble.
'''

In [20]:
few_shot_prompt = FewShotPromptTemplate(
    example_selector=example_selector,
    example_prompt=example_prompt,
    prefix= prefix,
    suffix=PROMPT_SUFFIX,
    input_variables=["input", "table_info", "top_k"], #These variables are used in the prefix and suffix
)

db_chain2 = SQLDatabaseChain.from_llm(llm, db, verbose=True, prompt=few_shot_prompt)

In [24]:
few_shot_prompt = FewShotPromptTemplate(
    example_selector=example_selector,
    example_prompt=example_prompt,
    # prefix= prefix,
    suffix=PROMPT_SUFFIX,
    input_variables=["input", "table_info", "top_k"], #These variables are used in the prefix and suffix
)

db_chain3 = SQLDatabaseChain.from_llm(llm, db, verbose=True, prompt=few_shot_prompt)

In [25]:
failed_questions = ['List all the cards going to expire by sep 2026',
                    'what is the total amount spent by user 2 for food and groceries?',
                    'How many unique users do we have in transactions',
                    'What is the total amount lended to the user 1',
                    'List all branches established after 2010.',
                    'List the user names and total reward points earned by each user.',
                    "Find the total amount spent in 'Movies and Entertainment' category by User 1.",
                    'What is the total loan amount issued to users of Branch 3?',
                    "List all transactions made in the 'Shopping' category."]

for que in failed_questions:
  db_chain3.run(que)



[1m> Entering new SQLDatabaseChain chain...[0m
List all the cards going to expire by sep 2026
SQLQuery:

ValueError: ignored

In [77]:
db_chain3.run("List all transactions made in the 'Shopping' category.")



[1m> Entering new SQLDatabaseChain chain...[0m
List all transactions made in the 'Shopping' category.
SQLQuery:



OutOfMemoryError: ignored

In [51]:
import time
start_time = time.time()
db_chain2.run("How many users do we have in the transactions")
end_time = time.time()
print(end_time-start_time)



[1m> Entering new SQLDatabaseChain chain...[0m
How many users do we have in the transactions
SQLQuery:



[32;1m[1;3mSELECT COUNT(DISTINCT users.user_id) AS number_of_users FROM transactions JOIN credit_card ON transactions.card_id = credit_card.card_id JOIN users ON credit_card.user_id = users.user_id;[0m
SQLResult: [33;1m[1;3m[(2,)][0m
Answer:



[32;1m[1;3mWe have 2 different users involved within the transactions[0m
[1m> Finished chain.[0m
6.400899410247803


In [None]:
failed_cases

['What is the oldest branch of the bank?']

In [None]:
res_lst = []
failed_cases2 = []
failed_cases3 = []
for i, item in enumerate(test_data):
  print(i)
  print("Question:")
  question = item['qury_text']
  print(question)
  print('--------------------')
  print(item['result'])
  print('--------------------')
  # print("Gen_sql_wto_longchain:")
  # print(gen_sql_wto_longchain(question))
  # print('--------------------')
  print("Longchain1")
  try:
    res = db_chain2.run(question)
    print(res)
  except:
    failed_cases2.append(question)
    print("Longchain2 Failed")
  print('--------------------')
  print("Longchain3")
  try:
    res = db_chain3.run(question)
    print(res)
  except:
    failed_cases3.append(question)
    print("Longchain3 Failed")

  print('***********************')
  print('\n')

0
Question:
What is the total number of active credit cards?
--------------------
6
--------------------
Longchain1


[1m> Entering new SQLDatabaseChain chain...[0m
What is the total number of active credit cards?
SQLQuery:



[32;1m[1;3mSELECT COUNT(DISTINCT card_id) AS number_of_cards FROM credit_card WHERE expiry_date > CURRENT_DATE;[0m
SQLResult: [33;1m[1;3m[(6,)][0m
Answer:



[32;1m[1;3mThere are six active credit cards registered within the system today.[0m
[1m> Finished chain.[0m
There are six active credit cards registered within the system today.
--------------------
Longchain3


[1m> Entering new SQLDatabaseChain chain...[0m
What is the total number of active credit cards?
SQLQuery:



[32;1m[1;3mSELECT COUNT(DISTINCT card_id) AS number_of_cards FROM credit_card WHERE expiry_date > CURRENT_DATE AND user_id IN (SELECT user_id FROM transactions);[0m
SQLResult: [33;1m[1;3m[(6,)][0m
Answer:



[32;1m[1;3mThere are six active credit cards registered to a customer account connected to one or more transactions[0m
[1m> Finished chain.[0m
There are six active credit cards registered to a customer account connected to one or more transactions
***********************


1
Question:
How many transactions were made in the 'Shopping' category?
--------------------
93
--------------------
Longchain1


[1m> Entering new SQLDatabaseChain chain...[0m
How many transactions were made in the 'Shopping' category?
SQLQuery:



[32;1m[1;3mSELECT COUNT(transaction_id) AS number_of_transactions FROM transactions WHERE category_id IN (SELECT category_id FROM category WHERE category_name ILIKE '%shopping%');[0m
SQLResult: [33;1m[1;3m[(93,)][0m
Answer:



[32;1m[1;3mNumber of transactions done under shopping category - 93[0m
[1m> Finished chain.[0m
Number of transactions done under shopping category - 93
--------------------
Longchain3


[1m> Entering new SQLDatabaseChain chain...[0m
How many transactions were made in the 'Shopping' category?
SQLQuery:



[32;1m[1;3mSELECT COUNT(transaction_id) AS number_of_transactions FROM transactions JOIN category ON transactions.category_id = category.category_id WHERE category.category_name ILIKE '%shopping%';[0m
SQLResult: [33;1m[1;3m[(93,)][0m
Answer:



[32;1m[1;3mNumber of transactions done under shopping is 93[0m
[1m> Finished chain.[0m
Number of transactions done under shopping is 93
***********************


2
Question:
Which user has the most number of credit cards?
--------------------
Michael Baldwin
--------------------
Longchain1


[1m> Entering new SQLDatabaseChain chain...[0m
Which user has the most number of credit cards?
SQLQuery:



[32;1m[1;3mSELECT users.user_name, COUNT(DISTINCT credit_card.card_id) AS num_cards FROM users JOIN credit_card ON users.user_id = credit_card.user_id GROUP BY users.user_name ORDER BY num_cards DESC NULLS LAST LIMIT 1;[0m
SQLResult: [33;1m[1;3m[('Gabrielle Anderson', 3)][0m
Answer:



[32;1m[1;3mGabrielle Anderson has the most number of credit cards (3).[0m
[1m> Finished chain.[0m
Gabrielle Anderson has the most number of credit cards (3).
--------------------
Longchain3


[1m> Entering new SQLDatabaseChain chain...[0m
Which user has the most number of credit cards?
SQLQuery:



[32;1m[1;3mSELECT users.user_name, COUNT(DISTINCT credit_card.card_id) AS num_cards FROM users JOIN credit_card ON users.user_id = credit_card.user_id GROUP BY users.user_name ORDER BY num_cards DESC NULLS LAST LIMIT 1;[0m
SQLResult: [33;1m[1;3m[('Gabrielle Anderson', 3)][0m
Answer:



[32;1m[1;3mGabrielle Anderson has the most number of credit cards[0m
[1m> Finished chain.[0m
Gabrielle Anderson has the most number of credit cards
***********************


3
Question:
Find the total amount spent on fuel by all users.
--------------------
23960.57
--------------------
Longchain1


[1m> Entering new SQLDatabaseChain chain...[0m
Find the total amount spent on fuel by all users.
SQLQuery:



[32;1m[1;3mSELECT SUM(transactions.amount::FLOAT) AS total_amount FROM transactions JOIN category ON transactions.category_id = category.category_id WHERE category.category_name ilike '%fuel%' AND transactions.transaction_status ILIKE '%completed%' AND transactions.transaction_type ILIKE '%Debit%' AND transactions.transaction_date >= CURRENT_DATE - interval '7 days';[0m
SQLResult: [33;1m[1;3m[(None,)][0m
Answer:



[32;1m[1;3mTotal amount spent on fuel by all users within the last week has not yet been done.[0m
[1m> Finished chain.[0m
Total amount spent on fuel by all users within the last week has not yet been done.
--------------------
Longchain3


[1m> Entering new SQLDatabaseChain chain...[0m
Find the total amount spent on fuel by all users.
SQLQuery:



[32;1m[1;3mSELECT SUM(transactions.amount::FLOAT) AS total_amount FROM transactions JOIN category ON transactions.category_id = category.category_id WHERE category.category_name ilike '%fuel%' AND transactions.transaction_status ILIKE '%completed%' AND transactions.transaction_type ILIKE '%Debit%';[0m
SQLResult: [33;1m[1;3m[(4508.70002746582,)][0m
Answer:



[32;1m[1;3mTotal amount spent on fuel by all users for completed transactions in 4508.7[0m
[1m> Finished chain.[0m
Total amount spent on fuel by all users for completed transactions in 4508.7
***********************


4
Question:
What is the oldest branch of the bank?
--------------------
Branch 3
--------------------
Longchain1


[1m> Entering new SQLDatabaseChain chain...[0m
What is the oldest branch of the bank?
SQLQuery:



[32;1m[1;3mSELECT branches.branch_name, branches.established_date::TEXT AS branch_establishment_year::TEXT FROM branches WHERE branches.established_date::TEXT IN (SELECT MIN(branches.established_date::TEXT) FROM branches);[0mLongchain2 Failed
--------------------
Longchain3


[1m> Entering new SQLDatabaseChain chain...[0m
What is the oldest branch of the bank?
SQLQuery:



[32;1m[1;3mSELECT MIN(b.branch_id::TEXT) AS min_branch_id, b.branch_name, to_char(MIN(to_timestamp(b.established_date::TEXT,'YYYYMMDD')), 'YYYY-MM-DD') AS min_established_date, MAX(to_timestamp(b.established_date::TEXT,'YYYYMMDD')) - MIN(to_timestamp(b.established_date::TEXT,'YYYYMMDD')) AS age_of_longest_branch FROM branch b;[0mLongchain3 Failed
***********************


5
Question:
Find the user with the oldest credit card.
--------------------
Gabrielle Anderson
--------------------
Longchain1


[1m> Entering new SQLDatabaseChain chain...[0m
Find the user with the oldest credit card.
SQLQuery:



[32;1m[1;3mSELECT users.user_name FROM users JOIN credit_card ON users.user_id = credit_card.user_id ORDER BY credit_card.expiry_date ASC LIMIT 1;[0m
SQLResult: [33;1m[1;3m[('Gabrielle Anderson',)][0m
Answer:



[32;1m[1;3mThe user with oldest credit card is Gabrielle Anderson[0m
[1m> Finished chain.[0m
The user with oldest credit card is Gabrielle Anderson
--------------------
Longchain3


[1m> Entering new SQLDatabaseChain chain...[0m
Find the user with the oldest credit card.
SQLQuery:



[32;1m[1;3mSELECT users.user_name FROM users JOIN credit_card ON users.user_id = credit_card.user_id ORDER BY credit_card.expiry_date ASC LIMIT 1;[0m
SQLResult: [33;1m[1;3m[('Gabrielle Anderson',)][0m
Answer:



[32;1m[1;3mThe user with oldest credit card is Gabrielle Anderson[0m
[1m> Finished chain.[0m
The user with oldest credit card is Gabrielle Anderson
***********************


6
Question:
How many rewards were earned for transactions above $1000?
--------------------
0
--------------------
Longchain1


[1m> Entering new SQLDatabaseChain chain...[0m
How many rewards were earned for transactions above $1000?
SQLQuery:



[32;1m[1;3mSELECT COUNT(DISTINCT r.reward_id) AS total_rewards FROM transactions t JOIN reward r ON t.transaction_id = r.transaction_id WHERE t.amount > 1000 AND t.amount < 2000;[0m
SQLResult: [33;1m[1;3m[(0,)][0m
Answer:



[32;1m[1;3mNo rewards earned within $1000 - $2000 range[0m
[1m> Finished chain.[0m
No rewards earned within $1000 - $2000 range
--------------------
Longchain3


[1m> Entering new SQLDatabaseChain chain...[0m
How many rewards were earned for transactions above $1000?
SQLQuery:



[32;1m[1;3mSELECT COUNT(DISTINCT r.reward_id) AS total_rewards FROM transactions t JOIN reward r ON t.transaction_id = r.transaction_id WHERE t.amount::TEXT::FLOAT > 1000::TEXT::FLOAT;[0m
SQLResult: [33;1m[1;3m[(0,)][0m
Answer:



[32;1m[1;3mThere were no rewards earned for transactions above $1000[0m
[1m> Finished chain.[0m
There were no rewards earned for transactions above $1000
***********************


7
Question:
Which category has the lowest average transaction amount?
--------------------
Shopping
--------------------
Longchain1


[1m> Entering new SQLDatabaseChain chain...[0m
Which category has the lowest average transaction amount?
SQLQuery:



[32;1m[1;3mSELECT category.category_name, AVG(transactions.amount::FLOAT) AS avg_amount FROM transactions JOIN category ON transactions.category_id = category.category_id GROUP BY category.category_name ORDER BY avg_amount ASC NULLS LAST LIMIT 1;[0m
SQLResult: [33;1m[1;3m[('Shopping', 227.74870998628677)][0m
Answer:



[32;1m[1;3mCategory with the lowest average transaction amount is Shopping (AVG Amount - 227.7487).[0m
[1m> Finished chain.[0m
Category with the lowest average transaction amount is Shopping (AVG Amount - 227.7487).
--------------------
Longchain3


[1m> Entering new SQLDatabaseChain chain...[0m
Which category has the lowest average transaction amount?
SQLQuery:



[32;1m[1;3mSELECT category.category_name, AVG(transactions.amount::FLOAT) AS avg_amount FROM transactions JOIN category ON transactions.category_id = category.category_id GROUP BY category.category_name ORDER BY avg_amount ASC NULLS LAST LIMIT 1;[0m
SQLResult: [33;1m[1;3m[('Shopping', 227.74870998628677)][0m
Answer:



[32;1m[1;3mCategory with lowest average transaction amount is Shopping (AVG of 227.74).[0m
[1m> Finished chain.[0m
Category with lowest average transaction amount is Shopping (AVG of 227.74).
***********************


8
Question:
What is the total outstanding balance for all credit cards?
--------------------
14090.2296532
--------------------
Longchain1


[1m> Entering new SQLDatabaseChain chain...[0m
What is the total outstanding balance for all credit cards?
SQLQuery:



[32;1m[1;3mSELECT SUM(cc.current_balance::FLOAT) AS total_outstanding_balance FROM credit_card cc;[0m
SQLResult: [33;1m[1;3m[(14090.229843139648,)][0m
Answer:



[32;1m[1;3mThe total outstanding balance across all credit cards registered with the bank account is $14090.22.[0m
[1m> Finished chain.[0m
The total outstanding balance across all credit cards registered with the bank account is $14090.22.
--------------------
Longchain3


[1m> Entering new SQLDatabaseChain chain...[0m
What is the total outstanding balance for all credit cards?
SQLQuery:



[32;1m[1;3mSELECT SUM(cc.current_balance) AS total_outstanding_balance FROM credit_card cc;[0m
SQLResult: [33;1m[1;3m[(14090.2295,)][0m
Answer:



[32;1m[1;3mTotal outstanding balance across all credit cards is $14090.2295[0m
[1m> Finished chain.[0m
Total outstanding balance across all credit cards is $14090.2295
***********************


9
Question:
How many branches have less than 20 employees?
--------------------
2
--------------------
Longchain1


[1m> Entering new SQLDatabaseChain chain...[0m
How many branches have less than 20 employees?
SQLQuery:



[32;1m[1;3mSELECT COUNT(DISTINCT b.branch_id) AS number_of_branches FROM branch b JOIN users u ON CAST(b.number_of_employees AS integer) < 20;[0m
SQLResult: [33;1m[1;3m[(2,)][0m
Answer:



[32;1m[1;3mNumber of branches with less than 20 employees:: 2[0m
[1m> Finished chain.[0m
Number of branches with less than 20 employees:: 2
--------------------
Longchain3


[1m> Entering new SQLDatabaseChain chain...[0m
How many branches have less than 20 employees?
SQLQuery:



[32;1m[1;3mSELECT COUNT(DISTINCT b.branch_id) AS number_of_branches FROM branch b JOIN users u ON CAST(b.number_of_employees AS integer) < 20;[0m
SQLResult: [33;1m[1;3m[(2,)][0m
Answer:



[32;1m[1;3mTwo branches have fewer then 20 employees[0m
[1m> Finished chain.[0m
Two branches have fewer then 20 employees
***********************


10
Question:
List the top 3 users by total transaction amount.
--------------------
Gabrielle Anderson, Michael Baldwin
--------------------
Longchain1


[1m> Entering new SQLDatabaseChain chain...[0m
List the top 3 users by total transaction amount.
SQLQuery:



[32;1m[1;3mSELECT users.user_name, SUM(transactions.amount::FLOAT) AS total_amount FROM users JOIN credit_card ON users.user_id = credit_card.user_id JOIN transactions ON credit_card.card_id = transactions.card_id GROUP BY users.user_name ORDER BY total_amount DESC NULLS LAST LIMIT 3;[0m
SQLResult: [33;1m[1;3m[('Gabrielle Anderson', 128059.25991678238), ('Michael Baldwin', 121319.25990200043)][0m
Answer:



[32;1m[1;3mTop 3 users by total transaction amount include Gabrielle Anderson (with $128,059.26) and Michael Baldwin (with $121,319.26).[0m
[1m> Finished chain.[0m
Top 3 users by total transaction amount include Gabrielle Anderson (with $128,059.26) and Michael Baldwin (with $121,319.26).
--------------------
Longchain3


[1m> Entering new SQLDatabaseChain chain...[0m
List the top 3 users by total transaction amount.
SQLQuery:



[32;1m[1;3mSELECT users.user_name, SUM(transactions.amount::FLOAT) AS total_amount FROM users JOIN credit_card ON users.user_id = credit_card.user_id JOIN transactions ON credit_card.card_id = transactions.card_id GROUP BY users.user_name ORDER BY total_amount DESC NULLS LAST LIMIT 3;[0m
SQLResult: [33;1m[1;3m[('Gabrielle Anderson', 128059.25991678238), ('Michael Baldwin', 121319.25990200043)][0m
Answer:



[32;1m[1;3mTop 3 users by total transaction amount are Gabrielle Anderson (with 128059.25991678238) and Michael Baldwin (With 121319.25990200043).[0m
[1m> Finished chain.[0m
Top 3 users by total transaction amount are Gabrielle Anderson (with 128059.25991678238) and Michael Baldwin (With 121319.25990200043).
***********************


11
Question:
What is the average number of transactions per user?
--------------------
166.66666666666666
--------------------
Longchain1


[1m> Entering new SQLDatabaseChain chain...[0m
What is the average number of transactions per user?
SQLQuery:



[32;1m[1;3mSELECT AVG(transactions_per_user::FLOAT) AS avg_transactions_per_user FROM (SELECT card_id, COUNT(DISTINCT transaction_id) AS transactions_per_user FROM transactions GROUP BY card_id) AS subquery;[0m
SQLResult: [33;1m[1;3m[(166.66666666666666,)][0m
Answer:



[32;1m[1;3mThe average number of transactions per user across branches is 166.666666666666666[0m
[1m> Finished chain.[0m
The average number of transactions per user across branches is 166.666666666666666
--------------------
Longchain3


[1m> Entering new SQLDatabaseChain chain...[0m
What is the average number of transactions per user?
SQLQuery:



[32;1m[1;3mSELECT AVG(count::INTEGER) AS avg_transactions_per_user FROM (SELECT card_id, COUNT(transaction_id) AS count FROM transactions GROUP BY card_id) AS subquery;[0m
SQLResult: [33;1m[1;3m[(Decimal('166.6666666666666667'),)][0m
Answer:



[32;1m[1;3mThe average number of transactions per user across all branches is 166.6666666666666667[0m
[1m> Finished chain.[0m
The average number of transactions per user across all branches is 166.6666666666666667
***********************




In [None]:
query_db('SELECT u."username", COUNT(cc."cardid") AS card_count FROM "user" u JOIN credit_card cc ON u."userid" = cc."userid" GROUP BY u."username" ORDER BY card_count DESC LIMIT 1')

In [None]:
resp = db_chain2.run("What is the total amount of transactions completed in the last month")

# Test

In [52]:
with open('text_sql/few_shot_examples', 'rb') as f:
  few_shot_examples2 = pickle.load(f)

few_shot_examples2[0]

{'Question': 'List all the cards going to expire by sep 2026',
 'SQLQuery': "SELECT CardNumber FROM credit_card WHERE ExpiryDate <= '2026-09-30';",
 'SQLResult': [('5040000000001',),
  ('35300000000000000',),
  ('52300000000000000',)],
 'Answer': '5040000000001, 35300000000000000, 52300000000000000'}

In [None]:
few_shot_examples2

In [None]:
failed_cases2

In [59]:
db_chain2.run('How many unique users do we have in transactions')



[1m> Entering new SQLDatabaseChain chain...[0m
How many unique users do we have in transactions
SQLQuery:



RuntimeError: ignored

In [None]:
res_lst = []
failed_cases2 = []
failed_cases3 = []
for i, item in enumerate(few_shot_examples2):
  print(i)
  print("Question:")
  question = item['Question']
  print(question)
  print('--------------------')
  print(item['SQLResult'])
  print('--------------------')
  # print("Gen_sql_wto_longchain:")
  # print(gen_sql_wto_longchain(question))
  # print('--------------------')
  # print("Longchain1")
  # try:
  #   res = db_chain.run(question)
  #   print(res)
  # except:
  #   failed_cases2.append(question)
  #   print("Longchain2 Failed")
  # print('--------------------')
  print("Longchain3")
  try:
    res = db_chain2.run(question)
    print(res)
  except:
    failed_cases2.append(question)
    print("Longchain3 Failed")

  print('***********************')
  print('\n')

0
Question:
List all the cards going to expire by sep 2026
--------------------
[('5040000000001',), ('35300000000000000',), ('52300000000000000',)]
--------------------
Longchain3


[1m> Entering new SQLDatabaseChain chain...[0m
List all the cards going to expire by sep 2026
SQLQuery:



Longchain3 Failed
***********************


1
Question:
what is the total amount spent by user 2 for food and groceries?
--------------------
[(11718.400000000003,)]
--------------------
Longchain3


[1m> Entering new SQLDatabaseChain chain...[0m
what is the total amount spent by user 2 for food and groceries?
SQLQuery:



[32;1m[1;3mSELECT SUM(transactions.amount::FLOAT) AS total_amount FROM transactions JOIN category ON transactions.category_id = category.category_id JOIN credit_card ON transactions.card_id = credit_card.card_id JOIN users ON credit_card.user_id = users.user_id WHERE category.category_name ILIKE '%food%and%groceries%' AND transactions.transaction_status ILIKE '%completed%' AND transactions.transaction_type ILIKE '%debit%' AND users.user_id = 2;[0m
SQLResult: [33;1m[1;3m[(1634.1600036621094,)][0m
Answer:



Longchain3 Failed
***********************


2
Question:
How many unique users do we have in transactions
--------------------
[(2,)]
--------------------
Longchain3


[1m> Entering new SQLDatabaseChain chain...[0m
How many unique users do we have in transactions
SQLQuery:



[32;1m[1;3mSELECT COUNT(DISTINCT transactions.user_id) AS distinct_users_count FROM transactions;[0mLongchain3 Failed
***********************


3
Question:
What is the total amount lended to the user 1
--------------------
[(27493.36,)]
--------------------
Longchain3


[1m> Entering new SQLDatabaseChain chain...[0m
What is the total amount lended to the user 1
SQLQuery:



[32;1m[1;3mSELECT SUM(r.loan_amount::FLOAT) AS total_amount_lended FROM reward r JOIN credit_card_financial f ON r.financial_id = f.financial_id JOIN credit_card c ON f.card_id = c.card_id JOIN transactions t ON c.card_id = t.card_id WHERE t.amount < 0 AND c.user_id = 1;[0mLongchain3 Failed
***********************


4
Question:
Total reward points earned by user2
--------------------
[(245701,)]
--------------------
Longchain3


[1m> Entering new SQLDatabaseChain chain...[0m
Total reward points earned by user2
SQLQuery:



[32;1m[1;3mSELECT SUM(r.points_earned::INTEGER) AS total_points_earned FROM reward r JOIN transactions t ON r.transaction_id = t.transaction_id JOIN credit_card c ON t.card_id = c.card_id JOIN users u ON c.user_id = u.user_id WHERE u.user_name ILIKE '%User2%' GROUP BY r.transaction_id HAVING SUM(r.points_earned::INTEGER) >= 5 ORDER BY total_points_earned DESC NULLS LAST LIMIT 5;[0m
SQLResult: [33;1m[1;3m[0m
Answer:



[32;1m[1;3mTotal reward points earned by User2 has exceeded 5 points.[0m
[1m> Finished chain.[0m
Total reward points earned by User2 has exceeded 5 points.
***********************


5
Question:
List all branches established after 2010.
--------------------
[(2, 'Branch 2', '105 Ashley Stravenue, South Paulborough, WY 22569', '245-022-5506', 'Louis James', 'robertspencer@thompson.com', '2010-11-14', 39), (4, 'Branch 4', '29407 Bryan Streets Suite 601, Jasonview, VT 13385', '001-137-016-2399x433', 'Peter Patel', 'campbelljeffery@strickland.org', '2010-12-04', 49), (5, 'Branch 5', '31651 Scott Ranch, East Sydney, HI 99471', '-9482', 'Hailey Newton', 'lorialexander@stevenson.org', '2012-03-02', 16)]
--------------------
Longchain3


[1m> Entering new SQLDatabaseChain chain...[0m
List all branches established after 2010.
SQLQuery:



[32;1m[1;3mSELECT b.*, COUNT(u.user_id) AS number_of_users FROM branch b LEFT JOIN users u ON CAST(b.established_date AS text::integer::date) < CAST('2010' AS integer::text::date) GROUP BY b.branch_id HAVING COUNT(u.user_id) > 0;[0mLongchain3 Failed
***********************


6
Question:
Find the total number of transactions in the 'Food and Groceries' category.
--------------------
[(84,)]
--------------------
Longchain3


[1m> Entering new SQLDatabaseChain chain...[0m
Find the total number of transactions in the 'Food and Groceries' category.
SQLQuery:



[32;1m[1;3mSELECT COUNT(transaction_id) AS total_transactions FROM transactions WHERE category_id IN (SELECT category_id FROM category WHERE category_name ILIKE '%food%and%groceries%');[0m
SQLResult: [33;1m[1;3m[(84,)][0m
Answer:



[32;1m[1;3mThere were 84 transactions done under the Food and Groceries category.[0m
[1m> Finished chain.[0m
There were 84 transactions done under the Food and Groceries category.
***********************


7
Question:
What is the highest credit limit among all credit cards?
--------------------
[(20000.0,)]
--------------------
Longchain3


[1m> Entering new SQLDatabaseChain chain...[0m
What is the highest credit limit among all credit cards?
SQLQuery:



[32;1m[1;3mSELECT MAX(cc.credit_limit::TEXT::FLOAT) AS max_credit_limit FROM credit_card cc;[0m
SQLResult: [33;1m[1;3m[(20000.0,)][0m
Answer:



[32;1m[1;3mThe maximum credit limit among all credit cards is 20000.0[0m
[1m> Finished chain.[0m
The maximum credit limit among all credit cards is 20000.0
***********************


8
Question:
How many users are associated with Branch 1?
--------------------
[(320,)]
--------------------
Longchain3


[1m> Entering new SQLDatabaseChain chain...[0m
How many users are associated with Branch 1?
SQLQuery:



[32;1m[1;3mSELECT COUNT(DISTINCT users.user_id) AS number_of_users FROM users JOIN branch ON users.branch_id = branch.branch_id WHERE branch.branch_name ILIKE '%Branch%1%';[0m
SQLResult: [33;1m[1;3m[(320,)][0m
Answer:



[32;1m[1;3mThere are 320 users registered under Branch 1[0m
[1m> Finished chain.[0m
There are 320 users registered under Branch 1
***********************


9
Question:
List the user names and total reward points earned by each user.
--------------------
[('Gabrielle Anderson', 254799), ('Michael Baldwin', 245701)]
--------------------
Longchain3


[1m> Entering new SQLDatabaseChain chain...[0m
List the user names and total reward points earned by each user.
SQLQuery:



[32;1m[1;3mSELECT users.user_name, SUM(reward.points_earned::INTEGER) AS total_rewards FROM users JOIN reward ON users.user_id = reward.user_id GROUP BY users.user_name;[0mLongchain3 Failed
***********************


10
Question:
Find the average balance of credit cards issued by Branch 2.
--------------------
[(2348.3716088666665,)]
--------------------
Longchain3


[1m> Entering new SQLDatabaseChain chain...[0m
Find the average balance of credit cards issued by Branch 2.
SQLQuery:



[32;1m[1;3mSELECT AVG(cc.current_balance::FLOAT) AS avg_balance FROM credit_card cc JOIN users u ON cc.user_id = u.user_id JOIN branch b ON u.branch_id = b.branch_id WHERE b.branch_name ILIKE '%Branch%2%' AND EXTRACT(MONTH FROM issue_date::DATE) < EXTRACT(MONTH FROM CURRENT_DATE::DATE);[0m
SQLResult: [33;1m[1;3m[(2622.9961547851562,)][0m
Answer:



[32;1m[1;3mAverage balance of credit cards issued by Branch 2 is 2622.9961547851562[0m
[1m> Finished chain.[0m
Average balance of credit cards issued by Branch 2 is 2622.9961547851562
***********************


11
Question:
Which category has the highest number of transactions?
--------------------
[(5, 117)]
--------------------
Longchain3


[1m> Entering new SQLDatabaseChain chain...[0m
Which category has the highest number of transactions?
SQLQuery:



[32;1m[1;3mSELECT c.category_name, COUNT(DISTINCT t.transaction_id) AS number_of_transactions FROM transactions t JOIN category c ON t.category_id = c.category_id GROUP BY c.category_name ORDER BY number_of_transactions DESC NULLS LAST LIMIT 1;[0m
SQLResult: [33;1m[1;3m[('Loan', 117)][0m
Answer:



[32;1m[1;3mLoan has the highest number of transactions (117).[0m
[1m> Finished chain.[0m
Loan has the highest number of transactions (117).
***********************


12
Question:
List all users who have a credit card expiring in 2024.
--------------------
[]
--------------------
Longchain3


[1m> Entering new SQLDatabaseChain chain...[0m
List all users who have a credit card expiring in 2024.
SQLQuery:



[32;1m[1;3mSELECT users.user_name FROM users JOIN credit_card ON users.user_id = credit_card.user_id WHERE credit_card.expiry_date BETWEEN '2024-01-01' AND '2024-12-31';[0m
SQLResult: [33;1m[1;3m[('Gabrielle Anderson',)][0m
Answer:



[32;1m[1;3mTheresi one user whose credit card expires next year - Gabrielle Anderson[0m
[1m> Finished chain.[0m
Theresi one user whose credit card expires next year - Gabrielle Anderson
***********************


13
Question:
Find the total amount spent in 'Movies and Entertainment' category by User 1.
--------------------
[(11754.929999999998,)]
--------------------
Longchain3


[1m> Entering new SQLDatabaseChain chain...[0m
Find the total amount spent in 'Movies and Entertainment' category by User 1.
SQLQuery:



[32;1m[1;3mSELECT SUM(transactions.amount::FLOAT) AS total_amount FROM transactions JOIN category ON transactions.category_id = category.category_id JOIN credit_card ON transactions.card_id = credit_card.card_id JOIN users ON credit_card.user_id = users.user_id WHERE category.category_name ILIKE '%movies%and%entertainment%' AND users.user_id = 1 AND transactions.transaction_status ILIKE '%Completed%' AND transactions.transaction_type ILIKE '%debit%' ;[0m
SQLResult: [33;1m[1;3m[(1664.789981842041,)][0m
Answer:



Longchain3 Failed
***********************


14
Question:
What is the total loan amount issued to users of Branch 3?
--------------------
[(47744.87000000001,)]
--------------------
Longchain3


[1m> Entering new SQLDatabaseChain chain...[0m
What is the total loan amount issued to users of Branch 3?
SQLQuery:



Longchain3 Failed
***********************


15
Question:
List all transactions made in the 'Shopping' category.
--------------------
[(1056, 1, '2022-12-05', 440.17, 'Good, Morgan and Harrison', 2, 'Debit', 'Pending', 'Environmental boy address responsibility.'), (1066, 5, '2023-08-08', 14.99, 'Hill Inc', 2, 'Debit', 'Completed', 'Leg of finish interview why impact.'), (1074, 4, '2023-08-01', 385.88, 'Gonzales PLC', 2, 'Credit', 'Completed', 'Particular PM add consumer.'), (1084, 2, '2023-01-24', 129.73, 'Gonzalez and Sons', 2, 'Credit', 'Failed', 'Thing home white Mrs how travel.'), (1085, 5, '2023-01-24', 276.77, 'Mckinney, Brown and Hawkins', 2, 'Credit', 'Pending', 'Early thousand student Mrs.'), (1086, 1, '2023-11-12', 8.46, 'Cox LLC', 2, 'Credit', 'Pending', 'Build yes debate rest generation idea.'), (1094, 2, '2023-07-17', 12.23, 'Garza-Ross', 2, 'Debit', 'Failed', 'Ask several father game.'), (1108, 4, '2023-02-07', 61.55, 'Vaughan-Graham', 2, 'Credit', 'Pending', 'Way simply s



Longchain3 Failed
***********************


16
Question:
How many credit cards have a balance over $10,000?
--------------------
[(0,)]
--------------------
Longchain3


[1m> Entering new SQLDatabaseChain chain...[0m
How many credit cards have a balance over $10,000?
SQLQuery:



[32;1m[1;3mSELECT COUNT(DISTINCT card_id) AS number_of_cards FROM credit_card WHERE current_balance > 10000 AND expiry_date > CURRENT_DATE;[0m
SQLResult: [33;1m[1;3m[(0,)][0m
Answer:



[32;1m[1;3mNo credit cards have balances greater than $10,000[0m
[1m> Finished chain.[0m
No credit cards have balances greater than $10,000
***********************


17
Question:
What is the total amount of transactions completed in the last month?
--------------------
[(2445.31,)]
--------------------
Longchain3


[1m> Entering new SQLDatabaseChain chain...[0m
What is the total amount of transactions completed in the last month?
SQLQuery:



[32;1m[1;3mSELECT COUNT(DISTINCT transactions.transaction_id) AS number_of_transactions FROM transactions WHERE transactions.transaction_status ILIKE '%Completed%' AND transactions.transaction_date >= (CURRENT_DATE - interval '1 month');[0m
SQLResult: [33;1m[1;3m[(9,)][0m
Answer:



[32;1m[1;3mThe total number of transactions completed within the past month was 9.[0m
[1m> Finished chain.[0m
The total number of transactions completed within the past month was 9.
***********************


