In [1]:
import os

from blendsql import blend, SQLiteDBConnector, init_secrets
from blendsql.ingredients import LLMMap, DT
from blendsql.ingredients.builtin.llm.openai_endpoint import OpenaiEndpoint
import guidance

init_secrets("../secrets.json")

In [2]:
# Define our few-shot examples
examples = [
    {
        "serialized_db": """CREATE TABLE "transactions" (
    "index" INTEGER,
    "description" TEXT,
    "amount" REAL,
    "merchant" TEXT,
    "cash_flow" TEXT,
    "parent_category" TEXT,
    "child_category" TEXT,
    "date" TEXT,
)""",
        "question": "Pizza shops in MA",
        "blendsql": """SELECT DISTINCT description, merchant FROM transactions WHERE
    {{LLMMap('is this a pizza shop?', 'transactions::merchant')}} = 1
    AND {{
       LLMMap(
           'what state is this transaction from? Choose -1 when N.A.',
           'transactions::description',
           binary=0,
           example_outputs='TX;CA;MA;-1;',
           endpoint_name='gpt-4'
       )
    }} = 'MA'
    AND parent_category = 'Food'"""
    },
    {
        "serialized_db": """CREATE TABLE "transactions" (
    "index" INTEGER,
    "description" TEXT,
    "amount" REAL,
    "merchant" TEXT,
    "cash_flow" TEXT,
    "parent_category" TEXT,
    "child_category" TEXT,
    "date" TEXT,
)""",
        "question": "Italian restaurants or gifts",
        "blendsql": """SELECT DISTINCT description, merchant, child_category FROM transactions WHERE
    (
       {{LLMMap('is this an italian restaurant?', 'transactions::merchant')}} = 1
       AND child_category = 'this does not exist'
    )
    OR child_category = 'Gifts'
    AND {{DT('transactions::date', start='last year')}}"""
    },
    {
        "serialized_db": """CREATE TABLE "transactions" (
    "index" INTEGER,
    "description" TEXT,
    "amount" REAL,
    "merchant" TEXT,
    "cash_flow" TEXT,
    "parent_category" TEXT,
    "child_category" TEXT,
    "date" TEXT,
)""",
        "question": "Show gifts for my kid in Q2",
        "blendsql": """SELECT DISTINCT description, merchant, child_category, date FROM transactions WHERE
      {{LLM('would this gift be appropriate for a child?', 'transactions::description')}} = 1
      AND {{DT('transactions::date', start='q2')}}
      AND child_category = 'Gifts'"""
    }
]

In [3]:
# Define our prompt to GPT-4
program = guidance("""
{{#system~}}
Generate BlendSQL given the question, table, passages, image captions to answer the question correctly.
BlendSQL is a superset of SQLite, which adds external function calls for information not found within native SQLite.
These external functions should be wrapped in double curly brackets.

If question-relevant column(s) contents are not suitable for SQL comparisons or calculations, map it to a new column with clean content by a new grammar:
    `LLMMap('question', '{table}::{column})'`

If the questions comment on some relative datetime operation, use the new grammar:
    `DT('{table}::{column}', start='', end='')`

Examples:
{{~#each examples}}
{{this.serialized_db}}
Question: {{this.question}}
BlendSQL: {{this.blendsql}}
{{/each}}

{{~/system}}

{{#user~}}
{{serialized_db}}
Question: {{question}}
BlendSQL:
{{~/user}}

{{#assistant~}}
{{gen "result" temperature=0.0}}
{{~/assistant}}
"""
)

In [9]:
def few_shot_blendsql(question: str, db_path: str):
    endpoint = OpenaiEndpoint("code-davinci-002")
    db = SQLiteDBConnector(db_path)
    res = endpoint.predict(
        program=program,
        serialized_db = db.to_serialized(ignore_tables="fundamentals"),
        question = question,
        examples=examples
    )
    query = res["result"]
    print(f"Parsed query: {query}")
    smoothie = blend(
        query=query,
        db=db,
        ingredients={LLMMap, DT},
        verbose=True,
        use_endpoint="gpt-4"
    )
    return smoothie

In [15]:
smoothie = few_shot_blendsql("Which Sushi restaurants have I been to in q2? only show 'Food' transactions", db_path="../tests/transactbot.sqlite")
print(smoothie.meta.query)

Making calls to LLM with batch_size 20: |          | 3/? [00:00<00:00,  6.07it/s]              | 3/? [00:00<00:00,  5.59it/s]
2023-12-05 14:04:47,378 - 8629432960 - main.py-main:92 - DEBUG: Finished with values ['0', '0', '0', '0', '0', '0', '0', '0', '0', '0']
2023-12-05 14:04:47,414 - 8629432960 - grammar.py-grammar:403 - DEBUG: 
2023-12-05 14:04:47,415 - 8629432960 - grammar.py-grammar:404 - DEBUG: **********************************************************************************
2023-12-05 14:04:47,416 - 8629432960 - grammar.py-grammar:407 - DEBUG: Final Query:
SELECT DISTINCT description, merchant FROM 'c9b8_transactions' WHERE  "is this a sushi restaurant?"  = 1 AND  'c9b8_transactions'."date" > '2022-04-01' AND 'c9b8_transactions'."date" < '2022-06-30'  AND parent_category = 'Food'
2023-12-05 14:04:47,417 - 8629432960 - grammar.py-grammar:408 - DEBUG: **********************************************************************************
2023-12-05 14:04:47,417 - 8629432960 - grammar

SELECT DISTINCT description, merchant FROM transactions WHERE  {{LLMMAP('is this a sushi restaurant?', 'transactions::merchant')}}  = 1 AND  {{DT('transactions::date', start = 'q2')}}  AND parent_category = 'Food'


In [14]:
smoothie.df

Unnamed: 0,description,merchant
0,DOORDASH NONSTOP SUSHI,Nonstop Sushi
1,TOBU SUSHI CORAL SPRINGS FL,Tobu Thai & Sushi
2,24081629L0V0KQBBX AKASHI SUSHI F,Akashi Sushi


In [10]:
smoothie = few_shot_blendsql("Show me tech stocks in my portfolio that manufacture cell phones", db_path="../tests/brokeragebot.sqlite")
print(smoothie.meta.query)

Making calls to LLM with batch_size 20: |          | 2/? [00:03<00:00,  1.66s/it]              | 2/? [00:03<00:00,  1.86s/it]
2023-12-05 13:54:19,228 - 8629432960 - main.py-main:92 - DEBUG: Finished with values ['0', '0', '0', '0', '0', '0', '0', '0', '0', '0']
2023-12-05 13:54:19,242 - 8629432960 - grammar.py-grammar:403 - DEBUG: 
2023-12-05 13:54:19,242 - 8629432960 - grammar.py-grammar:404 - DEBUG: **********************************************************************************
2023-12-05 13:54:19,243 - 8629432960 - grammar.py-grammar:407 - DEBUG: Final Query:
SELECT DISTINCT 'ac35_portfolio'.Symbol, 'ac35_portfolio'.Description, constituents.Name, constituents.Sector FROM 'ac35_portfolio' JOIN constituents ON 'ac35_portfolio'.Symbol = constituents.Symbol WHERE  "does this company manufacture cell phones?"  = 1 AND constituents.Sector = 'Information Technology'
2023-12-05 13:54:19,243 - 8629432960 - grammar.py-grammar:408 - DEBUG: **************************************************

SELECT DISTINCT portfolio.Symbol, portfolio.Description, constituents.Name, constituents.Sector FROM portfolio JOIN constituents ON portfolio.Symbol = constituents.Symbol WHERE  {{LLMMAP('does this company manufacture cell phones?', 'portfolio::Description')}}  = 1 AND constituents.Sector = 'Information Technology'


In [11]:
smoothie.df

Unnamed: 0,Symbol,Description,Name,Sector
0,AAPL,APPLE INC,Apple Inc.,Information Technology
