In [1]:
from google.colab import auth
auth.authenticate_user()

In [18]:
!pip install langchain langchain_community langchain-google-genai langchain-google-vertexai



In [22]:
from langchain.prompts.chat import SystemMessagePromptTemplate
from langchain.prompts.chat import HumanMessagePromptTemplate
from langchain.prompts.chat import AIMessagePromptTemplate
from langchain.prompts.chat import ChatPromptTemplate
from langchain_google_genai import ChatGoogleGenerativeAI
from langchain_google_vertexai import ChatVertexAI
from google.cloud import bigquery
import pandas as pd
import os

In [20]:
PROJECT = 'pradeep-genai'
LOCATION = 'us-central1'
MODEL_NAME = 'codechat-bison@latest'

In [4]:
bq = bigquery.Client(project=PROJECT)

In [5]:
# 1f. Import Google Colab library to access key
from google.colab import userdata
GOOGLE_API_KEY = userdata.get('GOOGLE_API_KEY')

In [7]:
llm = ChatGoogleGenerativeAI(google_api_key=GOOGLE_API_KEY,model="gemini-pro")


In [8]:
user_query = "Provide a list of all flight reservations from October 10th to October 15th, 2023"

In [9]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [10]:
messages = []
examples = pd.read_csv('/content/drive/MyDrive/SQL Generation/data/few-shot/prompts_intent.csv')
examples.head()

Unnamed: 0,prompt,intent
0,Need all the bookings from 10th to 15th Octobe...,RETRIEVE_RESERVATIONS
1,Could you retrieve reservations for mid-Octobe...,RETRIEVE_RESERVATIONS
2,Let’s see all the October reservations from 10...,RETRIEVE_RESERVATIONS
3,Any reservations from 10/10/2023 to 15/10/2023?,RETRIEVE_RESERVATIONS
4,I’m looking for bookings between the second an...,RETRIEVE_RESERVATIONS


In [11]:
template = "You are a helpful assistant capable of detecting the intent behind a user's query."
system_message_prompt = SystemMessagePromptTemplate.from_template(template)
messages.append(system_message_prompt)

In [12]:
for _, row in examples.iterrows():
    prompt, completion = row
    human_message = HumanMessagePromptTemplate.from_template(prompt)
    messages.append(human_message)
    ai_message = AIMessagePromptTemplate.from_template(completion)
    messages.append(ai_message)

In [13]:
human_template = "{user_query}"
human_message = HumanMessagePromptTemplate.from_template(human_template)
messages.append(human_message)

In [14]:
chat_prompt = ChatPromptTemplate.from_messages(messages)

In [15]:
request = chat_prompt.format_prompt(user_query=user_query).to_messages()

In [17]:
print(request)

[SystemMessage(content="You are a helpful assistant capable of detecting the intent behind a user's query."), HumanMessage(content='Need all the bookings from 10th to 15th October 2023.'), AIMessage(content='RETRIEVE_RESERVATIONS'), HumanMessage(content='Could you retrieve reservations for mid-October 2023?'), AIMessage(content='RETRIEVE_RESERVATIONS'), HumanMessage(content='Let’s see all the October reservations from 10th to 15th.'), AIMessage(content='RETRIEVE_RESERVATIONS'), HumanMessage(content='Any reservations from 10/10/2023 to 15/10/2023?'), AIMessage(content='RETRIEVE_RESERVATIONS'), HumanMessage(content='I’m looking for bookings between the second and third weeks of October 2023.'), AIMessage(content='RETRIEVE_RESERVATIONS'), HumanMessage(content='Who made a reservation last Wednesday?'), AIMessage(content='IDENTIFY_RECENT_CUSTOMERS'), HumanMessage(content='Name the people who have booked a flight in the last week.'), AIMessage(content='IDENTIFY_RECENT_CUSTOMERS'), HumanMessa

In [24]:
llm = ChatVertexAI(project=PROJECT,
                   location=LOCATION,
                   model_name="codechat-bison",
                   temperature=0.0,
                   max_output_tokens=256)

In [27]:
%%time

response = llm(request)
intent = response.content.strip()
print(intent)

RETRIEVE_RESERVATIONS
CPU times: user 12.9 ms, sys: 0 ns, total: 12.9 ms
Wall time: 394 ms


In [28]:
messages = []
examples = pd.read_csv('/content/drive/MyDrive/SQL Generation/data/few-shot/prompts_ner.csv')
examples.head()

Unnamed: 0,prompt,entities
0,Can you show me all the reservations from Octo...,"Start Date:October 10th, 2023|End Date:October..."
1,What bookings do we have from 10/10/2023 to 10...,Start Date:10/10/2023|End Date:10/15/2023
2,Show the reservations occurring between the se...,"Start Date:October 8th, 2023|End Date:October ..."
3,List all bookings that are happening from Octo...,"Start Date:October 10, 2023|End Date:October 1..."
4,Fetch the reservations from the second week of...,"Start Date:October 8th, 2023|End Date:October ..."


In [29]:
template = "You are a helpful assistant capable of performing named entity recognition."
system_message_prompt = SystemMessagePromptTemplate.from_template(template)
messages.append(system_message_prompt)

In [30]:
for _, row in examples.iterrows():
    prompt, completion = row
    human_message = HumanMessagePromptTemplate.from_template(prompt)
    messages.append(human_message)
    ai_message = AIMessagePromptTemplate.from_template(completion)
    messages.append(ai_message)

In [31]:
human_template = "{user_query} Standardize the date format to YYYY-MM-DD."
human_message = HumanMessagePromptTemplate.from_template(human_template)
messages.append(human_message)

In [32]:
chat_prompt = ChatPromptTemplate.from_messages(messages)
request = chat_prompt.format_prompt(user_query=user_query).to_messages()

In [33]:
%%time

response = llm(request)
entities = response.content.strip()
print(entities)

Start Date:2023-10-10|End Date:2023-10-15
CPU times: user 17.9 ms, sys: 27 µs, total: 17.9 ms
Wall time: 650 ms


In [34]:
messages = []
examples = pd.read_csv('/content/drive/MyDrive/SQL Generation/data/few-shot/intent_to_table_mapping.csv')
examples.head()
template = "You are a helpful assistant capable of mapping detected intent to the correct list of BigQuery tables."
system_message_prompt = SystemMessagePromptTemplate.from_template(template)
messages.append(system_message_prompt)

In [35]:
for _, row in examples.iterrows():
    prompt, completion = row
    human_message = HumanMessagePromptTemplate.from_template(prompt)
    messages.append(human_message)
    ai_message = AIMessagePromptTemplate.from_template(completion)
    messages.append(ai_message)

In [36]:
human_template = "{user_intent}"
human_message = HumanMessagePromptTemplate.from_template(human_template)
messages.append(human_message)

In [37]:
chat_prompt = ChatPromptTemplate.from_messages(messages)
request = chat_prompt.format_prompt(user_intent=intent).to_messages()

In [38]:
%%time

response = llm(request)
tables = response.content.strip()
print(tables)

reservations|flights
CPU times: user 20.8 ms, sys: 0 ns, total: 20.8 ms
Wall time: 515 ms


In [39]:
def read_files_from_dir(directory):
    if not os.path.exists(directory):
        print(f"The directory {directory} does not exist!")
        return {}

    # Create an empty dictionary to store filename and content
    files_dict = {}

    # Iterate over each file in the directory
    for filename in os.listdir(directory):
        file_path = os.path.join(directory, filename)

        # Ensure it's a file and not a sub-directory or other entity
        if os.path.isfile(file_path):
            with open(file_path, 'r', encoding='utf-8') as file:
                content = file.read()
                filename = filename.split('.txt')[0]
                files_dict[filename] = content

    return files_dict

In [40]:
directory_path = '/content/drive/MyDrive/SQL Generation/data/text-schema/'
table_schemas = read_files_from_dir(directory_path)

In [41]:
table_names = tables.split('|')
filtered_table_schemas = {}

for table_name in table_names:
    if table_name in table_schemas.keys():
        filtered_table_schemas[table_name] = table_schemas[table_name]

In [42]:
filtered_table_schemas_text = []

for schema in filtered_table_schemas.values():
    filtered_table_schemas_text.append(schema)

filtered_table_schemas_text = ''.join(filtered_table_schemas_text)
print(filtered_table_schemas_text)

----
Reservations Table:
Description:
The Reservations table keeps track of all flight reservations made by customers. Each record represents a unique reservation, detailing the customer, flight, reservation time, and status.
----
Columns:
--
reservation_id:
Description: A unique identifier for each reservation made on the platform.
Usage: This ID ensures that each reservation is distinct and can be referenced for customer inquiries, modifications, and operational tracking.
Type: INT64
--
customer_id:
Description: A reference to a customer from the Customers table who made the reservation.
Usage: Establishes which customer made a specific reservation, aiding in personalized user experiences, communication, and support.
Type: INT64
--
flight_id:
Description: Refers to a specific flight from the Flights table.
Usage: Ensures that the reservation corresponds to a specific flight, aiding in managing flight capacities and customer communications.
Type: INT64
--
reservation_datetime:
Descrip

In [43]:
messages = []
template = "You are a SQL master expert capable of writing complex SQL query in BigQuery."
system_message_prompt = SystemMessagePromptTemplate.from_template(template)
messages.append(system_message_prompt)
human_template = """Please construct a SQL query using the information provided below:

Input Parameters:
-----------------
INTENT: {intent}
EXTRACTED_ENTITIES: {entities}
MAPPED_TABLES: {tables}

User Query:
-----------
{user_query}

Table Schemas:
--------------
{filtered_table_schemas_text}

Note:
- Please prefix the table names with `flight_reservations`."""

In [44]:
human_message = HumanMessagePromptTemplate.from_template(human_template)
messages.append(human_message)
chat_prompt = ChatPromptTemplate.from_messages(messages)
request = chat_prompt.format_prompt(intent=intent,
                                    entities=entities,
                                    tables=tables,
                                    user_query=user_query,
                                    filtered_table_schemas_text=filtered_table_schemas_text).to_messages()

In [45]:
%%time

response = llm(request)
sql = '\n'.join(response.content.strip().split('\n')[1:-1])
print(sql)

SELECT *
FROM flight_reservations.reservations AS r
JOIN flight_reservations.flights AS f
ON r.flight_id = f.flight_id
WHERE r.reservation_datetime BETWEEN '2023-10-10' AND '2023-10-15';
CPU times: user 14 ms, sys: 0 ns, total: 14 ms
Wall time: 870 ms


In [46]:
df = bq.query(sql).to_dataframe()
df

Unnamed: 0,reservation_id,customer_id,flight_id,reservation_datetime,status,flight_id_1,origin,destination,departure_datetime,arrival_datetime,carrier,price
0,6,6,6,2023-10-10 10:00:00,Confirmed,6,SEA,JFK,2023-11-25 06:00:00,2023-11-25 14:30:00,United,550.0
1,7,6,7,2023-10-12 11:30:00,Confirmed,7,JFK,MIA,2023-11-27 20:00:00,2023-11-27 23:30:00,American,380.0


In [47]:
df = df.to_markdown(index=False)

In [48]:
messages = []
template = "You are a travel assistant chatbot that can help people make flight reservations."
system_message_prompt = SystemMessagePromptTemplate.from_template(template)
messages.append(system_message_prompt)

In [49]:
human_template = """User's Question:
----------------
{user_query}

BigQuery Result:
----------------
{bq_response}

Task:
-----
Please convert the above query result into a human-readable format.

IMPORTANT Notes:
----------------
- The response should be courteous and human-friendly.
- If the answer doesn't require a tabular structure, avoid using it."""
human_message = HumanMessagePromptTemplate.from_template(human_template)
messages.append(human_message)


In [50]:
chat_prompt = ChatPromptTemplate.from_messages(messages)
request = chat_prompt.format_prompt(user_query=user_query,
                                    bq_response=df).to_messages()

In [51]:
%%time

response = llm(request)
output = response.content.strip()
print(output)

Certainly! Here are the flight reservations from October 10th to October 15th, 2023:

**Reservation ID: 6**
- Customer ID: 6
- Flight ID: 6
- Reservation Date/Time: October 10, 2023, at 10:00 AM
- Status: Confirmed
- Flight ID: 6
- Origin: Seattle (SEA)
- Destination: New York (JFK)
- Departure Date/Time: November 25, 2023, at 06:00 AM
- Arrival Date/Time: November 25, 2023, at 02:30 PM
- Carrier: United
- Price: $550

**Reservation ID: 7**
- Customer ID: 6
- Flight ID: 7
- Reservation Date/Time: October 12, 2023, at 11:30 AM
- Status: Confirmed
- Flight ID: 7
- Origin: New York (JFK)
- Destination: Miami (MIA)
- Departure Date/Time: November 27
CPU times: user 19 ms, sys: 1.96 ms, total: 21 ms
Wall time: 1.74 s
