# Structured search and summarization

In [None]:
!pip install -q -U \
  langchain==0.0.345 \
  google-cloud-aiplatform==1.36.4 \
  google-cloud-bigquery==3.13.0 \
  sqlalchemy==2.0.23 \
  sqlalchemy-bigquery==1.8.0 \
  pyarrow==14.0.1

In [1]:
import os

from langchain.llms import VertexAI
from langchain.prompts import PromptTemplate
from langchain.chains import LLMChain
from IPython.display import display, Markdown
from langchain.agents import initialize_agent, Tool

from google.cloud import bigquery
from sqlalchemy import *
from sqlalchemy.engine import create_engine
from sqlalchemy.schema import *
from langchain.agents import create_sql_agent
from langchain.agents.agent_toolkits import SQLDatabaseToolkit
from langchain.sql_database import SQLDatabase
from langchain.llms.openai import OpenAI
from langchain.agents import AgentExecutor
from langchain.agents.agent_types import AgentType

In [2]:
PROJECT_ID = "gcloud-create-customer"
DATASET = "doc_metadata"
SQLALCHEMY_URL = f"bigquery://{PROJECT_ID}/{DATASET}"

In [3]:
import sys

if "google.colab" in sys.modules:
    from google.colab import auth
    auth.authenticate_user()

## SQL agent with SQLDataBaseToolkit

In [5]:
db = SQLDatabase.from_uri(SQLALCHEMY_URL)

llm = VertexAI(
    project="gcloud-create-customer",
    model_name="text-bison@latest",
    # temperature=0.2,
    # max_output_tokens=1024,
    # top_k=40,
    # top_p=0.8,
)

toolkit = SQLDatabaseToolkit(db=db, llm=llm)

agent_executor = create_sql_agent(
    llm=llm,
    toolkit=toolkit,
    verbose=True,
    top_k=1000,
    agent_type=AgentType.ZERO_SHOT_REACT_DESCRIPTION,
)

In [6]:
agent_executor.run("What claims can I make about supplments vs. beauty products on the label in the US?")



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3m Action: sql_db_list_tables
Action Input: [0m
Observation: [38;5;200m[1;3mdocuments[0m
Thought:[32;1m[1;3m The documents table seems relevant.  I should query its schema to see what information it contains.
 Action: sql_db_schema
Action Input: documents[0m
Observation: [33;1m[1;3m
CREATE TABLE `documents` (
	`uri` STRING, 
	`text` STRING, 
	`type` STRING, 
	`mentionText` STRING
)

/*
3 rows from documents table:
uri	text	type	mentionText
	Product Profile/Claims Grid
Title: Supporting Document for Nutrilite Chewable Concentrated Fruits an	product-claims	Nutrilite Chewable Concentrated Fruits and Vegetables is a phytonutrient and antioxidant formula tha
	Product Profile/Claims Grid
Title: Supporting Document for Nutrilite Chewable Concentrated Fruits an	product-claims	Fruits and vegetables are an excellent source of antioxidants. Most Americans do not meet the sugges
	Product Profile/Claims Grid
Title: Supporting Docum

ValueError: An output parsing error occurred. In order to pass this error back to the agent and have it try again, pass `handle_parsing_errors=True` to the AgentExecutor. This is the error: Could not parse LLM output: ` Question`

## SequentialChain

In [None]:
from langchain.prompts import PromptTemplate

prompt_1 = PromptTemplate.from_template(
    """You are a claims substantiator. Answer the marketer's question:

    {query}
"""
)

prompt_2 = PromptTemplate.from_template(
    """You are a claims substantiator. You have access to a BigQuery dataset called squad and a table called squad_flat with questions and answers.

    Formulate 5 GoogleSQL queries to search the database for the answer.
    """
)

prompt_3 = PromptTemplate.from_template(
    """You are a claims substantiator. Parse the results of the SQL query to help refine your initial answer.
    """
)

In [None]:
from langchain.llms import VertexAI
from langchain.schema import StrOutputParser

llm = VertexAI()
# chain = (
#     {"query": prompt_1 | llm | StrOutputParser()}
#     | prompt_2 | llm | StrOutputParser()
#     | prompt_3 | llm | StrOutputParser()
# )
# chain.invoke({"query": "What are the benefits, risks, and limitations related to antibiotics?"})

from langchain.schema.runnable import RunnablePassthrough

chain_1 = {"query": prompt_1 | llm | StrOutputParser()}
# chain_1a = db_chain | llm | StrOutputParser()
chain_2 = prompt_2 | llm | StrOutputParser()
chain_3 = prompt_3 | llm | StrOutputParser()

chain = {"query": chain_1} | RunnablePassthrough.assign(review=chain_2) | RunnablePassthrough.assign(review=chain_3)

chain.invoke({"query": "What types of products are there in this dataset?"})