In [None]:
import os
import getpass
import pickle
import jwt
import psycopg2
from langchain.vectorstores import FAISS
from langchain.document_loaders import CubeSemanticLoader
from langchain.embeddings.azure_openai import AzureOpenAIEmbeddings
from langchain_openai import AzureChatOpenAI
from langchain.llms import AzureOpenAI
from langchain.prompts.chat import ChatPromptTemplate
from langchain.prompts import PromptTemplate

In [None]:
print('Enter AZURE_OPENAI_API_KEY:')
os.environ["AZURE_OPENAI_API_KEY"] = getpass.getpass()

In [None]:
print('Enter AZURE_OPENAI_ENDPOINT:')
os.environ["AZURE_OPENAI_ENDPOINT"] = getpass.getpass()

In [None]:
print('Enter CUBE_API_SECRET:')
os.environ["CUBE_API_SECRET"] = getpass.getpass()

In [None]:
chat_llm = AzureChatOpenAI(
    temperature=0,
    openai_api_version="2023-05-15",
    deployment_name="gpt-4",
    verbose=True
)

In [None]:
embeddings = AzureOpenAIEmbeddings(
    azure_deployment="text-embeddings-ada-002",
    openai_api_version="2023-05-15",
)

In [None]:
CUBE_SQL_API_PROMPT = PromptTemplate(
    input_variables=[
        "input_question",
        "table_info",
        "columns_info",
        "top_k",
        "no_answer_text",
    ],
    template=_postgres_prompt,
)

In [None]:
def call_sql_api(sql_query: str):
    load_dotenv()
    CONN_STR = os.environ["DATABASE_URL"]

    # Initializing Cube SQL API connection)
    connection = psycopg2.connect(CONN_STR)
    
    cursor = connection.cursor()
    cursor.execute(sql_query)

    columns = [desc[0] for desc in cursor.description]
    rows = cursor.fetchall()

    cursor.close()
    connection.close()

    return columns, rows

In [None]:
security_context = {}
token = jwt.encode(security_context, os.environ["CUBE_API_SECRET"], algorithm="HS256")

In [None]:
loader = CubeSemanticLoader("http://localhost:4000/cubejs-api/v1", token)

In [None]:
documents = loader.load()

In [None]:
vectorstore = FAISS.from_documents(documents, embeddings)

In [None]:
with open("vectorstore.pkl", "wb") as f:
    pickle.dump(vectorstore, f)

In [None]:
question = "how many orders?"

In [None]:
docs = vectorstore.similarity_search(question)

In [None]:
# take the first document as the best guess
table_name = docs[0].metadata["table_name"]

In [None]:
columns_question = "All available columns"
column_docs = vectorstore.similarity_search(
    columns_question, filter=dict(table_name=table_name), k=15
)

In [None]:
lines = []
for column_doc in column_docs:
    column_title = column_doc.metadata["column_title"]
    column_name = column_doc.metadata["column_name"]
    column_data_type = column_doc.metadata["column_data_type"]
    print(column_name)
    lines.append(
        f"title: {column_title}, column name: {column_name}, datatype: {column_data_type}, member type: {column_doc.metadata['column_member_type']}"
    )
columns = "\n\n".join(lines)

In [None]:
prompt = CUBE_SQL_API_PROMPT.format(
    input_question=question,
    table_info=table_name,
    columns_info=columns,
    top_k=1000,
    no_answer_text=_NO_ANSWER_TEXT,
)

In [None]:
llm_answer = llm(prompt)
bare_llm_answer = re.sub(r"(?i)Answer:\s*", "", llm_answer)

In [None]:
sql_query = llm_answer
columns, rows = call_sql_api(sql_query)