In [None]:
from langchain.utilities import SQLDatabase
from langchain.llms import GPT4All
from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler


db_uri = "sqlite:///db/cargo-sqlite.db"
model_path = "../privateGPT/models/ggml-model-gpt4all-falcon-q4_0.bin" 
VERBOSE = True


db = SQLDatabase.from_uri(
    db_uri,
    include_tables=['cargo', 'cargo_info', 'depart_list', 'purchase_info', 'sales', 'supply_company'],
    sample_rows_in_table_info=3)

llm = GPT4All(model=model_path, max_tokens=1000, backend='gptj',
                          n_batch=8, callbacks=[StreamingStdOutCallbackHandler()], verbose=VERBOSE)

In [1]:
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.vectorstores import Chroma
from chromadb.config import Settings
import chromadb


persist_directory = "vectorstore/"
embedding_model = "uer/sbert-base-chinese-nli"
top_k = 3
CHROMA_SETTINGS = Settings(
    persist_directory=persist_directory,
    anonymized_telemetry=False
)


embeddings = HuggingFaceEmbeddings(model_name=embedding_model)
chroma_client = chromadb.PersistentClient(settings=CHROMA_SETTINGS , path=persist_directory)
vectordb = Chroma(persist_directory=persist_directory, embedding_function=embeddings, client_settings=CHROMA_SETTINGS, client=chroma_client)

retriever = vectordb.as_retriever(search_kwargs={"k": top_k})

In [None]:
from langchain.prompts import PromptTemplate
from langchain.chains.llm import LLMChain
from langchain.chains.combine_documents.stuff import StuffDocumentsChain


# questions and queries from embedding 
TEMPLATE_QNQS = """Here are some ground truth queries below, to help query generation:

{qnqs}
"""
document_prompt_template = PromptTemplate(
    input_variables=["qnqs"],
    template=TEMPLATE_QNQS)


combine_docs_chain = StuffDocumentsChain(
    llm_chain=LLMChain(llm=llm),
    document_prompt=document_prompt_template,

)

In [None]:


TEMPLATE_ = """Given an input question, first create a syntactically correct {dialect} query to run, then look at the results of the query and return the answer.
Use the following format:

Question: "Question here"
SQLQuery: "SQL Query to run"
SQLResult: "Result of the SQLQuery"
Answer: "Final answer here"

Only use the following tables:

{table_info}.

Some examples of SQL queries that corrsespond to questions are:

{few_shot_examples}

Question: {input}"""

CUSTOM_PROMPT = PromptTemplate(
    input_variables=["input", "few_shot_examples", "table_info", "dialect"], template=TEMPLATE
)

# Create chain with LangChain Expression Language
inputs = {
    "table_info": lambda x: db.get_table_info(),
    "input": lambda x: x["question"],
    "few_shot_examples": lambda x: combine_docs_chain.combine_docs(),
    "dialect": lambda x: db.dialect,
}
sql_response = (
    inputs
    | prompt
    | model.bind(stop=["\nSQLResult:"])
    | StrOutputParser()
)

In [None]:
from langchain.chains import create_sql_query_chain

chain = create_sql_query_chain()