In [3]:
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.vectorstores import Chroma
from chromadb.config import Settings
import chromadb

CHROMA_SETTINGS = Settings(
    persist_directory="db",
    anonymized_telemetry=False
)

embeddings = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")
chroma_client = chromadb.PersistentClient(settings=CHROMA_SETTINGS , path="db")
db = Chroma(persist_directory="db", embedding_function=embeddings, client_settings=CHROMA_SETTINGS, client=chroma_client)

retriever = db.as_retriever(search_kwargs={"k": 4})


In [6]:
from langchain.llms import GPT4All
from langchain.memory import ConversationBufferMemory
from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler
from langchain.chains import ConversationalRetrievalChain

llm = GPT4All(model="../models/ggml-model-gpt4all-falcon-q4_0.bin", max_tokens=1000, backend='gptj',
                          n_batch=8, callbacks=[StreamingStdOutCallbackHandler()], verbose=False)

memory = ConversationBufferMemory(memory_key="chat_history", return_messages=True)

qa = ConversationalRetrievalChain.from_llm(
    llm=llm, 
    retriever=retriever, 
    memory=memory,
    # condense_question_prompt=None,
    # condense_question_llm=None
    )


Found model file at  ../models/ggml-model-gpt4all-falcon-q4_0.bin
falcon_model_load: loading model from '../models/ggml-model-gpt4all-falcon-q4_0.bin' - please wait ...
falcon_model_load: n_vocab   = 65024
falcon_model_load: n_embd    = 4544
falcon_model_load: n_head    = 71
falcon_model_load: n_head_kv = 1
falcon_model_load: n_layer   = 32
falcon_model_load: ftype     = 2
falcon_model_load: qntvr     = 0
falcon_model_load: ggml ctx size = 3872.64 MB
falcon_model_load: memory_size =    32.00 MB, n_mem = 65536
falcon_model_load: ........................ done
falcon_model_load: model size =  3872.59 MB / num tensors = 196


In [7]:
question = "如何计算保有规模"
result = qa({"question": question})

0.00s - make the debugger miss breakpoints. Please pass -Xfrozen_modules=off
0.00s - to python to disable frozen modules.
0.00s - Note: Debugging will proceed. Set PYDEVD_DISABLE_FILE_VALIDATION=1 to disable this validation.




Answer:

SELECT DISTINCT `agencyname`, SUM(`asset`) FROM `fact_retention_model` WHERE `product_type_bi` = '权益类' AND `cdate` = (DATE_FORMAT(CURRENT_DATE(), '%Y-%m-01') - INTERVAL 1 DAY)

Explanation:

The above SQL query will return the names of all agencies that have invested in a 濝募基金 (merchant loan base model) with a product type of 濝益类 (merchant loan base model) and a current date of 2023-01-01.

The DISTINCT keyword is used to remove any duplicated values from the result set.

The SUM function is used to calculate the total asset value for each agency.

The WHERE clause filters the results based on the product type and the current date.

The DATE_FORMAT function is used to format the current date as 'YYYY-MM-DD'.

The INTERVAL function is used to subtract 

In [None]:
result

{'question': '如何计算保有规模',
 'chat_history': [HumanMessage(content='如何计算保有规模', additional_kwargs={}, example=False),
  AIMessage(content="\n\nAnswer:\n\nSELECT DISTINCT `agencyname`, SUM(`asset`) FROM `fact_retention_model` WHERE `product_type_bi` = '权益类' AND `cdate` = (DATE_FORMAT(CURRENT_DATE(), '%Y-%m-01') - INTERVAL 1 DAY)\n\nExplanation:\n\nThe above SQL query will return the names of all agencies that have invested in a 濝募基金 (merchant loan base model) with a product type of 濝益类 (merchant loan base model) and a current date of 2023-01-01.\n\nThe DISTINCT keyword is used to remove any duplicated values from the result set.\n\nThe SUM function is used to calculate the total asset value for each agency.\n\nThe WHERE clause filters the results based on the product type and the current date.\n\nThe DATE_FORMAT function is used to format the current date as 'YYYY-MM-DD'.\n\nThe INTERVAL function is used to subtract ", additional_kwargs={}, example=False)],
 'answer': "\n\nAnswer:\n\nSELECT

In [9]:
question2 = "如何获取所有表的结构"
result2 = qa({"question": question2})



What is the SQL query to retrieve the names of all agencies that have invested in a 濝募基金 (merchant loan base model) with a product type of 濝益类 (merchant loan base model) and a current date of 2023-01-01? 

SELECT agencyname FROM fact_retention_model WHERE product_type_bi='濝益类' AND cdate=(DATE_FORMAT(CURRENT_DATE(), '%Y-%m-01')-INTERVAL 1)

In [11]:
result2.get("chat_history")

[HumanMessage(content='如何计算保有规模', additional_kwargs={}, example=False),
 AIMessage(content="\n\nAnswer:\n\nSELECT DISTINCT `agencyname`, SUM(`asset`) FROM `fact_retention_model` WHERE `product_type_bi` = '权益类' AND `cdate` = (DATE_FORMAT(CURRENT_DATE(), '%Y-%m-01') - INTERVAL 1 DAY)\n\nExplanation:\n\nThe above SQL query will return the names of all agencies that have invested in a 濝募基金 (merchant loan base model) with a product type of 濝益类 (merchant loan base model) and a current date of 2023-01-01.\n\nThe DISTINCT keyword is used to remove any duplicated values from the result set.\n\nThe SUM function is used to calculate the total asset value for each agency.\n\nThe WHERE clause filters the results based on the product type and the current date.\n\nThe DATE_FORMAT function is used to format the current date as 'YYYY-MM-DD'.\n\nThe INTERVAL function is used to subtract ", additional_kwargs={}, example=False),
 HumanMessage(content='如何获取所有表的结构', additional_kwargs={}, example=False),
 AI

In [20]:
result2

{'question': '如何获取所有表的结构',
 'chat_history': [HumanMessage(content='如何计算保有规模', additional_kwargs={}, example=False),
  AIMessage(content="\n\nAnswer:\n\nSELECT DISTINCT `agencyname`, SUM(`asset`) FROM `fact_retention_model` WHERE `product_type_bi` = '权益类' AND `cdate` = (DATE_FORMAT(CURRENT_DATE(), '%Y-%m-01') - INTERVAL 1 DAY)\n\nExplanation:\n\nThe above SQL query will return the names of all agencies that have invested in a 濝募基金 (merchant loan base model) with a product type of 濝益类 (merchant loan base model) and a current date of 2023-01-01.\n\nThe DISTINCT keyword is used to remove any duplicated values from the result set.\n\nThe SUM function is used to calculate the total asset value for each agency.\n\nThe WHERE clause filters the results based on the product type and the current date.\n\nThe DATE_FORMAT function is used to format the current date as 'YYYY-MM-DD'.\n\nThe INTERVAL function is used to subtract ", additional_kwargs={}, example=False),
  HumanMessage(content='如何获取所有表的