# 大数据量数据库查询
## 多表、列或者高基数列的查询

## 数据库连接初始化

In [73]:
from langchain_community.utilities import SQLDatabase

db = SQLDatabase.from_uri("postgresql://postgres:3h1admin@192.168.1.19:5432/llm_full")

print(db.dialect)
print(db.get_usable_table_names())
db.run("SELECT * FROM blade_user LIMIT 10;")

postgresql
['act_app_appdef', 'act_app_databasechangelog', 'act_app_databasechangeloglock', 'act_app_deployment', 'act_app_deployment_resource', 'act_cmmn_casedef', 'act_cmmn_databasechangelog', 'act_cmmn_databasechangeloglock', 'act_cmmn_deployment', 'act_cmmn_deployment_resource', 'act_cmmn_hi_case_inst', 'act_cmmn_hi_mil_inst', 'act_cmmn_hi_plan_item_inst', 'act_cmmn_ru_case_inst', 'act_cmmn_ru_mil_inst', 'act_cmmn_ru_plan_item_inst', 'act_cmmn_ru_sentry_part_inst', 'act_co_content_item', 'act_co_databasechangelog', 'act_co_databasechangeloglock', 'act_de_databasechangelog', 'act_de_databasechangeloglock', 'act_de_model', 'act_de_model_history', 'act_de_model_relation', 'act_dmn_databasechangelog', 'act_dmn_databasechangeloglock', 'act_dmn_decision_table', 'act_dmn_deployment', 'act_dmn_deployment_resource', 'act_dmn_hi_decision_execution', 'act_evt_log', 'act_fo_databasechangelog', 'act_fo_databasechangeloglock', 'act_fo_form_definition', 'act_fo_form_deployment', 'act_fo_form_inst

"[(1590171555982729217, '000000', None, 'vdd1', 'adcd7048512e64b48da55b027577886ee5a36350', 'vdd1', 'vdd1', None, None, None, None, None, '1123598816738675202', '1123598813738675201', '1123598817738675208', 1123598821738675201, 1123598813738675201, datetime.datetime(2022, 11, 9, 10, 36, 57, 562000), 1123598821738675201, datetime.datetime(2022, 11, 9, 10, 36, 57, 562000), 1, 0, 1, 0), (1590270173334986754, '000000', None, 'vdd2', 'adcd7048512e64b48da55b027577886ee5a36350', 'vdd2', 'vdd2', None, None, None, None, None, '1123598816738675202', '1123598813738675201', '1123598817738675208', 1123598821738675201, 1123598813738675201, datetime.datetime(2022, 11, 9, 17, 8, 49, 771000), 1123598821738675201, datetime.datetime(2022, 11, 9, 17, 8, 49, 771000), 1, 0, 1, 0), (1638006299304562689, '000000', None, 'gxn', '7ee86ce2c0abff7eae4050dde36dbf2a0d66e607', 'gxn', 'gxn', None, None, None, None, None, '1123598816738675201', '1123598813738675201,1607913441397157890,1595282965993611266,1595737097862

## OpenAI模型构建

In [74]:
from langchain_openai import ChatOpenAI

llm = ChatOpenAI(model="gpt-3.5-turbo", temperature=0)

# 多表查询

In [75]:
from langchain.chains.openai_tools import create_extraction_chain_pydantic
from langchain_core.pydantic_v1 import BaseModel, Field

class Table(BaseModel):
    """Table in SQL database."""

    name: str = Field(description="Name of table in SQL database.")


table_names = "\n".join(db.get_usable_table_names())
print(table_names)
system = f"""Return the names of ALL the SQL tables that MIGHT be relevant to the user question. \
The tables are:

{table_names}

Remember to include ALL POTENTIALLY RELEVANT tables, even if you're not sure that they're needed."""
table_chain = create_extraction_chain_pydantic(Table, llm, system_message=system)
# table_chain.invoke({"input": "blade用户"})

act_app_appdef
act_app_databasechangelog
act_app_databasechangeloglock
act_app_deployment
act_app_deployment_resource
act_cmmn_casedef
act_cmmn_databasechangelog
act_cmmn_databasechangeloglock
act_cmmn_deployment
act_cmmn_deployment_resource
act_cmmn_hi_case_inst
act_cmmn_hi_mil_inst
act_cmmn_hi_plan_item_inst
act_cmmn_ru_case_inst
act_cmmn_ru_mil_inst
act_cmmn_ru_plan_item_inst
act_cmmn_ru_sentry_part_inst
act_co_content_item
act_co_databasechangelog
act_co_databasechangeloglock
act_de_databasechangelog
act_de_databasechangeloglock
act_de_model
act_de_model_history
act_de_model_relation
act_dmn_databasechangelog
act_dmn_databasechangeloglock
act_dmn_decision_table
act_dmn_deployment
act_dmn_deployment_resource
act_dmn_hi_decision_execution
act_evt_log
act_fo_databasechangelog
act_fo_databasechangeloglock
act_fo_form_definition
act_fo_form_deployment
act_fo_form_instance
act_fo_form_resource
act_ge_bytearray
act_ge_property
act_hi_actinst
act_hi_attachment
act_hi_comment
act_hi_detail


In [76]:
system = f"""Return the names of ALL the SQL tables that MIGHT be relevant to the user question. \
The tables are:

User
Application
"""
category_chain = create_extraction_chain_pydantic(Table, llm, system_message=system)
category_chain.invoke({"input": "应用信息"})

[Table(name='Application')]

In [77]:
from typing import List

# 将Table列表转为str列表
def get_tables(tables: List[Table]) -> List[str]:
    result = []
    for table in tables:
        result.append(table.name)
    return result

table_chain = table_chain | get_tables

In [78]:
from langchain_core.runnables import RunnablePassthrough
from operator import itemgetter
from langchain.chains import create_sql_query_chain
from langchain_community.tools.sql_database.tool import QuerySQLDataBaseTool

query_chain = create_sql_query_chain(llm, db)
table_chain = {"input": itemgetter("question")} | table_chain
execute_query = QuerySQLDataBaseTool(db=db)
full_chain = RunnablePassthrough.assign(table_names_to_use=table_chain) | query_chain | execute_query

query = full_chain.invoke({"question": "输出所有不重复的blade用户名称"})
print(query)

# prompts = full_chain.get_prompts()
# print(prompts)

[('谷晓宁',), ('gxn',), ('gxnhaha',), ('xh',), ('常文英',), ('李炜',), ('szm2',), ('李小龙',), ('用户123',), ('魏迪',), ('vdd321',), ('vdd1',), ('yf',), ('时泽明',), ('szm1',), ('wp',), ('李辉',), ('gxn11',), ('郜继昌',), ('马德兰',), ('叶翠英',), ('门秀英',), ('用户123456',), ('郭秀英',), ('黄云飞',), ('tttt',), ('vdd2',), ('管理员dev',), ('zyr',), ('test',), ('g1',), ('vdd',), ('feat3.10',), ('sss',), ('cccccc',), ('gxn1',), ('zhtest',), ('魏挺',), ('zhtest1',), ('邓中涛',), ('张雨鹏',), ('常其云',), ('王玉生',), ('王玉兰',), ('szm3',), ('庞亚洲123',), ('dev',), ('贾宝山',), ('xuhui',), ('庞亚洲',)]
