In [1]:
import os
from dotenv import load_dotenv
from datetime import datetime

import openai
from IPython.display import HTML, display
from ipywidgets import widgets
from langchain.chains import ConversationalRetrievalChain
from langchain.chat_models import ChatOpenAI
from langchain.document_loaders import DirectoryLoader
from langchain.vectorstores import Chroma

load_dotenv()

True

In [4]:
from langchain.text_splitter import CharacterTextSplitter

text_splitter = CharacterTextSplitter(        
    separator = "\n",
    chunk_size = 600,
    chunk_overlap = 100,
    length_function = len,
)

with open('isrpa-api-full.txt') as f:
    api_document = f.read()
    
docs = text_splitter.create_documents(api_document.split('\n\n'))
print(len(docs))
print(docs[3])

Created a chunk of size 1072, which is longer than the specified 600
Created a chunk of size 948, which is longer than the specified 600
Created a chunk of size 973, which is longer than the specified 600
Created a chunk of size 692, which is longer than the specified 600
Created a chunk of size 1017, which is longer than the specified 600
Created a chunk of size 733, which is longer than the specified 600
Created a chunk of size 781, which is longer than the specified 600
Created a chunk of size 1191, which is longer than the specified 600
Created a chunk of size 630, which is longer than the specified 600
Created a chunk of size 668, which is longer than the specified 600
Created a chunk of size 679, which is longer than the specified 600
Created a chunk of size 637, which is longer than the specified 600
Created a chunk of size 766, which is longer than the specified 600


482
page_content="name: compare_values  parameters: a(The first value to compare.) b(The second value to compare.) \ndescription: Compare the values of a and b. If a is less than b, return True; otherwise, return False. Note that values of different types cannot be compared. examples:compare_values(3, 5)  # returns True \ncompare_values('abc', 'def')  # returns True \ncompare_values(3, 'abc')  # raises an error" metadata={}


In [5]:
import openai
from langchain.embeddings.openai import OpenAIEmbeddings


openai.api_type = os.getenv("OPENAI_API_TYPE")
openai.api_key = os.getenv("OPENAI_API_KEY")
openai.api_base = os.getenv("OPENAI_API_BASE")
openai.api_version = os.getenv("OPENAI_API_VERSION")
print(openai.api_type, openai.api_base, openai.api_version)

embeddings = OpenAIEmbeddings(model='text-embedding-ada-002',
                              deployment='emb0614',
                              openai_api_base=os.getenv("OPENAI_API_BASE"),
                              openai_api_type=os.getenv("OPENAI_API_TYPE"),
                              chunk_size=1)

# txt_docsearch = Chroma.from_documents(docs, embeddings, persist_directory='apidocs')
# txt_docsearch.persist()

azure https://test0406.openai.azure.com/ 2023-03-15-preview


In [6]:
txt_docsearch = None
txt_docsearch = Chroma(persist_directory='apidocs', embedding_function=embeddings)

In [7]:
from langchain.chains import LLMChain
from langchain.chains.question_answering import load_qa_chain
from langchain.prompts.chat import ChatPromptTemplate
from langchain.memory import ConversationBufferMemory, ConversationSummaryBufferMemory

llm = ChatOpenAI(engine='test0406', model_name="gpt-3.5-turbo", temperature=0.5, max_tokens=2048)

prompt_q = """根据聊天记录和任务描述，使用指定的函数来实现这个任务。
聊天记录:
{chat_history}
任务: {question}
生成代码:"""

# 初始化 prompt 对象
prompt = ChatPromptTemplate.from_template(prompt_q)
# 初始化问答链
question_generator = LLMChain(llm=llm, prompt=prompt)
doc_chain = load_qa_chain(llm, chain_type="refine")
memory = ConversationSummaryBufferMemory(llm=llm,
        max_token_limit=1000, memory_key="chat_history", return_messages=True)

chain = ConversationalRetrievalChain(
    retriever=txt_docsearch.as_retriever(),
    question_generator=question_generator,
    combine_docs_chain=doc_chain,
    memory=memory
)

                    engine was transferred to model_kwargs.
                    Please confirm that engine is what you intended.


In [8]:
query = """操作步骤及说明
1.	业务人员配置好银行账号数据表，和邮箱账号数据；
2.	读取账号配置表，根据账号信息自动登录网银系统；
3.	获取前一天历史余额数据，写入资金日报表模板对应银行账户余额栏；
4.	下载前一天电子回单、网银流水，按照固定规则命名（命名规则：公司_收/付_账号_日期，例：X公司_收_账号_20211222），并在日志记录表中写入记录；
5.	将日报excel及回、流水文件夹进行压缩；
6.	邮件发送运行异常提醒，将运行的数据压缩文件发送到指定邮箱；
7.	业务人员接收邮件文件，进行确认操作。
"""
result = chain({"question": query})
print(result)

{'question': '操作步骤及说明\n1.\t业务人员配置好银行账号数据表，和邮箱账号数据；\n2.\t读取账号配置表，根据账号信息自动登录网银系统；\n3.\t获取前一天历史余额数据，写入资金日报表模板对应银行账户余额栏；\n4.\t下载前一天电子回单、网银流水，按照固定规则命名（命名规则：公司_收/付_账号_日期，例：X公司_收_账号_20211222），并在日志记录表中写入记录；\n5.\t将日报excel及回、流水文件夹进行压缩；\n6.\t邮件发送运行异常提醒，将运行的数据压缩文件发送到指定邮箱；\n7.\t业务人员接收邮件文件，进行确认操作。\n', 'chat_history': [HumanMessage(content='操作步骤及说明\n1.\t业务人员配置好银行账号数据表，和邮箱账号数据；\n2.\t读取账号配置表，根据账号信息自动登录网银系统；\n3.\t获取前一天历史余额数据，写入资金日报表模板对应银行账户余额栏；\n4.\t下载前一天电子回单、网银流水，按照固定规则命名（命名规则：公司_收/付_账号_日期，例：X公司_收_账号_20211222），并在日志记录表中写入记录；\n5.\t将日报excel及回、流水文件夹进行压缩；\n6.\t邮件发送运行异常提醒，将运行的数据压缩文件发送到指定邮箱；\n7.\t业务人员接收邮件文件，进行确认操作。\n', additional_kwargs={}, example=False), AIMessage(content='The new context provided does not relate to the original question, which asks for the steps to automate the daily financial report generation and email it to the relevant people. Therefore, the original answer remains the same.', additional_kwargs={}, example=False)], 'answer': 'The new context provided does not relate to the original que