# Env import

In [1]:
from datetime import date, datetime
from typing import Union, List
from apikey import API_KEY, BASE_URL, DEPLOYMENT_NAME, PINECONE_API_KEY, PINECONE_ENV
from langchain.chat_models import AzureChatOpenAI
import functools

from langchain import PromptTemplate
from langchain.prompts.chat import(
    ChatPromptTemplate,
    SystemMessagePromptTemplate,
    HumanMessagePromptTemplate,
    AIMessagePromptTemplate
)

from langchain.output_parsers import ResponseSchema
from langchain.output_parsers import StructuredOutputParser

from langchain.schema import (
    SystemMessage,
    HumanMessage,
    AIMessage
    )

from langchain.memory import ConversationBufferMemory, ConversationBufferWindowMemory

from langchain.chains import LLMChain, APIChain, SequentialChain, SimpleSequentialChain

import os 
from apikey import apikey 


os.environ['OPENAI_API_KEY'] = apikey

# Config 

In [2]:
data_info = """
[{"name":"会员ID","type":"String"},
{"name":"销售数量","type":"Long"},
{"name":"销售金额","type":"Long"},
{"name":"省份","type":"Double"},
{"name":"平台","type":"Double"},
{"name":"日期","type":"Date"},
{"name":"渠道","type":"String"},
{"name":"订单ID","type":"String"},
{"name":"品牌","type":"String"},
{"name":"sku","type":"String"}
]
"""

# 当前时间
today = date.today()
formatted_date = today.strftime("%Y-%m-%d")

today_dt = datetime.now()
formatted_dt = today_dt.strftime("%Y-%m-%d %H:%M:%S")

formatted_dt

'2023-08-01 13:00:01'

# Model

In [3]:
chat_llm = AzureChatOpenAI(
    openai_api_base=BASE_URL,
    openai_api_version="2023-05-15",
    deployment_name=DEPLOYMENT_NAME,
    openai_api_key=API_KEY,
    openai_api_type = "azure",
    temperature=0
)

In [4]:
# test
chat_llm([HumanMessage(content="Translate this sentence from English to Chinese. I love programming.")])

Retrying langchain.chat_models.openai.ChatOpenAI.completion_with_retry.<locals>._completion_with_retry in 1.0 seconds as it raised APIConnectionError: Error communicating with OpenAI: HTTPSConnectionPool(host='gd-hackathon.openai.azure.com', port=443): Max retries exceeded with url: /openai/deployments/gpt-35-turbo/chat/completions?api-version=2023-05-15 (Caused by SSLError(SSLEOFError(8, '[SSL: UNEXPECTED_EOF_WHILE_READING] EOF occurred in violation of protocol (_ssl.c:1007)'))).
Retrying langchain.chat_models.openai.ChatOpenAI.completion_with_retry.<locals>._completion_with_retry in 2.0 seconds as it raised APIConnectionError: Error communicating with OpenAI: HTTPSConnectionPool(host='gd-hackathon.openai.azure.com', port=443): Max retries exceeded with url: /openai/deployments/gpt-35-turbo/chat/completions?api-version=2023-05-15 (Caused by SSLError(SSLEOFError(8, '[SSL: UNEXPECTED_EOF_WHILE_READING] EOF occurred in violation of protocol (_ssl.c:1007)'))).
Retrying langchain.chat_mode

APIConnectionError: Error communicating with OpenAI: HTTPSConnectionPool(host='gd-hackathon.openai.azure.com', port=443): Max retries exceeded with url: /openai/deployments/gpt-35-turbo/chat/completions?api-version=2023-05-15 (Caused by SSLError(SSLEOFError(8, '[SSL: UNEXPECTED_EOF_WHILE_READING] EOF occurred in violation of protocol (_ssl.c:1007)')))

# Promp Eng

## output parser

In [9]:
card_type={"BASIC_COLUMN": "单柱图", 
           "GROUPED_COLUMN": "簇状图，柱图", 
           "STACKED_COLUMN": "堆积图，柱图", 
           "STACKED_SPLIT_COLUMN": "分组堆积图，柱图", 
           "BASIC_BAR": "单条图", 
           "BASIC_LINE": "单线图", 
           "PIE": "饼图", 
           "RISING_SUN": "旭日图", 
           "FUNNEL": "漏斗图", 
           "HORIZONTAL_FUNNEL": "水平漏斗图"}


description_chartType=f"""Recommended chart type from you. This is a string.
You must choose one from the python dictionary {card_type}'s key, and it can't be empty.
the dictionary key is chart type, and the dictionary value is the describe in Chinese, 
you should choose the dictionary key as the chart type.
"""

description_row="""The dimension fields. You can choose them from the Dataset Scheme.
This is a list, each element in the list is a python dictionary. There are one keys in the dictionary: name
The name indicates the dimension field, and you can choose from the Dataset Scheme.
For example:
```[{"name":"column_1"},
    {"name":"column_2"}]```
"""

description_filter = """Data filter. Based on the text entered by the user, if not returned as [],
if is, then output them as a comma separated Python list, each element in the list is a python dictionary.
There are three keys in the dictionary: name, filterType, filterValue.
1. The name indicates the filter field, you can choose from the Dataset Scheme.
2. The filterType indicates the filter type. This is an enumeration value, the optional values are BT,GT,GE,LT,LE,EQ,NE,IN,NI.
BT means between, GT means Greater Than. You can only select one from these optional values.
3. The filterValue indicates the filter value. It must be a list, Two elements when filterType is BT. Multiple elements when filterType is IN or NI. One element when filterType is other.
For example:
```[{
"filterType": "BT"
"filterValue": ['2023-05-17','2023-06-17'],
"name": "date"
}]```
"""

description_metric = """Data metric field. This is a list, each element in the list is a python dictionary.
There are three keys in the dictionary: name, aggrType, advCalc.
1. name: the metric field, you can choose from the Dataset Scheme.
2. aggrType: the aggregation mode. This is an enumeration value, the optional values are SUM,CNT,MIN,MAX,AVG,STDDEV,VAR,CNT_DISTINCT,NUL,MED,PERCENTILE
3. advCalc: the advanced calculation. It is not necessary, unless we need the advanced calculation like MoM or YoY calculation, there are the following subfields:
    a. advType: the advanced calculation type. This is an enumeration value, the optional value is COMPARATIVE.
    b. advValue: There are the fields required for the advanced calculation. The advValue field is a Python dictionary, and there are the following subfields:
        i. growthRateType: This is an enumeration value, the optional values are ABS, NORMAL.
            ABS: percentage change relative to the previous period, and the denominator is the absolute value of the previous period data, just like (本期数据-上期数据)/abs(上期数据) %
            NORMAL: percentage change relative to the previous period, and the denominator is the value of the previous period data, just like (本期数据-上期数据)/上期数据 %
        ii. mode: This is an enumeration value, the optional values are SIMPLE, NORMAL, FILTER_BASED. This filed is not necessary, unless the row fields contains a date type field.
        iii. dateFdName: This is a string, you can choose from the Dataset Scheme which is the date type.
        iv. valueType: This is an enumeration value, the optional values are VALUE, RATE, RAWDATA.
            VALUE means growth value, RATE means growth rate, RAWDATA means comparison value. You should choose one based on the text entered by the user.
        iv. granularity: This is an enumeration value, the optional values are DAY, WEEK, MONTH, QUARTER, YEAR. You should choose one based on the text entered by the user.
        v. offsetType: There are different options depending on how granularity changes. You should try to judge and choose one from the following options based on the text entered by the user.
            when granularity is DAY, the optional values are DAY, WEEK, MONTH, QUARTER, YEAR, YEARBYWEEK.
            when granularity is YEAR, the optional values is YEAR.
            when granularity is QUARTER, the optional values are QUARTER, YEAR.
            when granularity is MONTH, the optional values are MONTH, YEAR.
            when granularity is WEEK, the optional values are WEEK, YEAR.
        vi. offset: defaults to 1.
        vii. window: This filed is not necessary, unless the row fields does not contain a date type field.
            This is a python dictionary. There are two keys in the dictionary: withMacro, offset.
            withMacro: defaults to fasle, this is a bool type.
            offset: defaults to 1.

For example:
```[{
"aggrType": "SUM"
"name": "column_1"
"advCalc": {
    "advType": "COMPARATIVE",
      "advValue": {
      "growthRateType": "ABS",
        "mode": "NORMAL",
        "dateFdName": "column_1",
        "valueType": "RATE",
        "offsetType": "YEAR",
        "advType": "COMPARATIVE",
        "granularity": "YEAR",
        "window": {
          "withMacro": false,
          "offset": 1
        },
      "offset": 1
    }
  }
}]```
"""

description_advices = """The other three relevant content or questions adviced by the AI from user's input. The presentation should be in the user's intonation style.
This is a list, each element in the list is a String.
For example:
```['将销量与去年同期做对比', '用品类对销量进行进一步分析']```
"""


chart_type_schema = ResponseSchema(name="chartType",
                             description=description_chartType)

row_schema = ResponseSchema(name="row",
                            type = 'array',
                            description=description_row)

filters_schema = ResponseSchema(name="filter",
                                type = 'array',
                                description=description_filter)

metric_schema = ResponseSchema(name="metric",
                                type = 'array',
                                description=description_metric)

explanation_schema = ResponseSchema(name="explanation",
                            description="The explanation of the chart, expressed in Chinese,")

title_schema = ResponseSchema(name="title",
                            description="The title of the chart, within 15 characters, expressed in Chinese,")

description_schema = ResponseSchema(name="description",
                            description="The description of the chart, within 50 characters, expressed in Chinese")

advices_schema = ResponseSchema(name="advices",
                            description=description_advices)

response_schemas = [
    chart_type_schema,
    row_schema,
    filters_schema,
    metric_schema,
    explanation_schema,
    title_schema,
    description_schema,
    advices_schema
]

response_schemas

[ResponseSchema(name='chartType', description="Recommended chart type from you. This is a string.\nYou must choose one from the python dictionary {'BASIC_COLUMN': '单柱图', 'GROUPED_COLUMN': '簇状图，柱图', 'STACKED_COLUMN': '堆积图，柱图', 'STACKED_SPLIT_COLUMN': '分组堆积图，柱图', 'BASIC_BAR': '单条图', 'BASIC_LINE': '单线图', 'PIE': '饼图', 'RISING_SUN': '旭日图', 'FUNNEL': '漏斗图', 'HORIZONTAL_FUNNEL': '水平漏斗图'}'s key, and it can't be empty.\nthe dictionary key is chart type, and the dictionary value is the describe in Chinese, \nyou should choose the dictionary key as the chart type.\n"),
 ResponseSchema(name='row', description='The dimension fields. You can choose them from the Dataset Scheme.\nThis is a list, each element in the list is a python dictionary. There are one keys in the dictionary: name\nThe name indicates the dimension field, and you can choose from the Dataset Scheme.\nFor example:\n```[{"name":"column_1"},\n    {"name":"column_2"}]```\n'),
 ResponseSchema(name='filter', description='Data filter. Ba

In [10]:
# 创建输出解析器
output_parser = StructuredOutputParser.from_response_schemas(response_schemas)

# 获取解析结构
format_instructions = output_parser.get_format_instructions()
print(format_instructions)

The output should be a markdown code snippet formatted in the following schema, including the leading and trailing "\`\`\`json" and "\`\`\`":

```json
{
	"chartType": string  // Recommended chart type from you. This is a string.
You must choose one from the python dictionary {'BASIC_COLUMN': '单柱图', 'GROUPED_COLUMN': '簇状图，柱图', 'STACKED_COLUMN': '堆积图，柱图', 'STACKED_SPLIT_COLUMN': '分组堆积图，柱图', 'BASIC_BAR': '单条图', 'BASIC_LINE': '单线图', 'PIE': '饼图', 'RISING_SUN': '旭日图', 'FUNNEL': '漏斗图', 'HORIZONTAL_FUNNEL': '水平漏斗图'}'s key, and it can't be empty.
the dictionary key is chart type, and the dictionary value is the describe in Chinese, 
you should choose the dictionary key as the chart type.

	"row": string  // The dimension fields. You can choose them from the Dataset Scheme.
This is a list, each element in the list is a python dictionary. There are one keys in the dictionary: name
The name indicates the dimension field, and you can choose from the Dataset Scheme.
For example:
```[{"name":"column_

## prompt template

In [11]:
# API 核心参数
api_parameters = """
{
    "chartType":"",
    "row":[{"name":"A"}],
    "filter":[{"name":"A", "filterType":"IN", "filterValue":["",""]},
        {"name":"A", "filterType":"GT", "filterValue":[""]}],
    "metric":[{"name":"A", "aggrType":"SUM"}],
    "sorting":[{"name":"A", "aggrType":"SUM", "ordering":"desc"}],
    "explanation":"",
    "description":"",
    "title":"",
    "advices": ["advice1", "advice2"]
}
"""

system_template = """You are an AI assistant designed to help data analysts do their daily work.
Your decisions must always be made independently without seeking user assistance. Play to your strengths as an LLM and pursue simple strategies with no legal complications.

Current Time: {formatted_dt}

GOALS:
1. Check the Dataset Scheme meets user's inputs.
2. Generate the API Core Parameters for charting.
3. Check the API Core Parameters are correct.
4. Generate three other related questions or contents from user's inputs, using the user's intonation style change words into declarative sentences. Put them in the advices of API Parameters.

Dataset Scheme:
{data_info}

API Core Parameters:
{api_parameters}

Response Format: 
{format_instructions}
"""
system_message_prompt = SystemMessagePromptTemplate.from_template(template=system_template)
system_message_prompt

human_template = """
{chat_history}

{text}
"""
human_message_prompt = HumanMessagePromptTemplate.from_template(template=human_template)



In [12]:
chat_prompt = ChatPromptTemplate.from_messages([system_message_prompt, human_message_prompt])

chat_prompt

ChatPromptTemplate(input_variables=['data_info', 'formatted_dt', 'format_instructions', 'chat_history', 'api_parameters', 'text'], output_parser=None, partial_variables={}, messages=[SystemMessagePromptTemplate(prompt=PromptTemplate(input_variables=['api_parameters', 'data_info', 'format_instructions', 'formatted_dt'], output_parser=None, partial_variables={}, template="You are an AI assistant designed to help data analysts do their daily work.\nYour decisions must always be made independently without seeking user assistance. Play to your strengths as an LLM and pursue simple strategies with no legal complications.\n\nCurrent Time: {formatted_dt}\n\nGOALS:\n1. Check the Dataset Scheme meets user's inputs.\n2. Generate the API Core Parameters for charting.\n3. Check the API Core Parameters are correct.\n4. Generate three other related questions or contents from user's inputs, using the user's intonation style change words into declarative sentences. Put them in the advices of API Parame

# Memory

In [13]:
memory = ConversationBufferWindowMemory(memory_key="chat_history", input_key="text", k=4)


# LLMChain

In [14]:
chain = LLMChain(llm=chat_llm, prompt=chat_prompt, memory=memory, verbose=True)
question = '销量趋势与历史同期对比如何？'


chain_response = chain.run({'text':question, 
                            'data_info':data_info, 
                            'format_instructions':format_instructions, 
                            'formatted_dt': formatted_dt,
                            'api_parameters': api_parameters,
                            })





[1m> Entering new LLMChain chain...[0m
Prompt after formatting:
[32;1m[1;3mSystem: You are an AI assistant designed to help data analysts do their daily work.
Your decisions must always be made independently without seeking user assistance. Play to your strengths as an LLM and pursue simple strategies with no legal complications.

Current Time: 2023-07-03 16:39:32

GOALS:
1. Check the Dataset Scheme meets user's inputs.
2. Generate the API Core Parameters for charting.
3. Check the API Core Parameters are correct.
4. Generate three other related questions or contents from user's inputs, using the user's intonation style change words into declarative sentences. Put them in the advices of API Parameters.

Dataset Scheme:

[{"name":"会员ID","type":"String"},
{"name":"销售数量","type":"Long"},
{"name":"销售金额","type":"Long"},
{"name":"省份","type":"Double"},
{"name":"平台","type":"Double"},
{"name":"日期","type":"Date"},
{"name":"渠道","type":"String"},
{"name":"订单ID","type":"String"},
{"name":"品牌",

In [15]:
print(chain_response)

```json
{
    "chartType": "BASIC_LINE",
    "row": [{"name": "日期"}],
    "filter": [{
        "name": "日期",
        "filterType": "BT",
        "filterValue": ["2023-05-17", "2023-06-17"]
    }],
    "metric": [{
        "name": "销售数量",
        "aggrType": "SUM",
        "advCalc": {
            "advType": "COMPARATIVE",
            "advValue": {
                "growthRateType": "ABS",
                "mode": "NORMAL",
                "dateFdName": "日期",
                "valueType": "RATE",
                "offsetType": "YEAR",
                "granularity": "YEAR",
                "offset": 1
            }
        }
    }],
    "explanation": "图表展示了销量趋势与历史同期的对比情况。",
    "title": "销量趋势与历史同期对比",
    "description": "了解销量在不同时间段的表现及与历史同期的对比情况。",
    "advices": ["分析不同品牌的销量趋势", "探讨不同渠道对销量的影响", "研究各省份的销量表现"]
}
```


In [16]:
output_parser.parse(chain_response)

{'chartType': 'BASIC_LINE',
 'row': [{'name': '日期'}],
 'filter': [{'name': '日期',
   'filterType': 'BT',
   'filterValue': ['2023-05-17', '2023-06-17']}],
 'metric': [{'name': '销售数量',
   'aggrType': 'SUM',
   'advCalc': {'advType': 'COMPARATIVE',
    'advValue': {'growthRateType': 'ABS',
     'mode': 'NORMAL',
     'dateFdName': '日期',
     'valueType': 'RATE',
     'offsetType': 'YEAR',
     'granularity': 'YEAR',
     'offset': 1}}}],
 'explanation': '图表展示了销量趋势与历史同期的对比情况。',
 'title': '销量趋势与历史同期对比',
 'description': '了解销量在不同时间段的表现及与历史同期的对比情况。',
 'advices': ['分析不同品牌的销量趋势', '探讨不同渠道对销量的影响', '研究各省份的销量表现']}

In [17]:
output_dict = {}
try:
    output_dict = output_parser.parse(chain_response)
except ValueError as e:
    output_dict['explanation'] = chain_response

output_dict

{'chartType': 'BASIC_LINE',
 'row': [{'name': '日期'}],
 'filter': [{'name': '日期',
   'filterType': 'BT',
   'filterValue': ['2023-05-17', '2023-06-17']}],
 'metric': [{'name': '销售数量',
   'aggrType': 'SUM',
   'advCalc': {'advType': 'COMPARATIVE',
    'advValue': {'growthRateType': 'ABS',
     'mode': 'NORMAL',
     'dateFdName': '日期',
     'valueType': 'RATE',
     'offsetType': 'YEAR',
     'granularity': 'YEAR',
     'offset': 1}}}],
 'explanation': '图表展示了销量趋势与历史同期的对比情况。',
 'title': '销量趋势与历史同期对比',
 'description': '了解销量在不同时间段的表现及与历史同期的对比情况。',
 'advices': ['分析不同品牌的销量趋势', '探讨不同渠道对销量的影响', '研究各省份的销量表现']}

In [18]:
import json
json.dumps(output_dict)

'{"chartType": "BASIC_LINE", "row": [{"name": "\\u65e5\\u671f"}], "filter": [{"name": "\\u65e5\\u671f", "filterType": "BT", "filterValue": ["2023-05-17", "2023-06-17"]}], "metric": [{"name": "\\u9500\\u552e\\u6570\\u91cf", "aggrType": "SUM", "advCalc": {"advType": "COMPARATIVE", "advValue": {"growthRateType": "ABS", "mode": "NORMAL", "dateFdName": "\\u65e5\\u671f", "valueType": "RATE", "offsetType": "YEAR", "granularity": "YEAR", "offset": 1}}}], "explanation": "\\u56fe\\u8868\\u5c55\\u793a\\u4e86\\u9500\\u91cf\\u8d8b\\u52bf\\u4e0e\\u5386\\u53f2\\u540c\\u671f\\u7684\\u5bf9\\u6bd4\\u60c5\\u51b5\\u3002", "title": "\\u9500\\u91cf\\u8d8b\\u52bf\\u4e0e\\u5386\\u53f2\\u540c\\u671f\\u5bf9\\u6bd4", "description": "\\u4e86\\u89e3\\u9500\\u91cf\\u5728\\u4e0d\\u540c\\u65f6\\u95f4\\u6bb5\\u7684\\u8868\\u73b0\\u53ca\\u4e0e\\u5386\\u53f2\\u540c\\u671f\\u7684\\u5bf9\\u6bd4\\u60c5\\u51b5\\u3002", "advices": ["\\u5206\\u6790\\u4e0d\\u540c\\u54c1\\u724c\\u7684\\u9500\\u91cf\\u8d8b\\u52bf", "\\u63a2\\u8b

# Vector stores

## FAISS

## pinecone

In [3]:
import os
import getpass

from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.text_splitter import CharacterTextSplitter
from langchain.vectorstores import Pinecone
from langchain.document_loaders import TextLoader


In [4]:

loader = TextLoader("data/state_of_the_union.txt")
documents = loader.load()
text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)
docs = text_splitter.split_documents(documents)

embeddings = OpenAIEmbeddings()

In [5]:
import pinecone

# initialize pinecone
pinecone.init(
    api_key=PINECONE_API_KEY,  # find at app.pinecone.io
    environment=PINECONE_ENV,  # next to api key in console
)

  from tqdm.autonotebook import tqdm


In [7]:
index_name = "hackathon2"

docsearch = Pinecone.from_documents(docs, embeddings, index_name=index_name)


In [29]:

query = "What is the explanation of 会员?"
docs = docsearch.similarity_search(query)


In [64]:
docs

[Document(page_content='Vice President Harris and I ran for office with a new economic vision for America. \n\nInvest in America. Educate Americans. Grow the workforce. Build the economy from the bottom up  \nand the middle out, not from the top down.  \n\nBecause we know that when the middle class grows, the poor have a ladder up and the wealthy do very well. \n\nAmerica used to have the best roads, bridges, and airports on Earth. \n\nNow our infrastructure is ranked 13th in the world. \n\ncardid 为 0000001，对应的explanation为"商圈销量占比商圈销量占比商圈销量占比商圈销量占比".\n\nWe won’t be able to compete for the jobs of the 21st Century if we don’t fix that. \n\nThat’s why it was so important to pass the Bipartisan Infrastructure Law—the most sweeping investment to rebuild America in history. \n\nThis was a bipartisan effort, and I want to thank the members of both parties who worked to make it happen. \n\nWe’re done talking about infrastructure weeks. \n\nWe’re going to have an infrastructure decade.', metada

In [32]:
print(docs[1].page_content)


Last month, I announced our plan to supercharge  
the Cancer Moonshot that President Obama asked me to lead six years ago. 

Our goal is to cut the cancer death rate by at least 50% over the next 25 years, turn more cancers from death sentences into treatable diseases.  

More support for patients and families. 

To get there, I call on Congress to fund ARPA-H, the Advanced Research Projects Agency for Health. 

It’s based on DARPA—the Defense Department project that led to the Internet, GPS, and so much more.  

ARPA-H will have a singular purpose—to drive breakthroughs in cancer, Alzheimer’s, diabetes, and more. 

A unity agenda for the nation. 

We can do this. 

cardid 为 0000002，对应的explanation为"会员销量分布会员销量分布会员销量分布会员销量分布".

My fellow Americans—tonight , we have gathered in a sacred space—the citadel of our democracy. 

In this Capitol, generation after generation, Americans have debated great questions amid great strife, and have done great things.


In [33]:
query = "What is the description of 会员?"

retriever = docsearch.as_retriever(search_type="mmr")
matched_docs = retriever.get_relevant_documents(query)


In [34]:
for i, d in enumerate(matched_docs):
    print(f"\n## Document {i}\n")
    print(d.page_content)




## Document 0

Vice President Harris and I ran for office with a new economic vision for America. 

Invest in America. Educate Americans. Grow the workforce. Build the economy from the bottom up  
and the middle out, not from the top down.  

Because we know that when the middle class grows, the poor have a ladder up and the wealthy do very well. 

America used to have the best roads, bridges, and airports on Earth. 

Now our infrastructure is ranked 13th in the world. 

cardid 为 0000001，对应的explanation为"商圈销量占比商圈销量占比商圈销量占比商圈销量占比".

We won’t be able to compete for the jobs of the 21st Century if we don’t fix that. 

That’s why it was so important to pass the Bipartisan Infrastructure Law—the most sweeping investment to rebuild America in history. 

This was a bipartisan effort, and I want to thank the members of both parties who worked to make it happen. 

We’re done talking about infrastructure weeks. 

We’re going to have an infrastructure decade.

## Document 1

Groups of citizens bl

In [35]:
found_docs = docsearch.max_marginal_relevance_search(query, k=2, fetch_k=10)
for i, doc in enumerate(found_docs):
    print(f"{i + 1}.", doc.page_content, "\n")

1. Vice President Harris and I ran for office with a new economic vision for America. 

Invest in America. Educate Americans. Grow the workforce. Build the economy from the bottom up  
and the middle out, not from the top down.  

Because we know that when the middle class grows, the poor have a ladder up and the wealthy do very well. 

America used to have the best roads, bridges, and airports on Earth. 

Now our infrastructure is ranked 13th in the world. 

cardid 为 0000001，对应的explanation为"商圈销量占比商圈销量占比商圈销量占比商圈销量占比".

We won’t be able to compete for the jobs of the 21st Century if we don’t fix that. 

That’s why it was so important to pass the Bipartisan Infrastructure Law—the most sweeping investment to rebuild America in history. 

This was a bipartisan effort, and I want to thank the members of both parties who worked to make it happen. 

We’re done talking about infrastructure weeks. 

We’re going to have an infrastructure decade. 

2. Groups of citizens blocking tanks with their 

# Test

In [68]:
from langchain.chains.question_answering import load_qa_chain
from langchain.llms import OpenAI
from langchain.prompts import PromptTemplate
from langchain.memory import ConversationBufferMemory

In [69]:
template = """You are a chatbot having a conversation with a human.

Given the following extracted parts of a long document and a question, create a final answer.

{context}

{chat_history}
Human: {human_input}
Chatbot:"""

prompt = PromptTemplate(
    input_variables=["chat_history", "human_input", "context"], template=template
)

prompt

PromptTemplate(input_variables=['chat_history', 'human_input', 'context'], output_parser=None, partial_variables={}, template='You are a chatbot having a conversation with a human.\n\nGiven the following extracted parts of a long document and a question, create a final answer.\n\n{context}\n\n{chat_history}\nHuman: {human_input}\nChatbot:', template_format='f-string', validate_template=True)

In [None]:
memory = ConversationBufferMemory(memory_key="chat_history", input_key="human_input")


In [None]:
chain = load_qa_chain(
    OpenAI(temperature=0), chain_type="stuff", memory=memory, prompt=prompt
)